Initial import

author: André Fabian Silva Delgado <emulatorman@parabola.nu> 2015-08-05 17:04:01 -0300
committer: André Fabian Silva Delgado <emulatorman@parabola.nu> 2015-08-05 17:04:01 -0300
commit: 57f0f512b273f60d52568b8c6b77e17f5636edc0 (patch)
tree: 5e910f0e82173f4ef4f51111366a3f1299037a7b /arch/powerpc/platforms
341 files changed, 97908 insertions, 0 deletions
diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig
new file mode 100644
index 000000000..6e287f129
--- /dev/null
+++ b/arch/powerpc/platforms/40x/Kconfig
@@ -0,0 +1,161 @@
+config ACADIA
+	bool "Acadia"
+	depends on 40x
+	default n
+	select PPC40x_SIMPLE
+	select 405EZ
+	help
+	  This option enables support for the AMCC 405EZ Acadia evaluation board.
+
+config EP405
+	bool "EP405/EP405PC"
+	depends on 40x
+	default n
+	select 405GP
+	select PCI
+	help
+	  This option enables support for the EP405/EP405PC boards.
+
+config HOTFOOT
+        bool "Hotfoot"
+	depends on 40x
+	default n
+	select PPC40x_SIMPLE
+	select PCI
+        help
+	 This option enables support for the ESTEEM 195E Hotfoot board.
+
+config KILAUEA
+	bool "Kilauea"
+	depends on 40x
+	default n
+	select 405EX
+	select PPC40x_SIMPLE
+	select PPC4xx_PCI_EXPRESS
+	select PCI_MSI
+	select PPC4xx_MSI
+	help
+	  This option enables support for the AMCC PPC405EX evaluation board.
+
+config MAKALU
+	bool "Makalu"
+	depends on 40x
+	default n
+	select 405EX
+	select PCI
+	select PPC4xx_PCI_EXPRESS
+	select PPC40x_SIMPLE
+	help
+	  This option enables support for the AMCC PPC405EX board.
+
+config WALNUT
+	bool "Walnut"
+	depends on 40x
+	default y
+	select 405GP
+	select PCI
+	select OF_RTC
+	help
+	  This option enables support for the IBM PPC405GP evaluation board.
+
+config XILINX_VIRTEX_GENERIC_BOARD
+	bool "Generic Xilinx Virtex board"
+	depends on 40x
+	default n
+	select XILINX_VIRTEX_II_PRO
+	select XILINX_VIRTEX_4_FX
+	help
+	  This option enables generic support for Xilinx Virtex based boards.
+
+	  The generic virtex board support matches any device tree which
+	  specifies 'xilinx,virtex' in its compatible field.  This includes
+	  the Xilinx ML3xx and ML4xx reference designs using the powerpc
+	  core.
+
+	  Most Virtex designs should use this unless it needs to do some
+	  special configuration at board probe time.
+
+config OBS600
+	bool "OpenBlockS 600"
+	depends on 40x
+	default n
+	select 405EX
+	select PPC40x_SIMPLE
+	help
+	  This option enables support for PlatHome OpenBlockS 600 server
+
+
+config PPC40x_SIMPLE
+	bool "Simple PowerPC 40x board support"
+	depends on 40x
+	default n
+	help
+	  This option enables the simple PowerPC 40x platform support.
+
+# OAK doesn't exist but wanted to keep this around for any future 403GCX boards
+config 403GCX
+	bool
+	#depends on OAK
+	select IBM405_ERR51
+
+config 405GP
+	bool
+	select IBM405_ERR77
+	select IBM405_ERR51
+	select IBM_EMAC_ZMII
+
+config 405EX
+	bool
+	select IBM_EMAC_EMAC4
+	select IBM_EMAC_RGMII
+
+config 405EZ
+	bool
+	select IBM_EMAC_NO_FLOW_CTRL
+	select IBM_EMAC_MAL_CLR_ICINTSTAT
+	select IBM_EMAC_MAL_COMMON_ERR
+
+config XILINX_VIRTEX
+	bool
+	select DEFAULT_UIMAGE
+
+config XILINX_VIRTEX_II_PRO
+	bool
+	select XILINX_VIRTEX
+	select IBM405_ERR77
+	select IBM405_ERR51
+
+config XILINX_VIRTEX_4_FX
+	bool
+	select XILINX_VIRTEX
+
+config STB03xxx
+	bool
+	select IBM405_ERR77
+	select IBM405_ERR51
+
+config PPC4xx_GPIO
+	bool "PPC4xx GPIO support"
+	depends on 40x
+	select ARCH_REQUIRE_GPIOLIB
+	help
+	  Enable gpiolib support for ppc40x based boards
+
+# 40x errata/workaround config symbols, selected by the CPU models above
+
+# All 405-based cores up until the 405GPR and 405EP have this errata.
+config IBM405_ERR77
+	bool
+
+# All 40x-based cores, up until the 405GPR and 405EP have this errata.
+config IBM405_ERR51
+	bool
+
+config APM8018X
+	bool "APM8018X"
+	depends on 40x
+	default n
+	select PPC40x_SIMPLE
+	help
+	  This option enables support for the AppliedMicro APM8018X evaluation
+	  board.
diff --git a/arch/powerpc/platforms/40x/Makefile b/arch/powerpc/platforms/40x/Makefile
new file mode 100644
index 000000000..88c22de0c
--- /dev/null
+++ b/arch/powerpc/platforms/40x/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_WALNUT)				+= walnut.o
+obj-$(CONFIG_XILINX_VIRTEX_GENERIC_BOARD)	+= virtex.o
+obj-$(CONFIG_EP405)				+= ep405.o
+obj-$(CONFIG_PPC40x_SIMPLE)		+= ppc40x_simple.o
diff --git a/arch/powerpc/platforms/40x/ep405.c b/arch/powerpc/platforms/40x/ep405.c
new file mode 100644
index 000000000..ddc12a192
--- /dev/null
+++ b/arch/powerpc/platforms/40x/ep405.c
@@ -0,0 +1,125 @@
+/*
+ * Architecture- / platform-specific boot-time initialization code for
+ * IBM PowerPC 4xx based boards. Adapted from original
+ * code by Gary Thomas, Cort Dougan <cort@fsmlabs.com>, and Dan Malek
+ * <dan@net4x.com>.
+ *
+ * Copyright(c) 1999-2000 Grant Erickson <grant@lcse.umn.edu>
+ *
+ * Rewritten and ported to the merged powerpc tree:
+ * Copyright 2007 IBM Corporation
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ *
+ * Adapted to EP405 by Ben. Herrenschmidt <benh@kernel.crashing.org>
+ *
+ * TODO: Wire up the PCI IRQ mux and the southbridge interrupts
+ *
+ * 2002 (c) MontaVista, Software, Inc.  This file is licensed under
+ * the terms of the GNU General Public License version 2.  This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc4xx.h>
+
+static struct device_node *bcsr_node;
+static void __iomem *bcsr_regs;
+
+/* BCSR registers  */
+#define BCSR_ID			0
+#define BCSR_PCI_CTRL	       	1
+#define BCSR_FLASH_NV_POR_CTRL	2
+#define BCSR_FENET_UART_CTRL	3
+#define BCSR_PCI_IRQ		4
+#define BCSR_XIRQ_SELECT	5
+#define BCSR_XIRQ_ROUTING	6
+#define BCSR_XIRQ_STATUS	7
+#define BCSR_XIRQ_STATUS2	8
+#define BCSR_SW_STAT_LED_CTRL	9
+#define BCSR_GPIO_IRQ_PAR_CTRL	10
+/* there's more, can't be bothered typing them tho */
+
+
+static const struct of_device_id ep405_of_bus[] __initconst = {
+	{ .compatible = "ibm,plb3", },
+	{ .compatible = "ibm,opb", },
+	{ .compatible = "ibm,ebc", },
+	{},
+};
+
+static int __init ep405_device_probe(void)
+{
+	of_platform_bus_probe(NULL, ep405_of_bus, NULL);
+
+	return 0;
+}
+machine_device_initcall(ep405, ep405_device_probe);
+
+static void __init ep405_init_bcsr(void)
+{
+	const u8 *irq_routing;
+	int i;
+
+	/* Find the bloody thing & map it */
+	bcsr_node = of_find_compatible_node(NULL, NULL, "ep405-bcsr");
+	if (bcsr_node == NULL) {
+		printk(KERN_ERR "EP405 BCSR not found !\n");
+		return;
+	}
+	bcsr_regs = of_iomap(bcsr_node, 0);
+	if (bcsr_regs == NULL) {
+		printk(KERN_ERR "EP405 BCSR failed to map !\n");
+		return;
+	}
+
+	/* Get the irq-routing property and apply the routing to the CPLD */
+	irq_routing = of_get_property(bcsr_node, "irq-routing", NULL);
+	if (irq_routing == NULL)
+		return;
+	for (i = 0; i < 16; i++) {
+		u8 irq = irq_routing[i];
+		out_8(bcsr_regs + BCSR_XIRQ_SELECT, i);
+		out_8(bcsr_regs + BCSR_XIRQ_ROUTING, irq);
+	}
+	in_8(bcsr_regs + BCSR_XIRQ_SELECT);
+	mb();
+	out_8(bcsr_regs + BCSR_GPIO_IRQ_PAR_CTRL, 0xfe);
+}
+
+static void __init ep405_setup_arch(void)
+{
+	/* Find & init the BCSR CPLD */
+	ep405_init_bcsr();
+
+	pci_set_flags(PCI_REASSIGN_ALL_RSRC);
+}
+
+static int __init ep405_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "ep405"))
+		return 0;
+
+	return 1;
+}
+
+define_machine(ep405) {
+	.name			= "EP405",
+	.probe			= ep405_probe,
+	.setup_arch		= ep405_setup_arch,
+	.progress		= udbg_progress,
+	.init_IRQ		= uic_init_tree,
+	.get_irq		= uic_get_irq,
+	.restart		= ppc4xx_reset_system,
+	.calibrate_decr		= generic_calibrate_decr,
+};
diff --git a/arch/powerpc/platforms/40x/ppc40x_simple.c b/arch/powerpc/platforms/40x/ppc40x_simple.c
new file mode 100644
index 000000000..b0c46375d
--- /dev/null
+++ b/arch/powerpc/platforms/40x/ppc40x_simple.c
@@ -0,0 +1,82 @@
+/*
+ * Generic PowerPC 40x platform support
+ *
+ * Copyright 2008 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ *
+ * This implements simple platform support for PowerPC 44x chips.  This is
+ * mostly used for eval boards or other simple and "generic" 44x boards.  If
+ * your board has custom functions or hardware, then you will likely want to
+ * implement your own board.c file to accommodate it.
+ */
+
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc4xx.h>
+#include <asm/prom.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+#include <asm/uic.h>
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+
+static const struct of_device_id ppc40x_of_bus[] __initconst = {
+	{ .compatible = "ibm,plb3", },
+	{ .compatible = "ibm,plb4", },
+	{ .compatible = "ibm,opb", },
+	{ .compatible = "ibm,ebc", },
+	{ .compatible = "simple-bus", },
+	{},
+};
+
+static int __init ppc40x_device_probe(void)
+{
+	of_platform_bus_probe(NULL, ppc40x_of_bus, NULL);
+
+	return 0;
+}
+machine_device_initcall(ppc40x_simple, ppc40x_device_probe);
+
+/* This is the list of boards that can be supported by this simple
+ * platform code.  This does _not_ mean the boards are compatible,
+ * as they most certainly are not from a device tree perspective.
+ * However, their differences are handled by the device tree and the
+ * drivers and therefore they don't need custom board support files.
+ *
+ * Again, if your board needs to do things differently then create a
+ * board.c file for it rather than adding it to this list.
+ */
+static const char * const board[] __initconst = {
+	"amcc,acadia",
+	"amcc,haleakala",
+	"amcc,kilauea",
+	"amcc,makalu",
+	"apm,klondike",
+	"est,hotfoot",
+	"plathome,obs600",
+	NULL
+};
+
+static int __init ppc40x_probe(void)
+{
+	if (of_flat_dt_match(of_get_flat_dt_root(), board)) {
+		pci_set_flags(PCI_REASSIGN_ALL_RSRC);
+		return 1;
+	}
+
+	return 0;
+}
+
+define_machine(ppc40x_simple) {
+	.name = "PowerPC 40x Platform",
+	.probe = ppc40x_probe,
+	.progress = udbg_progress,
+	.init_IRQ = uic_init_tree,
+	.get_irq = uic_get_irq,
+	.restart = ppc4xx_reset_system,
+	.calibrate_decr = generic_calibrate_decr,
+};
diff --git a/arch/powerpc/platforms/40x/virtex.c b/arch/powerpc/platforms/40x/virtex.c
new file mode 100644
index 000000000..9aa7ae2f4
--- /dev/null
+++ b/arch/powerpc/platforms/40x/virtex.c
@@ -0,0 +1,56 @@
+/*
+ * Xilinx Virtex (IIpro & 4FX) based board support
+ *
+ * Copyright 2007 Secret Lab Technologies Ltd.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/time.h>
+#include <asm/xilinx_intc.h>
+#include <asm/xilinx_pci.h>
+#include <asm/ppc4xx.h>
+
+static const struct of_device_id xilinx_of_bus_ids[] __initconst = {
+	{ .compatible = "xlnx,plb-v46-1.00.a", },
+	{ .compatible = "xlnx,plb-v34-1.01.a", },
+	{ .compatible = "xlnx,plb-v34-1.02.a", },
+	{ .compatible = "xlnx,opb-v20-1.10.c", },
+	{ .compatible = "xlnx,dcr-v29-1.00.a", },
+	{ .compatible = "xlnx,compound", },
+	{}
+};
+
+static int __init virtex_device_probe(void)
+{
+	of_platform_bus_probe(NULL, xilinx_of_bus_ids, NULL);
+
+	return 0;
+}
+machine_device_initcall(virtex, virtex_device_probe);
+
+static int __init virtex_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "xlnx,virtex"))
+		return 0;
+
+	return 1;
+}
+
+define_machine(virtex) {
+	.name			= "Xilinx Virtex",
+	.probe			= virtex_probe,
+	.setup_arch		= xilinx_pci_init,
+	.init_IRQ		= xilinx_intc_init_tree,
+	.get_irq		= xilinx_intc_get_irq,
+	.restart		= ppc4xx_reset_system,
+	.calibrate_decr		= generic_calibrate_decr,
+};
diff --git a/arch/powerpc/platforms/40x/walnut.c b/arch/powerpc/platforms/40x/walnut.c
new file mode 100644
index 000000000..f7ac2d0fc
--- /dev/null
+++ b/arch/powerpc/platforms/40x/walnut.c
@@ -0,0 +1,67 @@
+/*
+ * Architecture- / platform-specific boot-time initialization code for
+ * IBM PowerPC 4xx based boards. Adapted from original
+ * code by Gary Thomas, Cort Dougan <cort@fsmlabs.com>, and Dan Malek
+ * <dan@net4x.com>.
+ *
+ * Copyright(c) 1999-2000 Grant Erickson <grant@lcse.umn.edu>
+ *
+ * Rewritten and ported to the merged powerpc tree:
+ * Copyright 2007 IBM Corporation
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ *
+ * 2002 (c) MontaVista, Software, Inc.  This file is licensed under
+ * the terms of the GNU General Public License version 2.  This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+#include <linux/rtc.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc4xx.h>
+
+static const struct of_device_id walnut_of_bus[] __initconst = {
+	{ .compatible = "ibm,plb3", },
+	{ .compatible = "ibm,opb", },
+	{ .compatible = "ibm,ebc", },
+	{},
+};
+
+static int __init walnut_device_probe(void)
+{
+	of_platform_bus_probe(NULL, walnut_of_bus, NULL);
+	of_instantiate_rtc();
+
+	return 0;
+}
+machine_device_initcall(walnut, walnut_device_probe);
+
+static int __init walnut_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "ibm,walnut"))
+		return 0;
+
+	pci_set_flags(PCI_REASSIGN_ALL_RSRC);
+
+	return 1;
+}
+
+define_machine(walnut) {
+	.name			= "Walnut",
+	.probe			= walnut_probe,
+	.progress		= udbg_progress,
+	.init_IRQ		= uic_init_tree,
+	.get_irq		= uic_get_irq,
+	.restart		= ppc4xx_reset_system,
+	.calibrate_decr		= generic_calibrate_decr,
+};
diff --git a/arch/powerpc/platforms/44x/44x.h b/arch/powerpc/platforms/44x/44x.h
new file mode 100644
index 000000000..63f703ecd
--- /dev/null
+++ b/arch/powerpc/platforms/44x/44x.h
@@ -0,0 +1,11 @@
+#ifndef __POWERPC_PLATFORMS_44X_44X_H
+#define __POWERPC_PLATFORMS_44X_44X_H
+
+extern u8 as1_readb(volatile u8 __iomem  *addr);
+extern void as1_writeb(u8 data, volatile u8 __iomem *addr);
+
+#define GPIO0_OSRH	0xC
+#define GPIO0_TSRH	0x14
+#define GPIO0_ISR1H	0x34
+
+#endif /* __POWERPC_PLATFORMS_44X_44X_H */
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
new file mode 100644
index 000000000..5538e57c3
--- /dev/null
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -0,0 +1,379 @@
+config PPC_47x
+	bool "Support for 47x variant"
+	depends on 44x
+	default n
+	select MPIC
+	help
+	  This option enables support for the 47x family of processors and is
+	  not currently compatible with other 44x or 46x varients
+
+config BAMBOO
+	bool "Bamboo"
+	depends on 44x
+	default n
+	select PPC44x_SIMPLE
+	select 440EP
+	select PCI
+	help
+	  This option enables support for the IBM PPC440EP evaluation board.
+
+config BLUESTONE
+	bool "Bluestone"
+	depends on 44x
+	default n
+	select PPC44x_SIMPLE
+	select APM821xx
+	select PCI_MSI
+	select PPC4xx_MSI
+	select PPC4xx_PCI_EXPRESS
+	select IBM_EMAC_RGMII
+	help
+	  This option enables support for the APM APM821xx Evaluation board.
+
+config EBONY
+	bool "Ebony"
+	depends on 44x
+	default y
+	select 440GP
+	select PCI
+	select OF_RTC
+	help
+	  This option enables support for the IBM PPC440GP evaluation board.
+
+config SAM440EP
+        bool "Sam440ep"
+	depends on 44x
+        default n
+        select 440EP
+        select PCI
+        help
+          This option enables support for the ACube Sam440ep board.
+
+config SEQUOIA
+	bool "Sequoia"
+	depends on 44x
+	default n
+	select PPC44x_SIMPLE
+	select 440EPX
+	help
+	  This option enables support for the AMCC PPC440EPX evaluation board.
+
+config TAISHAN
+	bool "Taishan"
+	depends on 44x
+	default n
+	select PPC44x_SIMPLE
+	select 440GX
+	select PCI
+	help
+	  This option enables support for the AMCC PPC440GX "Taishan"
+	  evaluation board.
+
+config KATMAI
+	bool "Katmai"
+	depends on 44x
+	default n
+	select PPC44x_SIMPLE
+	select 440SPe
+	select PCI
+	select PPC4xx_PCI_EXPRESS
+	select PCI_MSI
+	select PPC4xx_MSI
+	help
+	  This option enables support for the AMCC PPC440SPe evaluation board.
+
+config RAINIER
+	bool "Rainier"
+	depends on 44x
+	default n
+	select PPC44x_SIMPLE
+	select 440GRX
+	select PCI
+	help
+	  This option enables support for the AMCC PPC440GRX evaluation board.
+
+config WARP
+	bool "PIKA Warp"
+	depends on 44x
+	default n
+	select 440EP
+	help
+	  This option enables support for the PIKA Warp(tm) Appliance. The Warp
+	  is a small computer replacement with up to 9 ports of FXO/FXS plus VOIP
+	  stations and trunks.
+
+	  See http://www.pikatechnologies.com/ and follow the "PIKA for Computer
+	  Telephony Developers" link for more information.
+
+config ARCHES
+	bool "Arches"
+	depends on 44x
+	default n
+	select PPC44x_SIMPLE
+	select 460EX # Odd since it uses 460GT but the effects are the same
+	select PCI
+	select PPC4xx_PCI_EXPRESS
+	help
+	  This option enables support for the AMCC Dual PPC460GT evaluation board.
+
+config CANYONLANDS
+	bool "Canyonlands"
+	depends on 44x
+	default n
+	select 460EX
+	select PCI
+	select PPC4xx_PCI_EXPRESS
+	select PCI_MSI
+	select PPC4xx_MSI
+	select IBM_EMAC_RGMII
+	select IBM_EMAC_ZMII
+	help
+	  This option enables support for the AMCC PPC460EX evaluation board.
+
+config GLACIER
+	bool "Glacier"
+	depends on 44x
+	default n
+	select PPC44x_SIMPLE
+	select 460EX # Odd since it uses 460GT but the effects are the same
+	select PCI
+	select PPC4xx_PCI_EXPRESS
+	select IBM_EMAC_RGMII
+	select IBM_EMAC_ZMII
+	help
+	  This option enables support for the AMCC PPC460GT evaluation board.
+
+config REDWOOD
+	bool "Redwood"
+	depends on 44x
+	default n
+	select PPC44x_SIMPLE
+	select 460SX
+	select PCI
+	select PPC4xx_PCI_EXPRESS
+	select PCI_MSI
+	select PPC4xx_MSI
+	help
+	  This option enables support for the AMCC PPC460SX Redwood board.
+
+config EIGER
+	bool "Eiger"
+	depends on 44x
+	default n
+	select PPC44x_SIMPLE
+	select 460SX
+	select PCI
+	select PPC4xx_PCI_EXPRESS
+	select IBM_EMAC_RGMII
+	help
+	  This option enables support for the AMCC PPC460SX evaluation board.
+
+config YOSEMITE
+	bool "Yosemite"
+	depends on 44x
+	default n
+	select PPC44x_SIMPLE
+	select 440EP
+	select PCI
+	help
+	  This option enables support for the AMCC PPC440EP evaluation board.
+
+config ISS4xx
+	bool "ISS 4xx Simulator"
+	depends on (44x || 40x)
+	default n
+	select 405GP if 40x
+	select 440GP if 44x && !PPC_47x
+	select PPC_FPU
+	select OF_RTC
+	help
+	  This option enables support for the IBM ISS simulation environment
+
+config CURRITUCK
+	bool "IBM Currituck (476fpe) Support"
+	depends on PPC_47x
+	default n
+	select SWIOTLB
+	select 476FPE
+	select PPC4xx_PCI_EXPRESS
+	help
+	  This option enables support for the IBM Currituck (476fpe) evaluation board
+
+config AKEBONO
+	bool "IBM Akebono (476gtr) Support"
+	depends on PPC_47x
+	default n
+	select SWIOTLB
+	select 476FPE
+	select PPC4xx_PCI_EXPRESS
+	select PCI_MSI
+	select PPC4xx_HSTA_MSI
+	select I2C
+	select I2C_IBM_IIC
+	select NETDEVICES
+	select ETHERNET
+	select NET_VENDOR_IBM
+	select IBM_EMAC_EMAC4
+	select USB if USB_SUPPORT
+	select USB_OHCI_HCD_PLATFORM if USB_OHCI_HCD
+	select USB_EHCI_HCD_PLATFORM if USB_EHCI_HCD
+	select MMC_SDHCI
+	select MMC_SDHCI_PLTFM
+	select ATA
+	select SATA_AHCI_PLATFORM
+	help
+	  This option enables support for the IBM Akebono (476gtr) evaluation board
+
+
+config ICON
+	bool "Icon"
+	depends on 44x
+	default n
+	select PPC44x_SIMPLE
+	select 440SPe
+	select PCI
+	select PPC4xx_PCI_EXPRESS
+	help
+	  This option enables support for the AMCC PPC440SPe evaluation board.
+
+config XILINX_VIRTEX440_GENERIC_BOARD
+	bool "Generic Xilinx Virtex 5 FXT board support"
+	depends on 44x
+	default n
+	select XILINX_VIRTEX_5_FXT
+	help
+	  This option enables generic support for Xilinx Virtex based boards
+	  that use a 440 based processor in the Virtex 5 FXT FPGA architecture.
+
+	  The generic virtex board support matches any device tree which
+	  specifies 'xlnx,virtex440' in its compatible field.  This includes
+	  the Xilinx ML5xx reference designs using the powerpc core.
+
+	  Most Virtex 5 designs should use this unless it needs to do some
+	  special configuration at board probe time.
+
+config XILINX_ML510
+	bool "Xilinx ML510 extra support"
+	depends on XILINX_VIRTEX440_GENERIC_BOARD
+	select PPC_PCI_CHOICE
+	select XILINX_PCI if PCI
+	select PPC_INDIRECT_PCI if PCI
+	select PPC_I8259 if PCI
+	help
+	  This option enables extra support for features on the Xilinx ML510
+	  board.  The ML510 has a PCI bus with ALI south bridge.
+
+config PPC44x_SIMPLE
+	bool "Simple PowerPC 44x board support"
+	depends on 44x
+	default n
+	help
+	  This option enables the simple PowerPC 44x platform support.
+
+config PPC4xx_GPIO
+	bool "PPC4xx GPIO support"
+	depends on 44x
+	select ARCH_REQUIRE_GPIOLIB
+	help
+	  Enable gpiolib support for ppc440 based boards
+
+config PPC4xx_OCM
+	bool "PPC4xx On Chip Memory (OCM) support"
+	depends on 4xx
+	select PPC_LIB_RHEAP
+	help
+	  Enable OCM support for PowerPC 4xx platforms with on chip memory,
+	  OCM provides the fast place for memory access to improve performance.
+
+# 44x specific CPU modules, selected based on the board above.
+config 440EP
+	bool
+	select PPC_FPU
+	select IBM440EP_ERR42
+	select IBM_EMAC_ZMII
+
+config 440EPX
+	bool
+	select PPC_FPU
+	select IBM_EMAC_EMAC4
+	select IBM_EMAC_RGMII
+	select IBM_EMAC_ZMII
+	select USB_EHCI_BIG_ENDIAN_MMIO
+	select USB_EHCI_BIG_ENDIAN_DESC
+
+config 440GRX
+	bool
+	select IBM_EMAC_EMAC4
+	select IBM_EMAC_RGMII
+	select IBM_EMAC_ZMII
+
+config 440GP
+	bool
+	select IBM_EMAC_ZMII
+
+config 440GX
+	bool
+	select IBM_EMAC_EMAC4
+	select IBM_EMAC_RGMII
+	select IBM_EMAC_ZMII #test only
+	select IBM_EMAC_TAH  #test only
+
+config 440SP
+	bool
+
+config 440SPe
+	bool
+	select IBM_EMAC_EMAC4
+
+config 460EX
+	bool
+	select PPC_FPU
+	select IBM_EMAC_EMAC4
+	select IBM_EMAC_TAH
+
+config 460SX
+	bool
+	select PPC_FPU
+	select IBM_EMAC_EMAC4
+	select IBM_EMAC_RGMII
+	select IBM_EMAC_ZMII
+	select IBM_EMAC_TAH
+
+config 476FPE
+	bool
+	select PPC_FPU
+
+config APM821xx
+	bool
+	select PPC_FPU
+	select IBM_EMAC_EMAC4
+	select IBM_EMAC_TAH
+
+config 476FPE_ERR46
+	depends on 476FPE
+	bool "Enable linker work around for PPC476FPE errata #46"
+	help
+	  This option enables a work around for an icache bug on 476
+	  that can cause execution of stale instructions when falling
+	  through pages (IBM errata #46). It requires a recent version
+	  of binutils which supports the --ppc476-workaround option.
+
+	  The work around enables the appropriate linker options and
+	  ensures that all module output sections are aligned to 4K
+	  page boundaries. The work around is only required when
+	  building modules.
+
+# 44x errata/workaround config symbols, selected by the CPU models above
+config IBM440EP_ERR42
+	bool
+
+# Xilinx specific config options.
+config XILINX_VIRTEX
+	bool
+	select DEFAULT_UIMAGE
+
+# Xilinx Virtex 5 FXT FPGA architecture, selected by a Xilinx board above
+config XILINX_VIRTEX_5_FXT
+	bool
+	select XILINX_VIRTEX
+
diff --git a/arch/powerpc/platforms/44x/Makefile b/arch/powerpc/platforms/44x/Makefile
new file mode 100644
index 000000000..26d35b594
--- /dev/null
+++ b/arch/powerpc/platforms/44x/Makefile
@@ -0,0 +1,14 @@
+obj-$(CONFIG_44x)	+= misc_44x.o
+ifneq ($(CONFIG_PPC4xx_CPM),y)
+obj-$(CONFIG_44x)	+= idle.o
+endif
+obj-$(CONFIG_PPC44x_SIMPLE) += ppc44x_simple.o
+obj-$(CONFIG_EBONY)	+= ebony.o
+obj-$(CONFIG_SAM440EP) 	+= sam440ep.o
+obj-$(CONFIG_WARP)	+= warp.o
+obj-$(CONFIG_XILINX_VIRTEX_5_FXT) += virtex.o
+obj-$(CONFIG_XILINX_ML510) += virtex_ml510.o
+obj-$(CONFIG_ISS4xx)	+= iss4xx.o
+obj-$(CONFIG_CANYONLANDS)+= canyonlands.o
+obj-$(CONFIG_CURRITUCK)	+= ppc476.o
+obj-$(CONFIG_AKEBONO)	+= ppc476.o
diff --git a/arch/powerpc/platforms/44x/canyonlands.c b/arch/powerpc/platforms/44x/canyonlands.c
new file mode 100644
index 000000000..22ca5430c
--- /dev/null
+++ b/arch/powerpc/platforms/44x/canyonlands.c
@@ -0,0 +1,134 @@
+/*
+ * This contain platform specific code for APM PPC460EX based Canyonlands
+ * board.
+ *
+ * Copyright (c) 2010, Applied Micro Circuits Corporation
+ * Author: Rupjyoti Sarmah <rsarmah@apm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc4xx.h>
+#include <asm/udbg.h>
+#include <asm/uic.h>
+#include <linux/of_platform.h>
+#include <linux/delay.h>
+#include "44x.h"
+
+#define BCSR_USB_EN	0x11
+
+static const struct of_device_id ppc460ex_of_bus[] __initconst = {
+	{ .compatible = "ibm,plb4", },
+	{ .compatible = "ibm,opb", },
+	{ .compatible = "ibm,ebc", },
+	{ .compatible = "simple-bus", },
+	{},
+};
+
+static int __init ppc460ex_device_probe(void)
+{
+	of_platform_bus_probe(NULL, ppc460ex_of_bus, NULL);
+
+	return 0;
+}
+machine_device_initcall(canyonlands, ppc460ex_device_probe);
+
+/* Using this code only for the Canyonlands board.  */
+
+static int __init ppc460ex_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+	if (of_flat_dt_is_compatible(root, "amcc,canyonlands")) {
+		pci_set_flags(PCI_REASSIGN_ALL_RSRC);
+		return 1;
+		}
+	return 0;
+}
+
+/* USB PHY fixup code on Canyonlands kit. */
+
+static int __init ppc460ex_canyonlands_fixup(void)
+{
+	u8 __iomem *bcsr ;
+	void __iomem *vaddr;
+	struct device_node *np;
+	int ret = 0;
+
+	np = of_find_compatible_node(NULL, NULL, "amcc,ppc460ex-bcsr");
+	if (!np) {
+		printk(KERN_ERR "failed did not find amcc, ppc460ex bcsr node\n");
+		return -ENODEV;
+	}
+
+	bcsr = of_iomap(np, 0);
+	of_node_put(np);
+
+	if (!bcsr) {
+		printk(KERN_CRIT "Could not remap bcsr\n");
+		ret = -ENODEV;
+		goto err_bcsr;
+	}
+
+	np = of_find_compatible_node(NULL, NULL, "ibm,ppc4xx-gpio");
+	if (!np) {
+		printk(KERN_ERR "failed did not find ibm,ppc4xx-gpio node\n");
+		return -ENODEV;
+	}
+
+	vaddr = of_iomap(np, 0);
+	of_node_put(np);
+
+	if (!vaddr) {
+		printk(KERN_CRIT "Could not get gpio node address\n");
+		ret = -ENODEV;
+		goto err_gpio;
+	}
+	/* Disable USB, through the BCSR7 bits */
+	setbits8(&bcsr[7], BCSR_USB_EN);
+
+	/* Wait for a while after reset */
+	msleep(100);
+
+	/* Enable USB here */
+	clrbits8(&bcsr[7], BCSR_USB_EN);
+
+	/*
+	 * Configure multiplexed gpio16 and gpio19 as alternate1 output
+	 * source after USB reset. In this configuration gpio16 will be
+	 * USB2HStop and gpio19 will be USB2DStop. For more details refer to
+	 * table 34-7 of PPC460EX user manual.
+	 */
+	setbits32((vaddr + GPIO0_OSRH), 0x42000000);
+	setbits32((vaddr + GPIO0_TSRH), 0x42000000);
+err_gpio:
+	iounmap(vaddr);
+err_bcsr:
+	iounmap(bcsr);
+	return ret;
+}
+machine_device_initcall(canyonlands, ppc460ex_canyonlands_fixup);
+define_machine(canyonlands) {
+	.name = "Canyonlands",
+	.probe = ppc460ex_probe,
+	.progress = udbg_progress,
+	.init_IRQ = uic_init_tree,
+	.get_irq = uic_get_irq,
+	.restart = ppc4xx_reset_system,
+	.calibrate_decr = generic_calibrate_decr,
+};
diff --git a/arch/powerpc/platforms/44x/ebony.c b/arch/powerpc/platforms/44x/ebony.c
new file mode 100644
index 000000000..ae8932263
--- /dev/null
+++ b/arch/powerpc/platforms/44x/ebony.c
@@ -0,0 +1,70 @@
+/*
+ * Ebony board specific routines
+ *
+ * Matt Porter <mporter@kernel.crashing.org>
+ * Copyright 2002-2005 MontaVista Software Inc.
+ *
+ * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ * Copyright (c) 2003-2005 Zultys Technologies
+ *
+ * Rewritten and ported to the merged powerpc tree:
+ * Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+#include <linux/rtc.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc4xx.h>
+
+static const struct of_device_id ebony_of_bus[] __initconst = {
+	{ .compatible = "ibm,plb4", },
+	{ .compatible = "ibm,opb", },
+	{ .compatible = "ibm,ebc", },
+	{},
+};
+
+static int __init ebony_device_probe(void)
+{
+	of_platform_bus_probe(NULL, ebony_of_bus, NULL);
+	of_instantiate_rtc();
+
+	return 0;
+}
+machine_device_initcall(ebony, ebony_device_probe);
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init ebony_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "ibm,ebony"))
+		return 0;
+
+	pci_set_flags(PCI_REASSIGN_ALL_RSRC);
+
+	return 1;
+}
+
+define_machine(ebony) {
+	.name			= "Ebony",
+	.probe			= ebony_probe,
+	.progress		= udbg_progress,
+	.init_IRQ		= uic_init_tree,
+	.get_irq		= uic_get_irq,
+	.restart		= ppc4xx_reset_system,
+	.calibrate_decr		= generic_calibrate_decr,
+};
diff --git a/arch/powerpc/platforms/44x/idle.c b/arch/powerpc/platforms/44x/idle.c
new file mode 100644
index 000000000..7a81f921f
--- /dev/null
+++ b/arch/powerpc/platforms/44x/idle.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2008 IBM Corp. 
+ *
+ * Based on arch/powerpc/platforms/pasemi/idle.c: 
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Added by: Jerone Young <jyoung5@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ */
+
+#include <linux/of.h>
+#include <linux/kernel.h>
+#include <asm/machdep.h>
+
+static int mode_spin;
+
+static void ppc44x_idle(void)
+{
+	unsigned long msr_save;
+
+	msr_save = mfmsr();
+	/* set wait state MSR */
+	mtmsr(msr_save|MSR_WE|MSR_EE|MSR_CE|MSR_DE);
+	isync();
+	/* return to initial state */
+	mtmsr(msr_save);
+	isync();
+}
+
+int __init ppc44x_idle_init(void)
+{
+	if (!mode_spin) {
+		/* If we are not setting spin mode 
+                   then we set to wait mode */
+		ppc_md.power_save = &ppc44x_idle;
+	}
+
+	return 0;
+}
+
+arch_initcall(ppc44x_idle_init);
+
+static int __init idle_param(char *p)
+{ 
+
+	if (!strcmp("spin", p)) {
+		mode_spin = 1;
+		ppc_md.power_save = NULL;
+	}
+
+	return 0;
+}
+
+early_param("idle", idle_param);
diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c
new file mode 100644
index 000000000..c7c6758b3
--- /dev/null
+++ b/arch/powerpc/platforms/44x/iss4xx.c
@@ -0,0 +1,168 @@
+/*
+ * PPC476 board specific routines
+ *
+ * Copyright 2010 Torez Smith, IBM Corporation.
+ *
+ * Based on earlier code:
+ *    Matt Porter <mporter@kernel.crashing.org>
+ *    Copyright 2002-2005 MontaVista Software Inc.
+ *
+ *    Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ *    Copyright (c) 2003-2005 Zultys Technologies
+ *
+ *    Rewritten and ported to the merged powerpc tree:
+ *    Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+#include <linux/rtc.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/ppc4xx.h>
+#include <asm/mpic.h>
+#include <asm/mmu.h>
+
+static const struct of_device_id iss4xx_of_bus[] __initconst = {
+	{ .compatible = "ibm,plb4", },
+	{ .compatible = "ibm,plb6", },
+	{ .compatible = "ibm,opb", },
+	{ .compatible = "ibm,ebc", },
+	{},
+};
+
+static int __init iss4xx_device_probe(void)
+{
+	of_platform_bus_probe(NULL, iss4xx_of_bus, NULL);
+	of_instantiate_rtc();
+
+	return 0;
+}
+machine_device_initcall(iss4xx, iss4xx_device_probe);
+
+/* We can have either UICs or MPICs */
+static void __init iss4xx_init_irq(void)
+{
+	struct device_node *np;
+
+	/* Find top level interrupt controller */
+	for_each_node_with_property(np, "interrupt-controller") {
+		if (of_get_property(np, "interrupts", NULL) == NULL)
+			break;
+	}
+	if (np == NULL)
+		panic("Can't find top level interrupt controller");
+
+	/* Check type and do appropriate initialization */
+	if (of_device_is_compatible(np, "ibm,uic")) {
+		uic_init_tree();
+		ppc_md.get_irq = uic_get_irq;
+#ifdef CONFIG_MPIC
+	} else if (of_device_is_compatible(np, "chrp,open-pic")) {
+		/* The MPIC driver will get everything it needs from the
+		 * device-tree, just pass 0 to all arguments
+		 */
+		struct mpic *mpic = mpic_alloc(np, 0, MPIC_NO_RESET, 0, 0, " MPIC     ");
+		BUG_ON(mpic == NULL);
+		mpic_init(mpic);
+		ppc_md.get_irq = mpic_get_irq;
+#endif
+	} else
+		panic("Unrecognized top level interrupt controller");
+}
+
+#ifdef CONFIG_SMP
+static void smp_iss4xx_setup_cpu(int cpu)
+{
+	mpic_setup_this_cpu();
+}
+
+static int smp_iss4xx_kick_cpu(int cpu)
+{
+	struct device_node *cpunode = of_get_cpu_node(cpu, NULL);
+	const u64 *spin_table_addr_prop;
+	u32 *spin_table;
+	extern void start_secondary_47x(void);
+
+	BUG_ON(cpunode == NULL);
+
+	/* Assume spin table. We could test for the enable-method in
+	 * the device-tree but currently there's little point as it's
+	 * our only supported method
+	 */
+	spin_table_addr_prop = of_get_property(cpunode, "cpu-release-addr",
+					       NULL);
+	if (spin_table_addr_prop == NULL) {
+		pr_err("CPU%d: Can't start, missing cpu-release-addr !\n", cpu);
+		return -ENOENT;
+	}
+
+	/* Assume it's mapped as part of the linear mapping. This is a bit
+	 * fishy but will work fine for now
+	 */
+	spin_table = (u32 *)__va(*spin_table_addr_prop);
+	pr_debug("CPU%d: Spin table mapped at %p\n", cpu, spin_table);
+
+	spin_table[3] = cpu;
+	smp_wmb();
+	spin_table[1] = __pa(start_secondary_47x);
+	mb();
+
+	return 0;
+}
+
+static struct smp_ops_t iss_smp_ops = {
+	.probe		= smp_mpic_probe,
+	.message_pass	= smp_mpic_message_pass,
+	.setup_cpu	= smp_iss4xx_setup_cpu,
+	.kick_cpu	= smp_iss4xx_kick_cpu,
+	.give_timebase	= smp_generic_give_timebase,
+	.take_timebase	= smp_generic_take_timebase,
+};
+
+static void __init iss4xx_smp_init(void)
+{
+	if (mmu_has_feature(MMU_FTR_TYPE_47x))
+		smp_ops = &iss_smp_ops;
+}
+
+#else /* CONFIG_SMP */
+static void __init iss4xx_smp_init(void) { }
+#endif /* CONFIG_SMP */
+
+static void __init iss4xx_setup_arch(void)
+{
+	iss4xx_smp_init();
+}
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init iss4xx_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "ibm,iss-4xx"))
+		return 0;
+
+	return 1;
+}
+
+define_machine(iss4xx) {
+	.name			= "ISS-4xx",
+	.probe			= iss4xx_probe,
+	.progress		= udbg_progress,
+	.init_IRQ		= iss4xx_init_irq,
+	.setup_arch		= iss4xx_setup_arch,
+	.restart		= ppc4xx_reset_system,
+	.calibrate_decr		= generic_calibrate_decr,
+};
diff --git a/arch/powerpc/platforms/44x/misc_44x.S b/arch/powerpc/platforms/44x/misc_44x.S
new file mode 100644
index 000000000..dc12b8009
--- /dev/null
+++ b/arch/powerpc/platforms/44x/misc_44x.S
@@ -0,0 +1,46 @@
+/*
+ * This file contains miscellaneous low-level functions for PPC 44x.
+ *    Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/reg.h>
+#include <asm/ppc_asm.h>
+
+	.text
+
+/*
+ * Do an IO access in AS1
+ */
+_GLOBAL(as1_readb)
+	mfmsr	r7
+	ori	r0,r7,MSR_DS
+	sync
+	mtmsr	r0
+	sync
+	isync
+	lbz	r3,0(r3)
+	sync
+	mtmsr	r7
+	sync
+	isync
+	blr
+
+_GLOBAL(as1_writeb)
+	mfmsr	r7
+	ori	r0,r7,MSR_DS
+	sync
+	mtmsr	r0
+	sync
+	isync
+	stb	r3,0(r4)
+	sync
+	mtmsr	r7
+	sync
+	isync
+	blr
diff --git a/arch/powerpc/platforms/44x/ppc44x_simple.c b/arch/powerpc/platforms/44x/ppc44x_simple.c
new file mode 100644
index 000000000..573c3d268
--- /dev/null
+++ b/arch/powerpc/platforms/44x/ppc44x_simple.c
@@ -0,0 +1,91 @@
+/*
+ * Generic PowerPC 44x platform support
+ *
+ * Copyright 2008 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ *
+ * This implements simple platform support for PowerPC 44x chips.  This is
+ * mostly used for eval boards or other simple and "generic" 44x boards.  If
+ * your board has custom functions or hardware, then you will likely want to
+ * implement your own board.c file to accommodate it.
+ */
+
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc4xx.h>
+#include <asm/prom.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+#include <asm/uic.h>
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+
+static const struct of_device_id ppc44x_of_bus[] __initconst = {
+	{ .compatible = "ibm,plb4", },
+	{ .compatible = "ibm,opb", },
+	{ .compatible = "ibm,ebc", },
+	{ .compatible = "simple-bus", },
+	{},
+};
+
+static int __init ppc44x_device_probe(void)
+{
+	of_platform_bus_probe(NULL, ppc44x_of_bus, NULL);
+
+	return 0;
+}
+machine_device_initcall(ppc44x_simple, ppc44x_device_probe);
+
+/* This is the list of boards that can be supported by this simple
+ * platform code.  This does _not_ mean the boards are compatible,
+ * as they most certainly are not from a device tree perspective.
+ * However, their differences are handled by the device tree and the
+ * drivers and therefore they don't need custom board support files.
+ *
+ * Again, if your board needs to do things differently then create a
+ * board.c file for it rather than adding it to this list.
+ */
+static char *board[] __initdata = {
+	"amcc,arches",
+	"amcc,bamboo",
+	"apm,bluestone",
+	"amcc,glacier",
+	"ibm,ebony",
+	"amcc,eiger",
+	"amcc,katmai",
+	"amcc,rainier",
+	"amcc,redwood",
+	"amcc,sequoia",
+	"amcc,taishan",
+	"amcc,yosemite",
+	"mosaixtech,icon"
+};
+
+static int __init ppc44x_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+	int i = 0;
+
+	for (i = 0; i < ARRAY_SIZE(board); i++) {
+		if (of_flat_dt_is_compatible(root, board[i])) {
+			pci_set_flags(PCI_REASSIGN_ALL_RSRC);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+define_machine(ppc44x_simple) {
+	.name = "PowerPC 44x Platform",
+	.probe = ppc44x_probe,
+	.progress = udbg_progress,
+	.init_IRQ = uic_init_tree,
+	.get_irq = uic_get_irq,
+	.restart = ppc4xx_reset_system,
+	.calibrate_decr = generic_calibrate_decr,
+};
diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c
new file mode 100644
index 000000000..c11ce6516
--- /dev/null
+++ b/arch/powerpc/platforms/44x/ppc476.c
@@ -0,0 +1,299 @@
+/*
+ * PowerPC 476FPE board specific routines
+ *
+ * Copyright © 2013 Tony Breeds IBM Corporation
+ * Copyright © 2013 Alistair Popple IBM Corporation
+ *
+ * Based on earlier code:
+ *    Matt Porter <mporter@kernel.crashing.org>
+ *    Copyright 2002-2005 MontaVista Software Inc.
+ *
+ *    Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ *    Copyright (c) 2003-2005 Zultys Technologies
+ *
+ *    Rewritten and ported to the merged powerpc tree:
+ *    Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
+ *    Copyright © 2011 David Kliekamp IBM Corporation
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/rtc.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/ppc4xx.h>
+#include <asm/mpic.h>
+#include <asm/mmu.h>
+
+#include <linux/pci.h>
+#include <linux/i2c.h>
+
+static const struct of_device_id ppc47x_of_bus[] __initconst = {
+	{ .compatible = "ibm,plb4", },
+	{ .compatible = "ibm,plb6", },
+	{ .compatible = "ibm,opb", },
+	{ .compatible = "ibm,ebc", },
+	{},
+};
+
+/* The EEPROM is missing and the default values are bogus.  This forces USB in
+ * to EHCI mode */
+static void quirk_ppc_currituck_usb_fixup(struct pci_dev *dev)
+{
+	if (of_machine_is_compatible("ibm,currituck")) {
+		pci_write_config_dword(dev, 0xe0, 0x0114231f);
+		pci_write_config_dword(dev, 0xe4, 0x00006c40);
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(0x1033, 0x0035, quirk_ppc_currituck_usb_fixup);
+
+/* Akebono has an AVR microcontroller attached to the I2C bus
+ * which is used to power off/reset the system. */
+
+/* AVR I2C Commands */
+#define AVR_PWRCTL_CMD (0x26)
+
+/* Flags for the power control I2C commands */
+#define AVR_PWRCTL_PWROFF (0x01)
+#define AVR_PWRCTL_RESET (0x02)
+
+static struct i2c_client *avr_i2c_client;
+static void avr_halt_system(int pwrctl_flags)
+{
+	/* Request the AVR to reset the system */
+	i2c_smbus_write_byte_data(avr_i2c_client,
+				  AVR_PWRCTL_CMD, pwrctl_flags);
+
+	/* Wait for system to be reset */
+	while (1)
+		;
+}
+
+static void avr_power_off_system(void)
+{
+	avr_halt_system(AVR_PWRCTL_PWROFF);
+}
+
+static void avr_reset_system(char *cmd)
+{
+	avr_halt_system(AVR_PWRCTL_RESET);
+}
+
+static int avr_probe(struct i2c_client *client,
+			    const struct i2c_device_id *id)
+{
+	avr_i2c_client = client;
+	ppc_md.restart = avr_reset_system;
+	pm_power_off = avr_power_off_system;
+	return 0;
+}
+
+static const struct i2c_device_id avr_id[] = {
+	{ "akebono-avr", 0 },
+	{ }
+};
+
+static struct i2c_driver avr_driver = {
+	.driver = {
+		.name = "akebono-avr",
+	},
+	.probe = avr_probe,
+	.id_table = avr_id,
+};
+
+static int __init ppc47x_device_probe(void)
+{
+	i2c_add_driver(&avr_driver);
+	of_platform_bus_probe(NULL, ppc47x_of_bus, NULL);
+
+	return 0;
+}
+machine_device_initcall(ppc47x, ppc47x_device_probe);
+
+static void __init ppc47x_init_irq(void)
+{
+	struct device_node *np;
+
+	/* Find top level interrupt controller */
+	for_each_node_with_property(np, "interrupt-controller") {
+		if (of_get_property(np, "interrupts", NULL) == NULL)
+			break;
+	}
+	if (np == NULL)
+		panic("Can't find top level interrupt controller");
+
+	/* Check type and do appropriate initialization */
+	if (of_device_is_compatible(np, "chrp,open-pic")) {
+		/* The MPIC driver will get everything it needs from the
+		 * device-tree, just pass 0 to all arguments
+		 */
+		struct mpic *mpic =
+			mpic_alloc(np, 0, MPIC_NO_RESET, 0, 0, " MPIC     ");
+		BUG_ON(mpic == NULL);
+		mpic_init(mpic);
+		ppc_md.get_irq = mpic_get_irq;
+	} else
+		panic("Unrecognized top level interrupt controller");
+}
+
+#ifdef CONFIG_SMP
+static void smp_ppc47x_setup_cpu(int cpu)
+{
+	mpic_setup_this_cpu();
+}
+
+static int smp_ppc47x_kick_cpu(int cpu)
+{
+	struct device_node *cpunode = of_get_cpu_node(cpu, NULL);
+	const u64 *spin_table_addr_prop;
+	u32 *spin_table;
+	extern void start_secondary_47x(void);
+
+	BUG_ON(cpunode == NULL);
+
+	/* Assume spin table. We could test for the enable-method in
+	 * the device-tree but currently there's little point as it's
+	 * our only supported method
+	 */
+	spin_table_addr_prop =
+		of_get_property(cpunode, "cpu-release-addr", NULL);
+
+	if (spin_table_addr_prop == NULL) {
+		pr_err("CPU%d: Can't start, missing cpu-release-addr !\n",
+		       cpu);
+		return 1;
+	}
+
+	/* Assume it's mapped as part of the linear mapping. This is a bit
+	 * fishy but will work fine for now
+	 *
+	 * XXX: Is there any reason to assume differently?
+	 */
+	spin_table = (u32 *)__va(*spin_table_addr_prop);
+	pr_debug("CPU%d: Spin table mapped at %p\n", cpu, spin_table);
+
+	spin_table[3] = cpu;
+	smp_wmb();
+	spin_table[1] = __pa(start_secondary_47x);
+	mb();
+
+	return 0;
+}
+
+static struct smp_ops_t ppc47x_smp_ops = {
+	.probe		= smp_mpic_probe,
+	.message_pass	= smp_mpic_message_pass,
+	.setup_cpu	= smp_ppc47x_setup_cpu,
+	.kick_cpu	= smp_ppc47x_kick_cpu,
+	.give_timebase	= smp_generic_give_timebase,
+	.take_timebase	= smp_generic_take_timebase,
+};
+
+static void __init ppc47x_smp_init(void)
+{
+	if (mmu_has_feature(MMU_FTR_TYPE_47x))
+		smp_ops = &ppc47x_smp_ops;
+}
+
+#else /* CONFIG_SMP */
+static void __init ppc47x_smp_init(void) { }
+#endif /* CONFIG_SMP */
+
+static void __init ppc47x_setup_arch(void)
+{
+
+	/* No need to check the DMA config as we /know/ our windows are all of
+	 * RAM.  Lets hope that doesn't change */
+	swiotlb_detect_4g();
+
+	ppc47x_smp_init();
+}
+
+static int board_rev = -1;
+static int __init ppc47x_get_board_rev(void)
+{
+	int reg;
+	u8 *fpga;
+	struct device_node *np = NULL;
+
+	if (of_machine_is_compatible("ibm,currituck")) {
+		np = of_find_compatible_node(NULL, NULL, "ibm,currituck-fpga");
+		reg = 0;
+	} else if (of_machine_is_compatible("ibm,akebono")) {
+		np = of_find_compatible_node(NULL, NULL, "ibm,akebono-fpga");
+		reg = 2;
+	}
+
+	if (!np)
+		goto fail;
+
+	fpga = (u8 *) of_iomap(np, 0);
+	of_node_put(np);
+	if (!fpga)
+		goto fail;
+
+	board_rev = ioread8(fpga + reg) & 0x03;
+	pr_info("%s: Found board revision %d\n", __func__, board_rev);
+	iounmap(fpga);
+	return 0;
+
+fail:
+	pr_info("%s: Unable to find board revision\n", __func__);
+	return 0;
+}
+machine_arch_initcall(ppc47x, ppc47x_get_board_rev);
+
+/* Use USB controller should have been hardware swizzled but it wasn't :( */
+static void ppc47x_pci_irq_fixup(struct pci_dev *dev)
+{
+	if (dev->vendor == 0x1033 && (dev->device == 0x0035 ||
+				      dev->device == 0x00e0)) {
+		if (board_rev == 0) {
+			dev->irq = irq_create_mapping(NULL, 47);
+			pr_info("%s: Mapping irq %d\n", __func__, dev->irq);
+		} else if (board_rev == 2) {
+			dev->irq = irq_create_mapping(NULL, 49);
+			pr_info("%s: Mapping irq %d\n", __func__, dev->irq);
+		} else {
+			pr_alert("%s: Unknown board revision\n", __func__);
+		}
+	}
+}
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init ppc47x_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "ibm,akebono"))
+		return 1;
+
+	if (of_flat_dt_is_compatible(root, "ibm,currituck")) {
+		ppc_md.pci_irq_fixup = ppc47x_pci_irq_fixup;
+		return 1;
+	}
+
+	return 0;
+}
+
+define_machine(ppc47x) {
+	.name			= "PowerPC 47x",
+	.probe			= ppc47x_probe,
+	.progress		= udbg_progress,
+	.init_IRQ		= ppc47x_init_irq,
+	.setup_arch		= ppc47x_setup_arch,
+	.restart		= ppc4xx_reset_system,
+	.calibrate_decr		= generic_calibrate_decr,
+};
diff --git a/arch/powerpc/platforms/44x/ppc476_modules.lds b/arch/powerpc/platforms/44x/ppc476_modules.lds
new file mode 100644
index 000000000..9fec5d34b
--- /dev/null
+++ b/arch/powerpc/platforms/44x/ppc476_modules.lds
@@ -0,0 +1,15 @@
+SECTIONS
+{
+	.text : ALIGN(4096)
+	{
+		*(.text .text.* .fixup)
+	}
+	.init.text : ALIGN(4096)
+	{
+		*(.init.text .init.text.*)
+	}
+	.exit.text : ALIGN(4096)
+	{
+		*(.exit.text .exit.text.*)
+	}
+}
diff --git a/arch/powerpc/platforms/44x/sam440ep.c b/arch/powerpc/platforms/44x/sam440ep.c
new file mode 100644
index 000000000..3ee4a03c1
--- /dev/null
+++ b/arch/powerpc/platforms/44x/sam440ep.c
@@ -0,0 +1,79 @@
+/*
+ * Sam440ep board specific routines based off bamboo.c code
+ * original copyrights below
+ *
+ * Wade Farnsworth <wfarnsworth@mvista.com>
+ * Copyright 2004 MontaVista Software Inc.
+ *
+ * Rewritten and ported to the merged powerpc tree:
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ * Copyright 2007 IBM Corporation
+ *
+ * Modified from bamboo.c for sam440ep:
+ * Copyright 2008 Giuseppe Coviello <gicoviello@gmail.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+#include <linux/init.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc4xx.h>
+#include <linux/i2c.h>
+
+static const struct of_device_id sam440ep_of_bus[] __initconst = {
+	{ .compatible = "ibm,plb4", },
+	{ .compatible = "ibm,opb", },
+	{ .compatible = "ibm,ebc", },
+	{},
+};
+
+static int __init sam440ep_device_probe(void)
+{
+	of_platform_bus_probe(NULL, sam440ep_of_bus, NULL);
+
+	return 0;
+}
+machine_device_initcall(sam440ep, sam440ep_device_probe);
+
+static int __init sam440ep_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "acube,sam440ep"))
+		return 0;
+
+	pci_set_flags(PCI_REASSIGN_ALL_RSRC);
+
+	return 1;
+}
+
+define_machine(sam440ep) {
+	.name 			= "Sam440ep",
+	.probe 			= sam440ep_probe,
+	.progress 		= udbg_progress,
+	.init_IRQ 		= uic_init_tree,
+	.get_irq 		= uic_get_irq,
+	.restart		= ppc4xx_reset_system,
+	.calibrate_decr 	= generic_calibrate_decr,
+};
+
+static struct i2c_board_info sam440ep_rtc_info = {
+	.type = "m41st85",
+	.addr = 0x68,
+	.irq = -1,
+};
+
+static int sam440ep_setup_rtc(void)
+{
+	return i2c_register_board_info(0, &sam440ep_rtc_info, 1);
+}
+machine_device_initcall(sam440ep, sam440ep_setup_rtc);
diff --git a/arch/powerpc/platforms/44x/virtex.c b/arch/powerpc/platforms/44x/virtex.c
new file mode 100644
index 000000000..ad272c17c
--- /dev/null
+++ b/arch/powerpc/platforms/44x/virtex.c
@@ -0,0 +1,62 @@
+/*
+ * Xilinx Virtex 5FXT based board support, derived from
+ * the Xilinx Virtex (IIpro & 4FX) based board support
+ *
+ * Copyright 2007 Secret Lab Technologies Ltd.
+ * Copyright 2008 Xilinx, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/time.h>
+#include <asm/xilinx_intc.h>
+#include <asm/xilinx_pci.h>
+#include <asm/reg.h>
+#include <asm/ppc4xx.h>
+#include "44x.h"
+
+static const struct of_device_id xilinx_of_bus_ids[] __initconst = {
+	{ .compatible = "simple-bus", },
+	{ .compatible = "xlnx,plb-v46-1.00.a", },
+	{ .compatible = "xlnx,plb-v46-1.02.a", },
+	{ .compatible = "xlnx,plb-v34-1.01.a", },
+	{ .compatible = "xlnx,plb-v34-1.02.a", },
+	{ .compatible = "xlnx,opb-v20-1.10.c", },
+	{ .compatible = "xlnx,dcr-v29-1.00.a", },
+	{ .compatible = "xlnx,compound", },
+	{}
+};
+
+static int __init virtex_device_probe(void)
+{
+	of_platform_bus_probe(NULL, xilinx_of_bus_ids, NULL);
+
+	return 0;
+}
+machine_device_initcall(virtex, virtex_device_probe);
+
+static int __init virtex_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "xlnx,virtex440"))
+		return 0;
+
+	return 1;
+}
+
+define_machine(virtex) {
+	.name			= "Xilinx Virtex440",
+	.probe			= virtex_probe,
+	.setup_arch		= xilinx_pci_init,
+	.init_IRQ		= xilinx_intc_init_tree,
+	.get_irq		= xilinx_intc_get_irq,
+	.calibrate_decr		= generic_calibrate_decr,
+	.restart		= ppc4xx_reset_system,
+};
diff --git a/arch/powerpc/platforms/44x/virtex_ml510.c b/arch/powerpc/platforms/44x/virtex_ml510.c
new file mode 100644
index 000000000..1fdb87486
--- /dev/null
+++ b/arch/powerpc/platforms/44x/virtex_ml510.c
@@ -0,0 +1,29 @@
+#include <asm/i8259.h>
+#include <linux/pci.h>
+#include "44x.h"
+
+/**
+ * ml510_ail_quirk
+ */
+static void ml510_ali_quirk(struct pci_dev *dev)
+{
+	/* Enable the IDE controller */
+	pci_write_config_byte(dev, 0x58, 0x4c);
+	/* Assign irq 14 to the primary ide channel */
+	pci_write_config_byte(dev, 0x44, 0x0d);
+	/* Assign irq 15 to the secondary ide channel */
+	pci_write_config_byte(dev, 0x75, 0x0f);
+	/* Set the ide controller in native mode */
+	pci_write_config_byte(dev, 0x09, 0xff);
+
+	/* INTB = disabled, INTA = disabled */
+	pci_write_config_byte(dev, 0x48, 0x00);
+	/* INTD = disabled, INTC = disabled */
+	pci_write_config_byte(dev, 0x4a, 0x00);
+	/* Audio = INT7, Modem = disabled. */
+	pci_write_config_byte(dev, 0x4b, 0x60);
+	/* USB = INT7 */
+	pci_write_config_byte(dev, 0x74, 0x06);
+}
+DECLARE_PCI_FIXUP_EARLY(0x10b9, 0x1533, ml510_ali_quirk);
+
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
new file mode 100644
index 000000000..501333cf4
--- /dev/null
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -0,0 +1,320 @@
+/*
+ * PIKA Warp(tm) board specific routines
+ *
+ * Copyright (c) 2008-2009 PIKA Technologies
+ *   Sean MacLennan <smaclennan@pikatech.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+#include <linux/init.h>
+#include <linux/of_platform.h>
+#include <linux/kthread.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/of_gpio.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/ppc4xx.h>
+#include <asm/dma.h>
+
+
+static const struct of_device_id warp_of_bus[] __initconst = {
+	{ .compatible = "ibm,plb4", },
+	{ .compatible = "ibm,opb", },
+	{ .compatible = "ibm,ebc", },
+	{},
+};
+
+static int __init warp_device_probe(void)
+{
+	of_platform_bus_probe(NULL, warp_of_bus, NULL);
+	return 0;
+}
+machine_device_initcall(warp, warp_device_probe);
+
+static int __init warp_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "pika,warp"))
+		return 0;
+
+	/* For __dma_alloc_coherent */
+	ISA_DMA_THRESHOLD = ~0L;
+
+	return 1;
+}
+
+define_machine(warp) {
+	.name		= "Warp",
+	.probe 		= warp_probe,
+	.progress 	= udbg_progress,
+	.init_IRQ 	= uic_init_tree,
+	.get_irq 	= uic_get_irq,
+	.restart	= ppc4xx_reset_system,
+	.calibrate_decr = generic_calibrate_decr,
+};
+
+
+static int __init warp_post_info(void)
+{
+	struct device_node *np;
+	void __iomem *fpga;
+	u32 post1, post2;
+
+	/* Sighhhh... POST information is in the sd area. */
+	np = of_find_compatible_node(NULL, NULL, "pika,fpga-sd");
+	if (np == NULL)
+		return -ENOENT;
+
+	fpga = of_iomap(np, 0);
+	of_node_put(np);
+	if (fpga == NULL)
+		return -ENOENT;
+
+	post1 = in_be32(fpga + 0x40);
+	post2 = in_be32(fpga + 0x44);
+
+	iounmap(fpga);
+
+	if (post1 || post2)
+		printk(KERN_INFO "Warp POST %08x %08x\n", post1, post2);
+	else
+		printk(KERN_INFO "Warp POST OK\n");
+
+	return 0;
+}
+
+
+#ifdef CONFIG_SENSORS_AD7414
+
+static LIST_HEAD(dtm_shutdown_list);
+static void __iomem *dtm_fpga;
+static unsigned green_led, red_led;
+
+
+struct dtm_shutdown {
+	struct list_head list;
+	void (*func)(void *arg);
+	void *arg;
+};
+
+
+int pika_dtm_register_shutdown(void (*func)(void *arg), void *arg)
+{
+	struct dtm_shutdown *shutdown;
+
+	shutdown = kmalloc(sizeof(struct dtm_shutdown), GFP_KERNEL);
+	if (shutdown == NULL)
+		return -ENOMEM;
+
+	shutdown->func = func;
+	shutdown->arg = arg;
+
+	list_add(&shutdown->list, &dtm_shutdown_list);
+
+	return 0;
+}
+
+int pika_dtm_unregister_shutdown(void (*func)(void *arg), void *arg)
+{
+	struct dtm_shutdown *shutdown;
+
+	list_for_each_entry(shutdown, &dtm_shutdown_list, list)
+		if (shutdown->func == func && shutdown->arg == arg) {
+			list_del(&shutdown->list);
+			kfree(shutdown);
+			return 0;
+		}
+
+	return -EINVAL;
+}
+
+static irqreturn_t temp_isr(int irq, void *context)
+{
+	struct dtm_shutdown *shutdown;
+	int value = 1;
+
+	local_irq_disable();
+
+	gpio_set_value(green_led, 0);
+
+	/* Run through the shutdown list. */
+	list_for_each_entry(shutdown, &dtm_shutdown_list, list)
+		shutdown->func(shutdown->arg);
+
+	printk(KERN_EMERG "\n\nCritical Temperature Shutdown\n\n");
+
+	while (1) {
+		if (dtm_fpga) {
+			unsigned reset = in_be32(dtm_fpga + 0x14);
+			out_be32(dtm_fpga + 0x14, reset);
+		}
+
+		gpio_set_value(red_led, value);
+		value ^= 1;
+		mdelay(500);
+	}
+
+	/* Not reached */
+	return IRQ_HANDLED;
+}
+
+static int pika_setup_leds(void)
+{
+	struct device_node *np, *child;
+
+	np = of_find_compatible_node(NULL, NULL, "gpio-leds");
+	if (!np) {
+		printk(KERN_ERR __FILE__ ": Unable to find leds\n");
+		return -ENOENT;
+	}
+
+	for_each_child_of_node(np, child)
+		if (strcmp(child->name, "green") == 0)
+			green_led = of_get_gpio(child, 0);
+		else if (strcmp(child->name, "red") == 0)
+			red_led = of_get_gpio(child, 0);
+
+	of_node_put(np);
+
+	return 0;
+}
+
+static void pika_setup_critical_temp(struct device_node *np,
+				     struct i2c_client *client)
+{
+	int irq, rc;
+
+	/* Do this before enabling critical temp interrupt since we
+	 * may immediately interrupt.
+	 */
+	pika_setup_leds();
+
+	/* These registers are in 1 degree increments. */
+	i2c_smbus_write_byte_data(client, 2, 65); /* Thigh */
+	i2c_smbus_write_byte_data(client, 3,  0); /* Tlow */
+
+	irq = irq_of_parse_and_map(np, 0);
+	if (irq  == NO_IRQ) {
+		printk(KERN_ERR __FILE__ ": Unable to get ad7414 irq\n");
+		return;
+	}
+
+	rc = request_irq(irq, temp_isr, 0, "ad7414", NULL);
+	if (rc) {
+		printk(KERN_ERR __FILE__
+		       ": Unable to request ad7414 irq %d = %d\n", irq, rc);
+		return;
+	}
+}
+
+static inline void pika_dtm_check_fan(void __iomem *fpga)
+{
+	static int fan_state;
+	u32 fan = in_be32(fpga + 0x34) & (1 << 14);
+
+	if (fan_state != fan) {
+		fan_state = fan;
+		if (fan)
+			printk(KERN_WARNING "Fan rotation error detected."
+				   " Please check hardware.\n");
+	}
+}
+
+static int pika_dtm_thread(void __iomem *fpga)
+{
+	struct device_node *np;
+	struct i2c_client *client;
+
+	np = of_find_compatible_node(NULL, NULL, "adi,ad7414");
+	if (np == NULL)
+		return -ENOENT;
+
+	client = of_find_i2c_device_by_node(np);
+	if (client == NULL) {
+		of_node_put(np);
+		return -ENOENT;
+	}
+
+	pika_setup_critical_temp(np, client);
+
+	of_node_put(np);
+
+	printk(KERN_INFO "Warp DTM thread running.\n");
+
+	while (!kthread_should_stop()) {
+		int val;
+
+		val = i2c_smbus_read_word_data(client, 0);
+		if (val < 0)
+			dev_dbg(&client->dev, "DTM read temp failed.\n");
+		else {
+			s16 temp = swab16(val);
+			out_be32(fpga + 0x20, temp);
+		}
+
+		pika_dtm_check_fan(fpga);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(HZ);
+	}
+
+	return 0;
+}
+
+static int __init pika_dtm_start(void)
+{
+	struct task_struct *dtm_thread;
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "pika,fpga");
+	if (np == NULL)
+		return -ENOENT;
+
+	dtm_fpga = of_iomap(np, 0);
+	of_node_put(np);
+	if (dtm_fpga == NULL)
+		return -ENOENT;
+
+	/* Must get post info before thread starts. */
+	warp_post_info();
+
+	dtm_thread = kthread_run(pika_dtm_thread, dtm_fpga, "pika-dtm");
+	if (IS_ERR(dtm_thread)) {
+		iounmap(dtm_fpga);
+		return PTR_ERR(dtm_thread);
+	}
+
+	return 0;
+}
+machine_late_initcall(warp, pika_dtm_start);
+
+#else /* !CONFIG_SENSORS_AD7414 */
+
+int pika_dtm_register_shutdown(void (*func)(void *arg), void *arg)
+{
+	return 0;
+}
+
+int pika_dtm_unregister_shutdown(void (*func)(void *arg), void *arg)
+{
+	return 0;
+}
+
+machine_late_initcall(warp, warp_post_info);
+
+#endif
+
+EXPORT_SYMBOL(pika_dtm_register_shutdown);
+EXPORT_SYMBOL(pika_dtm_unregister_shutdown);
diff --git a/arch/powerpc/platforms/512x/Kconfig b/arch/powerpc/platforms/512x/Kconfig
new file mode 100644
index 000000000..5aa3f4b53
--- /dev/null
+++ b/arch/powerpc/platforms/512x/Kconfig
@@ -0,0 +1,36 @@
+config PPC_MPC512x
+	bool "512x-based boards"
+	depends on 6xx
+	select COMMON_CLK
+	select FSL_SOC
+	select IPIC
+	select PPC_PCI_CHOICE
+	select FSL_PCI if PCI
+	select ARCH_WANT_OPTIONAL_GPIOLIB
+	select USB_EHCI_BIG_ENDIAN_MMIO
+	select USB_EHCI_BIG_ENDIAN_DESC
+
+config MPC5121_ADS
+	bool "Freescale MPC5121E ADS"
+	depends on PPC_MPC512x
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the MPC5121E ADS board.
+
+config MPC512x_GENERIC
+	bool "Generic support for simple MPC512x based boards"
+	depends on PPC_MPC512x
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for simple MPC512x based boards
+	  which do not need custom platform specific setup.
+
+	  Compatible boards include:  Protonic LVT base boards (ZANMCU
+	  and VICVT2), Freescale MPC5125 Tower system.
+
+config PDM360NG
+	bool "ifm PDM360NG board"
+	depends on PPC_MPC512x
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the PDM360NG board.
diff --git a/arch/powerpc/platforms/512x/Makefile b/arch/powerpc/platforms/512x/Makefile
new file mode 100644
index 000000000..01693121a
--- /dev/null
+++ b/arch/powerpc/platforms/512x/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the Freescale PowerPC 512x linux kernel.
+#
+obj-$(CONFIG_COMMON_CLK)	+= clock-commonclk.o
+obj-y				+= mpc512x_shared.o
+obj-$(CONFIG_MPC5121_ADS)	+= mpc5121_ads.o mpc5121_ads_cpld.o
+obj-$(CONFIG_MPC512x_GENERIC)	+= mpc512x_generic.o
+obj-$(CONFIG_PDM360NG)		+= pdm360ng.o
diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c
new file mode 100644
index 000000000..f691bcabd
--- /dev/null
+++ b/arch/powerpc/platforms/512x/clock-commonclk.c
@@ -0,0 +1,1220 @@
+/*
+ * Copyright (C) 2013 DENX Software Engineering
+ *
+ * Gerhard Sittig, <gsi@denx.de>
+ *
+ * common clock driver support for the MPC512x platform
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/mpc5121.h>
+#include <dt-bindings/clock/mpc512x-clock.h>
+
+#include "mpc512x.h"		/* our public mpc5121_clk_init() API */
+
+/* helpers to keep the MCLK intermediates "somewhere" in our table */
+enum {
+	MCLK_IDX_MUX0,
+	MCLK_IDX_EN0,
+	MCLK_IDX_DIV0,
+	MCLK_MAX_IDX,
+};
+
+#define NR_PSCS			12
+#define NR_MSCANS		4
+#define NR_SPDIFS		1
+#define NR_OUTCLK		4
+#define NR_MCLKS		(NR_PSCS + NR_MSCANS + NR_SPDIFS + NR_OUTCLK)
+
+/* extend the public set of clocks by adding internal slots for management */
+enum {
+	/* arrange for adjacent numbers after the public set */
+	MPC512x_CLK_START_PRIVATE = MPC512x_CLK_LAST_PUBLIC,
+	/* clocks which aren't announced to the public */
+	MPC512x_CLK_DDR,
+	MPC512x_CLK_MEM,
+	MPC512x_CLK_IIM,
+	/* intermediates in div+gate combos or fractional dividers */
+	MPC512x_CLK_DDR_UG,
+	MPC512x_CLK_SDHC_x4,
+	MPC512x_CLK_SDHC_UG,
+	MPC512x_CLK_SDHC2_UG,
+	MPC512x_CLK_DIU_x4,
+	MPC512x_CLK_DIU_UG,
+	MPC512x_CLK_MBX_BUS_UG,
+	MPC512x_CLK_MBX_UG,
+	MPC512x_CLK_MBX_3D_UG,
+	MPC512x_CLK_PCI_UG,
+	MPC512x_CLK_NFC_UG,
+	MPC512x_CLK_LPC_UG,
+	MPC512x_CLK_SPDIF_TX_IN,
+	/* intermediates for the mux+gate+div+mux MCLK generation */
+	MPC512x_CLK_MCLKS_FIRST,
+	MPC512x_CLK_MCLKS_LAST = MPC512x_CLK_MCLKS_FIRST
+				+ NR_MCLKS * MCLK_MAX_IDX,
+	/* internal, symbolic spec for the number of slots */
+	MPC512x_CLK_LAST_PRIVATE,
+};
+
+/* data required for the OF clock provider registration */
+static struct clk *clks[MPC512x_CLK_LAST_PRIVATE];
+static struct clk_onecell_data clk_data;
+
+/* CCM register access */
+static struct mpc512x_ccm __iomem *clkregs;
+static DEFINE_SPINLOCK(clklock);
+
+/* SoC variants {{{ */
+
+/*
+ * tell SoC variants apart as they are rather similar yet not identical,
+ * cache the result in an enum to not repeatedly run the expensive OF test
+ *
+ * MPC5123 is an MPC5121 without the MBX graphics accelerator
+ *
+ * MPC5125 has many more differences: no MBX, no AXE, no VIU, no SPDIF,
+ * no PATA, no SATA, no PCI, two FECs (of different compatibility name),
+ * only 10 PSCs (of different compatibility name), two SDHCs, different
+ * NFC IP block, output clocks, system PLL status query, different CPMF
+ * interpretation, no CFM, different fourth PSC/CAN mux0 input -- yet
+ * those differences can get folded into this clock provider support
+ * code and don't warrant a separate highly redundant implementation
+ */
+
+static enum soc_type {
+	MPC512x_SOC_MPC5121,
+	MPC512x_SOC_MPC5123,
+	MPC512x_SOC_MPC5125,
+} soc;
+
+static void mpc512x_clk_determine_soc(void)
+{
+	if (of_machine_is_compatible("fsl,mpc5121")) {
+		soc = MPC512x_SOC_MPC5121;
+		return;
+	}
+	if (of_machine_is_compatible("fsl,mpc5123")) {
+		soc = MPC512x_SOC_MPC5123;
+		return;
+	}
+	if (of_machine_is_compatible("fsl,mpc5125")) {
+		soc = MPC512x_SOC_MPC5125;
+		return;
+	}
+}
+
+static bool soc_has_mbx(void)
+{
+	if (soc == MPC512x_SOC_MPC5121)
+		return true;
+	return false;
+}
+
+static bool soc_has_axe(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool soc_has_viu(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool soc_has_spdif(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool soc_has_pata(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool soc_has_sata(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool soc_has_pci(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool soc_has_fec2(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+static int soc_max_pscnum(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return 10;
+	return 12;
+}
+
+static bool soc_has_sdhc2(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+static bool soc_has_nfc_5125(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+static bool soc_has_outclk(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+static bool soc_has_cpmf_0_bypass(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+static bool soc_has_mclk_mux0_canin(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+/* }}} SoC variants */
+/* common clk API wrappers {{{ */
+
+/* convenience wrappers around the common clk API */
+static inline struct clk *mpc512x_clk_fixed(const char *name, int rate)
+{
+	return clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate);
+}
+
+static inline struct clk *mpc512x_clk_factor(
+	const char *name, const char *parent_name,
+	int mul, int div)
+{
+	int clkflags;
+
+	clkflags = CLK_SET_RATE_PARENT;
+	return clk_register_fixed_factor(NULL, name, parent_name, clkflags,
+					 mul, div);
+}
+
+static inline struct clk *mpc512x_clk_divider(
+	const char *name, const char *parent_name, u8 clkflags,
+	u32 __iomem *reg, u8 pos, u8 len, int divflags)
+{
+	return clk_register_divider(NULL, name, parent_name, clkflags,
+				    reg, pos, len, divflags, &clklock);
+}
+
+static inline struct clk *mpc512x_clk_divtable(
+	const char *name, const char *parent_name,
+	u32 __iomem *reg, u8 pos, u8 len,
+	const struct clk_div_table *divtab)
+{
+	u8 divflags;
+
+	divflags = 0;
+	return clk_register_divider_table(NULL, name, parent_name, 0,
+					  reg, pos, len, divflags,
+					  divtab, &clklock);
+}
+
+static inline struct clk *mpc512x_clk_gated(
+	const char *name, const char *parent_name,
+	u32 __iomem *reg, u8 pos)
+{
+	int clkflags;
+
+	clkflags = CLK_SET_RATE_PARENT;
+	return clk_register_gate(NULL, name, parent_name, clkflags,
+				 reg, pos, 0, &clklock);
+}
+
+static inline struct clk *mpc512x_clk_muxed(const char *name,
+	const char **parent_names, int parent_count,
+	u32 __iomem *reg, u8 pos, u8 len)
+{
+	int clkflags;
+	u8 muxflags;
+
+	clkflags = CLK_SET_RATE_PARENT;
+	muxflags = 0;
+	return clk_register_mux(NULL, name,
+				parent_names, parent_count, clkflags,
+				reg, pos, len, muxflags, &clklock);
+}
+
+/* }}} common clk API wrappers */
+
+/* helper to isolate a bit field from a register */
+static inline int get_bit_field(uint32_t __iomem *reg, uint8_t pos, uint8_t len)
+{
+	uint32_t val;
+
+	val = in_be32(reg);
+	val >>= pos;
+	val &= (1 << len) - 1;
+	return val;
+}
+
+/* get the SPMF and translate it into the "sys pll" multiplier */
+static int get_spmf_mult(void)
+{
+	static int spmf_to_mult[] = {
+		68, 1, 12, 16, 20, 24, 28, 32,
+		36, 40, 44, 48, 52, 56, 60, 64,
+	};
+	int spmf;
+
+	spmf = get_bit_field(&clkregs->spmr, 24, 4);
+	return spmf_to_mult[spmf];
+}
+
+/*
+ * get the SYS_DIV value and translate it into a divide factor
+ *
+ * values returned from here are a multiple of the real factor since the
+ * divide ratio is fractional
+ */
+static int get_sys_div_x2(void)
+{
+	static int sysdiv_code_to_x2[] = {
+		4, 5, 6, 7, 8, 9, 10, 14,
+		12, 16, 18, 22, 20, 24, 26, 30,
+		28, 32, 34, 38, 36, 40, 42, 46,
+		44, 48, 50, 54, 52, 56, 58, 62,
+		60, 64, 66,
+	};
+	int divcode;
+
+	divcode = get_bit_field(&clkregs->scfr2, 26, 6);
+	return sysdiv_code_to_x2[divcode];
+}
+
+/*
+ * get the CPMF value and translate it into a multiplier factor
+ *
+ * values returned from here are a multiple of the real factor since the
+ * multiplier ratio is fractional
+ */
+static int get_cpmf_mult_x2(void)
+{
+	static int cpmf_to_mult_x36[] = {
+		/* 0b000 is "times 36" */
+		72, 2, 2, 3, 4, 5, 6, 7,
+	};
+	static int cpmf_to_mult_0by[] = {
+		/* 0b000 is "bypass" */
+		2, 2, 2, 3, 4, 5, 6, 7,
+	};
+
+	int *cpmf_to_mult;
+	int cpmf;
+
+	cpmf = get_bit_field(&clkregs->spmr, 16, 4);
+	if (soc_has_cpmf_0_bypass())
+		cpmf_to_mult = cpmf_to_mult_0by;
+	else
+		cpmf_to_mult = cpmf_to_mult_x36;
+	return cpmf_to_mult[cpmf];
+}
+
+/*
+ * some of the clock dividers do scale in a linear way, yet not all of
+ * their bit combinations are legal; use a divider table to get a
+ * resulting set of applicable divider values
+ */
+
+/* applies to the IPS_DIV, and PCI_DIV values */
+static struct clk_div_table divtab_2346[] = {
+	{ .val = 2, .div = 2, },
+	{ .val = 3, .div = 3, },
+	{ .val = 4, .div = 4, },
+	{ .val = 6, .div = 6, },
+	{ .div = 0, },
+};
+
+/* applies to the MBX_DIV, LPC_DIV, and NFC_DIV values */
+static struct clk_div_table divtab_1234[] = {
+	{ .val = 1, .div = 1, },
+	{ .val = 2, .div = 2, },
+	{ .val = 3, .div = 3, },
+	{ .val = 4, .div = 4, },
+	{ .div = 0, },
+};
+
+static int get_freq_from_dt(char *propname)
+{
+	struct device_node *np;
+	const unsigned int *prop;
+	int val;
+
+	val = 0;
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-immr");
+	if (np) {
+		prop = of_get_property(np, propname, NULL);
+		if (prop)
+			val = *prop;
+	    of_node_put(np);
+	}
+	return val;
+}
+
+static void mpc512x_clk_preset_data(void)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(clks); i++)
+		clks[i] = ERR_PTR(-ENODEV);
+}
+
+/*
+ * - receives the "bus frequency" from the caller (that's the IPS clock
+ *   rate, the historical source of clock information)
+ * - fetches the system PLL multiplier and divider values as well as the
+ *   IPS divider value from hardware
+ * - determines the REF clock rate either from the XTAL/OSC spec (if
+ *   there is a device tree node describing the oscillator) or from the
+ *   IPS bus clock (supported for backwards compatibility, such that
+ *   setups without XTAL/OSC specs keep working)
+ * - creates the "ref" clock item in the clock tree, such that
+ *   subsequent code can create the remainder of the hierarchy (REF ->
+ *   SYS -> CSB -> IPS) from the REF clock rate and the returned mul/div
+ *   values
+ */
+static void mpc512x_clk_setup_ref_clock(struct device_node *np, int bus_freq,
+					int *sys_mul, int *sys_div,
+					int *ips_div)
+{
+	struct clk *osc_clk;
+	int calc_freq;
+
+	/* fetch mul/div factors from the hardware */
+	*sys_mul = get_spmf_mult();
+	*sys_mul *= 2;		/* compensate for the fractional divider */
+	*sys_div = get_sys_div_x2();
+	*ips_div = get_bit_field(&clkregs->scfr1, 23, 3);
+
+	/* lookup the oscillator clock for its rate */
+	osc_clk = of_clk_get_by_name(np, "osc");
+
+	/*
+	 * either descend from OSC to REF (and in bypassing verify the
+	 * IPS rate), or backtrack from IPS and multiplier values that
+	 * were fetched from hardware to REF and thus to the OSC value
+	 *
+	 * in either case the REF clock gets created here and the
+	 * remainder of the clock tree can get spanned from there
+	 */
+	if (!IS_ERR(osc_clk)) {
+		clks[MPC512x_CLK_REF] = mpc512x_clk_factor("ref", "osc", 1, 1);
+		calc_freq = clk_get_rate(clks[MPC512x_CLK_REF]);
+		calc_freq *= *sys_mul;
+		calc_freq /= *sys_div;
+		calc_freq /= 2;
+		calc_freq /= *ips_div;
+		if (bus_freq && calc_freq != bus_freq)
+			pr_warn("calc rate %d != OF spec %d\n",
+				calc_freq, bus_freq);
+	} else {
+		calc_freq = bus_freq;	/* start with IPS */
+		calc_freq *= *ips_div;	/* IPS -> CSB */
+		calc_freq *= 2;		/* CSB -> SYS */
+		calc_freq *= *sys_div;	/* SYS -> PLL out */
+		calc_freq /= *sys_mul;	/* PLL out -> REF == OSC */
+		clks[MPC512x_CLK_REF] = mpc512x_clk_fixed("ref", calc_freq);
+	}
+}
+
+/* MCLK helpers {{{ */
+
+/*
+ * helper code for the MCLK subtree setup
+ *
+ * the overview in section 5.2.4 of the MPC5121e Reference Manual rev4
+ * suggests that all instances of the "PSC clock generation" are equal,
+ * and that one might re-use the PSC setup for MSCAN clock generation
+ * (section 5.2.5) as well, at least the logic if not the data for
+ * description
+ *
+ * the details (starting at page 5-20) show differences in the specific
+ * inputs of the first mux stage ("can clk in", "spdif tx"), and the
+ * factual non-availability of the second mux stage (it's present yet
+ * only one input is valid)
+ *
+ * the MSCAN clock related registers (starting at page 5-35) all
+ * reference "spdif clk" at the first mux stage and don't mention any
+ * "can clk" at all, which somehow is unexpected
+ *
+ * TODO re-check the document, and clarify whether the RM is correct in
+ * the overview or in the details, and whether the difference is a
+ * clipboard induced error or results from chip revisions
+ *
+ * it turns out that the RM rev4 as of 2012-06 talks about "can" for the
+ * PSCs while RM rev3 as of 2008-10 talks about "spdif", so I guess that
+ * first a doc update is required which better reflects reality in the
+ * SoC before the implementation should follow while no questions remain
+ */
+
+/*
+ * note that this declaration raises a checkpatch warning, but
+ * it's the very data type dictated by <linux/clk-provider.h>,
+ * "fixing" this warning will break compilation
+ */
+static const char *parent_names_mux0_spdif[] = {
+	"sys", "ref", "psc-mclk-in", "spdif-tx",
+};
+
+static const char *parent_names_mux0_canin[] = {
+	"sys", "ref", "psc-mclk-in", "can-clk-in",
+};
+
+enum mclk_type {
+	MCLK_TYPE_PSC,
+	MCLK_TYPE_MSCAN,
+	MCLK_TYPE_SPDIF,
+	MCLK_TYPE_OUTCLK,
+};
+
+struct mclk_setup_data {
+	enum mclk_type type;
+	bool has_mclk1;
+	const char *name_mux0;
+	const char *name_en0;
+	const char *name_div0;
+	const char *parent_names_mux1[2];
+	const char *name_mclk;
+};
+
+#define MCLK_SETUP_DATA_PSC(id) { \
+	MCLK_TYPE_PSC, 0, \
+	"psc" #id "-mux0", \
+	"psc" #id "-en0", \
+	"psc" #id "_mclk_div", \
+	{ "psc" #id "_mclk_div", "dummy", }, \
+	"psc" #id "_mclk", \
+}
+
+#define MCLK_SETUP_DATA_MSCAN(id) { \
+	MCLK_TYPE_MSCAN, 0, \
+	"mscan" #id "-mux0", \
+	"mscan" #id "-en0", \
+	"mscan" #id "_mclk_div", \
+	{ "mscan" #id "_mclk_div", "dummy", }, \
+	"mscan" #id "_mclk", \
+}
+
+#define MCLK_SETUP_DATA_SPDIF { \
+	MCLK_TYPE_SPDIF, 1, \
+	"spdif-mux0", \
+	"spdif-en0", \
+	"spdif_mclk_div", \
+	{ "spdif_mclk_div", "spdif-rx", }, \
+	"spdif_mclk", \
+}
+
+#define MCLK_SETUP_DATA_OUTCLK(id) { \
+	MCLK_TYPE_OUTCLK, 0, \
+	"out" #id "-mux0", \
+	"out" #id "-en0", \
+	"out" #id "_mclk_div", \
+	{ "out" #id "_mclk_div", "dummy", }, \
+	"out" #id "_clk", \
+}
+
+static struct mclk_setup_data mclk_psc_data[] = {
+	MCLK_SETUP_DATA_PSC(0),
+	MCLK_SETUP_DATA_PSC(1),
+	MCLK_SETUP_DATA_PSC(2),
+	MCLK_SETUP_DATA_PSC(3),
+	MCLK_SETUP_DATA_PSC(4),
+	MCLK_SETUP_DATA_PSC(5),
+	MCLK_SETUP_DATA_PSC(6),
+	MCLK_SETUP_DATA_PSC(7),
+	MCLK_SETUP_DATA_PSC(8),
+	MCLK_SETUP_DATA_PSC(9),
+	MCLK_SETUP_DATA_PSC(10),
+	MCLK_SETUP_DATA_PSC(11),
+};
+
+static struct mclk_setup_data mclk_mscan_data[] = {
+	MCLK_SETUP_DATA_MSCAN(0),
+	MCLK_SETUP_DATA_MSCAN(1),
+	MCLK_SETUP_DATA_MSCAN(2),
+	MCLK_SETUP_DATA_MSCAN(3),
+};
+
+static struct mclk_setup_data mclk_spdif_data[] = {
+	MCLK_SETUP_DATA_SPDIF,
+};
+
+static struct mclk_setup_data mclk_outclk_data[] = {
+	MCLK_SETUP_DATA_OUTCLK(0),
+	MCLK_SETUP_DATA_OUTCLK(1),
+	MCLK_SETUP_DATA_OUTCLK(2),
+	MCLK_SETUP_DATA_OUTCLK(3),
+};
+
+/* setup the MCLK clock subtree of an individual PSC/MSCAN/SPDIF */
+static void mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx)
+{
+	size_t clks_idx_pub, clks_idx_int;
+	u32 __iomem *mccr_reg;	/* MCLK control register (mux, en, div) */
+	int div;
+
+	/* derive a few parameters from the component type and index */
+	switch (entry->type) {
+	case MCLK_TYPE_PSC:
+		clks_idx_pub = MPC512x_CLK_PSC0_MCLK + idx;
+		clks_idx_int = MPC512x_CLK_MCLKS_FIRST
+			     + (idx) * MCLK_MAX_IDX;
+		mccr_reg = &clkregs->psc_ccr[idx];
+		break;
+	case MCLK_TYPE_MSCAN:
+		clks_idx_pub = MPC512x_CLK_MSCAN0_MCLK + idx;
+		clks_idx_int = MPC512x_CLK_MCLKS_FIRST
+			     + (NR_PSCS + idx) * MCLK_MAX_IDX;
+		mccr_reg = &clkregs->mscan_ccr[idx];
+		break;
+	case MCLK_TYPE_SPDIF:
+		clks_idx_pub = MPC512x_CLK_SPDIF_MCLK;
+		clks_idx_int = MPC512x_CLK_MCLKS_FIRST
+			     + (NR_PSCS + NR_MSCANS) * MCLK_MAX_IDX;
+		mccr_reg = &clkregs->spccr;
+		break;
+	case MCLK_TYPE_OUTCLK:
+		clks_idx_pub = MPC512x_CLK_OUT0_CLK + idx;
+		clks_idx_int = MPC512x_CLK_MCLKS_FIRST
+			     + (NR_PSCS + NR_MSCANS + NR_SPDIFS + idx)
+			     * MCLK_MAX_IDX;
+		mccr_reg = &clkregs->out_ccr[idx];
+		break;
+	default:
+		return;
+	}
+
+	/*
+	 * this was grabbed from the PPC_CLOCK implementation, which
+	 * enforced a specific MCLK divider while the clock was gated
+	 * during setup (that's a documented hardware requirement)
+	 *
+	 * the PPC_CLOCK implementation might even have violated the
+	 * "MCLK <= IPS" constraint, the fixed divider value of 1
+	 * results in a divider of 2 and thus MCLK = SYS/2 which equals
+	 * CSB which is greater than IPS; the serial port setup may have
+	 * adjusted the divider which the clock setup might have left in
+	 * an undesirable state
+	 *
+	 * initial setup is:
+	 * - MCLK 0 from SYS
+	 * - MCLK DIV such to not exceed the IPS clock
+	 * - MCLK 0 enabled
+	 * - MCLK 1 from MCLK DIV
+	 */
+	div = clk_get_rate(clks[MPC512x_CLK_SYS]);
+	div /= clk_get_rate(clks[MPC512x_CLK_IPS]);
+	out_be32(mccr_reg, (0 << 16));
+	out_be32(mccr_reg, (0 << 16) | ((div - 1) << 17));
+	out_be32(mccr_reg, (1 << 16) | ((div - 1) << 17));
+
+	/*
+	 * create the 'struct clk' items of the MCLK's clock subtree
+	 *
+	 * note that by design we always create all nodes and won't take
+	 * shortcuts here, because
+	 * - the "internal" MCLK_DIV and MCLK_OUT signal in turn are
+	 *   selectable inputs to the CFM while those who "actually use"
+	 *   the PSC/MSCAN/SPDIF (serial drivers et al) need the MCLK
+	 *   for their bitrate
+	 * - in the absence of "aliases" for clocks we need to create
+	 *   individial 'struct clk' items for whatever might get
+	 *   referenced or looked up, even if several of those items are
+	 *   identical from the logical POV (their rate value)
+	 * - for easier future maintenance and for better reflection of
+	 *   the SoC's documentation, it appears appropriate to generate
+	 *   clock items even for those muxers which actually are NOPs
+	 *   (those with two inputs of which one is reserved)
+	 */
+	clks[clks_idx_int + MCLK_IDX_MUX0] = mpc512x_clk_muxed(
+			entry->name_mux0,
+			soc_has_mclk_mux0_canin()
+				? &parent_names_mux0_canin[0]
+				: &parent_names_mux0_spdif[0],
+			ARRAY_SIZE(parent_names_mux0_spdif),
+			mccr_reg, 14, 2);
+	clks[clks_idx_int + MCLK_IDX_EN0] = mpc512x_clk_gated(
+			entry->name_en0, entry->name_mux0,
+			mccr_reg, 16);
+	clks[clks_idx_int + MCLK_IDX_DIV0] = mpc512x_clk_divider(
+			entry->name_div0,
+			entry->name_en0, CLK_SET_RATE_GATE,
+			mccr_reg, 17, 15, 0);
+	if (entry->has_mclk1) {
+		clks[clks_idx_pub] = mpc512x_clk_muxed(
+				entry->name_mclk,
+				&entry->parent_names_mux1[0],
+				ARRAY_SIZE(entry->parent_names_mux1),
+				mccr_reg, 7, 1);
+	} else {
+		clks[clks_idx_pub] = mpc512x_clk_factor(
+				entry->name_mclk,
+				entry->parent_names_mux1[0],
+				1, 1);
+	}
+}
+
+/* }}} MCLK helpers */
+
+static void mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq)
+{
+	int sys_mul, sys_div, ips_div;
+	int mul, div;
+	size_t mclk_idx;
+	int freq;
+
+	/*
+	 * developer's notes:
+	 * - consider whether to handle clocks which have both gates and
+	 *   dividers via intermediates or by means of composites
+	 * - fractional dividers appear to not map well to composites
+	 *   since they can be seen as a fixed multiplier and an
+	 *   adjustable divider, while composites can only combine at
+	 *   most one of a mux, div, and gate each into one 'struct clk'
+	 *   item
+	 * - PSC/MSCAN/SPDIF clock generation OTOH already is very
+	 *   specific and cannot get mapped to componsites (at least not
+	 *   a single one, maybe two of them, but then some of these
+	 *   intermediate clock signals get referenced elsewhere (e.g.
+	 *   in the clock frequency measurement, CFM) and thus need
+	 *   publicly available names
+	 * - the current source layout appropriately reflects the
+	 *   hardware setup, and it works, so it's questionable whether
+	 *   further changes will result in big enough a benefit
+	 */
+
+	/* regardless of whether XTAL/OSC exists, have REF created */
+	mpc512x_clk_setup_ref_clock(np, busfreq, &sys_mul, &sys_div, &ips_div);
+
+	/* now setup the REF -> SYS -> CSB -> IPS hierarchy */
+	clks[MPC512x_CLK_SYS] = mpc512x_clk_factor("sys", "ref",
+						   sys_mul, sys_div);
+	clks[MPC512x_CLK_CSB] = mpc512x_clk_factor("csb", "sys", 1, 2);
+	clks[MPC512x_CLK_IPS] = mpc512x_clk_divtable("ips", "csb",
+						     &clkregs->scfr1, 23, 3,
+						     divtab_2346);
+	/* now setup anything below SYS and CSB and IPS */
+
+	clks[MPC512x_CLK_DDR_UG] = mpc512x_clk_factor("ddr-ug", "sys", 1, 2);
+
+	/*
+	 * the Reference Manual discusses that for SDHC only even divide
+	 * ratios are supported because clock domain synchronization
+	 * between 'per' and 'ipg' is broken;
+	 * keep the divider's bit 0 cleared (per reset value), and only
+	 * allow to setup the divider's bits 7:1, which results in that
+	 * only even divide ratios can get configured upon rate changes;
+	 * keep the "x4" name because this bit shift hack is an internal
+	 * implementation detail, the "fractional divider with quarters"
+	 * semantics remains
+	 */
+	clks[MPC512x_CLK_SDHC_x4] = mpc512x_clk_factor("sdhc-x4", "csb", 2, 1);
+	clks[MPC512x_CLK_SDHC_UG] = mpc512x_clk_divider("sdhc-ug", "sdhc-x4", 0,
+							&clkregs->scfr2, 1, 7,
+							CLK_DIVIDER_ONE_BASED);
+	if (soc_has_sdhc2()) {
+		clks[MPC512x_CLK_SDHC2_UG] = mpc512x_clk_divider(
+				"sdhc2-ug", "sdhc-x4", 0, &clkregs->scfr2,
+				9, 7, CLK_DIVIDER_ONE_BASED);
+	}
+
+	clks[MPC512x_CLK_DIU_x4] = mpc512x_clk_factor("diu-x4", "csb", 4, 1);
+	clks[MPC512x_CLK_DIU_UG] = mpc512x_clk_divider("diu-ug", "diu-x4", 0,
+						       &clkregs->scfr1, 0, 8,
+						       CLK_DIVIDER_ONE_BASED);
+
+	/*
+	 * the "power architecture PLL" was setup from data which was
+	 * sampled from the reset config word, at this point in time the
+	 * configuration can be considered fixed and read only (i.e. no
+	 * longer adjustable, or no longer in need of adjustment), which
+	 * is why we don't register a PLL here but assume fixed factors
+	 */
+	mul = get_cpmf_mult_x2();
+	div = 2;	/* compensate for the fractional factor */
+	clks[MPC512x_CLK_E300] = mpc512x_clk_factor("e300", "csb", mul, div);
+
+	if (soc_has_mbx()) {
+		clks[MPC512x_CLK_MBX_BUS_UG] = mpc512x_clk_factor(
+				"mbx-bus-ug", "csb", 1, 2);
+		clks[MPC512x_CLK_MBX_UG] = mpc512x_clk_divtable(
+				"mbx-ug", "mbx-bus-ug", &clkregs->scfr1,
+				14, 3, divtab_1234);
+		clks[MPC512x_CLK_MBX_3D_UG] = mpc512x_clk_factor(
+				"mbx-3d-ug", "mbx-ug", 1, 1);
+	}
+	if (soc_has_pci()) {
+		clks[MPC512x_CLK_PCI_UG] = mpc512x_clk_divtable(
+				"pci-ug", "csb", &clkregs->scfr1,
+				20, 3, divtab_2346);
+	}
+	if (soc_has_nfc_5125()) {
+		/*
+		 * XXX TODO implement 5125 NFC clock setup logic,
+		 * with high/low period counters in clkregs->scfr3,
+		 * currently there are no users so it's ENOIMPL
+		 */
+		clks[MPC512x_CLK_NFC_UG] = ERR_PTR(-ENOTSUPP);
+	} else {
+		clks[MPC512x_CLK_NFC_UG] = mpc512x_clk_divtable(
+				"nfc-ug", "ips", &clkregs->scfr1,
+				8, 3, divtab_1234);
+	}
+	clks[MPC512x_CLK_LPC_UG] = mpc512x_clk_divtable("lpc-ug", "ips",
+							&clkregs->scfr1, 11, 3,
+							divtab_1234);
+
+	clks[MPC512x_CLK_LPC] = mpc512x_clk_gated("lpc", "lpc-ug",
+						  &clkregs->sccr1, 30);
+	clks[MPC512x_CLK_NFC] = mpc512x_clk_gated("nfc", "nfc-ug",
+						  &clkregs->sccr1, 29);
+	if (soc_has_pata()) {
+		clks[MPC512x_CLK_PATA] = mpc512x_clk_gated(
+				"pata", "ips", &clkregs->sccr1, 28);
+	}
+	/* for PSCs there is a "registers" gate and a bitrate MCLK subtree */
+	for (mclk_idx = 0; mclk_idx < soc_max_pscnum(); mclk_idx++) {
+		char name[12];
+		snprintf(name, sizeof(name), "psc%d", mclk_idx);
+		clks[MPC512x_CLK_PSC0 + mclk_idx] = mpc512x_clk_gated(
+				name, "ips", &clkregs->sccr1, 27 - mclk_idx);
+		mpc512x_clk_setup_mclk(&mclk_psc_data[mclk_idx], mclk_idx);
+	}
+	clks[MPC512x_CLK_PSC_FIFO] = mpc512x_clk_gated("psc-fifo", "ips",
+						       &clkregs->sccr1, 15);
+	if (soc_has_sata()) {
+		clks[MPC512x_CLK_SATA] = mpc512x_clk_gated(
+				"sata", "ips", &clkregs->sccr1, 14);
+	}
+	clks[MPC512x_CLK_FEC] = mpc512x_clk_gated("fec", "ips",
+						  &clkregs->sccr1, 13);
+	if (soc_has_pci()) {
+		clks[MPC512x_CLK_PCI] = mpc512x_clk_gated(
+				"pci", "pci-ug", &clkregs->sccr1, 11);
+	}
+	clks[MPC512x_CLK_DDR] = mpc512x_clk_gated("ddr", "ddr-ug",
+						  &clkregs->sccr1, 10);
+	if (soc_has_fec2()) {
+		clks[MPC512x_CLK_FEC2] = mpc512x_clk_gated(
+				"fec2", "ips", &clkregs->sccr1, 9);
+	}
+
+	clks[MPC512x_CLK_DIU] = mpc512x_clk_gated("diu", "diu-ug",
+						  &clkregs->sccr2, 31);
+	if (soc_has_axe()) {
+		clks[MPC512x_CLK_AXE] = mpc512x_clk_gated(
+				"axe", "csb", &clkregs->sccr2, 30);
+	}
+	clks[MPC512x_CLK_MEM] = mpc512x_clk_gated("mem", "ips",
+						  &clkregs->sccr2, 29);
+	clks[MPC512x_CLK_USB1] = mpc512x_clk_gated("usb1", "csb",
+						   &clkregs->sccr2, 28);
+	clks[MPC512x_CLK_USB2] = mpc512x_clk_gated("usb2", "csb",
+						   &clkregs->sccr2, 27);
+	clks[MPC512x_CLK_I2C] = mpc512x_clk_gated("i2c", "ips",
+						  &clkregs->sccr2, 26);
+	/* MSCAN differs from PSC with just one gate for multiple components */
+	clks[MPC512x_CLK_BDLC] = mpc512x_clk_gated("bdlc", "ips",
+						   &clkregs->sccr2, 25);
+	for (mclk_idx = 0; mclk_idx < ARRAY_SIZE(mclk_mscan_data); mclk_idx++)
+		mpc512x_clk_setup_mclk(&mclk_mscan_data[mclk_idx], mclk_idx);
+	clks[MPC512x_CLK_SDHC] = mpc512x_clk_gated("sdhc", "sdhc-ug",
+						   &clkregs->sccr2, 24);
+	/* there is only one SPDIF component, which shares MCLK support code */
+	if (soc_has_spdif()) {
+		clks[MPC512x_CLK_SPDIF] = mpc512x_clk_gated(
+				"spdif", "ips", &clkregs->sccr2, 23);
+		mpc512x_clk_setup_mclk(&mclk_spdif_data[0], 0);
+	}
+	if (soc_has_mbx()) {
+		clks[MPC512x_CLK_MBX_BUS] = mpc512x_clk_gated(
+				"mbx-bus", "mbx-bus-ug", &clkregs->sccr2, 22);
+		clks[MPC512x_CLK_MBX] = mpc512x_clk_gated(
+				"mbx", "mbx-ug", &clkregs->sccr2, 21);
+		clks[MPC512x_CLK_MBX_3D] = mpc512x_clk_gated(
+				"mbx-3d", "mbx-3d-ug", &clkregs->sccr2, 20);
+	}
+	clks[MPC512x_CLK_IIM] = mpc512x_clk_gated("iim", "csb",
+						  &clkregs->sccr2, 19);
+	if (soc_has_viu()) {
+		clks[MPC512x_CLK_VIU] = mpc512x_clk_gated(
+				"viu", "csb", &clkregs->sccr2, 18);
+	}
+	if (soc_has_sdhc2()) {
+		clks[MPC512x_CLK_SDHC2] = mpc512x_clk_gated(
+				"sdhc-2", "sdhc2-ug", &clkregs->sccr2, 17);
+	}
+
+	if (soc_has_outclk()) {
+		size_t idx;	/* used as mclk_idx, just to trim line length */
+		for (idx = 0; idx < ARRAY_SIZE(mclk_outclk_data); idx++)
+			mpc512x_clk_setup_mclk(&mclk_outclk_data[idx], idx);
+	}
+
+	/*
+	 * externally provided clocks (when implemented in hardware,
+	 * device tree may specify values which otherwise were unknown)
+	 */
+	freq = get_freq_from_dt("psc_mclk_in");
+	if (!freq)
+		freq = 25000000;
+	clks[MPC512x_CLK_PSC_MCLK_IN] = mpc512x_clk_fixed("psc_mclk_in", freq);
+	if (soc_has_mclk_mux0_canin()) {
+		freq = get_freq_from_dt("can_clk_in");
+		clks[MPC512x_CLK_CAN_CLK_IN] = mpc512x_clk_fixed(
+				"can_clk_in", freq);
+	} else {
+		freq = get_freq_from_dt("spdif_tx_in");
+		clks[MPC512x_CLK_SPDIF_TX_IN] = mpc512x_clk_fixed(
+				"spdif_tx_in", freq);
+		freq = get_freq_from_dt("spdif_rx_in");
+		clks[MPC512x_CLK_SPDIF_TX_IN] = mpc512x_clk_fixed(
+				"spdif_rx_in", freq);
+	}
+
+	/* fixed frequency for AC97, always 24.567MHz */
+	clks[MPC512x_CLK_AC97] = mpc512x_clk_fixed("ac97", 24567000);
+
+	/*
+	 * pre-enable those "internal" clock items which never get
+	 * claimed by any peripheral driver, to not have the clock
+	 * subsystem disable them late at startup
+	 */
+	clk_prepare_enable(clks[MPC512x_CLK_DUMMY]);
+	clk_prepare_enable(clks[MPC512x_CLK_E300]);	/* PowerPC CPU */
+	clk_prepare_enable(clks[MPC512x_CLK_DDR]);	/* DRAM */
+	clk_prepare_enable(clks[MPC512x_CLK_MEM]);	/* SRAM */
+	clk_prepare_enable(clks[MPC512x_CLK_IPS]);	/* SoC periph */
+	clk_prepare_enable(clks[MPC512x_CLK_LPC]);	/* boot media */
+}
+
+/*
+ * registers the set of public clocks (those listed in the dt-bindings/
+ * header file) for OF lookups, keeps the intermediates private to us
+ */
+static void mpc5121_clk_register_of_provider(struct device_node *np)
+{
+	clk_data.clks = clks;
+	clk_data.clk_num = MPC512x_CLK_LAST_PUBLIC + 1;	/* _not_ ARRAY_SIZE() */
+	of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data);
+}
+
+/*
+ * temporary support for the period of time between introduction of CCF
+ * support and the adjustment of peripheral drivers to OF based lookups
+ */
+static void mpc5121_clk_provide_migration_support(void)
+{
+
+	/*
+	 * pre-enable those clock items which are not yet appropriately
+	 * acquired by their peripheral driver
+	 *
+	 * the PCI clock cannot get acquired by its peripheral driver,
+	 * because for this platform the driver won't probe(), instead
+	 * initialization is done from within the .setup_arch() routine
+	 * at a point in time where the clock provider has not been
+	 * setup yet and thus isn't available yet
+	 *
+	 * so we "pre-enable" the clock here, to not have the clock
+	 * subsystem automatically disable this item in a late init call
+	 *
+	 * this PCI clock pre-enable workaround only applies when there
+	 * are device tree nodes for PCI and thus the peripheral driver
+	 * has attached to bridges, otherwise the PCI clock remains
+	 * unused and so it gets disabled
+	 */
+	clk_prepare_enable(clks[MPC512x_CLK_PSC3_MCLK]);/* serial console */
+	if (of_find_compatible_node(NULL, "pci", "fsl,mpc5121-pci"))
+		clk_prepare_enable(clks[MPC512x_CLK_PCI]);
+}
+
+/*
+ * those macros are not exactly pretty, but they encapsulate a lot
+ * of copy'n'paste heavy code which is even more ugly, and reduce
+ * the potential for inconsistencies in those many code copies
+ */
+#define FOR_NODES(compatname) \
+	for_each_compatible_node(np, NULL, compatname)
+
+#define NODE_PREP do { \
+	of_address_to_resource(np, 0, &res); \
+	snprintf(devname, sizeof(devname), "%08x.%s", res.start, np->name); \
+} while (0)
+
+#define NODE_CHK(clkname, clkitem, regnode, regflag) do { \
+	struct clk *clk; \
+	clk = of_clk_get_by_name(np, clkname); \
+	if (IS_ERR(clk)) { \
+		clk = clkitem; \
+		clk_register_clkdev(clk, clkname, devname); \
+		if (regnode) \
+			clk_register_clkdev(clk, clkname, np->name); \
+		did_register |= DID_REG_ ## regflag; \
+		pr_debug("clock alias name '%s' for dev '%s' pointer %p\n", \
+			 clkname, devname, clk); \
+	} else { \
+		clk_put(clk); \
+	} \
+} while (0)
+
+/*
+ * register source code provided fallback results for clock lookups,
+ * these get consulted when OF based clock lookup fails (that is in the
+ * case of not yet adjusted device tree data, where clock related specs
+ * are missing)
+ */
+static void mpc5121_clk_provide_backwards_compat(void)
+{
+	enum did_reg_flags {
+		DID_REG_PSC	= BIT(0),
+		DID_REG_PSCFIFO	= BIT(1),
+		DID_REG_NFC	= BIT(2),
+		DID_REG_CAN	= BIT(3),
+		DID_REG_I2C	= BIT(4),
+		DID_REG_DIU	= BIT(5),
+		DID_REG_VIU	= BIT(6),
+		DID_REG_FEC	= BIT(7),
+		DID_REG_USB	= BIT(8),
+		DID_REG_PATA	= BIT(9),
+	};
+
+	int did_register;
+	struct device_node *np;
+	struct resource res;
+	int idx;
+	char devname[32];
+
+	did_register = 0;
+
+	FOR_NODES(mpc512x_select_psc_compat()) {
+		NODE_PREP;
+		idx = (res.start >> 8) & 0xf;
+		NODE_CHK("ipg", clks[MPC512x_CLK_PSC0 + idx], 0, PSC);
+		NODE_CHK("mclk", clks[MPC512x_CLK_PSC0_MCLK + idx], 0, PSC);
+	}
+
+	FOR_NODES("fsl,mpc5121-psc-fifo") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_PSC_FIFO], 1, PSCFIFO);
+	}
+
+	FOR_NODES("fsl,mpc5121-nfc") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_NFC], 0, NFC);
+	}
+
+	FOR_NODES("fsl,mpc5121-mscan") {
+		NODE_PREP;
+		idx = 0;
+		idx += (res.start & 0x2000) ? 2 : 0;
+		idx += (res.start & 0x0080) ? 1 : 0;
+		NODE_CHK("ipg", clks[MPC512x_CLK_BDLC], 0, CAN);
+		NODE_CHK("mclk", clks[MPC512x_CLK_MSCAN0_MCLK + idx], 0, CAN);
+	}
+
+	/*
+	 * do register the 'ips', 'sys', and 'ref' names globally
+	 * instead of inside each individual CAN node, as there is no
+	 * potential for a name conflict (in contrast to 'ipg' and 'mclk')
+	 */
+	if (did_register & DID_REG_CAN) {
+		clk_register_clkdev(clks[MPC512x_CLK_IPS], "ips", NULL);
+		clk_register_clkdev(clks[MPC512x_CLK_SYS], "sys", NULL);
+		clk_register_clkdev(clks[MPC512x_CLK_REF], "ref", NULL);
+	}
+
+	FOR_NODES("fsl,mpc5121-i2c") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_I2C], 0, I2C);
+	}
+
+	/*
+	 * workaround for the fact that the I2C driver does an "anonymous"
+	 * lookup (NULL name spec, which yields the first clock spec) for
+	 * which we cannot register an alias -- a _global_ 'ipg' alias that
+	 * is not bound to any device name and returns the I2C clock item
+	 * is not a good idea
+	 *
+	 * so we have the lookup in the peripheral driver fail, which is
+	 * silent and non-fatal, and pre-enable the clock item here such
+	 * that register access is possible
+	 *
+	 * see commit b3bfce2b "i2c: mpc: cleanup clock API use" for
+	 * details, adjusting s/NULL/"ipg"/ in i2c-mpc.c would make this
+	 * workaround obsolete
+	 */
+	if (did_register & DID_REG_I2C)
+		clk_prepare_enable(clks[MPC512x_CLK_I2C]);
+
+	FOR_NODES("fsl,mpc5121-diu") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_DIU], 1, DIU);
+	}
+
+	FOR_NODES("fsl,mpc5121-viu") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_VIU], 0, VIU);
+	}
+
+	/*
+	 * note that 2771399a "fs_enet: cleanup clock API use" did use the
+	 * "per" string for the clock lookup in contrast to the "ipg" name
+	 * which most other nodes are using -- this is not a fatal thing
+	 * but just something to keep in mind when doing compatibility
+	 * registration, it's a non-issue with up-to-date device tree data
+	 */
+	FOR_NODES("fsl,mpc5121-fec") {
+		NODE_PREP;
+		NODE_CHK("per", clks[MPC512x_CLK_FEC], 0, FEC);
+	}
+	FOR_NODES("fsl,mpc5121-fec-mdio") {
+		NODE_PREP;
+		NODE_CHK("per", clks[MPC512x_CLK_FEC], 0, FEC);
+	}
+	/*
+	 * MPC5125 has two FECs: FEC1 at 0x2800, FEC2 at 0x4800;
+	 * the clock items don't "form an array" since FEC2 was
+	 * added only later and was not allowed to shift all other
+	 * clock item indices, so the numbers aren't adjacent
+	 */
+	FOR_NODES("fsl,mpc5125-fec") {
+		NODE_PREP;
+		if (res.start & 0x4000)
+			idx = MPC512x_CLK_FEC2;
+		else
+			idx = MPC512x_CLK_FEC;
+		NODE_CHK("per", clks[idx], 0, FEC);
+	}
+
+	FOR_NODES("fsl,mpc5121-usb2-dr") {
+		NODE_PREP;
+		idx = (res.start & 0x4000) ? 1 : 0;
+		NODE_CHK("ipg", clks[MPC512x_CLK_USB1 + idx], 0, USB);
+	}
+
+	FOR_NODES("fsl,mpc5121-pata") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_PATA], 0, PATA);
+	}
+
+	/*
+	 * try to collapse diagnostics into a single line of output yet
+	 * provide a full list of what is missing, to avoid noise in the
+	 * absence of up-to-date device tree data -- backwards
+	 * compatibility to old DTBs is a requirement, updates may be
+	 * desirable or preferrable but are not at all mandatory
+	 */
+	if (did_register) {
+		pr_notice("device tree lacks clock specs, adding fallbacks (0x%x,%s%s%s%s%s%s%s%s%s%s)\n",
+			  did_register,
+			  (did_register & DID_REG_PSC) ? " PSC" : "",
+			  (did_register & DID_REG_PSCFIFO) ? " PSCFIFO" : "",
+			  (did_register & DID_REG_NFC) ? " NFC" : "",
+			  (did_register & DID_REG_CAN) ? " CAN" : "",
+			  (did_register & DID_REG_I2C) ? " I2C" : "",
+			  (did_register & DID_REG_DIU) ? " DIU" : "",
+			  (did_register & DID_REG_VIU) ? " VIU" : "",
+			  (did_register & DID_REG_FEC) ? " FEC" : "",
+			  (did_register & DID_REG_USB) ? " USB" : "",
+			  (did_register & DID_REG_PATA) ? " PATA" : "");
+	} else {
+		pr_debug("device tree has clock specs, no fallbacks added\n");
+	}
+}
+
+/*
+ * The "fixed-clock" nodes (which includes the oscillator node if the board's
+ * DT provides one) has already been scanned by the of_clk_init() in
+ * time_init().
+ */
+int __init mpc5121_clk_init(void)
+{
+	struct device_node *clk_np;
+	int busfreq;
+
+	/* map the clock control registers */
+	clk_np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-clock");
+	if (!clk_np)
+		return -ENODEV;
+	clkregs = of_iomap(clk_np, 0);
+	WARN_ON(!clkregs);
+
+	/* determine the SoC variant we run on */
+	mpc512x_clk_determine_soc();
+
+	/* invalidate all not yet registered clock slots */
+	mpc512x_clk_preset_data();
+
+	/*
+	 * add a dummy clock for those situations where a clock spec is
+	 * required yet no real clock is involved
+	 */
+	clks[MPC512x_CLK_DUMMY] = mpc512x_clk_fixed("dummy", 0);
+
+	/*
+	 * have all the real nodes in the clock tree populated from REF
+	 * down to all leaves, either starting from the OSC node or from
+	 * a REF root that was created from the IPS bus clock input
+	 */
+	busfreq = get_freq_from_dt("bus-frequency");
+	mpc512x_clk_setup_clock_tree(clk_np, busfreq);
+
+	/* register as an OF clock provider */
+	mpc5121_clk_register_of_provider(clk_np);
+
+	/*
+	 * unbreak not yet adjusted peripheral drivers during migration
+	 * towards fully operational common clock support, and allow
+	 * operation in the absence of clock related device tree specs
+	 */
+	mpc5121_clk_provide_migration_support();
+	mpc5121_clk_provide_backwards_compat();
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c
new file mode 100644
index 000000000..3e90ece10
--- /dev/null
+++ b/arch/powerpc/platforms/512x/mpc5121_ads.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2007, 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: John Rigby, <jrigby@freescale.com>, Thur Mar 29 2007
+ *
+ * Description:
+ * MPC5121 ADS board setup
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/prom.h>
+#include <asm/time.h>
+
+#include <sysdev/fsl_pci.h>
+
+#include "mpc512x.h"
+#include "mpc5121_ads.h"
+
+static void __init mpc5121_ads_setup_arch(void)
+{
+#ifdef CONFIG_PCI
+	struct device_node *np;
+#endif
+	printk(KERN_INFO "MPC5121 ADS board from Freescale Semiconductor\n");
+	/*
+	 * cpld regs are needed early
+	 */
+	mpc5121_ads_cpld_map();
+
+#ifdef CONFIG_PCI
+	for_each_compatible_node(np, "pci", "fsl,mpc5121-pci")
+		mpc83xx_add_bridge(np);
+#endif
+
+	mpc512x_setup_arch();
+}
+
+static void __init mpc5121_ads_init_IRQ(void)
+{
+	mpc512x_init_IRQ();
+	mpc5121_ads_cpld_pic_init();
+}
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc5121_ads_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,mpc5121ads");
+}
+
+define_machine(mpc5121_ads) {
+	.name			= "MPC5121 ADS",
+	.probe			= mpc5121_ads_probe,
+	.setup_arch		= mpc5121_ads_setup_arch,
+	.init			= mpc512x_init,
+	.init_early		= mpc512x_init_early,
+	.init_IRQ		= mpc5121_ads_init_IRQ,
+	.get_irq		= ipic_get_irq,
+	.calibrate_decr		= generic_calibrate_decr,
+	.restart		= mpc512x_restart,
+};
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.h b/arch/powerpc/platforms/512x/mpc5121_ads.h
new file mode 100644
index 000000000..662076cfe
--- /dev/null
+++ b/arch/powerpc/platforms/512x/mpc5121_ads.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * Prototypes for ADS5121 specific code
+ */
+
+#ifndef __MPC512ADS_H__
+#define __MPC512ADS_H__
+extern void __init mpc5121_ads_cpld_map(void);
+extern void __init mpc5121_ads_cpld_pic_init(void);
+#endif				/* __MPC512ADS_H__ */
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
new file mode 100644
index 000000000..ca3a062ed
--- /dev/null
+++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: John Rigby, <jrigby@freescale.com>
+ *
+ * Description:
+ * MPC5121ADS CPLD irq handling
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <asm/prom.h>
+
+static struct device_node *cpld_pic_node;
+static struct irq_domain *cpld_pic_host;
+
+/*
+ * Bits to ignore in the misc_status register
+ * 0x10 touch screen pendown is hard routed to irq1
+ * 0x02 pci status is read from pci status register
+ */
+#define MISC_IGNORE 0x12
+
+/*
+ * Nothing to ignore in pci status register
+ */
+#define PCI_IGNORE 0x00
+
+struct cpld_pic {
+	u8 pci_mask;
+	u8 pci_status;
+	u8 route;
+	u8 misc_mask;
+	u8 misc_status;
+	u8 misc_control;
+};
+
+static struct cpld_pic __iomem *cpld_regs;
+
+static void __iomem *
+irq_to_pic_mask(unsigned int irq)
+{
+	return irq <= 7 ? &cpld_regs->pci_mask : &cpld_regs->misc_mask;
+}
+
+static unsigned int
+irq_to_pic_bit(unsigned int irq)
+{
+	return 1 << (irq & 0x7);
+}
+
+static void
+cpld_mask_irq(struct irq_data *d)
+{
+	unsigned int cpld_irq = (unsigned int)irqd_to_hwirq(d);
+	void __iomem *pic_mask = irq_to_pic_mask(cpld_irq);
+
+	out_8(pic_mask,
+	      in_8(pic_mask) | irq_to_pic_bit(cpld_irq));
+}
+
+static void
+cpld_unmask_irq(struct irq_data *d)
+{
+	unsigned int cpld_irq = (unsigned int)irqd_to_hwirq(d);
+	void __iomem *pic_mask = irq_to_pic_mask(cpld_irq);
+
+	out_8(pic_mask,
+	      in_8(pic_mask) & ~irq_to_pic_bit(cpld_irq));
+}
+
+static struct irq_chip cpld_pic = {
+	.name = "CPLD PIC",
+	.irq_mask = cpld_mask_irq,
+	.irq_ack = cpld_mask_irq,
+	.irq_unmask = cpld_unmask_irq,
+};
+
+static int
+cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp,
+			    u8 __iomem *maskp)
+{
+	int cpld_irq;
+	u8 status = in_8(statusp);
+	u8 mask = in_8(maskp);
+
+	/* ignore don't cares and masked irqs */
+	status |= (ignore | mask);
+
+	if (status == 0xff)
+		return NO_IRQ;
+
+	cpld_irq = ffz(status) + offset;
+
+	return irq_linear_revmap(cpld_pic_host, cpld_irq);
+}
+
+static void
+cpld_pic_cascade(unsigned int irq, struct irq_desc *desc)
+{
+	irq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status,
+		&cpld_regs->pci_mask);
+	if (irq != NO_IRQ) {
+		generic_handle_irq(irq);
+		return;
+	}
+
+	irq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status,
+		&cpld_regs->misc_mask);
+	if (irq != NO_IRQ) {
+		generic_handle_irq(irq);
+		return;
+	}
+}
+
+static int
+cpld_pic_host_match(struct irq_domain *h, struct device_node *node)
+{
+	return cpld_pic_node == node;
+}
+
+static int
+cpld_pic_host_map(struct irq_domain *h, unsigned int virq,
+			     irq_hw_number_t hw)
+{
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &cpld_pic, handle_level_irq);
+	return 0;
+}
+
+static const struct irq_domain_ops cpld_pic_host_ops = {
+	.match = cpld_pic_host_match,
+	.map = cpld_pic_host_map,
+};
+
+void __init
+mpc5121_ads_cpld_map(void)
+{
+	struct device_node *np = NULL;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121ads-cpld-pic");
+	if (!np) {
+		printk(KERN_ERR "CPLD PIC init: can not find cpld-pic node\n");
+		return;
+	}
+
+	cpld_regs = of_iomap(np, 0);
+	of_node_put(np);
+}
+
+void __init
+mpc5121_ads_cpld_pic_init(void)
+{
+	unsigned int cascade_irq;
+	struct device_node *np = NULL;
+
+	pr_debug("cpld_ic_init\n");
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121ads-cpld-pic");
+	if (!np) {
+		printk(KERN_ERR "CPLD PIC init: can not find cpld-pic node\n");
+		return;
+	}
+
+	if (!cpld_regs)
+		goto end;
+
+	cascade_irq = irq_of_parse_and_map(np, 0);
+	if (cascade_irq == NO_IRQ)
+		goto end;
+
+	/*
+	 * statically route touch screen pendown through 1
+	 * and ignore it here
+	 * route all others through our cascade irq
+	 */
+	out_8(&cpld_regs->route, 0xfd);
+	out_8(&cpld_regs->pci_mask, 0xff);
+	/* unmask pci ints in misc mask */
+	out_8(&cpld_regs->misc_mask, ~(MISC_IGNORE));
+
+	cpld_pic_node = of_node_get(np);
+
+	cpld_pic_host = irq_domain_add_linear(np, 16, &cpld_pic_host_ops, NULL);
+	if (!cpld_pic_host) {
+		printk(KERN_ERR "CPLD PIC: failed to allocate irq host!\n");
+		goto end;
+	}
+
+	irq_set_chained_handler(cascade_irq, cpld_pic_cascade);
+end:
+	of_node_put(np);
+}
diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h
new file mode 100644
index 000000000..cc97f022d
--- /dev/null
+++ b/arch/powerpc/platforms/512x/mpc512x.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * Prototypes for MPC512x shared code
+ */
+
+#ifndef __MPC512X_H__
+#define __MPC512X_H__
+extern void __init mpc512x_init_IRQ(void);
+extern void __init mpc512x_init_early(void);
+extern void __init mpc512x_init(void);
+extern void __init mpc512x_setup_arch(void);
+extern int __init mpc5121_clk_init(void);
+extern const char *mpc512x_select_psc_compat(void);
+extern const char *mpc512x_select_reset_compat(void);
+extern void mpc512x_restart(char *cmd);
+
+#endif				/* __MPC512X_H__ */
diff --git a/arch/powerpc/platforms/512x/mpc512x_generic.c b/arch/powerpc/platforms/512x/mpc512x_generic.c
new file mode 100644
index 000000000..ce7140878
--- /dev/null
+++ b/arch/powerpc/platforms/512x/mpc512x_generic.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2007,2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: John Rigby, <jrigby@freescale.com>
+ *
+ * Description:
+ * MPC512x SoC setup
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/prom.h>
+#include <asm/time.h>
+
+#include "mpc512x.h"
+
+/*
+ * list of supported boards
+ */
+static const char * const board[] __initconst = {
+	"prt,prtlvt",
+	"fsl,mpc5125ads",
+	"ifm,ac14xx",
+	NULL
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc512x_generic_probe(void)
+{
+	return of_flat_dt_match(of_get_flat_dt_root(), board);
+}
+
+define_machine(mpc512x_generic) {
+	.name			= "MPC512x generic",
+	.probe			= mpc512x_generic_probe,
+	.init			= mpc512x_init,
+	.init_early		= mpc512x_init_early,
+	.setup_arch		= mpc512x_setup_arch,
+	.init_IRQ		= mpc512x_init_IRQ,
+	.get_irq		= ipic_get_irq,
+	.calibrate_decr		= generic_calibrate_decr,
+	.restart		= mpc512x_restart,
+};
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
new file mode 100644
index 000000000..711f3d352
--- /dev/null
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -0,0 +1,510 @@
+/*
+ * Copyright (C) 2007,2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: John Rigby <jrigby@freescale.com>
+ *
+ * Description:
+ * MPC512x Shared code
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/clk.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/of_platform.h>
+#include <linux/fsl-diu-fb.h>
+#include <linux/memblock.h>
+#include <sysdev/fsl_soc.h>
+
+#include <asm/cacheflush.h>
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/prom.h>
+#include <asm/time.h>
+#include <asm/mpc5121.h>
+#include <asm/mpc52xx_psc.h>
+
+#include "mpc512x.h"
+
+static struct mpc512x_reset_module __iomem *reset_module_base;
+
+static void __init mpc512x_restart_init(void)
+{
+	struct device_node *np;
+	const char *reset_compat;
+
+	reset_compat = mpc512x_select_reset_compat();
+	np = of_find_compatible_node(NULL, NULL, reset_compat);
+	if (!np)
+		return;
+
+	reset_module_base = of_iomap(np, 0);
+	of_node_put(np);
+}
+
+void mpc512x_restart(char *cmd)
+{
+	if (reset_module_base) {
+		/* Enable software reset "RSTE" */
+		out_be32(&reset_module_base->rpr, 0x52535445);
+		/* Set software hard reset */
+		out_be32(&reset_module_base->rcr, 0x2);
+	} else {
+		pr_err("Restart module not mapped.\n");
+	}
+	for (;;)
+		;
+}
+
+struct fsl_diu_shared_fb {
+	u8		gamma[0x300];	/* 32-bit aligned! */
+	struct diu_ad	ad0;		/* 32-bit aligned! */
+	phys_addr_t	fb_phys;
+	size_t		fb_len;
+	bool		in_use;
+};
+
+/* receives a pixel clock spec in pico seconds, adjusts the DIU clock rate */
+static void mpc512x_set_pixel_clock(unsigned int pixclock)
+{
+	struct device_node *np;
+	struct clk *clk_diu;
+	unsigned long epsilon, minpixclock, maxpixclock;
+	unsigned long offset, want, got, delta;
+
+	/* lookup and enable the DIU clock */
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-diu");
+	if (!np) {
+		pr_err("Could not find DIU device tree node.\n");
+		return;
+	}
+	clk_diu = of_clk_get(np, 0);
+	if (IS_ERR(clk_diu)) {
+		/* backwards compat with device trees that lack clock specs */
+		clk_diu = clk_get_sys(np->name, "ipg");
+	}
+	of_node_put(np);
+	if (IS_ERR(clk_diu)) {
+		pr_err("Could not lookup DIU clock.\n");
+		return;
+	}
+	if (clk_prepare_enable(clk_diu)) {
+		pr_err("Could not enable DIU clock.\n");
+		return;
+	}
+
+	/*
+	 * convert the picoseconds spec into the desired clock rate,
+	 * determine the acceptable clock range for the monitor (+/- 5%),
+	 * do the calculation in steps to avoid integer overflow
+	 */
+	pr_debug("DIU pixclock in ps - %u\n", pixclock);
+	pixclock = (1000000000 / pixclock) * 1000;
+	pr_debug("DIU pixclock freq  - %u\n", pixclock);
+	epsilon = pixclock / 20; /* pixclock * 0.05 */
+	pr_debug("DIU deviation      - %lu\n", epsilon);
+	minpixclock = pixclock - epsilon;
+	maxpixclock = pixclock + epsilon;
+	pr_debug("DIU minpixclock    - %lu\n", minpixclock);
+	pr_debug("DIU maxpixclock    - %lu\n", maxpixclock);
+
+	/*
+	 * check whether the DIU supports the desired pixel clock
+	 *
+	 * - simply request the desired clock and see what the
+	 *   platform's clock driver will make of it, assuming that it
+	 *   will setup the best approximation of the requested value
+	 * - try other candidate frequencies in the order of decreasing
+	 *   preference (i.e. with increasing distance from the desired
+	 *   pixel clock, and checking the lower frequency before the
+	 *   higher frequency to not overload the hardware) until the
+	 *   first match is found -- any potential subsequent match
+	 *   would only be as good as the former match or typically
+	 *   would be less preferrable
+	 *
+	 * the offset increment of pixelclock divided by 64 is an
+	 * arbitrary choice -- it's simple to calculate, in the typical
+	 * case we expect the first check to succeed already, in the
+	 * worst case seven frequencies get tested (the exact center and
+	 * three more values each to the left and to the right) before
+	 * the 5% tolerance window is exceeded, resulting in fast enough
+	 * execution yet high enough probability of finding a suitable
+	 * value, while the error rate will be in the order of single
+	 * percents
+	 */
+	for (offset = 0; offset <= epsilon; offset += pixclock / 64) {
+		want = pixclock - offset;
+		pr_debug("DIU checking clock - %lu\n", want);
+		clk_set_rate(clk_diu, want);
+		got = clk_get_rate(clk_diu);
+		delta = abs(pixclock - got);
+		if (delta < epsilon)
+			break;
+		if (!offset)
+			continue;
+		want = pixclock + offset;
+		pr_debug("DIU checking clock - %lu\n", want);
+		clk_set_rate(clk_diu, want);
+		got = clk_get_rate(clk_diu);
+		delta = abs(pixclock - got);
+		if (delta < epsilon)
+			break;
+	}
+	if (offset <= epsilon) {
+		pr_debug("DIU clock accepted - %lu\n", want);
+		pr_debug("DIU pixclock want %u, got %lu, delta %lu, eps %lu\n",
+			 pixclock, got, delta, epsilon);
+		return;
+	}
+	pr_warn("DIU pixclock auto search unsuccessful\n");
+
+	/*
+	 * what is the most appropriate action to take when the search
+	 * for an available pixel clock which is acceptable to the
+	 * monitor has failed?  disable the DIU (clock) or just provide
+	 * a "best effort"?  we go with the latter
+	 */
+	pr_warn("DIU pixclock best effort fallback (backend's choice)\n");
+	clk_set_rate(clk_diu, pixclock);
+	got = clk_get_rate(clk_diu);
+	delta = abs(pixclock - got);
+	pr_debug("DIU pixclock want %u, got %lu, delta %lu, eps %lu\n",
+		 pixclock, got, delta, epsilon);
+}
+
+static enum fsl_diu_monitor_port
+mpc512x_valid_monitor_port(enum fsl_diu_monitor_port port)
+{
+	return FSL_DIU_PORT_DVI;
+}
+
+static struct fsl_diu_shared_fb __attribute__ ((__aligned__(8))) diu_shared_fb;
+
+static inline void mpc512x_free_bootmem(struct page *page)
+{
+	BUG_ON(PageTail(page));
+	BUG_ON(atomic_read(&page->_count) > 1);
+	free_reserved_page(page);
+}
+
+static void mpc512x_release_bootmem(void)
+{
+	unsigned long addr = diu_shared_fb.fb_phys & PAGE_MASK;
+	unsigned long size = diu_shared_fb.fb_len;
+	unsigned long start, end;
+
+	if (diu_shared_fb.in_use) {
+		start = PFN_UP(addr);
+		end = PFN_DOWN(addr + size);
+
+		for (; start < end; start++)
+			mpc512x_free_bootmem(pfn_to_page(start));
+
+		diu_shared_fb.in_use = false;
+	}
+	diu_ops.release_bootmem	= NULL;
+}
+
+/*
+ * Check if DIU was pre-initialized. If so, perform steps
+ * needed to continue displaying through the whole boot process.
+ * Move area descriptor and gamma table elsewhere, they are
+ * destroyed by bootmem allocator otherwise. The frame buffer
+ * address range will be reserved in setup_arch() after bootmem
+ * allocator is up.
+ */
+static void __init mpc512x_init_diu(void)
+{
+	struct device_node *np;
+	struct diu __iomem *diu_reg;
+	phys_addr_t desc;
+	void __iomem *vaddr;
+	unsigned long mode, pix_fmt, res, bpp;
+	unsigned long dst;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-diu");
+	if (!np) {
+		pr_err("No DIU node\n");
+		return;
+	}
+
+	diu_reg = of_iomap(np, 0);
+	of_node_put(np);
+	if (!diu_reg) {
+		pr_err("Can't map DIU\n");
+		return;
+	}
+
+	mode = in_be32(&diu_reg->diu_mode);
+	if (mode == MFB_MODE0) {
+		pr_info("%s: DIU OFF\n", __func__);
+		goto out;
+	}
+
+	desc = in_be32(&diu_reg->desc[0]);
+	vaddr = ioremap(desc, sizeof(struct diu_ad));
+	if (!vaddr) {
+		pr_err("Can't map DIU area desc.\n");
+		goto out;
+	}
+	memcpy(&diu_shared_fb.ad0, vaddr, sizeof(struct diu_ad));
+	/* flush fb area descriptor */
+	dst = (unsigned long)&diu_shared_fb.ad0;
+	flush_dcache_range(dst, dst + sizeof(struct diu_ad) - 1);
+
+	res = in_be32(&diu_reg->disp_size);
+	pix_fmt = in_le32(vaddr);
+	bpp = ((pix_fmt >> 16) & 0x3) + 1;
+	diu_shared_fb.fb_phys = in_le32(vaddr + 4);
+	diu_shared_fb.fb_len = ((res & 0xfff0000) >> 16) * (res & 0xfff) * bpp;
+	diu_shared_fb.in_use = true;
+	iounmap(vaddr);
+
+	desc = in_be32(&diu_reg->gamma);
+	vaddr = ioremap(desc, sizeof(diu_shared_fb.gamma));
+	if (!vaddr) {
+		pr_err("Can't map DIU area desc.\n");
+		diu_shared_fb.in_use = false;
+		goto out;
+	}
+	memcpy(&diu_shared_fb.gamma, vaddr, sizeof(diu_shared_fb.gamma));
+	/* flush gamma table */
+	dst = (unsigned long)&diu_shared_fb.gamma;
+	flush_dcache_range(dst, dst + sizeof(diu_shared_fb.gamma) - 1);
+
+	iounmap(vaddr);
+	out_be32(&diu_reg->gamma, virt_to_phys(&diu_shared_fb.gamma));
+	out_be32(&diu_reg->desc[1], 0);
+	out_be32(&diu_reg->desc[2], 0);
+	out_be32(&diu_reg->desc[0], virt_to_phys(&diu_shared_fb.ad0));
+
+out:
+	iounmap(diu_reg);
+}
+
+static void __init mpc512x_setup_diu(void)
+{
+	int ret;
+
+	/*
+	 * We do not allocate and configure new area for bitmap buffer
+	 * because it would requere copying bitmap data (splash image)
+	 * and so negatively affect boot time. Instead we reserve the
+	 * already configured frame buffer area so that it won't be
+	 * destroyed. The starting address of the area to reserve and
+	 * also it's length is passed to memblock_reserve(). It will be
+	 * freed later on first open of fbdev, when splash image is not
+	 * needed any more.
+	 */
+	if (diu_shared_fb.in_use) {
+		ret = memblock_reserve(diu_shared_fb.fb_phys,
+				       diu_shared_fb.fb_len);
+		if (ret) {
+			pr_err("%s: reserve bootmem failed\n", __func__);
+			diu_shared_fb.in_use = false;
+		}
+	}
+
+	diu_ops.set_pixel_clock		= mpc512x_set_pixel_clock;
+	diu_ops.valid_monitor_port	= mpc512x_valid_monitor_port;
+	diu_ops.release_bootmem		= mpc512x_release_bootmem;
+}
+
+void __init mpc512x_init_IRQ(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-ipic");
+	if (!np)
+		return;
+
+	ipic_init(np, 0);
+	of_node_put(np);
+
+	/*
+	 * Initialize the default interrupt mapping priorities,
+	 * in case the boot rom changed something on us.
+	 */
+	ipic_set_default_priority();
+}
+
+/*
+ * Nodes to do bus probe on, soc and localbus
+ */
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .compatible = "fsl,mpc5121-immr", },
+	{ .compatible = "fsl,mpc5121-localbus", },
+	{ .compatible = "fsl,mpc5121-mbx", },
+	{ .compatible = "fsl,mpc5121-nfc", },
+	{ .compatible = "fsl,mpc5121-sram", },
+	{ .compatible = "fsl,mpc5121-pci", },
+	{ .compatible = "gpio-leds", },
+	{},
+};
+
+static void __init mpc512x_declare_of_platform_devices(void)
+{
+	if (of_platform_bus_probe(NULL, of_bus_ids, NULL))
+		printk(KERN_ERR __FILE__ ": "
+			"Error while probing of_platform bus\n");
+}
+
+#define DEFAULT_FIFO_SIZE 16
+
+const char *mpc512x_select_psc_compat(void)
+{
+	if (of_machine_is_compatible("fsl,mpc5121"))
+		return "fsl,mpc5121-psc";
+
+	if (of_machine_is_compatible("fsl,mpc5125"))
+		return "fsl,mpc5125-psc";
+
+	return NULL;
+}
+
+const char *mpc512x_select_reset_compat(void)
+{
+	if (of_machine_is_compatible("fsl,mpc5121"))
+		return "fsl,mpc5121-reset";
+
+	if (of_machine_is_compatible("fsl,mpc5125"))
+		return "fsl,mpc5125-reset";
+
+	return NULL;
+}
+
+static unsigned int __init get_fifo_size(struct device_node *np,
+					 char *prop_name)
+{
+	const unsigned int *fp;
+
+	fp = of_get_property(np, prop_name, NULL);
+	if (fp)
+		return *fp;
+
+	pr_warning("no %s property in %s node, defaulting to %d\n",
+		   prop_name, np->full_name, DEFAULT_FIFO_SIZE);
+
+	return DEFAULT_FIFO_SIZE;
+}
+
+#define FIFOC(_base) ((struct mpc512x_psc_fifo __iomem *) \
+		    ((u32)(_base) + sizeof(struct mpc52xx_psc)))
+
+/* Init PSC FIFO space for TX and RX slices */
+static void __init mpc512x_psc_fifo_init(void)
+{
+	struct device_node *np;
+	void __iomem *psc;
+	unsigned int tx_fifo_size;
+	unsigned int rx_fifo_size;
+	const char *psc_compat;
+	int fifobase = 0; /* current fifo address in 32 bit words */
+
+	psc_compat = mpc512x_select_psc_compat();
+	if (!psc_compat) {
+		pr_err("%s: no compatible devices found\n", __func__);
+		return;
+	}
+
+	for_each_compatible_node(np, NULL, psc_compat) {
+		tx_fifo_size = get_fifo_size(np, "fsl,tx-fifo-size");
+		rx_fifo_size = get_fifo_size(np, "fsl,rx-fifo-size");
+
+		/* size in register is in 4 byte units */
+		tx_fifo_size /= 4;
+		rx_fifo_size /= 4;
+		if (!tx_fifo_size)
+			tx_fifo_size = 1;
+		if (!rx_fifo_size)
+			rx_fifo_size = 1;
+
+		psc = of_iomap(np, 0);
+		if (!psc) {
+			pr_err("%s: Can't map %s device\n",
+				__func__, np->full_name);
+			continue;
+		}
+
+		/* FIFO space is 4KiB, check if requested size is available */
+		if ((fifobase + tx_fifo_size + rx_fifo_size) > 0x1000) {
+			pr_err("%s: no fifo space available for %s\n",
+				__func__, np->full_name);
+			iounmap(psc);
+			/*
+			 * chances are that another device requests less
+			 * fifo space, so we continue.
+			 */
+			continue;
+		}
+
+		/* set tx and rx fifo size registers */
+		out_be32(&FIFOC(psc)->txsz, (fifobase << 16) | tx_fifo_size);
+		fifobase += tx_fifo_size;
+		out_be32(&FIFOC(psc)->rxsz, (fifobase << 16) | rx_fifo_size);
+		fifobase += rx_fifo_size;
+
+		/* reset and enable the slices */
+		out_be32(&FIFOC(psc)->txcmd, 0x80);
+		out_be32(&FIFOC(psc)->txcmd, 0x01);
+		out_be32(&FIFOC(psc)->rxcmd, 0x80);
+		out_be32(&FIFOC(psc)->rxcmd, 0x01);
+
+		iounmap(psc);
+	}
+}
+
+void __init mpc512x_init_early(void)
+{
+	mpc512x_restart_init();
+	if (IS_ENABLED(CONFIG_FB_FSL_DIU))
+		mpc512x_init_diu();
+}
+
+void __init mpc512x_init(void)
+{
+	mpc5121_clk_init();
+	mpc512x_declare_of_platform_devices();
+	mpc512x_psc_fifo_init();
+}
+
+void __init mpc512x_setup_arch(void)
+{
+	if (IS_ENABLED(CONFIG_FB_FSL_DIU))
+		mpc512x_setup_diu();
+}
+
+/**
+ * mpc512x_cs_config - Setup chip select configuration
+ * @cs: chip select number
+ * @val: chip select configuration value
+ *
+ * Perform chip select configuration for devices on LocalPlus Bus.
+ * Intended to dynamically reconfigure the chip select parameters
+ * for configurable devices on the bus.
+ */
+int mpc512x_cs_config(unsigned int cs, u32 val)
+{
+	static struct mpc512x_lpc __iomem *lpc;
+	struct device_node *np;
+
+	if (cs > 7)
+		return -EINVAL;
+
+	if (!lpc) {
+		np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-lpc");
+		lpc = of_iomap(np, 0);
+		of_node_put(np);
+		if (!lpc)
+			return -ENOMEM;
+	}
+
+	out_be32(&lpc->cs_cfg[cs], val);
+	return 0;
+}
+EXPORT_SYMBOL(mpc512x_cs_config);
diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c
new file mode 100644
index 000000000..116f2325b
--- /dev/null
+++ b/arch/powerpc/platforms/512x/pdm360ng.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2010 DENX Software Engineering
+ *
+ * Anatolij Gustschin, <agust@denx.de>
+ *
+ * PDM360NG board setup
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+
+#include "mpc512x.h"
+
+#if defined(CONFIG_TOUCHSCREEN_ADS7846) || \
+    defined(CONFIG_TOUCHSCREEN_ADS7846_MODULE)
+#include <linux/interrupt.h>
+#include <linux/spi/ads7846.h>
+#include <linux/spi/spi.h>
+#include <linux/notifier.h>
+
+static void *pdm360ng_gpio_base;
+
+static int pdm360ng_get_pendown_state(void)
+{
+	u32 reg;
+
+	reg = in_be32(pdm360ng_gpio_base + 0xc);
+	if (reg & 0x40)
+		setbits32(pdm360ng_gpio_base + 0xc, 0x40);
+
+	reg = in_be32(pdm360ng_gpio_base + 0x8);
+
+	/* return 1 if pen is down */
+	return (reg & 0x40) == 0;
+}
+
+static struct ads7846_platform_data pdm360ng_ads7846_pdata = {
+	.model			= 7845,
+	.get_pendown_state	= pdm360ng_get_pendown_state,
+	.irq_flags		= IRQF_TRIGGER_LOW,
+};
+
+static int __init pdm360ng_penirq_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-gpio");
+	if (!np) {
+		pr_err("%s: Can't find 'mpc5121-gpio' node\n", __func__);
+		return -ENODEV;
+	}
+
+	pdm360ng_gpio_base = of_iomap(np, 0);
+	of_node_put(np);
+	if (!pdm360ng_gpio_base) {
+		pr_err("%s: Can't map gpio regs.\n", __func__);
+		return -ENODEV;
+	}
+	out_be32(pdm360ng_gpio_base + 0xc, 0xffffffff);
+	setbits32(pdm360ng_gpio_base + 0x18, 0x2000);
+	setbits32(pdm360ng_gpio_base + 0x10, 0x40);
+
+	return 0;
+}
+
+static int pdm360ng_touchscreen_notifier_call(struct notifier_block *nb,
+					unsigned long event, void *__dev)
+{
+	struct device *dev = __dev;
+
+	if ((event == BUS_NOTIFY_ADD_DEVICE) &&
+	    of_device_is_compatible(dev->of_node, "ti,ads7846")) {
+		dev->platform_data = &pdm360ng_ads7846_pdata;
+		return NOTIFY_OK;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block pdm360ng_touchscreen_nb = {
+	.notifier_call = pdm360ng_touchscreen_notifier_call,
+};
+
+static void __init pdm360ng_touchscreen_init(void)
+{
+	if (pdm360ng_penirq_init())
+		return;
+
+	bus_register_notifier(&spi_bus_type, &pdm360ng_touchscreen_nb);
+}
+#else
+static inline void __init pdm360ng_touchscreen_init(void)
+{
+}
+#endif /* CONFIG_TOUCHSCREEN_ADS7846 */
+
+void __init pdm360ng_init(void)
+{
+	mpc512x_init();
+	pdm360ng_touchscreen_init();
+}
+
+static int __init pdm360ng_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "ifm,pdm360ng");
+}
+
+define_machine(pdm360ng) {
+	.name			= "PDM360NG",
+	.probe			= pdm360ng_probe,
+	.setup_arch		= mpc512x_setup_arch,
+	.init			= pdm360ng_init,
+	.init_early		= mpc512x_init_early,
+	.init_IRQ		= mpc512x_init_IRQ,
+	.get_irq		= ipic_get_irq,
+	.calibrate_decr		= generic_calibrate_decr,
+	.restart		= mpc512x_restart,
+};
diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig
new file mode 100644
index 000000000..b625a2c6f
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/Kconfig
@@ -0,0 +1,61 @@
+config PPC_MPC52xx
+	bool "52xx-based boards"
+	depends on 6xx
+	select COMMON_CLK
+	select PPC_PCI_CHOICE
+
+config PPC_MPC5200_SIMPLE
+	bool "Generic support for simple MPC5200 based boards"
+	depends on PPC_MPC52xx
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for a simple MPC52xx based boards which
+	  do not need a custom platform specific setup. Such boards are
+	  supported assuming the following:
+
+	  - GPIO pins are configured by the firmware,
+	  - CDM configuration (clocking) is setup correctly by firmware,
+	  - if the 'fsl,has-wdt' property is present in one of the
+	    gpt nodes, then it is safe to use such gpt to reset the board,
+	  - PCI is supported if enabled in the kernel configuration
+	    and if there is a PCI bus node defined in the device tree.
+
+	  Boards that are compatible with this generic platform support
+	  are:
+	     intercontrol,digsy-mtc
+	     phytec,pcm030
+	     phytec,pcm032
+	     promess,motionpro
+	     schindler,cm5200
+	     tqc,tqm5200
+
+config PPC_EFIKA
+	bool "bPlan Efika 5k2. MPC5200B based computer"
+	depends on PPC_MPC52xx
+	select PPC_RTAS
+	select RTAS_PROC
+	select PPC_NATIVE
+
+config PPC_LITE5200
+	bool "Freescale Lite5200 Eval Board"
+	depends on PPC_MPC52xx
+	select DEFAULT_UIMAGE
+
+config PPC_MEDIA5200
+	bool "Freescale Media5200 Eval Board"
+	depends on PPC_MPC52xx
+	select DEFAULT_UIMAGE
+
+config PPC_MPC5200_BUGFIX
+	bool "MPC5200 (L25R) bugfix support"
+	depends on PPC_MPC52xx
+	help
+	  Enable workarounds for original MPC5200 errata.  This is not required
+	  for MPC5200B based boards.
+
+	  It is safe to say 'Y' here
+
+config PPC_MPC5200_LPBFIFO
+	tristate "MPC5200 LocalPlus bus FIFO driver"
+	depends on PPC_MPC52xx && PPC_BESTCOMM
+	select PPC_BESTCOMM_GEN_BD
diff --git a/arch/powerpc/platforms/52xx/Makefile b/arch/powerpc/platforms/52xx/Makefile
new file mode 100644
index 000000000..4e6248679
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/Makefile
@@ -0,0 +1,17 @@
+#
+# Makefile for 52xx based boards
+#
+obj-y				+= mpc52xx_pic.o mpc52xx_common.o mpc52xx_gpt.o
+obj-$(CONFIG_PCI)		+= mpc52xx_pci.o
+
+obj-$(CONFIG_PPC_MPC5200_SIMPLE) += mpc5200_simple.o
+obj-$(CONFIG_PPC_EFIKA)		+= efika.o
+obj-$(CONFIG_PPC_LITE5200)	+= lite5200.o
+obj-$(CONFIG_PPC_MEDIA5200)	+= media5200.o
+
+obj-$(CONFIG_PM)		+= mpc52xx_sleep.o mpc52xx_pm.o
+ifeq ($(CONFIG_PPC_LITE5200),y)
+	obj-$(CONFIG_PM)	+= lite5200_sleep.o lite5200_pm.o
+endif
+
+obj-$(CONFIG_PPC_MPC5200_LPBFIFO)	+= mpc52xx_lpbfifo.o
diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
new file mode 100644
index 000000000..6af651e69
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -0,0 +1,240 @@
+/*
+ * Efika 5K2 platform code
+ * Some code really inspired from the lite5200b platform.
+ *
+ * Copyright (C) 2006 bplan GmbH
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <generated/utsrelease.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <asm/dma.h>
+#include <asm/prom.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/mpc52xx.h>
+
+#define EFIKA_PLATFORM_NAME "Efika"
+
+
+/* ------------------------------------------------------------------------ */
+/* PCI accesses thru RTAS                                                   */
+/* ------------------------------------------------------------------------ */
+
+#ifdef CONFIG_PCI
+
+/*
+ * Access functions for PCI config space using RTAS calls.
+ */
+static int rtas_read_config(struct pci_bus *bus, unsigned int devfn, int offset,
+			    int len, u32 * val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	unsigned long addr = (offset & 0xff) | ((devfn & 0xff) << 8)
+	    | (((bus->number - hose->first_busno) & 0xff) << 16)
+	    | (hose->global_number << 24);
+	int ret = -1;
+	int rval;
+
+	rval = rtas_call(rtas_token("read-pci-config"), 2, 2, &ret, addr, len);
+	*val = ret;
+	return rval ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
+}
+
+static int rtas_write_config(struct pci_bus *bus, unsigned int devfn,
+			     int offset, int len, u32 val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	unsigned long addr = (offset & 0xff) | ((devfn & 0xff) << 8)
+	    | (((bus->number - hose->first_busno) & 0xff) << 16)
+	    | (hose->global_number << 24);
+	int rval;
+
+	rval = rtas_call(rtas_token("write-pci-config"), 3, 1, NULL,
+			 addr, len, val);
+	return rval ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops rtas_pci_ops = {
+	.read = rtas_read_config,
+	.write = rtas_write_config,
+};
+
+
+static void __init efika_pcisetup(void)
+{
+	const int *bus_range;
+	int len;
+	struct pci_controller *hose;
+	struct device_node *root;
+	struct device_node *pcictrl;
+
+	root = of_find_node_by_path("/");
+	if (root == NULL) {
+		printk(KERN_WARNING EFIKA_PLATFORM_NAME
+		       ": Unable to find the root node\n");
+		return;
+	}
+
+	for (pcictrl = NULL;;) {
+		pcictrl = of_get_next_child(root, pcictrl);
+		if ((pcictrl == NULL) || (strcmp(pcictrl->name, "pci") == 0))
+			break;
+	}
+
+	of_node_put(root);
+
+	if (pcictrl == NULL) {
+		printk(KERN_WARNING EFIKA_PLATFORM_NAME
+		       ": Unable to find the PCI bridge node\n");
+		return;
+	}
+
+	bus_range = of_get_property(pcictrl, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int)) {
+		printk(KERN_WARNING EFIKA_PLATFORM_NAME
+		       ": Can't get bus-range for %s\n", pcictrl->full_name);
+		goto out_put;
+	}
+
+	if (bus_range[1] == bus_range[0])
+		printk(KERN_INFO EFIKA_PLATFORM_NAME ": PCI bus %d",
+		       bus_range[0]);
+	else
+		printk(KERN_INFO EFIKA_PLATFORM_NAME ": PCI buses %d..%d",
+		       bus_range[0], bus_range[1]);
+	printk(" controlled by %s\n", pcictrl->full_name);
+	printk("\n");
+
+	hose = pcibios_alloc_controller(pcictrl);
+	if (!hose) {
+		printk(KERN_WARNING EFIKA_PLATFORM_NAME
+		       ": Can't allocate PCI controller structure for %s\n",
+		       pcictrl->full_name);
+		goto out_put;
+	}
+
+	hose->first_busno = bus_range[0];
+	hose->last_busno = bus_range[1];
+	hose->ops = &rtas_pci_ops;
+
+	pci_process_bridge_OF_ranges(hose, pcictrl, 0);
+	return;
+out_put:
+	of_node_put(pcictrl);
+}
+
+#else
+static void __init efika_pcisetup(void)
+{}
+#endif
+
+
+
+/* ------------------------------------------------------------------------ */
+/* Platform setup                                                           */
+/* ------------------------------------------------------------------------ */
+
+static void efika_show_cpuinfo(struct seq_file *m)
+{
+	struct device_node *root;
+	const char *revision;
+	const char *codegendescription;
+	const char *codegenvendor;
+
+	root = of_find_node_by_path("/");
+	if (!root)
+		return;
+
+	revision = of_get_property(root, "revision", NULL);
+	codegendescription = of_get_property(root, "CODEGEN,description", NULL);
+	codegenvendor = of_get_property(root, "CODEGEN,vendor", NULL);
+
+	if (codegendescription)
+		seq_printf(m, "machine\t\t: %s\n", codegendescription);
+	else
+		seq_printf(m, "machine\t\t: Efika\n");
+
+	if (revision)
+		seq_printf(m, "revision\t: %s\n", revision);
+
+	if (codegenvendor)
+		seq_printf(m, "vendor\t\t: %s\n", codegenvendor);
+
+	of_node_put(root);
+}
+
+#ifdef CONFIG_PM
+static void efika_suspend_prepare(void __iomem *mbar)
+{
+	u8 pin = 4;	/* GPIO_WKUP_4 (GPIO_PSC6_0 - IRDA_RX) */
+	u8 level = 1;	/* wakeup on high level */
+	/* IOW. to wake it up, short pins 1 and 3 on IRDA connector */
+	mpc52xx_set_wakeup_gpio(pin, level);
+}
+#endif
+
+static void __init efika_setup_arch(void)
+{
+	rtas_initialize();
+
+	/* Map important registers from the internal memory map */
+	mpc52xx_map_common_devices();
+
+	efika_pcisetup();
+
+#ifdef CONFIG_PM
+	mpc52xx_suspend.board_suspend_prepare = efika_suspend_prepare;
+	mpc52xx_pm_init();
+#endif
+
+	if (ppc_md.progress)
+		ppc_md.progress("Linux/PPC " UTS_RELEASE " running on Efika ;-)\n", 0x0);
+}
+
+static int __init efika_probe(void)
+{
+	const char *model = of_get_flat_dt_prop(of_get_flat_dt_root(),
+						"model", NULL);
+
+	if (model == NULL)
+		return 0;
+	if (strcmp(model, "EFIKA5K2"))
+		return 0;
+
+	ISA_DMA_THRESHOLD = ~0L;
+	DMA_MODE_READ = 0x44;
+	DMA_MODE_WRITE = 0x48;
+
+	pm_power_off = rtas_power_off;
+
+	return 1;
+}
+
+define_machine(efika)
+{
+	.name			= EFIKA_PLATFORM_NAME,
+	.probe			= efika_probe,
+	.setup_arch		= efika_setup_arch,
+	.init			= mpc52xx_declare_of_platform_devices,
+	.show_cpuinfo		= efika_show_cpuinfo,
+	.init_IRQ		= mpc52xx_init_irq,
+	.get_irq		= mpc52xx_get_irq,
+	.restart		= rtas_restart,
+	.halt			= rtas_halt,
+	.set_rtc_time		= rtas_set_rtc_time,
+	.get_rtc_time		= rtas_get_rtc_time,
+	.progress		= rtas_progress,
+	.get_boot_time		= rtas_get_boot_time,
+	.calibrate_decr		= generic_calibrate_decr,
+#ifdef CONFIG_PCI
+	.phys_mem_access_prot	= pci_phys_mem_access_prot,
+#endif
+};
+
diff --git a/arch/powerpc/platforms/52xx/lite5200.c b/arch/powerpc/platforms/52xx/lite5200.c
new file mode 100644
index 000000000..7492de3cf
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/lite5200.c
@@ -0,0 +1,198 @@
+/*
+ * Freescale Lite5200 board support
+ *
+ * Written by: Grant Likely <grant.likely@secretlab.ca>
+ *
+ * Copyright (C) Secret Lab Technologies Ltd. 2006. All rights reserved.
+ * Copyright 2006 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Description:
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/root_dev.h>
+#include <linux/initrd.h>
+#include <asm/time.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/mpc52xx.h>
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+
+/* mpc5200 device tree match tables */
+static const struct of_device_id mpc5200_cdm_ids[] __initconst = {
+	{ .compatible = "fsl,mpc5200-cdm", },
+	{ .compatible = "mpc5200-cdm", },
+	{}
+};
+
+static const struct of_device_id mpc5200_gpio_ids[] __initconst = {
+	{ .compatible = "fsl,mpc5200-gpio", },
+	{ .compatible = "mpc5200-gpio", },
+	{}
+};
+
+/*
+ * Fix clock configuration.
+ *
+ * Firmware is supposed to be responsible for this.  If you are creating a
+ * new board port, do *NOT* duplicate this code.  Fix your boot firmware
+ * to set it correctly in the first place
+ */
+static void __init
+lite5200_fix_clock_config(void)
+{
+	struct device_node *np;
+	struct mpc52xx_cdm  __iomem *cdm;
+	/* Map zones */
+	np = of_find_matching_node(NULL, mpc5200_cdm_ids);
+	cdm = of_iomap(np, 0);
+	of_node_put(np);
+	if (!cdm) {
+		printk(KERN_ERR "%s() failed; expect abnormal behaviour\n",
+		       __func__);
+		return;
+	}
+
+	/* Use internal 48 Mhz */
+	out_8(&cdm->ext_48mhz_en, 0x00);
+	out_8(&cdm->fd_enable, 0x01);
+	if (in_be32(&cdm->rstcfg) & 0x40)	/* Assumes 33Mhz clock */
+		out_be16(&cdm->fd_counters, 0x0001);
+	else
+		out_be16(&cdm->fd_counters, 0x5555);
+
+	/* Unmap the regs */
+	iounmap(cdm);
+}
+
+/*
+ * Fix setting of port_config register.
+ *
+ * Firmware is supposed to be responsible for this.  If you are creating a
+ * new board port, do *NOT* duplicate this code.  Fix your boot firmware
+ * to set it correctly in the first place
+ */
+static void __init
+lite5200_fix_port_config(void)
+{
+	struct device_node *np;
+	struct mpc52xx_gpio __iomem *gpio;
+	u32 port_config;
+
+	np = of_find_matching_node(NULL, mpc5200_gpio_ids);
+	gpio = of_iomap(np, 0);
+	of_node_put(np);
+	if (!gpio) {
+		printk(KERN_ERR "%s() failed. expect abnormal behavior\n",
+		       __func__);
+		return;
+	}
+
+	/* Set port config */
+	port_config = in_be32(&gpio->port_config);
+
+	port_config &= ~0x00800000;	/* 48Mhz internal, pin is GPIO	*/
+
+	port_config &= ~0x00007000;	/* USB port : Differential mode	*/
+	port_config |=  0x00001000;	/*            USB 1 only	*/
+
+	port_config &= ~0x03000000;	/* ATA CS is on csb_4/5		*/
+	port_config |=  0x01000000;
+
+	pr_debug("port_config: old:%x new:%x\n",
+	         in_be32(&gpio->port_config), port_config);
+	out_be32(&gpio->port_config, port_config);
+
+	/* Unmap zone */
+	iounmap(gpio);
+}
+
+#ifdef CONFIG_PM
+static void lite5200_suspend_prepare(void __iomem *mbar)
+{
+	u8 pin = 1;	/* GPIO_WKUP_1 (GPIO_PSC2_4) */
+	u8 level = 0;	/* wakeup on low level */
+	mpc52xx_set_wakeup_gpio(pin, level);
+
+	/*
+	 * power down usb port
+	 * this needs to be called before of-ohci suspend code
+	 */
+
+	/* set ports to "power switched" and "powered at the same time"
+	 * USB Rh descriptor A: NPS = 0, PSM = 0 */
+	out_be32(mbar + 0x1048, in_be32(mbar + 0x1048) & ~0x300);
+	/* USB Rh status: LPS = 1 - turn off power */
+	out_be32(mbar + 0x1050, 0x00000001);
+}
+
+static void lite5200_resume_finish(void __iomem *mbar)
+{
+	/* USB Rh status: LPSC = 1 - turn on power */
+	out_be32(mbar + 0x1050, 0x00010000);
+}
+#endif
+
+static void __init lite5200_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("lite5200_setup_arch()", 0);
+
+	/* Map important registers from the internal memory map */
+	mpc52xx_map_common_devices();
+
+	/* Some mpc5200 & mpc5200b related configuration */
+	mpc5200_setup_xlb_arbiter();
+
+	/* Fix things that firmware should have done. */
+	lite5200_fix_clock_config();
+	lite5200_fix_port_config();
+
+#ifdef CONFIG_PM
+	mpc52xx_suspend.board_suspend_prepare = lite5200_suspend_prepare;
+	mpc52xx_suspend.board_resume_finish = lite5200_resume_finish;
+	lite5200_pm_init();
+#endif
+
+	mpc52xx_setup_pci();
+}
+
+static const char * const board[] __initconst = {
+	"fsl,lite5200",
+	"fsl,lite5200b",
+	NULL,
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init lite5200_probe(void)
+{
+	return of_flat_dt_match(of_get_flat_dt_root(), board);
+}
+
+define_machine(lite5200) {
+	.name 		= "lite5200",
+	.probe 		= lite5200_probe,
+	.setup_arch 	= lite5200_setup_arch,
+	.init		= mpc52xx_declare_of_platform_devices,
+	.init_IRQ 	= mpc52xx_init_irq,
+	.get_irq 	= mpc52xx_get_irq,
+	.restart	= mpc52xx_restart,
+	.calibrate_decr	= generic_calibrate_decr,
+};
diff --git a/arch/powerpc/platforms/52xx/lite5200_pm.c b/arch/powerpc/platforms/52xx/lite5200_pm.c
new file mode 100644
index 000000000..870b70f5d
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/lite5200_pm.c
@@ -0,0 +1,249 @@
+#include <linux/init.h>
+#include <linux/suspend.h>
+#include <asm/io.h>
+#include <asm/time.h>
+#include <asm/mpc52xx.h>
+#include <asm/switch_to.h>
+
+/* defined in lite5200_sleep.S and only used here */
+extern void lite5200_low_power(void __iomem *sram, void __iomem *mbar);
+
+static struct mpc52xx_cdm __iomem *cdm;
+static struct mpc52xx_intr __iomem *pic;
+static struct mpc52xx_sdma __iomem *bes;
+static struct mpc52xx_xlb __iomem *xlb;
+static struct mpc52xx_gpio __iomem *gps;
+static struct mpc52xx_gpio_wkup __iomem *gpw;
+static void __iomem *pci;
+static void __iomem *sram;
+static const int sram_size = 0x4000;	/* 16 kBytes */
+static void __iomem *mbar;
+
+static suspend_state_t lite5200_pm_target_state;
+
+static int lite5200_pm_valid(suspend_state_t state)
+{
+	switch (state) {
+	case PM_SUSPEND_STANDBY:
+	case PM_SUSPEND_MEM:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static int lite5200_pm_begin(suspend_state_t state)
+{
+	if (lite5200_pm_valid(state)) {
+		lite5200_pm_target_state = state;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int lite5200_pm_prepare(void)
+{
+	struct device_node *np;
+	const struct of_device_id immr_ids[] = {
+		{ .compatible = "fsl,mpc5200-immr", },
+		{ .compatible = "fsl,mpc5200b-immr", },
+		{ .type = "soc", .compatible = "mpc5200", }, /* lite5200 */
+		{ .type = "builtin", .compatible = "mpc5200", }, /* efika */
+		{}
+	};
+	u64 regaddr64 = 0;
+	const u32 *regaddr_p;
+
+	/* deep sleep? let mpc52xx code handle that */
+	if (lite5200_pm_target_state == PM_SUSPEND_STANDBY)
+		return mpc52xx_pm_prepare();
+
+	if (lite5200_pm_target_state != PM_SUSPEND_MEM)
+		return -EINVAL;
+
+	/* map registers */
+	np = of_find_matching_node(NULL, immr_ids);
+	regaddr_p = of_get_address(np, 0, NULL, NULL);
+	if (regaddr_p)
+		regaddr64 = of_translate_address(np, regaddr_p);
+	of_node_put(np);
+
+	mbar = ioremap((u32) regaddr64, 0xC000);
+	if (!mbar) {
+		printk(KERN_ERR "%s:%i Error mapping registers\n", __func__, __LINE__);
+		return -ENOSYS;
+	}
+
+	cdm = mbar + 0x200;
+	pic = mbar + 0x500;
+	gps = mbar + 0xb00;
+	gpw = mbar + 0xc00;
+	pci = mbar + 0xd00;
+	bes = mbar + 0x1200;
+	xlb = mbar + 0x1f00;
+	sram = mbar + 0x8000;
+
+	return 0;
+}
+
+/* save and restore registers not bound to any real devices */
+static struct mpc52xx_cdm scdm;
+static struct mpc52xx_intr spic;
+static struct mpc52xx_sdma sbes;
+static struct mpc52xx_xlb sxlb;
+static struct mpc52xx_gpio sgps;
+static struct mpc52xx_gpio_wkup sgpw;
+static char spci[0x200];
+
+static void lite5200_save_regs(void)
+{
+	_memcpy_fromio(&spic, pic, sizeof(*pic));
+	_memcpy_fromio(&sbes, bes, sizeof(*bes));
+	_memcpy_fromio(&scdm, cdm, sizeof(*cdm));
+	_memcpy_fromio(&sxlb, xlb, sizeof(*xlb));
+	_memcpy_fromio(&sgps, gps, sizeof(*gps));
+	_memcpy_fromio(&sgpw, gpw, sizeof(*gpw));
+	_memcpy_fromio(spci, pci, 0x200);
+
+	_memcpy_fromio(saved_sram, sram, sram_size);
+}
+
+static void lite5200_restore_regs(void)
+{
+	int i;
+	_memcpy_toio(sram, saved_sram, sram_size);
+
+	/* PCI Configuration */
+	_memcpy_toio(pci, spci, 0x200);
+
+	/*
+	 * GPIOs. Interrupt Master Enable has higher address then other
+	 * registers, so just memcpy is ok.
+	 */
+	_memcpy_toio(gpw, &sgpw, sizeof(*gpw));
+	_memcpy_toio(gps, &sgps, sizeof(*gps));
+
+
+	/* XLB Arbitrer */
+	out_be32(&xlb->snoop_window, sxlb.snoop_window);
+	out_be32(&xlb->master_priority, sxlb.master_priority);
+	out_be32(&xlb->master_pri_enable, sxlb.master_pri_enable);
+
+	/* enable */
+	out_be32(&xlb->int_enable, sxlb.int_enable);
+	out_be32(&xlb->config, sxlb.config);
+
+
+	/* CDM - Clock Distribution Module */
+	out_8(&cdm->ipb_clk_sel, scdm.ipb_clk_sel);
+	out_8(&cdm->pci_clk_sel, scdm.pci_clk_sel);
+
+	out_8(&cdm->ext_48mhz_en, scdm.ext_48mhz_en);
+	out_8(&cdm->fd_enable, scdm.fd_enable);
+	out_be16(&cdm->fd_counters, scdm.fd_counters);
+
+	out_be32(&cdm->clk_enables, scdm.clk_enables);
+
+	out_8(&cdm->osc_disable, scdm.osc_disable);
+
+	out_be16(&cdm->mclken_div_psc1, scdm.mclken_div_psc1);
+	out_be16(&cdm->mclken_div_psc2, scdm.mclken_div_psc2);
+	out_be16(&cdm->mclken_div_psc3, scdm.mclken_div_psc3);
+	out_be16(&cdm->mclken_div_psc6, scdm.mclken_div_psc6);
+
+
+	/* BESTCOMM */
+	out_be32(&bes->taskBar, sbes.taskBar);
+	out_be32(&bes->currentPointer, sbes.currentPointer);
+	out_be32(&bes->endPointer, sbes.endPointer);
+	out_be32(&bes->variablePointer, sbes.variablePointer);
+
+	out_8(&bes->IntVect1, sbes.IntVect1);
+	out_8(&bes->IntVect2, sbes.IntVect2);
+	out_be16(&bes->PtdCntrl, sbes.PtdCntrl);
+
+	for (i=0; i<32; i++)
+		out_8(&bes->ipr[i], sbes.ipr[i]);
+
+	out_be32(&bes->cReqSelect, sbes.cReqSelect);
+	out_be32(&bes->task_size0, sbes.task_size0);
+	out_be32(&bes->task_size1, sbes.task_size1);
+	out_be32(&bes->MDEDebug, sbes.MDEDebug);
+	out_be32(&bes->ADSDebug, sbes.ADSDebug);
+	out_be32(&bes->Value1, sbes.Value1);
+	out_be32(&bes->Value2, sbes.Value2);
+	out_be32(&bes->Control, sbes.Control);
+	out_be32(&bes->Status, sbes.Status);
+	out_be32(&bes->PTDDebug, sbes.PTDDebug);
+
+	/* restore tasks */
+	for (i=0; i<16; i++)
+		out_be16(&bes->tcr[i], sbes.tcr[i]);
+
+	/* enable interrupts */
+	out_be32(&bes->IntPend, sbes.IntPend);
+	out_be32(&bes->IntMask, sbes.IntMask);
+
+
+	/* PIC */
+	out_be32(&pic->per_pri1, spic.per_pri1);
+	out_be32(&pic->per_pri2, spic.per_pri2);
+	out_be32(&pic->per_pri3, spic.per_pri3);
+
+	out_be32(&pic->main_pri1, spic.main_pri1);
+	out_be32(&pic->main_pri2, spic.main_pri2);
+
+	out_be32(&pic->enc_status, spic.enc_status);
+
+	/* unmask and enable interrupts */
+	out_be32(&pic->per_mask, spic.per_mask);
+	out_be32(&pic->main_mask, spic.main_mask);
+	out_be32(&pic->ctrl, spic.ctrl);
+}
+
+static int lite5200_pm_enter(suspend_state_t state)
+{
+	/* deep sleep? let mpc52xx code handle that */
+	if (state == PM_SUSPEND_STANDBY) {
+		return mpc52xx_pm_enter(state);
+	}
+
+	lite5200_save_regs();
+
+	/* effectively save FP regs */
+	enable_kernel_fp();
+
+	lite5200_low_power(sram, mbar);
+
+	lite5200_restore_regs();
+
+	iounmap(mbar);
+	return 0;
+}
+
+static void lite5200_pm_finish(void)
+{
+	/* deep sleep? let mpc52xx code handle that */
+	if (lite5200_pm_target_state == PM_SUSPEND_STANDBY)
+		mpc52xx_pm_finish();
+}
+
+static void lite5200_pm_end(void)
+{
+	lite5200_pm_target_state = PM_SUSPEND_ON;
+}
+
+static const struct platform_suspend_ops lite5200_pm_ops = {
+	.valid		= lite5200_pm_valid,
+	.begin		= lite5200_pm_begin,
+	.prepare	= lite5200_pm_prepare,
+	.enter		= lite5200_pm_enter,
+	.finish		= lite5200_pm_finish,
+	.end		= lite5200_pm_end,
+};
+
+int __init lite5200_pm_init(void)
+{
+	suspend_set_ops(&lite5200_pm_ops);
+	return 0;
+}
diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S
new file mode 100644
index 000000000..08ab6fefc
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S
@@ -0,0 +1,412 @@
+#include <asm/reg.h>
+#include <asm/ppc_asm.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+
+
+#define SDRAM_CTRL	0x104
+#define SC_MODE_EN	(1<<31)
+#define SC_CKE		(1<<30)
+#define SC_REF_EN	(1<<28)
+#define SC_SOFT_PRE	(1<<1)
+
+#define GPIOW_GPIOE	0xc00
+#define GPIOW_DDR	0xc08
+#define GPIOW_DVO	0xc0c
+
+#define CDM_CE		0x214
+#define CDM_SDRAM	(1<<3)
+
+
+/* helpers... beware: r10 and r4 are overwritten */
+#define SAVE_SPRN(reg, addr)		\
+	mfspr	r10, SPRN_##reg;	\
+	stw	r10, ((addr)*4)(r4);
+
+#define LOAD_SPRN(reg, addr)		\
+	lwz	r10, ((addr)*4)(r4);	\
+	mtspr	SPRN_##reg, r10;	\
+	sync;				\
+	isync;
+
+
+	.data
+registers:
+	.space 0x5c*4
+	.text
+
+/* ---------------------------------------------------------------------- */
+/* low-power mode with help of M68HLC908QT1 */
+
+	.globl lite5200_low_power
+lite5200_low_power:
+
+	mr	r7, r3	/* save SRAM va */
+	mr	r8, r4	/* save MBAR va */
+
+	/* setup wakeup address for u-boot at physical location 0x0 */
+	lis	r3, CONFIG_KERNEL_START@h
+	lis	r4, lite5200_wakeup@h
+	ori	r4, r4, lite5200_wakeup@l
+	sub	r4, r4, r3
+	stw	r4, 0(r3)
+
+
+	/*
+	 * save stuff BDI overwrites
+	 * 0xf0 (0xe0->0x100 gets overwritten when BDI connected;
+	 *   even when CONFIG_BDI* is disabled and MMU XLAT commented; heisenbug?))
+	 * WARNING: self-refresh doesn't seem to work when BDI2000 is connected,
+	 *   possibly because BDI sets SDRAM registers before wakeup code does
+	 */
+	lis	r4, registers@h
+	ori	r4, r4, registers@l
+	lwz	r10, 0xf0(r3)
+	stw	r10, (0x1d*4)(r4)
+
+	/* save registers to r4 [destroys r10] */
+	SAVE_SPRN(LR, 0x1c)
+	bl	save_regs
+
+	/* flush caches [destroys r3, r4] */
+	bl	flush_data_cache
+
+
+	/* copy code to sram */
+	mr	r4, r7
+	li	r3, (sram_code_end - sram_code)/4
+	mtctr	r3
+	lis	r3, sram_code@h
+	ori	r3, r3, sram_code@l
+1:
+	lwz	r5, 0(r3)
+	stw	r5, 0(r4)
+	addi	r3, r3, 4
+	addi	r4, r4, 4
+	bdnz	1b
+
+	/* get tb_ticks_per_usec */
+	lis	r3, tb_ticks_per_usec@h
+	lwz	r11, tb_ticks_per_usec@l(r3)
+
+	/* disable I and D caches */
+	mfspr	r3, SPRN_HID0
+	ori	r3, r3, HID0_ICE | HID0_DCE
+	xori	r3, r3, HID0_ICE | HID0_DCE
+	sync; isync;
+	mtspr	SPRN_HID0, r3
+	sync; isync;
+
+	/* jump to sram */
+	mtlr	r7
+	blrl
+	/* doesn't return */
+
+
+sram_code:
+	/* self refresh */
+	lwz	r4, SDRAM_CTRL(r8)
+
+	/* send NOP (precharge) */
+	oris	r4, r4, SC_MODE_EN@h	/* mode_en */
+	stw	r4, SDRAM_CTRL(r8)
+	sync
+
+	ori	r4, r4, SC_SOFT_PRE	/* soft_pre */
+	stw	r4, SDRAM_CTRL(r8)
+	sync
+	xori	r4, r4, SC_SOFT_PRE
+
+	xoris	r4, r4, SC_MODE_EN@h	/* !mode_en */
+	stw	r4, SDRAM_CTRL(r8)
+	sync
+
+	/* delay (for NOP to finish) */
+	li	r12, 1
+	bl	udelay
+
+	/*
+	 * mode_en must not be set when enabling self-refresh
+	 * send AR with CKE low (self-refresh)
+	 */
+	oris	r4, r4, (SC_REF_EN | SC_CKE)@h
+	xoris	r4, r4, (SC_CKE)@h	/* ref_en !cke */
+	stw	r4, SDRAM_CTRL(r8)
+	sync
+
+	/* delay (after !CKE there should be two cycles) */
+	li	r12, 1
+	bl	udelay
+
+	/* disable clock */
+	lwz	r4, CDM_CE(r8)
+	ori	r4, r4, CDM_SDRAM
+	xori	r4, r4, CDM_SDRAM
+	stw	r4, CDM_CE(r8)
+	sync
+
+	/* delay a bit */
+	li	r12, 1
+	bl	udelay
+
+
+	/* turn off with QT chip */
+	li	r4, 0x02
+	stb	r4, GPIOW_GPIOE(r8)	/* enable gpio_wkup1 */
+	sync
+
+	stb	r4, GPIOW_DVO(r8)	/* "output" high */
+	sync
+	stb	r4, GPIOW_DDR(r8)	/* output */
+	sync
+	stb	r4, GPIOW_DVO(r8)	/* output high */
+	sync
+
+	/* 10uS delay */
+	li	r12, 10
+	bl	udelay
+
+	/* turn off */
+	li	r4, 0
+	stb	r4, GPIOW_DVO(r8)	/* output low */
+	sync
+
+	/* wait until we're offline */
+  1:
+	b	1b
+
+
+	/* local udelay in sram is needed */
+  udelay: /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */
+	mullw	r12, r12, r11
+	mftb	r13	/* start */
+	addi	r12, r13, r12 /* end */
+    1:
+	mftb	r13	/* current */
+	cmp	cr0, r13, r12
+	blt	1b
+	blr
+
+sram_code_end:
+
+
+
+/* uboot jumps here on resume */
+lite5200_wakeup:
+	bl	restore_regs
+
+
+	/* HIDs, MSR */
+	LOAD_SPRN(HID1, 0x19)
+	LOAD_SPRN(HID2, 0x1a)
+
+
+	/* address translation is tricky (see turn_on_mmu) */
+	mfmsr	r10
+	ori	r10, r10, MSR_DR | MSR_IR
+
+
+	mtspr	SPRN_SRR1, r10
+	lis	r10, mmu_on@h
+	ori	r10, r10, mmu_on@l
+	mtspr	SPRN_SRR0, r10
+	sync
+	rfi
+mmu_on:
+	/* kernel offset (r4 is still set from restore_registers) */
+	addis	r4, r4, CONFIG_KERNEL_START@h
+
+
+	/* restore MSR */
+	lwz	r10, (4*0x1b)(r4)
+	mtmsr	r10
+	sync; isync;
+
+	/* invalidate caches */
+	mfspr	r10, SPRN_HID0
+	ori	r5, r10, HID0_ICFI | HID0_DCI
+	mtspr	SPRN_HID0, r5	/* invalidate caches */
+	sync; isync;
+	mtspr	SPRN_HID0, r10
+	sync; isync;
+
+	/* enable caches */
+	lwz	r10, (4*0x18)(r4)
+	mtspr	SPRN_HID0, r10	/* restore (enable caches, DPM) */
+	/* ^ this has to be after address translation set in MSR */
+	sync
+	isync
+
+
+	/* restore 0xf0 (BDI2000) */
+	lis	r3, CONFIG_KERNEL_START@h
+	lwz	r10, (0x1d*4)(r4)
+	stw	r10, 0xf0(r3)
+
+	LOAD_SPRN(LR, 0x1c)
+
+
+	blr
+
+
+/* ---------------------------------------------------------------------- */
+/* boring code: helpers */
+
+/* save registers */
+#define SAVE_BAT(n, addr)		\
+	SAVE_SPRN(DBAT##n##L, addr);	\
+	SAVE_SPRN(DBAT##n##U, addr+1);	\
+	SAVE_SPRN(IBAT##n##L, addr+2);	\
+	SAVE_SPRN(IBAT##n##U, addr+3);
+
+#define SAVE_SR(n, addr)		\
+	mfsr	r10, n;			\
+	stw	r10, ((addr)*4)(r4);
+
+#define SAVE_4SR(n, addr)	\
+	SAVE_SR(n, addr);	\
+	SAVE_SR(n+1, addr+1);	\
+	SAVE_SR(n+2, addr+2);	\
+	SAVE_SR(n+3, addr+3);
+
+save_regs:
+	stw	r0, 0(r4)
+	stw	r1, 0x4(r4)
+	stw	r2, 0x8(r4)
+	stmw	r11, 0xc(r4) /* 0xc -> 0x5f, (0x18*4-1) */
+
+	SAVE_SPRN(HID0, 0x18)
+	SAVE_SPRN(HID1, 0x19)
+	SAVE_SPRN(HID2, 0x1a)
+	mfmsr	r10
+	stw	r10, (4*0x1b)(r4)
+	/*SAVE_SPRN(LR, 0x1c) have to save it before the call */
+	/* 0x1d reserved by 0xf0 */
+	SAVE_SPRN(RPA,   0x1e)
+	SAVE_SPRN(SDR1,  0x1f)
+
+	/* save MMU regs */
+	SAVE_BAT(0, 0x20)
+	SAVE_BAT(1, 0x24)
+	SAVE_BAT(2, 0x28)
+	SAVE_BAT(3, 0x2c)
+	SAVE_BAT(4, 0x30)
+	SAVE_BAT(5, 0x34)
+	SAVE_BAT(6, 0x38)
+	SAVE_BAT(7, 0x3c)
+
+	SAVE_4SR(0, 0x40)
+	SAVE_4SR(4, 0x44)
+	SAVE_4SR(8, 0x48)
+	SAVE_4SR(12, 0x4c)
+
+	SAVE_SPRN(SPRG0, 0x50)
+	SAVE_SPRN(SPRG1, 0x51)
+	SAVE_SPRN(SPRG2, 0x52)
+	SAVE_SPRN(SPRG3, 0x53)
+	SAVE_SPRN(SPRG4, 0x54)
+	SAVE_SPRN(SPRG5, 0x55)
+	SAVE_SPRN(SPRG6, 0x56)
+	SAVE_SPRN(SPRG7, 0x57)
+
+	SAVE_SPRN(IABR,  0x58)
+	SAVE_SPRN(DABR,  0x59)
+	SAVE_SPRN(TBRL,  0x5a)
+	SAVE_SPRN(TBRU,  0x5b)
+
+	blr
+
+
+/* restore registers */
+#define LOAD_BAT(n, addr)		\
+	LOAD_SPRN(DBAT##n##L, addr);	\
+	LOAD_SPRN(DBAT##n##U, addr+1);	\
+	LOAD_SPRN(IBAT##n##L, addr+2);	\
+	LOAD_SPRN(IBAT##n##U, addr+3);
+
+#define LOAD_SR(n, addr)		\
+	lwz	r10, ((addr)*4)(r4);	\
+	mtsr	n, r10;
+
+#define LOAD_4SR(n, addr)	\
+	LOAD_SR(n, addr);	\
+	LOAD_SR(n+1, addr+1);	\
+	LOAD_SR(n+2, addr+2);	\
+	LOAD_SR(n+3, addr+3);
+
+restore_regs:
+	lis	r4, registers@h
+	ori	r4, r4, registers@l
+
+	/* MMU is not up yet */
+	subis	r4, r4, CONFIG_KERNEL_START@h
+
+	lwz	r0, 0(r4)
+	lwz	r1, 0x4(r4)
+	lwz	r2, 0x8(r4)
+	lmw	r11, 0xc(r4)
+
+	/*
+	 * these are a bit tricky
+	 *
+	 * 0x18 - HID0
+	 * 0x19 - HID1
+	 * 0x1a - HID2
+	 * 0x1b - MSR
+	 * 0x1c - LR
+	 * 0x1d - reserved by 0xf0 (BDI2000)
+	 */
+	LOAD_SPRN(RPA,   0x1e);
+	LOAD_SPRN(SDR1,  0x1f);
+
+	/* restore MMU regs */
+	LOAD_BAT(0, 0x20)
+	LOAD_BAT(1, 0x24)
+	LOAD_BAT(2, 0x28)
+	LOAD_BAT(3, 0x2c)
+	LOAD_BAT(4, 0x30)
+	LOAD_BAT(5, 0x34)
+	LOAD_BAT(6, 0x38)
+	LOAD_BAT(7, 0x3c)
+
+	LOAD_4SR(0, 0x40)
+	LOAD_4SR(4, 0x44)
+	LOAD_4SR(8, 0x48)
+	LOAD_4SR(12, 0x4c)
+
+	/* rest of regs */
+	LOAD_SPRN(SPRG0, 0x50);
+	LOAD_SPRN(SPRG1, 0x51);
+	LOAD_SPRN(SPRG2, 0x52);
+	LOAD_SPRN(SPRG3, 0x53);
+	LOAD_SPRN(SPRG4, 0x54);
+	LOAD_SPRN(SPRG5, 0x55);
+	LOAD_SPRN(SPRG6, 0x56);
+	LOAD_SPRN(SPRG7, 0x57);
+
+	LOAD_SPRN(IABR,  0x58);
+	LOAD_SPRN(DABR,  0x59);
+	LOAD_SPRN(TBWL,  0x5a);	/* these two have separate R/W regs */
+	LOAD_SPRN(TBWU,  0x5b);
+
+	blr
+
+
+
+/* cache flushing code. copied from arch/ppc/boot/util.S */
+#define NUM_CACHE_LINES (128*8)
+
+/*
+ * Flush data cache
+ * Do this by just reading lots of stuff into the cache.
+ */
+flush_data_cache:
+	lis	r3,CONFIG_KERNEL_START@h
+	ori	r3,r3,CONFIG_KERNEL_START@l
+	li	r4,NUM_CACHE_LINES
+	mtctr	r4
+1:
+	lwz	r4,0(r3)
+	addi	r3,r3,L1_CACHE_BYTES	/* Next line, please */
+	bdnz	1b
+	blr
diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c
new file mode 100644
index 000000000..32cae33c4
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/media5200.c
@@ -0,0 +1,257 @@
+/*
+ * Support for 'media5200-platform' compatible boards.
+ *
+ * Copyright (C) 2008 Secret Lab Technologies Ltd.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * Description:
+ * This code implements support for the Freescape Media5200 platform
+ * (built around the MPC5200 SoC).
+ *
+ * Notable characteristic of the Media5200 is the presence of an FPGA
+ * that has all external IRQ lines routed through it.  This file implements
+ * a cascaded interrupt controller driver which attaches itself to the
+ * Virtual IRQ subsystem after the primary mpc5200 interrupt controller
+ * is initialized.
+ *
+ */
+
+#undef DEBUG
+
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <asm/time.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/mpc52xx.h>
+
+static const struct of_device_id mpc5200_gpio_ids[] __initconst = {
+	{ .compatible = "fsl,mpc5200-gpio", },
+	{ .compatible = "mpc5200-gpio", },
+	{}
+};
+
+/* FPGA register set */
+#define MEDIA5200_IRQ_ENABLE (0x40c)
+#define MEDIA5200_IRQ_STATUS (0x410)
+#define MEDIA5200_NUM_IRQS   (6)
+#define MEDIA5200_IRQ_SHIFT  (32 - MEDIA5200_NUM_IRQS)
+
+struct media5200_irq {
+	void __iomem *regs;
+	spinlock_t lock;
+	struct irq_domain *irqhost;
+};
+struct media5200_irq media5200_irq;
+
+static void media5200_irq_unmask(struct irq_data *d)
+{
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&media5200_irq.lock, flags);
+	val = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE);
+	val |= 1 << (MEDIA5200_IRQ_SHIFT + irqd_to_hwirq(d));
+	out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, val);
+	spin_unlock_irqrestore(&media5200_irq.lock, flags);
+}
+
+static void media5200_irq_mask(struct irq_data *d)
+{
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&media5200_irq.lock, flags);
+	val = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE);
+	val &= ~(1 << (MEDIA5200_IRQ_SHIFT + irqd_to_hwirq(d)));
+	out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, val);
+	spin_unlock_irqrestore(&media5200_irq.lock, flags);
+}
+
+static struct irq_chip media5200_irq_chip = {
+	.name = "Media5200 FPGA",
+	.irq_unmask = media5200_irq_unmask,
+	.irq_mask = media5200_irq_mask,
+	.irq_mask_ack = media5200_irq_mask,
+};
+
+void media5200_irq_cascade(unsigned int virq, struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	int sub_virq, val;
+	u32 status, enable;
+
+	/* Mask off the cascaded IRQ */
+	raw_spin_lock(&desc->lock);
+	chip->irq_mask(&desc->irq_data);
+	raw_spin_unlock(&desc->lock);
+
+	/* Ask the FPGA for IRQ status.  If 'val' is 0, then no irqs
+	 * are pending.  'ffs()' is 1 based */
+	status = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE);
+	enable = in_be32(media5200_irq.regs + MEDIA5200_IRQ_STATUS);
+	val = ffs((status & enable) >> MEDIA5200_IRQ_SHIFT);
+	if (val) {
+		sub_virq = irq_linear_revmap(media5200_irq.irqhost, val - 1);
+		/* pr_debug("%s: virq=%i s=%.8x e=%.8x hwirq=%i subvirq=%i\n",
+		 *          __func__, virq, status, enable, val - 1, sub_virq);
+		 */
+		generic_handle_irq(sub_virq);
+	}
+
+	/* Processing done; can reenable the cascade now */
+	raw_spin_lock(&desc->lock);
+	chip->irq_ack(&desc->irq_data);
+	if (!irqd_irq_disabled(&desc->irq_data))
+		chip->irq_unmask(&desc->irq_data);
+	raw_spin_unlock(&desc->lock);
+}
+
+static int media5200_irq_map(struct irq_domain *h, unsigned int virq,
+			     irq_hw_number_t hw)
+{
+	pr_debug("%s: h=%p, virq=%i, hwirq=%i\n", __func__, h, virq, (int)hw);
+	irq_set_chip_data(virq, &media5200_irq);
+	irq_set_chip_and_handler(virq, &media5200_irq_chip, handle_level_irq);
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	return 0;
+}
+
+static int media5200_irq_xlate(struct irq_domain *h, struct device_node *ct,
+				 const u32 *intspec, unsigned int intsize,
+				 irq_hw_number_t *out_hwirq,
+				 unsigned int *out_flags)
+{
+	if (intsize != 2)
+		return -1;
+
+	pr_debug("%s: bank=%i, number=%i\n", __func__, intspec[0], intspec[1]);
+	*out_hwirq = intspec[1];
+	*out_flags = IRQ_TYPE_NONE;
+	return 0;
+}
+
+static const struct irq_domain_ops media5200_irq_ops = {
+	.map = media5200_irq_map,
+	.xlate = media5200_irq_xlate,
+};
+
+/*
+ * Setup Media5200 IRQ mapping
+ */
+static void __init media5200_init_irq(void)
+{
+	struct device_node *fpga_np;
+	int cascade_virq;
+
+	/* First setup the regular MPC5200 interrupt controller */
+	mpc52xx_init_irq();
+
+	/* Now find the FPGA IRQ */
+	fpga_np = of_find_compatible_node(NULL, NULL, "fsl,media5200-fpga");
+	if (!fpga_np)
+		goto out;
+	pr_debug("%s: found fpga node: %s\n", __func__, fpga_np->full_name);
+
+	media5200_irq.regs = of_iomap(fpga_np, 0);
+	if (!media5200_irq.regs)
+		goto out;
+	pr_debug("%s: mapped to %p\n", __func__, media5200_irq.regs);
+
+	cascade_virq = irq_of_parse_and_map(fpga_np, 0);
+	if (!cascade_virq)
+		goto out;
+	pr_debug("%s: cascaded on virq=%i\n", __func__, cascade_virq);
+
+	/* Disable all FPGA IRQs */
+	out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, 0);
+
+	spin_lock_init(&media5200_irq.lock);
+
+	media5200_irq.irqhost = irq_domain_add_linear(fpga_np,
+			MEDIA5200_NUM_IRQS, &media5200_irq_ops, &media5200_irq);
+	if (!media5200_irq.irqhost)
+		goto out;
+	pr_debug("%s: allocated irqhost\n", __func__);
+
+	irq_set_handler_data(cascade_virq, &media5200_irq);
+	irq_set_chained_handler(cascade_virq, media5200_irq_cascade);
+
+	return;
+
+ out:
+	pr_err("Could not find Media5200 FPGA; PCI interrupts will not work\n");
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init media5200_setup_arch(void)
+{
+
+	struct device_node *np;
+	struct mpc52xx_gpio __iomem *gpio;
+	u32 port_config;
+
+	if (ppc_md.progress)
+		ppc_md.progress("media5200_setup_arch()", 0);
+
+	/* Map important registers from the internal memory map */
+	mpc52xx_map_common_devices();
+
+	/* Some mpc5200 & mpc5200b related configuration */
+	mpc5200_setup_xlb_arbiter();
+
+	mpc52xx_setup_pci();
+
+	np = of_find_matching_node(NULL, mpc5200_gpio_ids);
+	gpio = of_iomap(np, 0);
+	of_node_put(np);
+	if (!gpio) {
+		printk(KERN_ERR "%s() failed. expect abnormal behavior\n",
+		       __func__);
+		return;
+	}
+
+	/* Set port config */
+	port_config = in_be32(&gpio->port_config);
+
+	port_config &= ~0x03000000;	/* ATA CS is on csb_4/5		*/
+	port_config |=  0x01000000;
+
+	out_be32(&gpio->port_config, port_config);
+
+	/* Unmap zone */
+	iounmap(gpio);
+
+}
+
+/* list of the supported boards */
+static const char * const board[] __initconst = {
+	"fsl,media5200",
+	NULL
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init media5200_probe(void)
+{
+	return of_flat_dt_match(of_get_flat_dt_root(), board);
+}
+
+define_machine(media5200_platform) {
+	.name		= "media5200-platform",
+	.probe		= media5200_probe,
+	.setup_arch	= media5200_setup_arch,
+	.init		= mpc52xx_declare_of_platform_devices,
+	.init_IRQ	= media5200_init_irq,
+	.get_irq	= mpc52xx_get_irq,
+	.restart	= mpc52xx_restart,
+	.calibrate_decr	= generic_calibrate_decr,
+};
diff --git a/arch/powerpc/platforms/52xx/mpc5200_simple.c b/arch/powerpc/platforms/52xx/mpc5200_simple.c
new file mode 100644
index 000000000..792a301a0
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc5200_simple.c
@@ -0,0 +1,85 @@
+/*
+ * Support for 'mpc5200-simple-platform' compatible boards.
+ *
+ * Written by Marian Balakowicz <m8@semihalf.com>
+ * Copyright (C) 2007 Semihalf
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * Description:
+ * This code implements support for a simple MPC52xx based boards which
+ * do not need a custom platform specific setup. Such boards are
+ * supported assuming the following:
+ *
+ * - GPIO pins are configured by the firmware,
+ * - CDM configuration (clocking) is setup correctly by firmware,
+ * - if the 'fsl,has-wdt' property is present in one of the
+ *   gpt nodes, then it is safe to use such gpt to reset the board,
+ * - PCI is supported if enabled in the kernel configuration
+ *   and if there is a PCI bus node defined in the device tree.
+ *
+ * Boards that are compatible with this generic platform support
+ * are listed in a 'board' table.
+ */
+
+#undef DEBUG
+#include <asm/time.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/mpc52xx.h>
+
+/*
+ * Setup the architecture
+ */
+static void __init mpc5200_simple_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mpc5200_simple_setup_arch()", 0);
+
+	/* Map important registers from the internal memory map */
+	mpc52xx_map_common_devices();
+
+	/* Some mpc5200 & mpc5200b related configuration */
+	mpc5200_setup_xlb_arbiter();
+
+	mpc52xx_setup_pci();
+}
+
+/* list of the supported boards */
+static const char *board[] __initdata = {
+	"anonymous,a3m071",
+	"anonymous,a4m072",
+	"anon,charon",
+	"ifm,o2d",
+	"intercontrol,digsy-mtc",
+	"manroland,mucmc52",
+	"manroland,uc101",
+	"phytec,pcm030",
+	"phytec,pcm032",
+	"promess,motionpro",
+	"schindler,cm5200",
+	"tqc,tqm5200",
+	NULL
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc5200_simple_probe(void)
+{
+	return of_flat_dt_match(of_get_flat_dt_root(), board);
+}
+
+define_machine(mpc5200_simple_platform) {
+	.name		= "mpc5200-simple-platform",
+	.probe		= mpc5200_simple_probe,
+	.setup_arch	= mpc5200_simple_setup_arch,
+	.init		= mpc52xx_declare_of_platform_devices,
+	.init_IRQ	= mpc52xx_init_irq,
+	.get_irq	= mpc52xx_get_irq,
+	.restart	= mpc52xx_restart,
+	.calibrate_decr	= generic_calibrate_decr,
+};
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c
new file mode 100644
index 000000000..26993826a
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c
@@ -0,0 +1,344 @@
+/*
+ *
+ * Utility functions for the Freescale MPC52xx.
+ *
+ * Copyright (C) 2006 Sylvain Munaut <tnt@246tNt.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ *
+ */
+
+#undef DEBUG
+
+#include <linux/gpio.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/of_platform.h>
+#include <linux/of_gpio.h>
+#include <linux/export.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/mpc52xx.h>
+
+/* MPC5200 device tree match tables */
+static const struct of_device_id mpc52xx_xlb_ids[] __initconst = {
+	{ .compatible = "fsl,mpc5200-xlb", },
+	{ .compatible = "mpc5200-xlb", },
+	{}
+};
+static const struct of_device_id mpc52xx_bus_ids[] __initconst = {
+	{ .compatible = "fsl,mpc5200-immr", },
+	{ .compatible = "fsl,mpc5200b-immr", },
+	{ .compatible = "simple-bus", },
+
+	/* depreciated matches; shouldn't be used in new device trees */
+	{ .compatible = "fsl,lpb", },
+	{ .type = "builtin", .compatible = "mpc5200", }, /* efika */
+	{ .type = "soc", .compatible = "mpc5200", }, /* lite5200 */
+	{}
+};
+
+/*
+ * This variable is mapped in mpc52xx_map_wdt() and used in mpc52xx_restart().
+ * Permanent mapping is required because mpc52xx_restart() can be called
+ * from interrupt context while node mapping (which calls ioremap())
+ * cannot be used at such point.
+ */
+static DEFINE_SPINLOCK(mpc52xx_lock);
+static struct mpc52xx_gpt __iomem *mpc52xx_wdt;
+static struct mpc52xx_cdm __iomem *mpc52xx_cdm;
+
+/*
+ * Configure the XLB arbiter settings to match what Linux expects.
+ */
+void __init
+mpc5200_setup_xlb_arbiter(void)
+{
+	struct device_node *np;
+	struct mpc52xx_xlb  __iomem *xlb;
+
+	np = of_find_matching_node(NULL, mpc52xx_xlb_ids);
+	xlb = of_iomap(np, 0);
+	of_node_put(np);
+	if (!xlb) {
+		printk(KERN_ERR __FILE__ ": "
+			"Error mapping XLB in mpc52xx_setup_cpu(). "
+			"Expect some abnormal behavior\n");
+		return;
+	}
+
+	/* Configure the XLB Arbiter priorities */
+	out_be32(&xlb->master_pri_enable, 0xff);
+	out_be32(&xlb->master_priority, 0x11111111);
+
+	/*
+	 * Disable XLB pipelining
+	 * (cfr errate 292. We could do this only just before ATA PIO
+	 *  transaction and re-enable it afterwards ...)
+	 * Not needed on MPC5200B.
+	 */
+	if ((mfspr(SPRN_SVR) & MPC5200_SVR_MASK) == MPC5200_SVR)
+		out_be32(&xlb->config, in_be32(&xlb->config) | MPC52xx_XLB_CFG_PLDIS);
+
+	iounmap(xlb);
+}
+
+/*
+ * This variable is mapped in mpc52xx_map_common_devices and
+ * used in mpc5200_psc_ac97_gpio_reset().
+ */
+static DEFINE_SPINLOCK(gpio_lock);
+struct mpc52xx_gpio __iomem *simple_gpio;
+struct mpc52xx_gpio_wkup __iomem *wkup_gpio;
+
+/**
+ * mpc52xx_declare_of_platform_devices: register internal devices and children
+ *					of the localplus bus to the of_platform
+ *					bus.
+ */
+void __init mpc52xx_declare_of_platform_devices(void)
+{
+	/* Find all the 'platform' devices and register them. */
+	if (of_platform_populate(NULL, mpc52xx_bus_ids, NULL, NULL))
+		pr_err(__FILE__ ": Error while populating devices from DT\n");
+}
+
+/*
+ * match tables used by mpc52xx_map_common_devices()
+ */
+static const struct of_device_id mpc52xx_gpt_ids[] __initconst = {
+	{ .compatible = "fsl,mpc5200-gpt", },
+	{ .compatible = "mpc5200-gpt", }, /* old */
+	{}
+};
+static const struct of_device_id mpc52xx_cdm_ids[] __initconst = {
+	{ .compatible = "fsl,mpc5200-cdm", },
+	{ .compatible = "mpc5200-cdm", }, /* old */
+	{}
+};
+static const struct of_device_id mpc52xx_gpio_simple[] __initconst = {
+	{ .compatible = "fsl,mpc5200-gpio", },
+	{}
+};
+static const struct of_device_id mpc52xx_gpio_wkup[] __initconst = {
+	{ .compatible = "fsl,mpc5200-gpio-wkup", },
+	{}
+};
+
+
+/**
+ * mpc52xx_map_common_devices: iomap devices required by common code
+ */
+void __init
+mpc52xx_map_common_devices(void)
+{
+	struct device_node *np;
+
+	/* mpc52xx_wdt is mapped here and used in mpc52xx_restart,
+	 * possibly from a interrupt context. wdt is only implement
+	 * on a gpt0, so check has-wdt property before mapping.
+	 */
+	for_each_matching_node(np, mpc52xx_gpt_ids) {
+		if (of_get_property(np, "fsl,has-wdt", NULL) ||
+		    of_get_property(np, "has-wdt", NULL)) {
+			mpc52xx_wdt = of_iomap(np, 0);
+			of_node_put(np);
+			break;
+		}
+	}
+
+	/* Clock Distribution Module, used by PSC clock setting function */
+	np = of_find_matching_node(NULL, mpc52xx_cdm_ids);
+	mpc52xx_cdm = of_iomap(np, 0);
+	of_node_put(np);
+
+	/* simple_gpio registers */
+	np = of_find_matching_node(NULL, mpc52xx_gpio_simple);
+	simple_gpio = of_iomap(np, 0);
+	of_node_put(np);
+
+	/* wkup_gpio registers */
+	np = of_find_matching_node(NULL, mpc52xx_gpio_wkup);
+	wkup_gpio = of_iomap(np, 0);
+	of_node_put(np);
+}
+
+/**
+ * mpc52xx_set_psc_clkdiv: Set clock divider in the CDM for PSC ports
+ *
+ * @psc_id: id of psc port; must be 1,2,3 or 6
+ * @clkdiv: clock divider value to put into CDM PSC register.
+ */
+int mpc52xx_set_psc_clkdiv(int psc_id, int clkdiv)
+{
+	unsigned long flags;
+	u16 __iomem *reg;
+	u32 val;
+	u32 mask;
+	u32 mclken_div;
+
+	if (!mpc52xx_cdm)
+		return -ENODEV;
+
+	mclken_div = 0x8000 | (clkdiv & 0x1FF);
+	switch (psc_id) {
+	case 1: reg = &mpc52xx_cdm->mclken_div_psc1; mask = 0x20; break;
+	case 2: reg = &mpc52xx_cdm->mclken_div_psc2; mask = 0x40; break;
+	case 3: reg = &mpc52xx_cdm->mclken_div_psc3; mask = 0x80; break;
+	case 6: reg = &mpc52xx_cdm->mclken_div_psc6; mask = 0x10; break;
+	default:
+		return -ENODEV;
+	}
+
+	/* Set the rate and enable the clock */
+	spin_lock_irqsave(&mpc52xx_lock, flags);
+	out_be16(reg, mclken_div);
+	val = in_be32(&mpc52xx_cdm->clk_enables);
+	out_be32(&mpc52xx_cdm->clk_enables, val | mask);
+	spin_unlock_irqrestore(&mpc52xx_lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(mpc52xx_set_psc_clkdiv);
+
+/**
+ * mpc52xx_get_xtal_freq - Get SYS_XTAL_IN frequency for a device
+ *
+ * @node: device node
+ *
+ * Returns the frequency of the external oscillator clock connected
+ * to the SYS_XTAL_IN pin, or 0 if it cannot be determined.
+ */
+unsigned int mpc52xx_get_xtal_freq(struct device_node *node)
+{
+	u32 val;
+	unsigned int freq;
+
+	if (!mpc52xx_cdm)
+		return 0;
+
+	freq = mpc5xxx_get_bus_frequency(node);
+	if (!freq)
+		return 0;
+
+	if (in_8(&mpc52xx_cdm->ipb_clk_sel) & 0x1)
+		freq *= 2;
+
+	val  = in_be32(&mpc52xx_cdm->rstcfg);
+	if (val & (1 << 5))
+		freq *= 8;
+	else
+		freq *= 4;
+	if (val & (1 << 6))
+		freq /= 12;
+	else
+		freq /= 16;
+
+	return freq;
+}
+EXPORT_SYMBOL(mpc52xx_get_xtal_freq);
+
+/**
+ * mpc52xx_restart: ppc_md->restart hook for mpc5200 using the watchdog timer
+ */
+void
+mpc52xx_restart(char *cmd)
+{
+	local_irq_disable();
+
+	/* Turn on the watchdog and wait for it to expire.
+	 * It effectively does a reset. */
+	if (mpc52xx_wdt) {
+		out_be32(&mpc52xx_wdt->mode, 0x00000000);
+		out_be32(&mpc52xx_wdt->count, 0x000000ff);
+		out_be32(&mpc52xx_wdt->mode, 0x00009004);
+	} else
+		printk(KERN_ERR __FILE__ ": "
+			"mpc52xx_restart: Can't access wdt. "
+			"Restart impossible, system halted.\n");
+
+	while (1);
+}
+
+#define PSC1_RESET     0x1
+#define PSC1_SYNC      0x4
+#define PSC1_SDATA_OUT 0x1
+#define PSC2_RESET     0x2
+#define PSC2_SYNC      (0x4<<4)
+#define PSC2_SDATA_OUT (0x1<<4)
+#define MPC52xx_GPIO_PSC1_MASK 0x7
+#define MPC52xx_GPIO_PSC2_MASK (0x7<<4)
+
+/**
+ * mpc5200_psc_ac97_gpio_reset: Use gpio pins to reset the ac97 bus
+ *
+ * @psc: psc number to reset (only psc 1 and 2 support ac97)
+ */
+int mpc5200_psc_ac97_gpio_reset(int psc_number)
+{
+	unsigned long flags;
+	u32 gpio;
+	u32 mux;
+	int out;
+	int reset;
+	int sync;
+
+	if ((!simple_gpio) || (!wkup_gpio))
+		return -ENODEV;
+
+	switch (psc_number) {
+	case 0:
+		reset   = PSC1_RESET;           /* AC97_1_RES */
+		sync    = PSC1_SYNC;            /* AC97_1_SYNC */
+		out     = PSC1_SDATA_OUT;       /* AC97_1_SDATA_OUT */
+		gpio    = MPC52xx_GPIO_PSC1_MASK;
+		break;
+	case 1:
+		reset   = PSC2_RESET;           /* AC97_2_RES */
+		sync    = PSC2_SYNC;            /* AC97_2_SYNC */
+		out     = PSC2_SDATA_OUT;       /* AC97_2_SDATA_OUT */
+		gpio    = MPC52xx_GPIO_PSC2_MASK;
+		break;
+	default:
+		pr_err(__FILE__ ": Unable to determine PSC, no ac97 "
+		       "cold-reset will be performed\n");
+		return -ENODEV;
+	}
+
+	spin_lock_irqsave(&gpio_lock, flags);
+
+	/* Reconfiure pin-muxing to gpio */
+	mux = in_be32(&simple_gpio->port_config);
+	out_be32(&simple_gpio->port_config, mux & (~gpio));
+
+	/* enable gpio pins for output */
+	setbits8(&wkup_gpio->wkup_gpioe, reset);
+	setbits32(&simple_gpio->simple_gpioe, sync | out);
+
+	setbits8(&wkup_gpio->wkup_ddr, reset);
+	setbits32(&simple_gpio->simple_ddr, sync | out);
+
+	/* Assert cold reset */
+	clrbits32(&simple_gpio->simple_dvo, sync | out);
+	clrbits8(&wkup_gpio->wkup_dvo, reset);
+
+	/* wait for 1 us */
+	udelay(1);
+
+	/* Deassert reset */
+	setbits8(&wkup_gpio->wkup_dvo, reset);
+
+	/* wait at least 200ns */
+	/* 7 ~= (200ns * timebase) / ns2sec */
+	__delay(7);
+
+	/* Restore pin-muxing */
+	out_be32(&simple_gpio->port_config, mux);
+
+	spin_unlock_irqrestore(&gpio_lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(mpc5200_psc_ac97_gpio_reset);
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
new file mode 100644
index 000000000..c949ca055
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -0,0 +1,798 @@
+/*
+ * MPC5200 General Purpose Timer device driver
+ *
+ * Copyright (c) 2009 Secret Lab Technologies Ltd.
+ * Copyright (c) 2008 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This file is a driver for the the General Purpose Timer (gpt) devices
+ * found on the MPC5200 SoC.  Each timer has an IO pin which can be used
+ * for GPIO or can be used to raise interrupts.  The timer function can
+ * be used independently from the IO pin, or it can be used to control
+ * output signals or measure input signals.
+ *
+ * This driver supports the GPIO and IRQ controller functions of the GPT
+ * device.  Timer functions are not yet supported.
+ *
+ * The timer gpt0 can be used as watchdog (wdt).  If the wdt mode is used,
+ * this prevents the use of any gpt0 gpt function (i.e. they will fail with
+ * -EBUSY).  Thus, the safety wdt function always has precedence over the gpt
+ * function.  If the kernel has been compiled with CONFIG_WATCHDOG_NOWAYOUT,
+ * this means that gpt0 is locked in wdt mode until the next reboot - this
+ * may be a requirement in safety applications.
+ *
+ * To use the GPIO function, the following two properties must be added
+ * to the device tree node for the gpt device (typically in the .dts file
+ * for the board):
+ * 	gpio-controller;
+ * 	#gpio-cells = < 2 >;
+ * This driver will register the GPIO pin if it finds the gpio-controller
+ * property in the device tree.
+ *
+ * To use the IRQ controller function, the following two properties must
+ * be added to the device tree node for the gpt device:
+ * 	interrupt-controller;
+ * 	#interrupt-cells = < 1 >;
+ * The IRQ controller binding only uses one cell to specify the interrupt,
+ * and the IRQ flags are encoded in the cell.  A cell is not used to encode
+ * the IRQ number because the GPT only has a single IRQ source.  For flags,
+ * a value of '1' means rising edge sensitive and '2' means falling edge.
+ *
+ * The GPIO and the IRQ controller functions can be used at the same time,
+ * but in this use case the IO line will only work as an input.  Trying to
+ * use it as a GPIO output will not work.
+ *
+ * When using the GPIO line as an output, it can either be driven as normal
+ * IO, or it can be an Open Collector (OC) output.  At the moment it is the
+ * responsibility of either the bootloader or the platform setup code to set
+ * the output mode.  This driver does not change the output mode setting.
+ */
+
+#include <linux/device.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/of_gpio.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/watchdog.h>
+#include <linux/miscdevice.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <asm/div64.h>
+#include <asm/mpc52xx.h>
+
+MODULE_DESCRIPTION("Freescale MPC52xx gpt driver");
+MODULE_AUTHOR("Sascha Hauer, Grant Likely, Albrecht Dreß");
+MODULE_LICENSE("GPL");
+
+/**
+ * struct mpc52xx_gpt - Private data structure for MPC52xx GPT driver
+ * @dev: pointer to device structure
+ * @regs: virtual address of GPT registers
+ * @lock: spinlock to coordinate between different functions.
+ * @gc: gpio_chip instance structure; used when GPIO is enabled
+ * @irqhost: Pointer to irq_domain instance; used when IRQ mode is supported
+ * @wdt_mode: only relevant for gpt0: bit 0 (MPC52xx_GPT_CAN_WDT) indicates
+ *   if the gpt may be used as wdt, bit 1 (MPC52xx_GPT_IS_WDT) indicates
+ *   if the timer is actively used as wdt which blocks gpt functions
+ */
+struct mpc52xx_gpt_priv {
+	struct list_head list;		/* List of all GPT devices */
+	struct device *dev;
+	struct mpc52xx_gpt __iomem *regs;
+	spinlock_t lock;
+	struct irq_domain *irqhost;
+	u32 ipb_freq;
+	u8 wdt_mode;
+
+#if defined(CONFIG_GPIOLIB)
+	struct gpio_chip gc;
+#endif
+};
+
+LIST_HEAD(mpc52xx_gpt_list);
+DEFINE_MUTEX(mpc52xx_gpt_list_mutex);
+
+#define MPC52xx_GPT_MODE_MS_MASK	(0x07)
+#define MPC52xx_GPT_MODE_MS_IC		(0x01)
+#define MPC52xx_GPT_MODE_MS_OC		(0x02)
+#define MPC52xx_GPT_MODE_MS_PWM		(0x03)
+#define MPC52xx_GPT_MODE_MS_GPIO	(0x04)
+
+#define MPC52xx_GPT_MODE_GPIO_MASK	(0x30)
+#define MPC52xx_GPT_MODE_GPIO_OUT_LOW	(0x20)
+#define MPC52xx_GPT_MODE_GPIO_OUT_HIGH	(0x30)
+
+#define MPC52xx_GPT_MODE_COUNTER_ENABLE	(0x1000)
+#define MPC52xx_GPT_MODE_CONTINUOUS	(0x0400)
+#define MPC52xx_GPT_MODE_OPEN_DRAIN	(0x0200)
+#define MPC52xx_GPT_MODE_IRQ_EN		(0x0100)
+#define MPC52xx_GPT_MODE_WDT_EN		(0x8000)
+
+#define MPC52xx_GPT_MODE_ICT_MASK	(0x030000)
+#define MPC52xx_GPT_MODE_ICT_RISING	(0x010000)
+#define MPC52xx_GPT_MODE_ICT_FALLING	(0x020000)
+#define MPC52xx_GPT_MODE_ICT_TOGGLE	(0x030000)
+
+#define MPC52xx_GPT_MODE_WDT_PING	(0xa5)
+
+#define MPC52xx_GPT_STATUS_IRQMASK	(0x000f)
+
+#define MPC52xx_GPT_CAN_WDT		(1 << 0)
+#define MPC52xx_GPT_IS_WDT		(1 << 1)
+
+
+/* ---------------------------------------------------------------------
+ * Cascaded interrupt controller hooks
+ */
+
+static void mpc52xx_gpt_irq_unmask(struct irq_data *d)
+{
+	struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d);
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpt->lock, flags);
+	setbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN);
+	spin_unlock_irqrestore(&gpt->lock, flags);
+}
+
+static void mpc52xx_gpt_irq_mask(struct irq_data *d)
+{
+	struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d);
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpt->lock, flags);
+	clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN);
+	spin_unlock_irqrestore(&gpt->lock, flags);
+}
+
+static void mpc52xx_gpt_irq_ack(struct irq_data *d)
+{
+	struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d);
+
+	out_be32(&gpt->regs->status, MPC52xx_GPT_STATUS_IRQMASK);
+}
+
+static int mpc52xx_gpt_irq_set_type(struct irq_data *d, unsigned int flow_type)
+{
+	struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d);
+	unsigned long flags;
+	u32 reg;
+
+	dev_dbg(gpt->dev, "%s: virq=%i type=%x\n", __func__, d->irq, flow_type);
+
+	spin_lock_irqsave(&gpt->lock, flags);
+	reg = in_be32(&gpt->regs->mode) & ~MPC52xx_GPT_MODE_ICT_MASK;
+	if (flow_type & IRQF_TRIGGER_RISING)
+		reg |= MPC52xx_GPT_MODE_ICT_RISING;
+	if (flow_type & IRQF_TRIGGER_FALLING)
+		reg |= MPC52xx_GPT_MODE_ICT_FALLING;
+	out_be32(&gpt->regs->mode, reg);
+	spin_unlock_irqrestore(&gpt->lock, flags);
+
+	return 0;
+}
+
+static struct irq_chip mpc52xx_gpt_irq_chip = {
+	.name = "MPC52xx GPT",
+	.irq_unmask = mpc52xx_gpt_irq_unmask,
+	.irq_mask = mpc52xx_gpt_irq_mask,
+	.irq_ack = mpc52xx_gpt_irq_ack,
+	.irq_set_type = mpc52xx_gpt_irq_set_type,
+};
+
+void mpc52xx_gpt_irq_cascade(unsigned int virq, struct irq_desc *desc)
+{
+	struct mpc52xx_gpt_priv *gpt = irq_get_handler_data(virq);
+	int sub_virq;
+	u32 status;
+
+	status = in_be32(&gpt->regs->status) & MPC52xx_GPT_STATUS_IRQMASK;
+	if (status) {
+		sub_virq = irq_linear_revmap(gpt->irqhost, 0);
+		generic_handle_irq(sub_virq);
+	}
+}
+
+static int mpc52xx_gpt_irq_map(struct irq_domain *h, unsigned int virq,
+			       irq_hw_number_t hw)
+{
+	struct mpc52xx_gpt_priv *gpt = h->host_data;
+
+	dev_dbg(gpt->dev, "%s: h=%p, virq=%i\n", __func__, h, virq);
+	irq_set_chip_data(virq, gpt);
+	irq_set_chip_and_handler(virq, &mpc52xx_gpt_irq_chip, handle_edge_irq);
+
+	return 0;
+}
+
+static int mpc52xx_gpt_irq_xlate(struct irq_domain *h, struct device_node *ct,
+				 const u32 *intspec, unsigned int intsize,
+				 irq_hw_number_t *out_hwirq,
+				 unsigned int *out_flags)
+{
+	struct mpc52xx_gpt_priv *gpt = h->host_data;
+
+	dev_dbg(gpt->dev, "%s: flags=%i\n", __func__, intspec[0]);
+
+	if ((intsize < 1) || (intspec[0] > 3)) {
+		dev_err(gpt->dev, "bad irq specifier in %s\n", ct->full_name);
+		return -EINVAL;
+	}
+
+	*out_hwirq = 0; /* The GPT only has 1 IRQ line */
+	*out_flags = intspec[0];
+
+	return 0;
+}
+
+static const struct irq_domain_ops mpc52xx_gpt_irq_ops = {
+	.map = mpc52xx_gpt_irq_map,
+	.xlate = mpc52xx_gpt_irq_xlate,
+};
+
+static void
+mpc52xx_gpt_irq_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node)
+{
+	int cascade_virq;
+	unsigned long flags;
+	u32 mode;
+
+	cascade_virq = irq_of_parse_and_map(node, 0);
+	if (!cascade_virq)
+		return;
+
+	gpt->irqhost = irq_domain_add_linear(node, 1, &mpc52xx_gpt_irq_ops, gpt);
+	if (!gpt->irqhost) {
+		dev_err(gpt->dev, "irq_domain_add_linear() failed\n");
+		return;
+	}
+
+	irq_set_handler_data(cascade_virq, gpt);
+	irq_set_chained_handler(cascade_virq, mpc52xx_gpt_irq_cascade);
+
+	/* If the GPT is currently disabled, then change it to be in Input
+	 * Capture mode.  If the mode is non-zero, then the pin could be
+	 * already in use for something. */
+	spin_lock_irqsave(&gpt->lock, flags);
+	mode = in_be32(&gpt->regs->mode);
+	if ((mode & MPC52xx_GPT_MODE_MS_MASK) == 0)
+		out_be32(&gpt->regs->mode, mode | MPC52xx_GPT_MODE_MS_IC);
+	spin_unlock_irqrestore(&gpt->lock, flags);
+
+	dev_dbg(gpt->dev, "%s() complete. virq=%i\n", __func__, cascade_virq);
+}
+
+
+/* ---------------------------------------------------------------------
+ * GPIOLIB hooks
+ */
+#if defined(CONFIG_GPIOLIB)
+static inline struct mpc52xx_gpt_priv *gc_to_mpc52xx_gpt(struct gpio_chip *gc)
+{
+	return container_of(gc, struct mpc52xx_gpt_priv, gc);
+}
+
+static int mpc52xx_gpt_gpio_get(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct mpc52xx_gpt_priv *gpt = gc_to_mpc52xx_gpt(gc);
+
+	return (in_be32(&gpt->regs->status) >> 8) & 1;
+}
+
+static void
+mpc52xx_gpt_gpio_set(struct gpio_chip *gc, unsigned int gpio, int v)
+{
+	struct mpc52xx_gpt_priv *gpt = gc_to_mpc52xx_gpt(gc);
+	unsigned long flags;
+	u32 r;
+
+	dev_dbg(gpt->dev, "%s: gpio:%d v:%d\n", __func__, gpio, v);
+	r = v ? MPC52xx_GPT_MODE_GPIO_OUT_HIGH : MPC52xx_GPT_MODE_GPIO_OUT_LOW;
+
+	spin_lock_irqsave(&gpt->lock, flags);
+	clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK, r);
+	spin_unlock_irqrestore(&gpt->lock, flags);
+}
+
+static int mpc52xx_gpt_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
+{
+	struct mpc52xx_gpt_priv *gpt = gc_to_mpc52xx_gpt(gc);
+	unsigned long flags;
+
+	dev_dbg(gpt->dev, "%s: gpio:%d\n", __func__, gpio);
+
+	spin_lock_irqsave(&gpt->lock, flags);
+	clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK);
+	spin_unlock_irqrestore(&gpt->lock, flags);
+
+	return 0;
+}
+
+static int
+mpc52xx_gpt_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+	mpc52xx_gpt_gpio_set(gc, gpio, val);
+	return 0;
+}
+
+static void
+mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node)
+{
+	int rc;
+
+	/* Only setup GPIO if the device tree claims the GPT is
+	 * a GPIO controller */
+	if (!of_find_property(node, "gpio-controller", NULL))
+		return;
+
+	gpt->gc.label = kstrdup(node->full_name, GFP_KERNEL);
+	if (!gpt->gc.label) {
+		dev_err(gpt->dev, "out of memory\n");
+		return;
+	}
+
+	gpt->gc.ngpio = 1;
+	gpt->gc.direction_input  = mpc52xx_gpt_gpio_dir_in;
+	gpt->gc.direction_output = mpc52xx_gpt_gpio_dir_out;
+	gpt->gc.get = mpc52xx_gpt_gpio_get;
+	gpt->gc.set = mpc52xx_gpt_gpio_set;
+	gpt->gc.base = -1;
+	gpt->gc.of_node = node;
+
+	/* Setup external pin in GPIO mode */
+	clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_MS_MASK,
+			MPC52xx_GPT_MODE_MS_GPIO);
+
+	rc = gpiochip_add(&gpt->gc);
+	if (rc)
+		dev_err(gpt->dev, "gpiochip_add() failed; rc=%i\n", rc);
+
+	dev_dbg(gpt->dev, "%s() complete.\n", __func__);
+}
+#else /* defined(CONFIG_GPIOLIB) */
+static void
+mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *p, struct device_node *np) { }
+#endif /* defined(CONFIG_GPIOLIB) */
+
+/***********************************************************************
+ * Timer API
+ */
+
+/**
+ * mpc52xx_gpt_from_irq - Return the GPT device associated with an IRQ number
+ * @irq: irq of timer.
+ */
+struct mpc52xx_gpt_priv *mpc52xx_gpt_from_irq(int irq)
+{
+	struct mpc52xx_gpt_priv *gpt;
+	struct list_head *pos;
+
+	/* Iterate over the list of timers looking for a matching device */
+	mutex_lock(&mpc52xx_gpt_list_mutex);
+	list_for_each(pos, &mpc52xx_gpt_list) {
+		gpt = container_of(pos, struct mpc52xx_gpt_priv, list);
+		if (gpt->irqhost && irq == irq_linear_revmap(gpt->irqhost, 0)) {
+			mutex_unlock(&mpc52xx_gpt_list_mutex);
+			return gpt;
+		}
+	}
+	mutex_unlock(&mpc52xx_gpt_list_mutex);
+
+	return NULL;
+}
+EXPORT_SYMBOL(mpc52xx_gpt_from_irq);
+
+static int mpc52xx_gpt_do_start(struct mpc52xx_gpt_priv *gpt, u64 period,
+				int continuous, int as_wdt)
+{
+	u32 clear, set;
+	u64 clocks;
+	u32 prescale;
+	unsigned long flags;
+
+	clear = MPC52xx_GPT_MODE_MS_MASK | MPC52xx_GPT_MODE_CONTINUOUS;
+	set = MPC52xx_GPT_MODE_MS_GPIO | MPC52xx_GPT_MODE_COUNTER_ENABLE;
+	if (as_wdt) {
+		clear |= MPC52xx_GPT_MODE_IRQ_EN;
+		set |= MPC52xx_GPT_MODE_WDT_EN;
+	} else if (continuous)
+		set |= MPC52xx_GPT_MODE_CONTINUOUS;
+
+	/* Determine the number of clocks in the requested period.  64 bit
+	 * arithmatic is done here to preserve the precision until the value
+	 * is scaled back down into the u32 range.  Period is in 'ns', bus
+	 * frequency is in Hz. */
+	clocks = period * (u64)gpt->ipb_freq;
+	do_div(clocks, 1000000000); /* Scale it down to ns range */
+
+	/* This device cannot handle a clock count greater than 32 bits */
+	if (clocks > 0xffffffff)
+		return -EINVAL;
+
+	/* Calculate the prescaler and count values from the clocks value.
+	 * 'clocks' is the number of clock ticks in the period.  The timer
+	 * has 16 bit precision and a 16 bit prescaler.  Prescaler is
+	 * calculated by integer dividing the clocks by 0x10000 (shifting
+	 * down 16 bits) to obtain the smallest possible divisor for clocks
+	 * to get a 16 bit count value.
+	 *
+	 * Note: the prescale register is '1' based, not '0' based.  ie. a
+	 * value of '1' means divide the clock by one.  0xffff divides the
+	 * clock by 0xffff.  '0x0000' does not divide by zero, but wraps
+	 * around and divides by 0x10000.  That is why prescale must be
+	 * a u32 variable, not a u16, for this calculation. */
+	prescale = (clocks >> 16) + 1;
+	do_div(clocks, prescale);
+	if (clocks > 0xffff) {
+		pr_err("calculation error; prescale:%x clocks:%llx\n",
+		       prescale, clocks);
+		return -EINVAL;
+	}
+
+	/* Set and enable the timer, reject an attempt to use a wdt as gpt */
+	spin_lock_irqsave(&gpt->lock, flags);
+	if (as_wdt)
+		gpt->wdt_mode |= MPC52xx_GPT_IS_WDT;
+	else if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) {
+		spin_unlock_irqrestore(&gpt->lock, flags);
+		return -EBUSY;
+	}
+	out_be32(&gpt->regs->count, prescale << 16 | clocks);
+	clrsetbits_be32(&gpt->regs->mode, clear, set);
+	spin_unlock_irqrestore(&gpt->lock, flags);
+
+	return 0;
+}
+
+/**
+ * mpc52xx_gpt_start_timer - Set and enable the GPT timer
+ * @gpt: Pointer to gpt private data structure
+ * @period: period of timer in ns; max. ~130s @ 33MHz IPB clock
+ * @continuous: set to 1 to make timer continuous free running
+ *
+ * An interrupt will be generated every time the timer fires
+ */
+int mpc52xx_gpt_start_timer(struct mpc52xx_gpt_priv *gpt, u64 period,
+                            int continuous)
+{
+	return mpc52xx_gpt_do_start(gpt, period, continuous, 0);
+}
+EXPORT_SYMBOL(mpc52xx_gpt_start_timer);
+
+/**
+ * mpc52xx_gpt_stop_timer - Stop a gpt
+ * @gpt: Pointer to gpt private data structure
+ *
+ * Returns an error if attempting to stop a wdt
+ */
+int mpc52xx_gpt_stop_timer(struct mpc52xx_gpt_priv *gpt)
+{
+	unsigned long flags;
+
+	/* reject the operation if the timer is used as watchdog (gpt 0 only) */
+	spin_lock_irqsave(&gpt->lock, flags);
+	if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) {
+		spin_unlock_irqrestore(&gpt->lock, flags);
+		return -EBUSY;
+	}
+
+	clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_COUNTER_ENABLE);
+	spin_unlock_irqrestore(&gpt->lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(mpc52xx_gpt_stop_timer);
+
+/**
+ * mpc52xx_gpt_timer_period - Read the timer period
+ * @gpt: Pointer to gpt private data structure
+ *
+ * Returns the timer period in ns
+ */
+u64 mpc52xx_gpt_timer_period(struct mpc52xx_gpt_priv *gpt)
+{
+	u64 period;
+	u64 prescale;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpt->lock, flags);
+	period = in_be32(&gpt->regs->count);
+	spin_unlock_irqrestore(&gpt->lock, flags);
+
+	prescale = period >> 16;
+	period &= 0xffff;
+	if (prescale == 0)
+		prescale = 0x10000;
+	period = period * prescale * 1000000000ULL;
+	do_div(period, (u64)gpt->ipb_freq);
+	return period;
+}
+EXPORT_SYMBOL(mpc52xx_gpt_timer_period);
+
+#if defined(CONFIG_MPC5200_WDT)
+/***********************************************************************
+ * Watchdog API for gpt0
+ */
+
+#define WDT_IDENTITY	    "mpc52xx watchdog on GPT0"
+
+/* wdt_is_active stores whether or not the /dev/watchdog device is opened */
+static unsigned long wdt_is_active;
+
+/* wdt-capable gpt */
+static struct mpc52xx_gpt_priv *mpc52xx_gpt_wdt;
+
+/* low-level wdt functions */
+static inline void mpc52xx_gpt_wdt_ping(struct mpc52xx_gpt_priv *gpt_wdt)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpt_wdt->lock, flags);
+	out_8((u8 *) &gpt_wdt->regs->mode, MPC52xx_GPT_MODE_WDT_PING);
+	spin_unlock_irqrestore(&gpt_wdt->lock, flags);
+}
+
+/* wdt misc device api */
+static ssize_t mpc52xx_wdt_write(struct file *file, const char __user *data,
+				 size_t len, loff_t *ppos)
+{
+	struct mpc52xx_gpt_priv *gpt_wdt = file->private_data;
+	mpc52xx_gpt_wdt_ping(gpt_wdt);
+	return 0;
+}
+
+static const struct watchdog_info mpc5200_wdt_info = {
+	.options	= WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING,
+	.identity	= WDT_IDENTITY,
+};
+
+static long mpc52xx_wdt_ioctl(struct file *file, unsigned int cmd,
+			      unsigned long arg)
+{
+	struct mpc52xx_gpt_priv *gpt_wdt = file->private_data;
+	int __user *data = (int __user *)arg;
+	int timeout;
+	u64 real_timeout;
+	int ret = 0;
+
+	switch (cmd) {
+	case WDIOC_GETSUPPORT:
+		ret = copy_to_user(data, &mpc5200_wdt_info,
+				   sizeof(mpc5200_wdt_info));
+		if (ret)
+			ret = -EFAULT;
+		break;
+
+	case WDIOC_GETSTATUS:
+	case WDIOC_GETBOOTSTATUS:
+		ret = put_user(0, data);
+		break;
+
+	case WDIOC_KEEPALIVE:
+		mpc52xx_gpt_wdt_ping(gpt_wdt);
+		break;
+
+	case WDIOC_SETTIMEOUT:
+		ret = get_user(timeout, data);
+		if (ret)
+			break;
+		real_timeout = (u64) timeout * 1000000000ULL;
+		ret = mpc52xx_gpt_do_start(gpt_wdt, real_timeout, 0, 1);
+		if (ret)
+			break;
+		/* fall through and return the timeout */
+
+	case WDIOC_GETTIMEOUT:
+		/* we need to round here as to avoid e.g. the following
+		 * situation:
+		 * - timeout requested is 1 second;
+		 * - real timeout @33MHz is 999997090ns
+		 * - the int divide by 10^9 will return 0.
+		 */
+		real_timeout =
+			mpc52xx_gpt_timer_period(gpt_wdt) + 500000000ULL;
+		do_div(real_timeout, 1000000000ULL);
+		timeout = (int) real_timeout;
+		ret = put_user(timeout, data);
+		break;
+
+	default:
+		ret = -ENOTTY;
+	}
+	return ret;
+}
+
+static int mpc52xx_wdt_open(struct inode *inode, struct file *file)
+{
+	int ret;
+
+	/* sanity check */
+	if (!mpc52xx_gpt_wdt)
+		return -ENODEV;
+
+	/* /dev/watchdog can only be opened once */
+	if (test_and_set_bit(0, &wdt_is_active))
+		return -EBUSY;
+
+	/* Set and activate the watchdog with 30 seconds timeout */
+	ret = mpc52xx_gpt_do_start(mpc52xx_gpt_wdt, 30ULL * 1000000000ULL,
+				   0, 1);
+	if (ret) {
+		clear_bit(0, &wdt_is_active);
+		return ret;
+	}
+
+	file->private_data = mpc52xx_gpt_wdt;
+	return nonseekable_open(inode, file);
+}
+
+static int mpc52xx_wdt_release(struct inode *inode, struct file *file)
+{
+	/* note: releasing the wdt in NOWAYOUT-mode does not stop it */
+#if !defined(CONFIG_WATCHDOG_NOWAYOUT)
+	struct mpc52xx_gpt_priv *gpt_wdt = file->private_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpt_wdt->lock, flags);
+	clrbits32(&gpt_wdt->regs->mode,
+		  MPC52xx_GPT_MODE_COUNTER_ENABLE | MPC52xx_GPT_MODE_WDT_EN);
+	gpt_wdt->wdt_mode &= ~MPC52xx_GPT_IS_WDT;
+	spin_unlock_irqrestore(&gpt_wdt->lock, flags);
+#endif
+	clear_bit(0, &wdt_is_active);
+	return 0;
+}
+
+
+static const struct file_operations mpc52xx_wdt_fops = {
+	.owner		= THIS_MODULE,
+	.llseek		= no_llseek,
+	.write		= mpc52xx_wdt_write,
+	.unlocked_ioctl = mpc52xx_wdt_ioctl,
+	.open		= mpc52xx_wdt_open,
+	.release	= mpc52xx_wdt_release,
+};
+
+static struct miscdevice mpc52xx_wdt_miscdev = {
+	.minor		= WATCHDOG_MINOR,
+	.name		= "watchdog",
+	.fops		= &mpc52xx_wdt_fops,
+};
+
+static int mpc52xx_gpt_wdt_init(void)
+{
+	int err;
+
+	/* try to register the watchdog misc device */
+	err = misc_register(&mpc52xx_wdt_miscdev);
+	if (err)
+		pr_err("%s: cannot register watchdog device\n", WDT_IDENTITY);
+	else
+		pr_info("%s: watchdog device registered\n", WDT_IDENTITY);
+	return err;
+}
+
+static int mpc52xx_gpt_wdt_setup(struct mpc52xx_gpt_priv *gpt,
+				 const u32 *period)
+{
+	u64 real_timeout;
+
+	/* remember the gpt for the wdt operation */
+	mpc52xx_gpt_wdt = gpt;
+
+	/* configure the wdt if the device tree contained a timeout */
+	if (!period || *period == 0)
+		return 0;
+
+	real_timeout = (u64) *period * 1000000000ULL;
+	if (mpc52xx_gpt_do_start(gpt, real_timeout, 0, 1))
+		dev_warn(gpt->dev, "starting as wdt failed\n");
+	else
+		dev_info(gpt->dev, "watchdog set to %us timeout\n", *period);
+	return 0;
+}
+
+#else
+
+static int mpc52xx_gpt_wdt_init(void)
+{
+	return 0;
+}
+
+static inline int mpc52xx_gpt_wdt_setup(struct mpc52xx_gpt_priv *gpt,
+					const u32 *period)
+{
+	return 0;
+}
+
+#endif	/*  CONFIG_MPC5200_WDT	*/
+
+/* ---------------------------------------------------------------------
+ * of_platform bus binding code
+ */
+static int mpc52xx_gpt_probe(struct platform_device *ofdev)
+{
+	struct mpc52xx_gpt_priv *gpt;
+
+	gpt = kzalloc(sizeof *gpt, GFP_KERNEL);
+	if (!gpt)
+		return -ENOMEM;
+
+	spin_lock_init(&gpt->lock);
+	gpt->dev = &ofdev->dev;
+	gpt->ipb_freq = mpc5xxx_get_bus_frequency(ofdev->dev.of_node);
+	gpt->regs = of_iomap(ofdev->dev.of_node, 0);
+	if (!gpt->regs) {
+		kfree(gpt);
+		return -ENOMEM;
+	}
+
+	dev_set_drvdata(&ofdev->dev, gpt);
+
+	mpc52xx_gpt_gpio_setup(gpt, ofdev->dev.of_node);
+	mpc52xx_gpt_irq_setup(gpt, ofdev->dev.of_node);
+
+	mutex_lock(&mpc52xx_gpt_list_mutex);
+	list_add(&gpt->list, &mpc52xx_gpt_list);
+	mutex_unlock(&mpc52xx_gpt_list_mutex);
+
+	/* check if this device could be a watchdog */
+	if (of_get_property(ofdev->dev.of_node, "fsl,has-wdt", NULL) ||
+	    of_get_property(ofdev->dev.of_node, "has-wdt", NULL)) {
+		const u32 *on_boot_wdt;
+
+		gpt->wdt_mode = MPC52xx_GPT_CAN_WDT;
+		on_boot_wdt = of_get_property(ofdev->dev.of_node,
+					      "fsl,wdt-on-boot", NULL);
+		if (on_boot_wdt) {
+			dev_info(gpt->dev, "used as watchdog\n");
+			gpt->wdt_mode |= MPC52xx_GPT_IS_WDT;
+		} else
+			dev_info(gpt->dev, "can function as watchdog\n");
+		mpc52xx_gpt_wdt_setup(gpt, on_boot_wdt);
+	}
+
+	return 0;
+}
+
+static int mpc52xx_gpt_remove(struct platform_device *ofdev)
+{
+	return -EBUSY;
+}
+
+static const struct of_device_id mpc52xx_gpt_match[] = {
+	{ .compatible = "fsl,mpc5200-gpt", },
+
+	/* Depreciated compatible values; don't use for new dts files */
+	{ .compatible = "fsl,mpc5200-gpt-gpio", },
+	{ .compatible = "mpc5200-gpt", },
+	{}
+};
+
+static struct platform_driver mpc52xx_gpt_driver = {
+	.driver = {
+		.name = "mpc52xx-gpt",
+		.of_match_table = mpc52xx_gpt_match,
+	},
+	.probe = mpc52xx_gpt_probe,
+	.remove = mpc52xx_gpt_remove,
+};
+
+static int __init mpc52xx_gpt_init(void)
+{
+	return platform_driver_register(&mpc52xx_gpt_driver);
+}
+
+/* Make sure GPIOs and IRQs get set up before anyone tries to use them */
+subsys_initcall(mpc52xx_gpt_init);
+device_initcall(mpc52xx_gpt_wdt_init);
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
new file mode 100644
index 000000000..251dcb90e
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
@@ -0,0 +1,580 @@
+/*
+ * LocalPlus Bus FIFO driver for the Freescale MPC52xx.
+ *
+ * Copyright (C) 2009 Secret Lab Technologies Ltd.
+ *
+ * This file is released under the GPLv2
+ *
+ * Todo:
+ * - Add support for multiple requests to be queued.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/mpc52xx.h>
+#include <asm/time.h>
+
+#include <linux/fsl/bestcomm/bestcomm.h>
+#include <linux/fsl/bestcomm/bestcomm_priv.h>
+#include <linux/fsl/bestcomm/gen_bd.h>
+
+MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
+MODULE_DESCRIPTION("MPC5200 LocalPlus FIFO device driver");
+MODULE_LICENSE("GPL");
+
+#define LPBFIFO_REG_PACKET_SIZE		(0x00)
+#define LPBFIFO_REG_START_ADDRESS	(0x04)
+#define LPBFIFO_REG_CONTROL		(0x08)
+#define LPBFIFO_REG_ENABLE		(0x0C)
+#define LPBFIFO_REG_BYTES_DONE_STATUS	(0x14)
+#define LPBFIFO_REG_FIFO_DATA		(0x40)
+#define LPBFIFO_REG_FIFO_STATUS		(0x44)
+#define LPBFIFO_REG_FIFO_CONTROL	(0x48)
+#define LPBFIFO_REG_FIFO_ALARM		(0x4C)
+
+struct mpc52xx_lpbfifo {
+	struct device *dev;
+	phys_addr_t regs_phys;
+	void __iomem *regs;
+	int irq;
+	spinlock_t lock;
+
+	struct bcom_task *bcom_tx_task;
+	struct bcom_task *bcom_rx_task;
+	struct bcom_task *bcom_cur_task;
+
+	/* Current state data */
+	struct mpc52xx_lpbfifo_request *req;
+	int dma_irqs_enabled;
+};
+
+/* The MPC5200 has only one fifo, so only need one instance structure */
+static struct mpc52xx_lpbfifo lpbfifo;
+
+/**
+ * mpc52xx_lpbfifo_kick - Trigger the next block of data to be transferred
+ */
+static void mpc52xx_lpbfifo_kick(struct mpc52xx_lpbfifo_request *req)
+{
+	size_t transfer_size = req->size - req->pos;
+	struct bcom_bd *bd;
+	void __iomem *reg;
+	u32 *data;
+	int i;
+	int bit_fields;
+	int dma = !(req->flags & MPC52XX_LPBFIFO_FLAG_NO_DMA);
+	int write = req->flags & MPC52XX_LPBFIFO_FLAG_WRITE;
+	int poll_dma = req->flags & MPC52XX_LPBFIFO_FLAG_POLL_DMA;
+
+	/* Set and clear the reset bits; is good practice in User Manual */
+	out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, 0x01010000);
+
+	/* set master enable bit */
+	out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, 0x00000001);
+	if (!dma) {
+		/* While the FIFO can be setup for transfer sizes as large as
+		 * 16M-1, the FIFO itself is only 512 bytes deep and it does
+		 * not generate interrupts for FIFO full events (only transfer
+		 * complete will raise an IRQ).  Therefore when not using
+		 * Bestcomm to drive the FIFO it needs to either be polled, or
+		 * transfers need to constrained to the size of the fifo.
+		 *
+		 * This driver restricts the size of the transfer
+		 */
+		if (transfer_size > 512)
+			transfer_size = 512;
+
+		/* Load the FIFO with data */
+		if (write) {
+			reg = lpbfifo.regs + LPBFIFO_REG_FIFO_DATA;
+			data = req->data + req->pos;
+			for (i = 0; i < transfer_size; i += 4)
+				out_be32(reg, *data++);
+		}
+
+		/* Unmask both error and completion irqs */
+		out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, 0x00000301);
+	} else {
+		/* Choose the correct direction
+		 *
+		 * Configure the watermarks so DMA will always complete correctly.
+		 * It may be worth experimenting with the ALARM value to see if
+		 * there is a performance impacit.  However, if it is wrong there
+		 * is a risk of DMA not transferring the last chunk of data
+		 */
+		if (write) {
+			out_be32(lpbfifo.regs + LPBFIFO_REG_FIFO_ALARM, 0x1e4);
+			out_8(lpbfifo.regs + LPBFIFO_REG_FIFO_CONTROL, 7);
+			lpbfifo.bcom_cur_task = lpbfifo.bcom_tx_task;
+		} else {
+			out_be32(lpbfifo.regs + LPBFIFO_REG_FIFO_ALARM, 0x1ff);
+			out_8(lpbfifo.regs + LPBFIFO_REG_FIFO_CONTROL, 0);
+			lpbfifo.bcom_cur_task = lpbfifo.bcom_rx_task;
+
+			if (poll_dma) {
+				if (lpbfifo.dma_irqs_enabled) {
+					disable_irq(bcom_get_task_irq(lpbfifo.bcom_rx_task));
+					lpbfifo.dma_irqs_enabled = 0;
+				}
+			} else {
+				if (!lpbfifo.dma_irqs_enabled) {
+					enable_irq(bcom_get_task_irq(lpbfifo.bcom_rx_task));
+					lpbfifo.dma_irqs_enabled = 1;
+				}
+			}
+		}
+
+		bd = bcom_prepare_next_buffer(lpbfifo.bcom_cur_task);
+		bd->status = transfer_size;
+		if (!write) {
+			/*
+			 * In the DMA read case, the DMA doesn't complete,
+			 * possibly due to incorrect watermarks in the ALARM
+			 * and CONTROL regs. For now instead of trying to
+			 * determine the right watermarks that will make this
+			 * work, just increase the number of bytes the FIFO is
+			 * expecting.
+			 *
+			 * When submitting another operation, the FIFO will get
+			 * reset, so the condition of the FIFO waiting for a
+			 * non-existent 4 bytes will get cleared.
+			 */
+			transfer_size += 4; /* BLECH! */
+		}
+		bd->data[0] = req->data_phys + req->pos;
+		bcom_submit_next_buffer(lpbfifo.bcom_cur_task, NULL);
+
+		/* error irq & master enabled bit */
+		bit_fields = 0x00000201;
+
+		/* Unmask irqs */
+		if (write && (!poll_dma))
+			bit_fields |= 0x00000100; /* completion irq too */
+		out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, bit_fields);
+	}
+
+	/* Set transfer size, width, chip select and READ mode */
+	out_be32(lpbfifo.regs + LPBFIFO_REG_START_ADDRESS,
+		 req->offset + req->pos);
+	out_be32(lpbfifo.regs + LPBFIFO_REG_PACKET_SIZE, transfer_size);
+
+	bit_fields = req->cs << 24 | 0x000008;
+	if (!write)
+		bit_fields |= 0x010000; /* read mode */
+	out_be32(lpbfifo.regs + LPBFIFO_REG_CONTROL, bit_fields);
+
+	/* Kick it off */
+	if (!lpbfifo.req->defer_xfer_start)
+		out_8(lpbfifo.regs + LPBFIFO_REG_PACKET_SIZE, 0x01);
+	if (dma)
+		bcom_enable(lpbfifo.bcom_cur_task);
+}
+
+/**
+ * mpc52xx_lpbfifo_irq - IRQ handler for LPB FIFO
+ *
+ * On transmit, the dma completion irq triggers before the fifo completion
+ * triggers.  Handle the dma completion here instead of the LPB FIFO Bestcomm
+ * task completion irq because everything is not really done until the LPB FIFO
+ * completion irq triggers.
+ *
+ * In other words:
+ * For DMA, on receive, the "Fat Lady" is the bestcom completion irq. on
+ * transmit, the fifo completion irq is the "Fat Lady". The opera (or in this
+ * case the DMA/FIFO operation) is not finished until the "Fat Lady" sings.
+ *
+ * Reasons for entering this routine:
+ * 1) PIO mode rx and tx completion irq
+ * 2) DMA interrupt mode tx completion irq
+ * 3) DMA polled mode tx
+ *
+ * Exit conditions:
+ * 1) Transfer aborted
+ * 2) FIFO complete without DMA; more data to do
+ * 3) FIFO complete without DMA; all data transferred
+ * 4) FIFO complete using DMA
+ *
+ * Condition 1 can occur regardless of whether or not DMA is used.
+ * It requires executing the callback to report the error and exiting
+ * immediately.
+ *
+ * Condition 2 requires programming the FIFO with the next block of data
+ *
+ * Condition 3 requires executing the callback to report completion
+ *
+ * Condition 4 means the same as 3, except that we also retrieve the bcom
+ * buffer so DMA doesn't get clogged up.
+ *
+ * To make things trickier, the spinlock must be dropped before
+ * executing the callback, otherwise we could end up with a deadlock
+ * or nested spinlock condition.  The out path is non-trivial, so
+ * extra fiddling is done to make sure all paths lead to the same
+ * outbound code.
+ */
+static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id)
+{
+	struct mpc52xx_lpbfifo_request *req;
+	u32 status = in_8(lpbfifo.regs + LPBFIFO_REG_BYTES_DONE_STATUS);
+	void __iomem *reg;
+	u32 *data;
+	int count, i;
+	int do_callback = 0;
+	u32 ts;
+	unsigned long flags;
+	int dma, write, poll_dma;
+
+	spin_lock_irqsave(&lpbfifo.lock, flags);
+	ts = get_tbl();
+
+	req = lpbfifo.req;
+	if (!req) {
+		spin_unlock_irqrestore(&lpbfifo.lock, flags);
+		pr_err("bogus LPBFIFO IRQ\n");
+		return IRQ_HANDLED;
+	}
+
+	dma = !(req->flags & MPC52XX_LPBFIFO_FLAG_NO_DMA);
+	write = req->flags & MPC52XX_LPBFIFO_FLAG_WRITE;
+	poll_dma = req->flags & MPC52XX_LPBFIFO_FLAG_POLL_DMA;
+
+	if (dma && !write) {
+		spin_unlock_irqrestore(&lpbfifo.lock, flags);
+		pr_err("bogus LPBFIFO IRQ (dma and not writing)\n");
+		return IRQ_HANDLED;
+	}
+
+	if ((status & 0x01) == 0) {
+		goto out;
+	}
+
+	/* check abort bit */
+	if (status & 0x10) {
+		out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, 0x01010000);
+		do_callback = 1;
+		goto out;
+	}
+
+	/* Read result from hardware */
+	count = in_be32(lpbfifo.regs + LPBFIFO_REG_BYTES_DONE_STATUS);
+	count &= 0x00ffffff;
+
+	if (!dma && !write) {
+		/* copy the data out of the FIFO */
+		reg = lpbfifo.regs + LPBFIFO_REG_FIFO_DATA;
+		data = req->data + req->pos;
+		for (i = 0; i < count; i += 4)
+			*data++ = in_be32(reg);
+	}
+
+	/* Update transfer position and count */
+	req->pos += count;
+
+	/* Decide what to do next */
+	if (req->size - req->pos)
+		mpc52xx_lpbfifo_kick(req); /* more work to do */
+	else
+		do_callback = 1;
+
+ out:
+	/* Clear the IRQ */
+	out_8(lpbfifo.regs + LPBFIFO_REG_BYTES_DONE_STATUS, 0x01);
+
+	if (dma && (status & 0x11)) {
+		/*
+		 * Count the DMA as complete only when the FIFO completion
+		 * status or abort bits are set.
+		 *
+		 * (status & 0x01) should always be the case except sometimes
+		 * when using polled DMA.
+		 *
+		 * (status & 0x10) {transfer aborted}: This case needs more
+		 * testing.
+		 */
+		bcom_retrieve_buffer(lpbfifo.bcom_cur_task, &status, NULL);
+	}
+	req->last_byte = ((u8 *)req->data)[req->size - 1];
+
+	/* When the do_callback flag is set; it means the transfer is finished
+	 * so set the FIFO as idle */
+	if (do_callback)
+		lpbfifo.req = NULL;
+
+	if (irq != 0) /* don't increment on polled case */
+		req->irq_count++;
+
+	req->irq_ticks += get_tbl() - ts;
+	spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+	/* Spinlock is released; it is now safe to call the callback */
+	if (do_callback && req->callback)
+		req->callback(req);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * mpc52xx_lpbfifo_bcom_irq - IRQ handler for LPB FIFO Bestcomm task
+ *
+ * Only used when receiving data.
+ */
+static irqreturn_t mpc52xx_lpbfifo_bcom_irq(int irq, void *dev_id)
+{
+	struct mpc52xx_lpbfifo_request *req;
+	unsigned long flags;
+	u32 status;
+	u32 ts;
+
+	spin_lock_irqsave(&lpbfifo.lock, flags);
+	ts = get_tbl();
+
+	req = lpbfifo.req;
+	if (!req || (req->flags & MPC52XX_LPBFIFO_FLAG_NO_DMA)) {
+		spin_unlock_irqrestore(&lpbfifo.lock, flags);
+		return IRQ_HANDLED;
+	}
+
+	if (irq != 0) /* don't increment on polled case */
+		req->irq_count++;
+
+	if (!bcom_buffer_done(lpbfifo.bcom_cur_task)) {
+		spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+		req->buffer_not_done_cnt++;
+		if ((req->buffer_not_done_cnt % 1000) == 0)
+			pr_err("transfer stalled\n");
+
+		return IRQ_HANDLED;
+	}
+
+	bcom_retrieve_buffer(lpbfifo.bcom_cur_task, &status, NULL);
+
+	req->last_byte = ((u8 *)req->data)[req->size - 1];
+
+	req->pos = status & 0x00ffffff;
+
+	/* Mark the FIFO as idle */
+	lpbfifo.req = NULL;
+
+	/* Release the lock before calling out to the callback. */
+	req->irq_ticks += get_tbl() - ts;
+	spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+	if (req->callback)
+		req->callback(req);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * mpc52xx_lpbfifo_bcom_poll - Poll for DMA completion
+ */
+void mpc52xx_lpbfifo_poll(void)
+{
+	struct mpc52xx_lpbfifo_request *req = lpbfifo.req;
+	int dma = !(req->flags & MPC52XX_LPBFIFO_FLAG_NO_DMA);
+	int write = req->flags & MPC52XX_LPBFIFO_FLAG_WRITE;
+
+	/*
+	 * For more information, see comments on the "Fat Lady" 
+	 */
+	if (dma && write)
+		mpc52xx_lpbfifo_irq(0, NULL);
+	else 
+		mpc52xx_lpbfifo_bcom_irq(0, NULL);
+}
+EXPORT_SYMBOL(mpc52xx_lpbfifo_poll);
+
+/**
+ * mpc52xx_lpbfifo_submit - Submit an LPB FIFO transfer request.
+ * @req: Pointer to request structure
+ */
+int mpc52xx_lpbfifo_submit(struct mpc52xx_lpbfifo_request *req)
+{
+	unsigned long flags;
+
+	if (!lpbfifo.regs)
+		return -ENODEV;
+
+	spin_lock_irqsave(&lpbfifo.lock, flags);
+
+	/* If the req pointer is already set, then a transfer is in progress */
+	if (lpbfifo.req) {
+		spin_unlock_irqrestore(&lpbfifo.lock, flags);
+		return -EBUSY;
+	}
+
+	/* Setup the transfer */
+	lpbfifo.req = req;
+	req->irq_count = 0;
+	req->irq_ticks = 0;
+	req->buffer_not_done_cnt = 0;
+	req->pos = 0;
+
+	mpc52xx_lpbfifo_kick(req);
+	spin_unlock_irqrestore(&lpbfifo.lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(mpc52xx_lpbfifo_submit);
+
+int mpc52xx_lpbfifo_start_xfer(struct mpc52xx_lpbfifo_request *req)
+{
+	unsigned long flags;
+
+	if (!lpbfifo.regs)
+		return -ENODEV;
+
+	spin_lock_irqsave(&lpbfifo.lock, flags);
+
+	/*
+	 * If the req pointer is already set and a transfer was
+	 * started on submit, then this transfer is in progress
+	 */
+	if (lpbfifo.req && !lpbfifo.req->defer_xfer_start) {
+		spin_unlock_irqrestore(&lpbfifo.lock, flags);
+		return -EBUSY;
+	}
+
+	/*
+	 * If the req was previously submitted but not
+	 * started, start it now
+	 */
+	if (lpbfifo.req && lpbfifo.req == req &&
+	    lpbfifo.req->defer_xfer_start) {
+		out_8(lpbfifo.regs + LPBFIFO_REG_PACKET_SIZE, 0x01);
+	}
+
+	spin_unlock_irqrestore(&lpbfifo.lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(mpc52xx_lpbfifo_start_xfer);
+
+void mpc52xx_lpbfifo_abort(struct mpc52xx_lpbfifo_request *req)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&lpbfifo.lock, flags);
+	if (lpbfifo.req == req) {
+		/* Put it into reset and clear the state */
+		bcom_gen_bd_rx_reset(lpbfifo.bcom_rx_task);
+		bcom_gen_bd_tx_reset(lpbfifo.bcom_tx_task);
+		out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, 0x01010000);
+		lpbfifo.req = NULL;
+	}
+	spin_unlock_irqrestore(&lpbfifo.lock, flags);
+}
+EXPORT_SYMBOL(mpc52xx_lpbfifo_abort);
+
+static int mpc52xx_lpbfifo_probe(struct platform_device *op)
+{
+	struct resource res;
+	int rc = -ENOMEM;
+
+	if (lpbfifo.dev != NULL)
+		return -ENOSPC;
+
+	lpbfifo.irq = irq_of_parse_and_map(op->dev.of_node, 0);
+	if (!lpbfifo.irq)
+		return -ENODEV;
+
+	if (of_address_to_resource(op->dev.of_node, 0, &res))
+		return -ENODEV;
+	lpbfifo.regs_phys = res.start;
+	lpbfifo.regs = of_iomap(op->dev.of_node, 0);
+	if (!lpbfifo.regs)
+		return -ENOMEM;
+
+	spin_lock_init(&lpbfifo.lock);
+
+	/* Put FIFO into reset */
+	out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, 0x01010000);
+
+	/* Register the interrupt handler */
+	rc = request_irq(lpbfifo.irq, mpc52xx_lpbfifo_irq, 0,
+			 "mpc52xx-lpbfifo", &lpbfifo);
+	if (rc)
+		goto err_irq;
+
+	/* Request the Bestcomm receive (fifo --> memory) task and IRQ */
+	lpbfifo.bcom_rx_task =
+		bcom_gen_bd_rx_init(2, res.start + LPBFIFO_REG_FIFO_DATA,
+				    BCOM_INITIATOR_SCLPC, BCOM_IPR_SCLPC,
+				    16*1024*1024);
+	if (!lpbfifo.bcom_rx_task)
+		goto err_bcom_rx;
+
+	rc = request_irq(bcom_get_task_irq(lpbfifo.bcom_rx_task),
+			 mpc52xx_lpbfifo_bcom_irq, 0,
+			 "mpc52xx-lpbfifo-rx", &lpbfifo);
+	if (rc)
+		goto err_bcom_rx_irq;
+
+	lpbfifo.dma_irqs_enabled = 1;
+
+	/* Request the Bestcomm transmit (memory --> fifo) task and IRQ */
+	lpbfifo.bcom_tx_task =
+		bcom_gen_bd_tx_init(2, res.start + LPBFIFO_REG_FIFO_DATA,
+				    BCOM_INITIATOR_SCLPC, BCOM_IPR_SCLPC);
+	if (!lpbfifo.bcom_tx_task)
+		goto err_bcom_tx;
+
+	lpbfifo.dev = &op->dev;
+	return 0;
+
+ err_bcom_tx:
+	free_irq(bcom_get_task_irq(lpbfifo.bcom_rx_task), &lpbfifo);
+ err_bcom_rx_irq:
+	bcom_gen_bd_rx_release(lpbfifo.bcom_rx_task);
+ err_bcom_rx:
+ err_irq:
+	iounmap(lpbfifo.regs);
+	lpbfifo.regs = NULL;
+
+	dev_err(&op->dev, "mpc52xx_lpbfifo_probe() failed\n");
+	return -ENODEV;
+}
+
+
+static int mpc52xx_lpbfifo_remove(struct platform_device *op)
+{
+	if (lpbfifo.dev != &op->dev)
+		return 0;
+
+	/* Put FIFO in reset */
+	out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, 0x01010000);
+
+	/* Release the bestcomm transmit task */
+	free_irq(bcom_get_task_irq(lpbfifo.bcom_tx_task), &lpbfifo);
+	bcom_gen_bd_tx_release(lpbfifo.bcom_tx_task);
+	
+	/* Release the bestcomm receive task */
+	free_irq(bcom_get_task_irq(lpbfifo.bcom_rx_task), &lpbfifo);
+	bcom_gen_bd_rx_release(lpbfifo.bcom_rx_task);
+
+	free_irq(lpbfifo.irq, &lpbfifo);
+	iounmap(lpbfifo.regs);
+	lpbfifo.regs = NULL;
+	lpbfifo.dev = NULL;
+
+	return 0;
+}
+
+static const struct of_device_id mpc52xx_lpbfifo_match[] = {
+	{ .compatible = "fsl,mpc5200-lpbfifo", },
+	{},
+};
+
+static struct platform_driver mpc52xx_lpbfifo_driver = {
+	.driver = {
+		.name = "mpc52xx-lpbfifo",
+		.of_match_table = mpc52xx_lpbfifo_match,
+	},
+	.probe = mpc52xx_lpbfifo_probe,
+	.remove = mpc52xx_lpbfifo_remove,
+};
+module_platform_driver(mpc52xx_lpbfifo_driver);
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pci.c b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
new file mode 100644
index 000000000..e2d401ad8
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
@@ -0,0 +1,428 @@
+/*
+ * PCI code for the Freescale MPC52xx embedded CPU.
+ *
+ * Copyright (C) 2006 Secret Lab Technologies Ltd.
+ *                        Grant Likely <grant.likely@secretlab.ca>
+ * Copyright (C) 2004 Sylvain Munaut <tnt@246tNt.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#undef DEBUG
+
+#include <asm/pci.h>
+#include <asm/mpc52xx.h>
+#include <asm/delay.h>
+#include <asm/machdep.h>
+#include <linux/kernel.h>
+
+
+/* ======================================================================== */
+/* Structures mapping & Defines for PCI Unit                                */
+/* ======================================================================== */
+
+#define MPC52xx_PCI_GSCR_BM		0x40000000
+#define MPC52xx_PCI_GSCR_PE		0x20000000
+#define MPC52xx_PCI_GSCR_SE		0x10000000
+#define MPC52xx_PCI_GSCR_XLB2PCI_MASK	0x07000000
+#define MPC52xx_PCI_GSCR_XLB2PCI_SHIFT	24
+#define MPC52xx_PCI_GSCR_IPG2PCI_MASK	0x00070000
+#define MPC52xx_PCI_GSCR_IPG2PCI_SHIFT	16
+#define MPC52xx_PCI_GSCR_BME		0x00004000
+#define MPC52xx_PCI_GSCR_PEE		0x00002000
+#define MPC52xx_PCI_GSCR_SEE		0x00001000
+#define MPC52xx_PCI_GSCR_PR		0x00000001
+
+
+#define MPC52xx_PCI_IWBTAR_TRANSLATION(proc_ad,pci_ad,size)	  \
+		( ( (proc_ad) & 0xff000000 )			| \
+		  ( (((size) - 1) >> 8) & 0x00ff0000 )		| \
+		  ( ((pci_ad) >> 16) & 0x0000ff00 ) )
+
+#define MPC52xx_PCI_IWCR_PACK(win0,win1,win2)	(((win0) << 24) | \
+						 ((win1) << 16) | \
+						 ((win2) <<  8))
+
+#define MPC52xx_PCI_IWCR_DISABLE	0x0
+#define MPC52xx_PCI_IWCR_ENABLE		0x1
+#define MPC52xx_PCI_IWCR_READ		0x0
+#define MPC52xx_PCI_IWCR_READ_LINE	0x2
+#define MPC52xx_PCI_IWCR_READ_MULTI	0x4
+#define MPC52xx_PCI_IWCR_MEM		0x0
+#define MPC52xx_PCI_IWCR_IO		0x8
+
+#define MPC52xx_PCI_TCR_P		0x01000000
+#define MPC52xx_PCI_TCR_LD		0x00010000
+#define MPC52xx_PCI_TCR_WCT8		0x00000008
+
+#define MPC52xx_PCI_TBATR_DISABLE	0x0
+#define MPC52xx_PCI_TBATR_ENABLE	0x1
+
+struct mpc52xx_pci {
+	u32	idr;		/* PCI + 0x00 */
+	u32	scr;		/* PCI + 0x04 */
+	u32	ccrir;		/* PCI + 0x08 */
+	u32	cr1;		/* PCI + 0x0C */
+	u32	bar0;		/* PCI + 0x10 */
+	u32	bar1;		/* PCI + 0x14 */
+	u8	reserved1[16];	/* PCI + 0x18 */
+	u32	ccpr;		/* PCI + 0x28 */
+	u32	sid;		/* PCI + 0x2C */
+	u32	erbar;		/* PCI + 0x30 */
+	u32	cpr;		/* PCI + 0x34 */
+	u8	reserved2[4];	/* PCI + 0x38 */
+	u32	cr2;		/* PCI + 0x3C */
+	u8	reserved3[32];	/* PCI + 0x40 */
+	u32	gscr;		/* PCI + 0x60 */
+	u32	tbatr0;		/* PCI + 0x64 */
+	u32	tbatr1;		/* PCI + 0x68 */
+	u32	tcr;		/* PCI + 0x6C */
+	u32	iw0btar;	/* PCI + 0x70 */
+	u32	iw1btar;	/* PCI + 0x74 */
+	u32	iw2btar;	/* PCI + 0x78 */
+	u8	reserved4[4];	/* PCI + 0x7C */
+	u32	iwcr;		/* PCI + 0x80 */
+	u32	icr;		/* PCI + 0x84 */
+	u32	isr;		/* PCI + 0x88 */
+	u32	arb;		/* PCI + 0x8C */
+	u8	reserved5[104];	/* PCI + 0x90 */
+	u32	car;		/* PCI + 0xF8 */
+	u8	reserved6[4];	/* PCI + 0xFC */
+};
+
+/* MPC5200 device tree match tables */
+const struct of_device_id mpc52xx_pci_ids[] __initconst = {
+	{ .type = "pci", .compatible = "fsl,mpc5200-pci", },
+	{ .type = "pci", .compatible = "mpc5200-pci", },
+	{}
+};
+
+/* ======================================================================== */
+/* PCI configuration access                                                 */
+/* ======================================================================== */
+
+static int
+mpc52xx_pci_read_config(struct pci_bus *bus, unsigned int devfn,
+				int offset, int len, u32 *val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	u32 value;
+
+	if (ppc_md.pci_exclude_device)
+		if (ppc_md.pci_exclude_device(hose, bus->number, devfn))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+	out_be32(hose->cfg_addr,
+		(1 << 31) |
+		(bus->number << 16) |
+		(devfn << 8) |
+		(offset & 0xfc));
+	mb();
+
+#if defined(CONFIG_PPC_MPC5200_BUGFIX)
+	if (bus->number) {
+		/* workaround for the bug 435 of the MPC5200 (L25R);
+		 * Don't do 32 bits config access during type-1 cycles */
+		switch (len) {
+		      case 1:
+			value = in_8(((u8 __iomem *)hose->cfg_data) +
+			             (offset & 3));
+			break;
+		      case 2:
+			value = in_le16(((u16 __iomem *)hose->cfg_data) +
+			                ((offset>>1) & 1));
+			break;
+
+		      default:
+			value = in_le16((u16 __iomem *)hose->cfg_data) |
+				(in_le16(((u16 __iomem *)hose->cfg_data) + 1) << 16);
+			break;
+		}
+	}
+	else
+#endif
+	{
+		value = in_le32(hose->cfg_data);
+
+		if (len != 4) {
+			value >>= ((offset & 0x3) << 3);
+			value &= 0xffffffff >> (32 - (len << 3));
+		}
+	}
+
+	*val = value;
+
+	out_be32(hose->cfg_addr, 0);
+	mb();
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int
+mpc52xx_pci_write_config(struct pci_bus *bus, unsigned int devfn,
+				int offset, int len, u32 val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	u32 value, mask;
+
+	if (ppc_md.pci_exclude_device)
+		if (ppc_md.pci_exclude_device(hose, bus->number, devfn))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+	out_be32(hose->cfg_addr,
+		(1 << 31) |
+		(bus->number << 16) |
+		(devfn << 8) |
+		(offset & 0xfc));
+	mb();
+
+#if defined(CONFIG_PPC_MPC5200_BUGFIX)
+	if (bus->number) {
+		/* workaround for the bug 435 of the MPC5200 (L25R);
+		 * Don't do 32 bits config access during type-1 cycles */
+		switch (len) {
+		      case 1:
+			out_8(((u8 __iomem *)hose->cfg_data) +
+				(offset & 3), val);
+			break;
+		      case 2:
+			out_le16(((u16 __iomem *)hose->cfg_data) +
+				((offset>>1) & 1), val);
+			break;
+
+		      default:
+			out_le16((u16 __iomem *)hose->cfg_data,
+				(u16)val);
+			out_le16(((u16 __iomem *)hose->cfg_data) + 1,
+				(u16)(val>>16));
+			break;
+		}
+	}
+	else
+#endif
+	{
+		if (len != 4) {
+			value = in_le32(hose->cfg_data);
+
+			offset = (offset & 0x3) << 3;
+			mask = (0xffffffff >> (32 - (len << 3)));
+			mask <<= offset;
+
+			value &= ~mask;
+			val = value | ((val << offset) & mask);
+		}
+
+		out_le32(hose->cfg_data, val);
+	}
+	mb();
+
+	out_be32(hose->cfg_addr, 0);
+	mb();
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops mpc52xx_pci_ops = {
+	.read  = mpc52xx_pci_read_config,
+	.write = mpc52xx_pci_write_config
+};
+
+
+/* ======================================================================== */
+/* PCI setup                                                                */
+/* ======================================================================== */
+
+static void __init
+mpc52xx_pci_setup(struct pci_controller *hose,
+                  struct mpc52xx_pci __iomem *pci_regs, phys_addr_t pci_phys)
+{
+	struct resource *res;
+	u32 tmp;
+	int iwcr0 = 0, iwcr1 = 0, iwcr2 = 0;
+
+	pr_debug("mpc52xx_pci_setup(hose=%p, pci_regs=%p)\n", hose, pci_regs);
+
+	/* pci_process_bridge_OF_ranges() found all our addresses for us;
+	 * now store them in the right places */
+	hose->cfg_addr = &pci_regs->car;
+	hose->cfg_data = hose->io_base_virt;
+
+	/* Control regs */
+	tmp = in_be32(&pci_regs->scr);
+	tmp |= PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY;
+	out_be32(&pci_regs->scr, tmp);
+
+	/* Memory windows */
+	res = &hose->mem_resources[0];
+	if (res->flags) {
+		pr_debug("mem_resource[0] = "
+		         "{.start=%llx, .end=%llx, .flags=%llx}\n",
+		         (unsigned long long)res->start,
+			 (unsigned long long)res->end,
+			 (unsigned long long)res->flags);
+		out_be32(&pci_regs->iw0btar,
+		         MPC52xx_PCI_IWBTAR_TRANSLATION(res->start, res->start,
+							resource_size(res)));
+		iwcr0 = MPC52xx_PCI_IWCR_ENABLE | MPC52xx_PCI_IWCR_MEM;
+		if (res->flags & IORESOURCE_PREFETCH)
+			iwcr0 |= MPC52xx_PCI_IWCR_READ_MULTI;
+		else
+			iwcr0 |= MPC52xx_PCI_IWCR_READ;
+	}
+
+	res = &hose->mem_resources[1];
+	if (res->flags) {
+		pr_debug("mem_resource[1] = {.start=%x, .end=%x, .flags=%lx}\n",
+		         res->start, res->end, res->flags);
+		out_be32(&pci_regs->iw1btar,
+		         MPC52xx_PCI_IWBTAR_TRANSLATION(res->start, res->start,
+							resource_size(res)));
+		iwcr1 = MPC52xx_PCI_IWCR_ENABLE | MPC52xx_PCI_IWCR_MEM;
+		if (res->flags & IORESOURCE_PREFETCH)
+			iwcr1 |= MPC52xx_PCI_IWCR_READ_MULTI;
+		else
+			iwcr1 |= MPC52xx_PCI_IWCR_READ;
+	}
+
+	/* IO resources */
+	res = &hose->io_resource;
+	if (!res) {
+		printk(KERN_ERR "%s: Didn't find IO resources\n", __FILE__);
+		return;
+	}
+	pr_debug(".io_resource={.start=%llx,.end=%llx,.flags=%llx} "
+	         ".io_base_phys=0x%p\n",
+	         (unsigned long long)res->start,
+		 (unsigned long long)res->end,
+		 (unsigned long long)res->flags, (void*)hose->io_base_phys);
+	out_be32(&pci_regs->iw2btar,
+	         MPC52xx_PCI_IWBTAR_TRANSLATION(hose->io_base_phys,
+	                                        res->start,
+						resource_size(res)));
+	iwcr2 = MPC52xx_PCI_IWCR_ENABLE | MPC52xx_PCI_IWCR_IO;
+
+	/* Set all the IWCR fields at once; they're in the same reg */
+	out_be32(&pci_regs->iwcr, MPC52xx_PCI_IWCR_PACK(iwcr0, iwcr1, iwcr2));
+
+	/* Map IMMR onto PCI bus */
+	pci_phys &= 0xfffc0000; /* bar0 has only 14 significant bits */
+	out_be32(&pci_regs->tbatr0, MPC52xx_PCI_TBATR_ENABLE | pci_phys);
+	out_be32(&pci_regs->bar0, PCI_BASE_ADDRESS_MEM_PREFETCH | pci_phys);
+
+	/* Map memory onto PCI bus */
+	out_be32(&pci_regs->tbatr1, MPC52xx_PCI_TBATR_ENABLE);
+	out_be32(&pci_regs->bar1, PCI_BASE_ADDRESS_MEM_PREFETCH);
+
+	out_be32(&pci_regs->tcr, MPC52xx_PCI_TCR_LD | MPC52xx_PCI_TCR_WCT8);
+
+	tmp = in_be32(&pci_regs->gscr);
+#if 0
+	/* Reset the exteral bus ( internal PCI controller is NOT resetted ) */
+	/* Not necessary and can be a bad thing if for example the bootloader
+	   is displaying a splash screen or ... Just left here for
+	   documentation purpose if anyone need it */
+	out_be32(&pci_regs->gscr, tmp | MPC52xx_PCI_GSCR_PR);
+	udelay(50);
+#endif
+
+	/* Make sure the PCI bridge is out of reset */
+	out_be32(&pci_regs->gscr, tmp & ~MPC52xx_PCI_GSCR_PR);
+}
+
+static void
+mpc52xx_pci_fixup_resources(struct pci_dev *dev)
+{
+	int i;
+
+	pr_debug("mpc52xx_pci_fixup_resources() %.4x:%.4x\n",
+	         dev->vendor, dev->device);
+
+	/* We don't rely on boot loader for PCI and resets all
+	   devices */
+	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+		struct resource *res = &dev->resource[i];
+		if (res->end > res->start) {	/* Only valid resources */
+			res->end -= res->start;
+			res->start = 0;
+			res->flags |= IORESOURCE_UNSET;
+		}
+	}
+
+	/* The PCI Host bridge of MPC52xx has a prefetch memory resource
+	   fixed to 1Gb. Doesn't fit in the resource system so we remove it */
+	if ( (dev->vendor == PCI_VENDOR_ID_MOTOROLA) &&
+	     (   dev->device == PCI_DEVICE_ID_MOTOROLA_MPC5200
+	      || dev->device == PCI_DEVICE_ID_MOTOROLA_MPC5200B) ) {
+		struct resource *res = &dev->resource[1];
+		res->start = res->end = res->flags = 0;
+	}
+}
+
+int __init
+mpc52xx_add_bridge(struct device_node *node)
+{
+	int len;
+	struct mpc52xx_pci __iomem *pci_regs;
+	struct pci_controller *hose;
+	const int *bus_range;
+	struct resource rsrc;
+
+	pr_debug("Adding MPC52xx PCI host bridge %s\n", node->full_name);
+
+	pci_add_flags(PCI_REASSIGN_ALL_BUS);
+
+	if (of_address_to_resource(node, 0, &rsrc) != 0) {
+		printk(KERN_ERR "Can't get %s resources\n", node->full_name);
+		return -EINVAL;
+	}
+
+	bus_range = of_get_property(node, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int)) {
+		printk(KERN_WARNING "Can't get %s bus-range, assume bus 0\n",
+		       node->full_name);
+		bus_range = NULL;
+	}
+
+	/* There are some PCI quirks on the 52xx, register the hook to
+	 * fix them. */
+	ppc_md.pcibios_fixup_resources = mpc52xx_pci_fixup_resources;
+
+	/* Alloc and initialize the pci controller.  Values in the device
+	 * tree are needed to configure the 52xx PCI controller.  Rather
+	 * than parse the tree here, let pci_process_bridge_OF_ranges()
+	 * do it for us and extract the values after the fact */
+	hose = pcibios_alloc_controller(node);
+	if (!hose)
+		return -ENOMEM;
+
+	hose->first_busno = bus_range ? bus_range[0] : 0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+	hose->ops = &mpc52xx_pci_ops;
+
+	pci_regs = ioremap(rsrc.start, resource_size(&rsrc));
+	if (!pci_regs)
+		return -ENOMEM;
+
+	pci_process_bridge_OF_ranges(hose, node, 1);
+
+	/* Finish setting up PCI using values obtained by
+	 * pci_proces_bridge_OF_ranges */
+	mpc52xx_pci_setup(hose, pci_regs, rsrc.start);
+
+	return 0;
+}
+
+void __init mpc52xx_setup_pci(void)
+{
+	struct device_node *pci;
+
+	pci = of_find_matching_node(NULL, mpc52xx_pci_ids);
+	if (!pci)
+		return;
+
+	mpc52xx_add_bridge(pci);
+	of_node_put(pci);
+}
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
new file mode 100644
index 000000000..2944bc84b
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
@@ -0,0 +1,518 @@
+/*
+ *
+ * Programmable Interrupt Controller functions for the Freescale MPC52xx.
+ *
+ * Copyright (C) 2008 Secret Lab Technologies Ltd.
+ * Copyright (C) 2006 bplan GmbH
+ * Copyright (C) 2004 Sylvain Munaut <tnt@246tNt.com>
+ * Copyright (C) 2003 Montavista Software, Inc
+ *
+ * Based on the code from the 2.4 kernel by
+ * Dale Farnsworth <dfarnsworth@mvista.com> and Kent Borg.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ *
+ */
+
+/*
+ * This is the device driver for the MPC5200 interrupt controller.
+ *
+ * hardware overview
+ * -----------------
+ * The MPC5200 interrupt controller groups the all interrupt sources into
+ * three groups called 'critical', 'main', and 'peripheral'.  The critical
+ * group has 3 irqs, External IRQ0, slice timer 0 irq, and wake from deep
+ * sleep.  Main group include the other 3 external IRQs, slice timer 1, RTC,
+ * gpios, and the general purpose timers.  Peripheral group contains the
+ * remaining irq sources from all of the on-chip peripherals (PSCs, Ethernet,
+ * USB, DMA, etc).
+ *
+ * virqs
+ * -----
+ * The Linux IRQ subsystem requires that each irq source be assigned a
+ * system wide unique IRQ number starting at 1 (0 means no irq).  Since
+ * systems can have multiple interrupt controllers, the virtual IRQ (virq)
+ * infrastructure lets each interrupt controller to define a local set
+ * of IRQ numbers and the virq infrastructure maps those numbers into
+ * a unique range of the global IRQ# space.
+ *
+ * To define a range of virq numbers for this controller, this driver first
+ * assigns a number to each of the irq groups (called the level 1 or L1
+ * value).  Within each group individual irq sources are also assigned a
+ * number, as defined by the MPC5200 user guide, and refers to it as the
+ * level 2 or L2 value.  The virq number is determined by shifting up the
+ * L1 value by MPC52xx_IRQ_L1_OFFSET and ORing it with the L2 value.
+ *
+ * For example, the TMR0 interrupt is irq 9 in the main group.  The
+ * virq for TMR0 is calculated by ((1 << MPC52xx_IRQ_L1_OFFSET) | 9).
+ *
+ * The observant reader will also notice that this driver defines a 4th
+ * interrupt group called 'bestcomm'.  The bestcomm group isn't physically
+ * part of the MPC5200 interrupt controller, but it is used here to assign
+ * a separate virq number for each bestcomm task (since any of the 16
+ * bestcomm tasks can cause the bestcomm interrupt to be raised).  When a
+ * bestcomm interrupt occurs (peripheral group, irq 0) this driver determines
+ * which task needs servicing and returns the irq number for that task.  This
+ * allows drivers which use bestcomm to define their own interrupt handlers.
+ *
+ * irq_chip structures
+ * -------------------
+ * For actually manipulating IRQs (masking, enabling, clearing, etc) this
+ * driver defines four separate 'irq_chip' structures, one for the main
+ * group, one for the peripherals group, one for the bestcomm group and one
+ * for external interrupts.  The irq_chip structures provide the hooks needed
+ * to manipulate each IRQ source, and since each group is has a separate set
+ * of registers for controlling the irq, it makes sense to divide up the
+ * hooks along those lines.
+ *
+ * You'll notice that there is not an irq_chip for the critical group and
+ * you'll also notice that there is an irq_chip defined for external
+ * interrupts even though there is no external interrupt group.  The reason
+ * for this is that the four external interrupts are all managed with the same
+ * register even though one of the external IRQs is in the critical group and
+ * the other three are in the main group.  For this reason it makes sense for
+ * the 4 external irqs to be managed using a separate set of hooks.  The
+ * reason there is no crit irq_chip is that of the 3 irqs in the critical
+ * group, only external interrupt is actually support at this time by this
+ * driver and since external interrupt is the only one used, it can just
+ * be directed to make use of the external irq irq_chip.
+ *
+ * device tree bindings
+ * --------------------
+ * The device tree bindings for this controller reflect the two level
+ * organization of irqs in the device.  #interrupt-cells = <3> where the
+ * first cell is the group number [0..3], the second cell is the irq
+ * number in the group, and the third cell is the sense type (level/edge).
+ * For reference, the following is a list of the interrupt property values
+ * associated with external interrupt sources on the MPC5200 (just because
+ * it is non-obvious to determine what the interrupts property should be
+ * when reading the mpc5200 manual and it is a frequently asked question).
+ *
+ * External interrupts:
+ * <0 0 n>	external irq0, n is sense	(n=0: level high,
+ * <1 1 n>	external irq1, n is sense	 n=1: edge rising,
+ * <1 2 n>	external irq2, n is sense	 n=2: edge falling,
+ * <1 3 n>	external irq3, n is sense	 n=3: level low)
+ */
+#undef DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/mpc52xx.h>
+
+/* HW IRQ mapping */
+#define MPC52xx_IRQ_L1_CRIT	(0)
+#define MPC52xx_IRQ_L1_MAIN	(1)
+#define MPC52xx_IRQ_L1_PERP	(2)
+#define MPC52xx_IRQ_L1_SDMA	(3)
+
+#define MPC52xx_IRQ_L1_OFFSET	(6)
+#define MPC52xx_IRQ_L1_MASK	(0x00c0)
+#define MPC52xx_IRQ_L2_MASK	(0x003f)
+
+#define MPC52xx_IRQ_HIGHTESTHWIRQ (0xd0)
+
+
+/* MPC5200 device tree match tables */
+static const struct of_device_id mpc52xx_pic_ids[] __initconst = {
+	{ .compatible = "fsl,mpc5200-pic", },
+	{ .compatible = "mpc5200-pic", },
+	{}
+};
+static const struct of_device_id mpc52xx_sdma_ids[] __initconst = {
+	{ .compatible = "fsl,mpc5200-bestcomm", },
+	{ .compatible = "mpc5200-bestcomm", },
+	{}
+};
+
+static struct mpc52xx_intr __iomem *intr;
+static struct mpc52xx_sdma __iomem *sdma;
+static struct irq_domain *mpc52xx_irqhost = NULL;
+
+static unsigned char mpc52xx_map_senses[4] = {
+	IRQ_TYPE_LEVEL_HIGH,
+	IRQ_TYPE_EDGE_RISING,
+	IRQ_TYPE_EDGE_FALLING,
+	IRQ_TYPE_LEVEL_LOW,
+};
+
+/* Utility functions */
+static inline void io_be_setbit(u32 __iomem *addr, int bitno)
+{
+	out_be32(addr, in_be32(addr) | (1 << bitno));
+}
+
+static inline void io_be_clrbit(u32 __iomem *addr, int bitno)
+{
+	out_be32(addr, in_be32(addr) & ~(1 << bitno));
+}
+
+/*
+ * IRQ[0-3] interrupt irq_chip
+ */
+static void mpc52xx_extirq_mask(struct irq_data *d)
+{
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	io_be_clrbit(&intr->ctrl, 11 - l2irq);
+}
+
+static void mpc52xx_extirq_unmask(struct irq_data *d)
+{
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	io_be_setbit(&intr->ctrl, 11 - l2irq);
+}
+
+static void mpc52xx_extirq_ack(struct irq_data *d)
+{
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	io_be_setbit(&intr->ctrl, 27-l2irq);
+}
+
+static int mpc52xx_extirq_set_type(struct irq_data *d, unsigned int flow_type)
+{
+	u32 ctrl_reg, type;
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	void *handler = handle_level_irq;
+
+	pr_debug("%s: irq=%x. l2=%d flow_type=%d\n", __func__,
+		(int) irqd_to_hwirq(d), l2irq, flow_type);
+
+	switch (flow_type) {
+	case IRQF_TRIGGER_HIGH: type = 0; break;
+	case IRQF_TRIGGER_RISING: type = 1; handler = handle_edge_irq; break;
+	case IRQF_TRIGGER_FALLING: type = 2; handler = handle_edge_irq; break;
+	case IRQF_TRIGGER_LOW: type = 3; break;
+	default:
+		type = 0;
+	}
+
+	ctrl_reg = in_be32(&intr->ctrl);
+	ctrl_reg &= ~(0x3 << (22 - (l2irq * 2)));
+	ctrl_reg |= (type << (22 - (l2irq * 2)));
+	out_be32(&intr->ctrl, ctrl_reg);
+
+	__irq_set_handler_locked(d->irq, handler);
+
+	return 0;
+}
+
+static struct irq_chip mpc52xx_extirq_irqchip = {
+	.name = "MPC52xx External",
+	.irq_mask = mpc52xx_extirq_mask,
+	.irq_unmask = mpc52xx_extirq_unmask,
+	.irq_ack = mpc52xx_extirq_ack,
+	.irq_set_type = mpc52xx_extirq_set_type,
+};
+
+/*
+ * Main interrupt irq_chip
+ */
+static int mpc52xx_null_set_type(struct irq_data *d, unsigned int flow_type)
+{
+	return 0; /* Do nothing so that the sense mask will get updated */
+}
+
+static void mpc52xx_main_mask(struct irq_data *d)
+{
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	io_be_setbit(&intr->main_mask, 16 - l2irq);
+}
+
+static void mpc52xx_main_unmask(struct irq_data *d)
+{
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	io_be_clrbit(&intr->main_mask, 16 - l2irq);
+}
+
+static struct irq_chip mpc52xx_main_irqchip = {
+	.name = "MPC52xx Main",
+	.irq_mask = mpc52xx_main_mask,
+	.irq_mask_ack = mpc52xx_main_mask,
+	.irq_unmask = mpc52xx_main_unmask,
+	.irq_set_type = mpc52xx_null_set_type,
+};
+
+/*
+ * Peripherals interrupt irq_chip
+ */
+static void mpc52xx_periph_mask(struct irq_data *d)
+{
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	io_be_setbit(&intr->per_mask, 31 - l2irq);
+}
+
+static void mpc52xx_periph_unmask(struct irq_data *d)
+{
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	io_be_clrbit(&intr->per_mask, 31 - l2irq);
+}
+
+static struct irq_chip mpc52xx_periph_irqchip = {
+	.name = "MPC52xx Peripherals",
+	.irq_mask = mpc52xx_periph_mask,
+	.irq_mask_ack = mpc52xx_periph_mask,
+	.irq_unmask = mpc52xx_periph_unmask,
+	.irq_set_type = mpc52xx_null_set_type,
+};
+
+/*
+ * SDMA interrupt irq_chip
+ */
+static void mpc52xx_sdma_mask(struct irq_data *d)
+{
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	io_be_setbit(&sdma->IntMask, l2irq);
+}
+
+static void mpc52xx_sdma_unmask(struct irq_data *d)
+{
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	io_be_clrbit(&sdma->IntMask, l2irq);
+}
+
+static void mpc52xx_sdma_ack(struct irq_data *d)
+{
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+	out_be32(&sdma->IntPend, 1 << l2irq);
+}
+
+static struct irq_chip mpc52xx_sdma_irqchip = {
+	.name = "MPC52xx SDMA",
+	.irq_mask = mpc52xx_sdma_mask,
+	.irq_unmask = mpc52xx_sdma_unmask,
+	.irq_ack = mpc52xx_sdma_ack,
+	.irq_set_type = mpc52xx_null_set_type,
+};
+
+/**
+ * mpc52xx_is_extirq - Returns true if hwirq number is for an external IRQ
+ */
+static int mpc52xx_is_extirq(int l1, int l2)
+{
+	return ((l1 == 0) && (l2 == 0)) ||
+	       ((l1 == 1) && (l2 >= 1) && (l2 <= 3));
+}
+
+/**
+ * mpc52xx_irqhost_xlate - translate virq# from device tree interrupts property
+ */
+static int mpc52xx_irqhost_xlate(struct irq_domain *h, struct device_node *ct,
+				 const u32 *intspec, unsigned int intsize,
+				 irq_hw_number_t *out_hwirq,
+				 unsigned int *out_flags)
+{
+	int intrvect_l1;
+	int intrvect_l2;
+	int intrvect_type;
+	int intrvect_linux;
+
+	if (intsize != 3)
+		return -1;
+
+	intrvect_l1 = (int)intspec[0];
+	intrvect_l2 = (int)intspec[1];
+	intrvect_type = (int)intspec[2] & 0x3;
+
+	intrvect_linux = (intrvect_l1 << MPC52xx_IRQ_L1_OFFSET) &
+			 MPC52xx_IRQ_L1_MASK;
+	intrvect_linux |= intrvect_l2 & MPC52xx_IRQ_L2_MASK;
+
+	*out_hwirq = intrvect_linux;
+	*out_flags = IRQ_TYPE_LEVEL_LOW;
+	if (mpc52xx_is_extirq(intrvect_l1, intrvect_l2))
+		*out_flags = mpc52xx_map_senses[intrvect_type];
+
+	pr_debug("return %x, l1=%d, l2=%d\n", intrvect_linux, intrvect_l1,
+		 intrvect_l2);
+	return 0;
+}
+
+/**
+ * mpc52xx_irqhost_map - Hook to map from virq to an irq_chip structure
+ */
+static int mpc52xx_irqhost_map(struct irq_domain *h, unsigned int virq,
+			       irq_hw_number_t irq)
+{
+	int l1irq;
+	int l2irq;
+	struct irq_chip *uninitialized_var(irqchip);
+	void *hndlr;
+	int type;
+	u32 reg;
+
+	l1irq = (irq & MPC52xx_IRQ_L1_MASK) >> MPC52xx_IRQ_L1_OFFSET;
+	l2irq = irq & MPC52xx_IRQ_L2_MASK;
+
+	/*
+	 * External IRQs are handled differently by the hardware so they are
+	 * handled by a dedicated irq_chip structure.
+	 */
+	if (mpc52xx_is_extirq(l1irq, l2irq)) {
+		reg = in_be32(&intr->ctrl);
+		type = mpc52xx_map_senses[(reg >> (22 - l2irq * 2)) & 0x3];
+		if ((type == IRQ_TYPE_EDGE_FALLING) ||
+		    (type == IRQ_TYPE_EDGE_RISING))
+			hndlr = handle_edge_irq;
+		else
+			hndlr = handle_level_irq;
+
+		irq_set_chip_and_handler(virq, &mpc52xx_extirq_irqchip, hndlr);
+		pr_debug("%s: External IRQ%i virq=%x, hw=%x. type=%x\n",
+			 __func__, l2irq, virq, (int)irq, type);
+		return 0;
+	}
+
+	/* It is an internal SOC irq.  Choose the correct irq_chip */
+	switch (l1irq) {
+	case MPC52xx_IRQ_L1_MAIN: irqchip = &mpc52xx_main_irqchip; break;
+	case MPC52xx_IRQ_L1_PERP: irqchip = &mpc52xx_periph_irqchip; break;
+	case MPC52xx_IRQ_L1_SDMA: irqchip = &mpc52xx_sdma_irqchip; break;
+	case MPC52xx_IRQ_L1_CRIT:
+		pr_warn("%s: Critical IRQ #%d is unsupported! Nopping it.\n",
+			__func__, l2irq);
+		irq_set_chip(virq, &no_irq_chip);
+		return 0;
+	}
+
+	irq_set_chip_and_handler(virq, irqchip, handle_level_irq);
+	pr_debug("%s: virq=%x, l1=%i, l2=%i\n", __func__, virq, l1irq, l2irq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops mpc52xx_irqhost_ops = {
+	.xlate = mpc52xx_irqhost_xlate,
+	.map = mpc52xx_irqhost_map,
+};
+
+/**
+ * mpc52xx_init_irq - Initialize and register with the virq subsystem
+ *
+ * Hook for setting up IRQs on an mpc5200 system.  A pointer to this function
+ * is to be put into the machine definition structure.
+ *
+ * This function searches the device tree for an MPC5200 interrupt controller,
+ * initializes it, and registers it with the virq subsystem.
+ */
+void __init mpc52xx_init_irq(void)
+{
+	u32 intr_ctrl;
+	struct device_node *picnode;
+	struct device_node *np;
+
+	/* Remap the necessary zones */
+	picnode = of_find_matching_node(NULL, mpc52xx_pic_ids);
+	intr = of_iomap(picnode, 0);
+	if (!intr)
+		panic(__FILE__	": find_and_map failed on 'mpc5200-pic'. "
+				"Check node !");
+
+	np = of_find_matching_node(NULL, mpc52xx_sdma_ids);
+	sdma = of_iomap(np, 0);
+	of_node_put(np);
+	if (!sdma)
+		panic(__FILE__	": find_and_map failed on 'mpc5200-bestcomm'. "
+				"Check node !");
+
+	pr_debug("MPC5200 IRQ controller mapped to 0x%p\n", intr);
+
+	/* Disable all interrupt sources. */
+	out_be32(&sdma->IntPend, 0xffffffff);	/* 1 means clear pending */
+	out_be32(&sdma->IntMask, 0xffffffff);	/* 1 means disabled */
+	out_be32(&intr->per_mask, 0x7ffffc00);	/* 1 means disabled */
+	out_be32(&intr->main_mask, 0x00010fff);	/* 1 means disabled */
+	intr_ctrl = in_be32(&intr->ctrl);
+	intr_ctrl &= 0x00ff0000;	/* Keeps IRQ[0-3] config */
+	intr_ctrl |=	0x0f000000 |	/* clear IRQ 0-3 */
+			0x00001000 |	/* MEE master external enable */
+			0x00000000 |	/* 0 means disable IRQ 0-3 */
+			0x00000001;	/* CEb route critical normally */
+	out_be32(&intr->ctrl, intr_ctrl);
+
+	/* Zero a bunch of the priority settings. */
+	out_be32(&intr->per_pri1, 0);
+	out_be32(&intr->per_pri2, 0);
+	out_be32(&intr->per_pri3, 0);
+	out_be32(&intr->main_pri1, 0);
+	out_be32(&intr->main_pri2, 0);
+
+	/*
+	 * As last step, add an irq host to translate the real
+	 * hw irq information provided by the ofw to linux virq
+	 */
+	mpc52xx_irqhost = irq_domain_add_linear(picnode,
+	                                 MPC52xx_IRQ_HIGHTESTHWIRQ,
+	                                 &mpc52xx_irqhost_ops, NULL);
+
+	if (!mpc52xx_irqhost)
+		panic(__FILE__ ": Cannot allocate the IRQ host\n");
+
+	irq_set_default_host(mpc52xx_irqhost);
+
+	pr_info("MPC52xx PIC is up and running!\n");
+}
+
+/**
+ * mpc52xx_get_irq - Get pending interrupt number hook function
+ *
+ * Called by the interrupt handler to determine what IRQ handler needs to be
+ * executed.
+ *
+ * Status of pending interrupts is determined by reading the encoded status
+ * register.  The encoded status register has three fields; one for each of the
+ * types of interrupts defined by the controller - 'critical', 'main' and
+ * 'peripheral'.  This function reads the status register and returns the IRQ
+ * number associated with the highest priority pending interrupt.  'Critical'
+ * interrupts have the highest priority, followed by 'main' interrupts, and
+ * then 'peripheral'.
+ *
+ * The mpc5200 interrupt controller can be configured to boost the priority
+ * of individual 'peripheral' interrupts.  If this is the case then a special
+ * value will appear in either the crit or main fields indicating a high
+ * or medium priority peripheral irq has occurred.
+ *
+ * This function checks each of the 3 irq request fields and returns the
+ * first pending interrupt that it finds.
+ *
+ * This function also identifies a 4th type of interrupt; 'bestcomm'.  Each
+ * bestcomm DMA task can raise the bestcomm peripheral interrupt.  When this
+ * occurs at task-specific IRQ# is decoded so that each task can have its
+ * own IRQ handler.
+ */
+unsigned int mpc52xx_get_irq(void)
+{
+	u32 status;
+	int irq;
+
+	status = in_be32(&intr->enc_status);
+	if (status & 0x00000400) {	/* critical */
+		irq = (status >> 8) & 0x3;
+		if (irq == 2)	/* high priority peripheral */
+			goto peripheral;
+		irq |= (MPC52xx_IRQ_L1_CRIT << MPC52xx_IRQ_L1_OFFSET);
+	} else if (status & 0x00200000) {	/* main */
+		irq = (status >> 16) & 0x1f;
+		if (irq == 4)	/* low priority peripheral */
+			goto peripheral;
+		irq |= (MPC52xx_IRQ_L1_MAIN << MPC52xx_IRQ_L1_OFFSET);
+	} else if (status & 0x20000000) {	/* peripheral */
+	      peripheral:
+		irq = (status >> 24) & 0x1f;
+		if (irq == 0) {	/* bestcomm */
+			status = in_be32(&sdma->IntPend);
+			irq = ffs(status) - 1;
+			irq |= (MPC52xx_IRQ_L1_SDMA << MPC52xx_IRQ_L1_OFFSET);
+		} else {
+			irq |= (MPC52xx_IRQ_L1_PERP << MPC52xx_IRQ_L1_OFFSET);
+		}
+	} else {
+		return NO_IRQ;
+	}
+
+	return irq_linear_revmap(mpc52xx_irqhost, irq);
+}
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pm.c b/arch/powerpc/platforms/52xx/mpc52xx_pm.c
new file mode 100644
index 000000000..8310e8b5b
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pm.c
@@ -0,0 +1,200 @@
+#include <linux/init.h>
+#include <linux/suspend.h>
+#include <linux/io.h>
+#include <asm/time.h>
+#include <asm/cacheflush.h>
+#include <asm/mpc52xx.h>
+
+/* these are defined in mpc52xx_sleep.S, and only used here */
+extern void mpc52xx_deep_sleep(void __iomem *sram, void __iomem *sdram_regs,
+		struct mpc52xx_cdm __iomem *, struct mpc52xx_intr __iomem*);
+extern void mpc52xx_ds_sram(void);
+extern const long mpc52xx_ds_sram_size;
+extern void mpc52xx_ds_cached(void);
+extern const long mpc52xx_ds_cached_size;
+
+static void __iomem *mbar;
+static void __iomem *sdram;
+static struct mpc52xx_cdm __iomem *cdm;
+static struct mpc52xx_intr __iomem *intr;
+static struct mpc52xx_gpio_wkup __iomem *gpiow;
+static void __iomem *sram;
+static int sram_size;
+
+struct mpc52xx_suspend mpc52xx_suspend;
+
+static int mpc52xx_pm_valid(suspend_state_t state)
+{
+	switch (state) {
+	case PM_SUSPEND_STANDBY:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+int mpc52xx_set_wakeup_gpio(u8 pin, u8 level)
+{
+	u16 tmp;
+
+	/* enable gpio */
+	out_8(&gpiow->wkup_gpioe, in_8(&gpiow->wkup_gpioe) | (1 << pin));
+	/* set as input */
+	out_8(&gpiow->wkup_ddr, in_8(&gpiow->wkup_ddr) & ~(1 << pin));
+	/* enable deep sleep interrupt */
+	out_8(&gpiow->wkup_inten, in_8(&gpiow->wkup_inten) | (1 << pin));
+	/* low/high level creates wakeup interrupt */
+	tmp = in_be16(&gpiow->wkup_itype);
+	tmp &= ~(0x3 << (pin * 2));
+	tmp |= (!level + 1) << (pin * 2);
+	out_be16(&gpiow->wkup_itype, tmp);
+	/* master enable */
+	out_8(&gpiow->wkup_maste, 1);
+
+	return 0;
+}
+
+int mpc52xx_pm_prepare(void)
+{
+	struct device_node *np;
+	const struct of_device_id immr_ids[] = {
+		{ .compatible = "fsl,mpc5200-immr", },
+		{ .compatible = "fsl,mpc5200b-immr", },
+		{ .type = "soc", .compatible = "mpc5200", }, /* lite5200 */
+		{ .type = "builtin", .compatible = "mpc5200", }, /* efika */
+		{}
+	};
+	struct resource res;
+
+	/* map the whole register space */
+	np = of_find_matching_node(NULL, immr_ids);
+
+	if (of_address_to_resource(np, 0, &res)) {
+		pr_err("mpc52xx_pm_prepare(): could not get IMMR address\n");
+		of_node_put(np);
+		return -ENOSYS;
+	}
+
+	mbar = ioremap(res.start, 0xc000); /* we should map whole region including SRAM */
+
+	of_node_put(np);
+	if (!mbar) {
+		pr_err("mpc52xx_pm_prepare(): could not map registers\n");
+		return -ENOSYS;
+	}
+	/* these offsets are from mpc5200 users manual */
+	sdram	= mbar + 0x100;
+	cdm	= mbar + 0x200;
+	intr	= mbar + 0x500;
+	gpiow	= mbar + 0xc00;
+	sram	= mbar + 0x8000;	/* Those will be handled by the */
+	sram_size = 0x4000;		/* bestcomm driver soon */
+
+	/* call board suspend code, if applicable */
+	if (mpc52xx_suspend.board_suspend_prepare)
+		mpc52xx_suspend.board_suspend_prepare(mbar);
+	else {
+		printk(KERN_ALERT "%s: %i don't know how to wake up the board\n",
+				__func__, __LINE__);
+		goto out_unmap;
+	}
+
+	return 0;
+
+ out_unmap:
+	iounmap(mbar);
+	return -ENOSYS;
+}
+
+
+char saved_sram[0x4000];
+
+int mpc52xx_pm_enter(suspend_state_t state)
+{
+	u32 clk_enables;
+	u32 msr, hid0;
+	u32 intr_main_mask;
+	void __iomem * irq_0x500 = (void __iomem *)CONFIG_KERNEL_START + 0x500;
+	unsigned long irq_0x500_stop = (unsigned long)irq_0x500 + mpc52xx_ds_cached_size;
+	char saved_0x500[mpc52xx_ds_cached_size];
+
+	/* disable all interrupts in PIC */
+	intr_main_mask = in_be32(&intr->main_mask);
+	out_be32(&intr->main_mask, intr_main_mask | 0x1ffff);
+
+	/* don't let DEC expire any time soon */
+	mtspr(SPRN_DEC, 0x7fffffff);
+
+	/* save SRAM */
+	memcpy(saved_sram, sram, sram_size);
+
+	/* copy low level suspend code to sram */
+	memcpy(sram, mpc52xx_ds_sram, mpc52xx_ds_sram_size);
+
+	out_8(&cdm->ccs_sleep_enable, 1);
+	out_8(&cdm->osc_sleep_enable, 1);
+	out_8(&cdm->ccs_qreq_test, 1);
+
+	/* disable all but SDRAM and bestcomm (SRAM) clocks */
+	clk_enables = in_be32(&cdm->clk_enables);
+	out_be32(&cdm->clk_enables, clk_enables & 0x00088000);
+
+	/* disable power management */
+	msr = mfmsr();
+	mtmsr(msr & ~MSR_POW);
+
+	/* enable sleep mode, disable others */
+	hid0 = mfspr(SPRN_HID0);
+	mtspr(SPRN_HID0, (hid0 & ~(HID0_DOZE | HID0_NAP | HID0_DPM)) | HID0_SLEEP);
+
+	/* save original, copy our irq handler, flush from dcache and invalidate icache */
+	memcpy(saved_0x500, irq_0x500, mpc52xx_ds_cached_size);
+	memcpy(irq_0x500, mpc52xx_ds_cached, mpc52xx_ds_cached_size);
+	flush_icache_range((unsigned long)irq_0x500, irq_0x500_stop);
+
+	/* call low-level sleep code */
+	mpc52xx_deep_sleep(sram, sdram, cdm, intr);
+
+	/* restore original irq handler */
+	memcpy(irq_0x500, saved_0x500, mpc52xx_ds_cached_size);
+	flush_icache_range((unsigned long)irq_0x500, irq_0x500_stop);
+
+	/* restore old power mode */
+	mtmsr(msr & ~MSR_POW);
+	mtspr(SPRN_HID0, hid0);
+	mtmsr(msr);
+
+	out_be32(&cdm->clk_enables, clk_enables);
+	out_8(&cdm->ccs_sleep_enable, 0);
+	out_8(&cdm->osc_sleep_enable, 0);
+
+	/* restore SRAM */
+	memcpy(sram, saved_sram, sram_size);
+
+	/* reenable interrupts in PIC */
+	out_be32(&intr->main_mask, intr_main_mask);
+
+	return 0;
+}
+
+void mpc52xx_pm_finish(void)
+{
+	/* call board resume code */
+	if (mpc52xx_suspend.board_resume_finish)
+		mpc52xx_suspend.board_resume_finish(mbar);
+
+	iounmap(mbar);
+}
+
+static const struct platform_suspend_ops mpc52xx_pm_ops = {
+	.valid		= mpc52xx_pm_valid,
+	.prepare	= mpc52xx_pm_prepare,
+	.enter		= mpc52xx_pm_enter,
+	.finish		= mpc52xx_pm_finish,
+};
+
+int __init mpc52xx_pm_init(void)
+{
+	suspend_set_ops(&mpc52xx_pm_ops);
+	return 0;
+}
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_sleep.S b/arch/powerpc/platforms/52xx/mpc52xx_sleep.S
new file mode 100644
index 000000000..4dc170b0a
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc52xx_sleep.S
@@ -0,0 +1,154 @@
+#include <asm/reg.h>
+#include <asm/ppc_asm.h>
+#include <asm/processor.h>
+
+
+.text
+
+_GLOBAL(mpc52xx_deep_sleep)
+mpc52xx_deep_sleep: /* args r3-r6: SRAM, SDRAM regs, CDM regs, INTR regs */
+
+	/* enable interrupts */
+	mfmsr	r7
+	ori	r7, r7, 0x8000 /* EE */
+	mtmsr	r7
+	sync; isync;
+
+	li	r10, 0 /* flag that irq handler sets */
+
+	/* enable tmr7 (or any other) interrupt */
+	lwz	r8, 0x14(r6) /* intr->main_mask */
+	ori	r8, r8, 0x1
+	xori	r8, r8, 0x1
+	stw	r8, 0x14(r6)
+	sync
+
+	/* emulate tmr7 interrupt */
+	li	r8, 0x1
+	stw	r8, 0x40(r6) /* intr->main_emulate */
+	sync
+
+	/* wait for it to happen */
+1:
+	cmpi	cr0, r10, 1
+	bne	cr0, 1b
+
+	/* lock icache */
+	mfspr	r10, SPRN_HID0
+	ori	r10, r10, 0x2000
+	sync; isync;
+	mtspr	SPRN_HID0, r10
+	sync; isync;
+
+
+	mflr	r9 /* save LR */
+
+	/* jump to sram */
+	mtlr	r3
+	blrl
+
+	mtlr	r9 /* restore LR */
+
+	/* unlock icache */
+	mfspr	r10, SPRN_HID0
+	ori	r10, r10, 0x2000
+	xori	r10, r10, 0x2000
+	sync; isync;
+	mtspr	SPRN_HID0, r10
+	sync; isync;
+
+
+	/* return to C code */
+	blr
+
+
+_GLOBAL(mpc52xx_ds_sram)
+mpc52xx_ds_sram:
+	/* put SDRAM into self-refresh */
+	lwz	r8, 0x4(r4)	/* sdram->ctrl */
+
+	oris	r8, r8, 0x8000 /* mode_en */
+	stw	r8, 0x4(r4)
+	sync
+
+	ori	r8, r8, 0x0002 /* soft_pre */
+	stw	r8, 0x4(r4)
+	sync
+	xori	r8, r8, 0x0002
+
+	xoris	r8, r8, 0x8000 /* !mode_en */
+	stw	r8, 0x4(r4)
+	sync
+
+	oris	r8, r8, 0x5000
+	xoris	r8, r8, 0x4000 /* ref_en !cke */
+	stw	r8, 0x4(r4)
+	sync
+
+	/* disable SDRAM clock */
+	lwz	r8, 0x14(r5) /* cdm->clkenable */
+	ori	r8, r8, 0x0008
+	xori	r8, r8, 0x0008
+	stw	r8, 0x14(r5)
+	sync
+
+
+	/* put mpc5200 to sleep */
+	mfmsr	r10
+	oris	r10, r10, 0x0004	/* POW = 1 */
+	sync; isync;
+	mtmsr	r10
+	sync; isync;
+
+
+	/* enable clock */
+	lwz	r8, 0x14(r5)
+	ori	r8, r8, 0x0008
+	stw	r8, 0x14(r5)
+	sync
+
+	/* get ram out of self-refresh */
+	lwz	r8, 0x4(r4)
+	oris	r8, r8, 0x5000 /* cke ref_en */
+	stw	r8, 0x4(r4)
+	sync
+
+	blr
+_GLOBAL(mpc52xx_ds_sram_size)
+mpc52xx_ds_sram_size:
+	.long $-mpc52xx_ds_sram
+
+
+/* ### interrupt handler for wakeup from deep-sleep ### */
+_GLOBAL(mpc52xx_ds_cached)
+mpc52xx_ds_cached:
+	mtspr	SPRN_SPRG0, r7
+	mtspr	SPRN_SPRG1, r8
+
+	/* disable emulated interrupt */
+	mfspr	r7, 311 /* MBAR */
+	addi	r7, r7, 0x540	/* intr->main_emul */
+	li	r8, 0
+	stw	r8, 0(r7)
+	sync
+	dcbf	0, r7
+
+	/* acknowledge wakeup, so CCS releases power pown */
+	mfspr	r7, 311	/* MBAR */
+	addi	r7, r7, 0x524	/* intr->enc_status */
+	lwz	r8, 0(r7)
+	ori	r8, r8, 0x0400
+	stw	r8, 0(r7)
+	sync
+	dcbf	0, r7
+
+	/* flag - we handled the interrupt */
+	li	r10, 1
+
+	mfspr	r8, SPRN_SPRG1
+	mfspr	r7, SPRN_SPRG0
+
+	rfi
+_GLOBAL(mpc52xx_ds_cached_size)
+mpc52xx_ds_cached_size:
+	.long $-mpc52xx_ds_cached
diff --git a/arch/powerpc/platforms/82xx/Kconfig b/arch/powerpc/platforms/82xx/Kconfig
new file mode 100644
index 000000000..7c7df4003
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/Kconfig
@@ -0,0 +1,72 @@
+menuconfig PPC_82xx
+	bool "82xx-based boards (PQ II)"
+	depends on 6xx
+
+if PPC_82xx
+
+config MPC8272_ADS
+	bool "Freescale MPC8272 ADS"
+	select DEFAULT_UIMAGE
+	select PQ2ADS
+	select 8272
+	select 8260
+	select FSL_SOC
+	select PQ2_ADS_PCI_PIC if PCI
+	help
+	  This option enables support for the MPC8272 ADS board
+
+config PQ2FADS
+	bool "Freescale PQ2FADS"
+	select DEFAULT_UIMAGE
+	select PQ2ADS
+	select 8260
+	select FSL_SOC
+	select PQ2_ADS_PCI_PIC if PCI
+	help
+	  This option enables support for the PQ2FADS board
+
+config EP8248E
+	bool "Embedded Planet EP8248E (a.k.a. CWH-PPC-8248N-VE)"
+	select 8272
+	select 8260
+	select FSL_SOC
+	select PHYLIB
+	select MDIO_BITBANG
+	help
+	  This enables support for the Embedded Planet EP8248E board.
+
+	  This board is also resold by Freescale as the QUICCStart
+	  MPC8248 Evaluation System and/or the CWH-PPC-8248N-VE.
+
+config MGCOGE
+	bool "Keymile MGCOGE"
+	select 8272
+	select 8260
+	select FSL_SOC
+	help
+	  This enables support for the Keymile MGCOGE board.
+
+endif
+
+config PQ2ADS
+	bool
+	default n
+
+config 8260
+	bool
+	depends on 6xx
+	select CPM2
+	help
+	  The MPC8260 is a typical embedded CPU made by Freescale.  Selecting
+	  this option means that you wish to build a kernel for a machine with
+	  an 8260 class CPU.
+
+config 8272
+	bool
+	select 8260
+	help
+	  The MPC8272 CPM has a different internal dpram setup than other CPM2
+	  devices
+
+config PQ2_ADS_PCI_PIC
+	bool
diff --git a/arch/powerpc/platforms/82xx/Makefile b/arch/powerpc/platforms/82xx/Makefile
new file mode 100644
index 000000000..455fe21e3
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile for the PowerPC 82xx linux kernel.
+#
+obj-$(CONFIG_MPC8272_ADS) += mpc8272_ads.o
+obj-$(CONFIG_CPM2) += pq2.o
+obj-$(CONFIG_PQ2_ADS_PCI_PIC) += pq2ads-pci-pic.o
+obj-$(CONFIG_PQ2FADS) += pq2fads.o
+obj-$(CONFIG_EP8248E) += ep8248e.o
+obj-$(CONFIG_MGCOGE) += km82xx.o
diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c
new file mode 100644
index 000000000..a0cb8bd41
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/ep8248e.c
@@ -0,0 +1,334 @@
+/*
+ * Embedded Planet EP8248E support
+ *
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/fsl_devices.h>
+#include <linux/mdio-bitbang.h>
+#include <linux/of_mdio.h>
+#include <linux/slab.h>
+#include <linux/of_platform.h>
+
+#include <asm/io.h>
+#include <asm/cpm2.h>
+#include <asm/udbg.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/mpc8260.h>
+#include <asm/prom.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/cpm2_pic.h>
+
+#include "pq2.h"
+
+static u8 __iomem *ep8248e_bcsr;
+static struct device_node *ep8248e_bcsr_node;
+
+#define BCSR7_SCC2_ENABLE 0x10
+
+#define BCSR8_PHY1_ENABLE 0x80
+#define BCSR8_PHY1_POWER  0x40
+#define BCSR8_PHY2_ENABLE 0x20
+#define BCSR8_PHY2_POWER  0x10
+#define BCSR8_MDIO_READ   0x04
+#define BCSR8_MDIO_CLOCK  0x02
+#define BCSR8_MDIO_DATA   0x01
+
+#define BCSR9_USB_ENABLE  0x80
+#define BCSR9_USB_POWER   0x40
+#define BCSR9_USB_HOST    0x20
+#define BCSR9_USB_FULL_SPEED_TARGET 0x10
+
+static void __init ep8248e_pic_init(void)
+{
+	struct device_node *np = of_find_compatible_node(NULL, NULL, "fsl,pq2-pic");
+	if (!np) {
+		printk(KERN_ERR "PIC init: can not find cpm-pic node\n");
+		return;
+	}
+
+	cpm2_pic_init(np);
+	of_node_put(np);
+}
+
+static void ep8248e_set_mdc(struct mdiobb_ctrl *ctrl, int level)
+{
+	if (level)
+		setbits8(&ep8248e_bcsr[8], BCSR8_MDIO_CLOCK);
+	else
+		clrbits8(&ep8248e_bcsr[8], BCSR8_MDIO_CLOCK);
+
+	/* Read back to flush the write. */
+	in_8(&ep8248e_bcsr[8]);
+}
+
+static void ep8248e_set_mdio_dir(struct mdiobb_ctrl *ctrl, int output)
+{
+	if (output)
+		clrbits8(&ep8248e_bcsr[8], BCSR8_MDIO_READ);
+	else
+		setbits8(&ep8248e_bcsr[8], BCSR8_MDIO_READ);
+
+	/* Read back to flush the write. */
+	in_8(&ep8248e_bcsr[8]);
+}
+
+static void ep8248e_set_mdio_data(struct mdiobb_ctrl *ctrl, int data)
+{
+	if (data)
+		setbits8(&ep8248e_bcsr[8], BCSR8_MDIO_DATA);
+	else
+		clrbits8(&ep8248e_bcsr[8], BCSR8_MDIO_DATA);
+
+	/* Read back to flush the write. */
+	in_8(&ep8248e_bcsr[8]);
+}
+
+static int ep8248e_get_mdio_data(struct mdiobb_ctrl *ctrl)
+{
+	return in_8(&ep8248e_bcsr[8]) & BCSR8_MDIO_DATA;
+}
+
+static const struct mdiobb_ops ep8248e_mdio_ops = {
+	.set_mdc = ep8248e_set_mdc,
+	.set_mdio_dir = ep8248e_set_mdio_dir,
+	.set_mdio_data = ep8248e_set_mdio_data,
+	.get_mdio_data = ep8248e_get_mdio_data,
+	.owner = THIS_MODULE,
+};
+
+static struct mdiobb_ctrl ep8248e_mdio_ctrl = {
+	.ops = &ep8248e_mdio_ops,
+};
+
+static int ep8248e_mdio_probe(struct platform_device *ofdev)
+{
+	struct mii_bus *bus;
+	struct resource res;
+	struct device_node *node;
+	int ret;
+
+	node = of_get_parent(ofdev->dev.of_node);
+	of_node_put(node);
+	if (node != ep8248e_bcsr_node)
+		return -ENODEV;
+
+	ret = of_address_to_resource(ofdev->dev.of_node, 0, &res);
+	if (ret)
+		return ret;
+
+	bus = alloc_mdio_bitbang(&ep8248e_mdio_ctrl);
+	if (!bus)
+		return -ENOMEM;
+
+	bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL);
+	if (bus->irq == NULL) {
+		ret = -ENOMEM;
+		goto err_free_bus;
+	}
+
+	bus->name = "ep8248e-mdio-bitbang";
+	bus->parent = &ofdev->dev;
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%x", res.start);
+
+	ret = of_mdiobus_register(bus, ofdev->dev.of_node);
+	if (ret)
+		goto err_free_irq;
+
+	return 0;
+err_free_irq:
+	kfree(bus->irq);
+err_free_bus:
+	free_mdio_bitbang(bus);
+	return ret;
+}
+
+static int ep8248e_mdio_remove(struct platform_device *ofdev)
+{
+	BUG();
+	return 0;
+}
+
+static const struct of_device_id ep8248e_mdio_match[] = {
+	{
+		.compatible = "fsl,ep8248e-mdio-bitbang",
+	},
+	{},
+};
+
+static struct platform_driver ep8248e_mdio_driver = {
+	.driver = {
+		.name = "ep8248e-mdio-bitbang",
+		.of_match_table = ep8248e_mdio_match,
+	},
+	.probe = ep8248e_mdio_probe,
+	.remove = ep8248e_mdio_remove,
+};
+
+struct cpm_pin {
+	int port, pin, flags;
+};
+
+static __initdata struct cpm_pin ep8248e_pins[] = {
+	/* SMC1 */
+	{2, 4, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 5, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+
+	/* SCC1 */
+	{2, 14, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{3, 29, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{3, 30, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{3, 31, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* FCC1 */
+	{0, 14, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 18, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 19, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 21, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 26, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{0, 27, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{0, 28, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{0, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{0, 30, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{0, 31, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{2, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 22, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* FCC2 */
+	{1, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 20, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 22, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 25, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 26, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 27, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{1, 30, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 31, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{2, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* I2C */
+	{4, 14, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{4, 15, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+
+	/* USB */
+	{2, 10, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 11, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{2, 24, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{3, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{3, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{3, 25, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+};
+
+static void __init init_ioports(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ep8248e_pins); i++) {
+		const struct cpm_pin *pin = &ep8248e_pins[i];
+		cpm2_set_pin(pin->port, pin->pin, pin->flags);
+	}
+
+	cpm2_smc_clk_setup(CPM_CLK_SMC1, CPM_BRG7);
+	cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_SCC3, CPM_CLK8, CPM_CLK_TX); /* USB */
+	cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK11, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK10, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK13, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK14, CPM_CLK_TX);
+}
+
+static void __init ep8248e_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("ep8248e_setup_arch()", 0);
+
+	cpm2_reset();
+
+	/* When this is set, snooping CPM DMA from RAM causes
+	 * machine checks.  See erratum SIU18.
+	 */
+	clrbits32(&cpm2_immr->im_siu_conf.siu_82xx.sc_bcr, MPC82XX_BCR_PLDP);
+
+	ep8248e_bcsr_node =
+		of_find_compatible_node(NULL, NULL, "fsl,ep8248e-bcsr");
+	if (!ep8248e_bcsr_node) {
+		printk(KERN_ERR "No bcsr in device tree\n");
+		return;
+	}
+
+	ep8248e_bcsr = of_iomap(ep8248e_bcsr_node, 0);
+	if (!ep8248e_bcsr) {
+		printk(KERN_ERR "Cannot map BCSR registers\n");
+		of_node_put(ep8248e_bcsr_node);
+		ep8248e_bcsr_node = NULL;
+		return;
+	}
+
+	setbits8(&ep8248e_bcsr[7], BCSR7_SCC2_ENABLE);
+	setbits8(&ep8248e_bcsr[8], BCSR8_PHY1_ENABLE | BCSR8_PHY1_POWER |
+	                           BCSR8_PHY2_ENABLE | BCSR8_PHY2_POWER);
+
+	init_ioports();
+
+	if (ppc_md.progress)
+		ppc_md.progress("ep8248e_setup_arch(), finish", 0);
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .compatible = "simple-bus", },
+	{ .compatible = "fsl,ep8248e-bcsr", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+	platform_driver_register(&ep8248e_mdio_driver);
+
+	return 0;
+}
+machine_device_initcall(ep8248e, declare_of_platform_devices);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init ep8248e_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+	return of_flat_dt_is_compatible(root, "fsl,ep8248e");
+}
+
+define_machine(ep8248e)
+{
+	.name = "Embedded Planet EP8248E",
+	.probe = ep8248e_probe,
+	.setup_arch = ep8248e_setup_arch,
+	.init_IRQ = ep8248e_pic_init,
+	.get_irq = cpm2_get_irq,
+	.calibrate_decr = generic_calibrate_decr,
+	.restart = pq2_restart,
+	.progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/82xx/km82xx.c b/arch/powerpc/platforms/82xx/km82xx.c
new file mode 100644
index 000000000..387b446f4
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/km82xx.c
@@ -0,0 +1,215 @@
+/*
+ * Keymile km82xx support
+ * Copyright 2008-2011 DENX Software Engineering GmbH
+ * Author: Heiko Schocher <hs@denx.de>
+ *
+ * based on code from:
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/fsl_devices.h>
+#include <linux/of_platform.h>
+
+#include <linux/io.h>
+#include <asm/cpm2.h>
+#include <asm/udbg.h>
+#include <asm/machdep.h>
+#include <linux/time.h>
+#include <asm/mpc8260.h>
+#include <asm/prom.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/cpm2_pic.h>
+
+#include "pq2.h"
+
+static void __init km82xx_pic_init(void)
+{
+	struct device_node *np = of_find_compatible_node(NULL, NULL,
+							"fsl,pq2-pic");
+	if (!np) {
+		pr_err("PIC init: can not find cpm-pic node\n");
+		return;
+	}
+
+	cpm2_pic_init(np);
+	of_node_put(np);
+}
+
+struct cpm_pin {
+	int port, pin, flags;
+};
+
+static __initdata struct cpm_pin km82xx_pins[] = {
+	/* SMC1 */
+	{2, 4, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 5, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+
+	/* SMC2 */
+	{0, 8, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 9, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+
+	/* SCC1 */
+	{2, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{3, 31, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{3, 30, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+
+	/* SCC4 */
+	{2, 25, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 24, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2,  9, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2,  8, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{3, 22, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{3, 21, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+
+	/* FCC1 */
+	{0, 14, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 18, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 19, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 21, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 26, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{0, 27, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{0, 28, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{0, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{0, 30, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{0, 31, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+
+	{2, 22, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 23, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* FCC2 */
+	{1, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 20, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 22, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 25, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 26, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 27, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{1, 30, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 31, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+
+	{2, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* MDC */
+	{0, 13, CPM_PIN_OUTPUT | CPM_PIN_GPIO},
+
+#if defined(CONFIG_I2C_CPM)
+	/* I2C */
+	{3, 14, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_OPENDRAIN},
+	{3, 15, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_OPENDRAIN},
+#endif
+
+	/* USB */
+	{0, 10, CPM_PIN_OUTPUT | CPM_PIN_GPIO},    /* FULL_SPEED */
+	{0, 11, CPM_PIN_OUTPUT | CPM_PIN_GPIO},    /*/SLAVE */
+	{2, 10, CPM_PIN_INPUT  | CPM_PIN_PRIMARY}, /* RXN */
+	{2, 11, CPM_PIN_INPUT  | CPM_PIN_PRIMARY}, /* RXP */
+	{2, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, /* /OE */
+	{2, 27, CPM_PIN_INPUT  | CPM_PIN_PRIMARY}, /* RXCLK */
+	{3, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, /* TXP */
+	{3, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, /* TXN */
+	{3, 25, CPM_PIN_INPUT  | CPM_PIN_PRIMARY}, /* RXD */
+
+	/* SPI */
+	{3, 16, CPM_PIN_INPUT | CPM_PIN_SECONDARY},/* SPI_MISO PD16 */
+	{3, 17, CPM_PIN_INPUT | CPM_PIN_SECONDARY},/* SPI_MOSI PD17 */
+	{3, 18, CPM_PIN_INPUT | CPM_PIN_SECONDARY},/* SPI_CLK PD18 */
+};
+
+static void __init init_ioports(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(km82xx_pins); i++) {
+		const struct cpm_pin *pin = &km82xx_pins[i];
+		cpm2_set_pin(pin->port, pin->pin, pin->flags);
+	}
+
+	cpm2_smc_clk_setup(CPM_CLK_SMC2, CPM_BRG8);
+	cpm2_smc_clk_setup(CPM_CLK_SMC1, CPM_BRG7);
+	cpm2_clk_setup(CPM_CLK_SCC1, CPM_CLK11, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_SCC1, CPM_CLK11, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_SCC3, CPM_CLK5, CPM_CLK_RTX);
+	cpm2_clk_setup(CPM_CLK_SCC4, CPM_CLK7, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_SCC4, CPM_CLK8, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK10, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK9,  CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK13, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK14, CPM_CLK_TX);
+
+	/* Force USB FULL SPEED bit to '1' */
+	setbits32(&cpm2_immr->im_ioport.iop_pdata, 1 << (31 - 10));
+	/* clear USB_SLAVE */
+	clrbits32(&cpm2_immr->im_ioport.iop_pdata, 1 << (31 - 11));
+}
+
+static void __init km82xx_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("km82xx_setup_arch()", 0);
+
+	cpm2_reset();
+
+	/* When this is set, snooping CPM DMA from RAM causes
+	 * machine checks.  See erratum SIU18.
+	 */
+	clrbits32(&cpm2_immr->im_siu_conf.siu_82xx.sc_bcr, MPC82XX_BCR_PLDP);
+
+	init_ioports();
+
+	if (ppc_md.progress)
+		ppc_md.progress("km82xx_setup_arch(), finish", 0);
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .compatible = "simple-bus", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+	return 0;
+}
+machine_device_initcall(km82xx, declare_of_platform_devices);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init km82xx_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+	return of_flat_dt_is_compatible(root, "keymile,km82xx");
+}
+
+define_machine(km82xx)
+{
+	.name = "Keymile km82xx",
+	.probe = km82xx_probe,
+	.setup_arch = km82xx_setup_arch,
+	.init_IRQ = km82xx_pic_init,
+	.get_irq = cpm2_get_irq,
+	.calibrate_decr = generic_calibrate_decr,
+	.restart = pq2_restart,
+	.progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/82xx/m82xx_pci.h b/arch/powerpc/platforms/82xx/m82xx_pci.h
new file mode 100644
index 000000000..65e38a7ff
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/m82xx_pci.h
@@ -0,0 +1,17 @@
+#ifndef _PPC_KERNEL_M82XX_PCI_H
+#define _PPC_KERNEL_M82XX_PCI_H
+
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define SIU_INT_IRQ1   ((uint)0x13 + CPM_IRQ_OFFSET)
+
+#ifndef _IO_BASE
+#define _IO_BASE isa_io_base
+#endif
+
+#endif				/* _PPC_KERNEL_M8260_PCI_H */
diff --git a/arch/powerpc/platforms/82xx/mpc8272_ads.c b/arch/powerpc/platforms/82xx/mpc8272_ads.c
new file mode 100644
index 000000000..d24deacf0
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/mpc8272_ads.c
@@ -0,0 +1,218 @@
+/*
+ * MPC8272 ADS board support
+ *
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Based on code by Vitaly Bordug <vbordug@ru.mvista.com>
+ * Copyright (c) 2006 MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/fsl_devices.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+#include <linux/io.h>
+
+#include <asm/cpm2.h>
+#include <asm/udbg.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+
+#include <platforms/82xx/pq2.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/cpm2_pic.h>
+
+#include "pq2.h"
+
+static void __init mpc8272_ads_pic_init(void)
+{
+	struct device_node *np = of_find_compatible_node(NULL, NULL,
+	                                                 "fsl,cpm2-pic");
+	if (!np) {
+		printk(KERN_ERR "PIC init: can not find fsl,cpm2-pic node\n");
+		return;
+	}
+
+	cpm2_pic_init(np);
+	of_node_put(np);
+
+	/* Initialize stuff for the 82xx CPLD IC and install demux  */
+	pq2ads_pci_init_irq();
+}
+
+struct cpm_pin {
+	int port, pin, flags;
+};
+
+static struct cpm_pin mpc8272_ads_pins[] = {
+	/* SCC1 */
+	{3, 30, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{3, 31, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* SCC4 */
+	{3, 21, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{3, 22, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* FCC1 */
+	{0, 14, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 18, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 19, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 21, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 26, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{0, 27, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{0, 28, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{0, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{0, 30, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{0, 31, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{2, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 22, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* FCC2 */
+	{1, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 20, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 22, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 25, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 26, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 27, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{1, 30, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 31, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{2, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* I2C */
+	{3, 14, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_OPENDRAIN},
+	{3, 15, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_OPENDRAIN},
+
+	/* USB */
+	{2, 10, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 11, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{2, 24, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{3, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{3, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{3, 25, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+};
+
+static void __init init_ioports(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mpc8272_ads_pins); i++) {
+		struct cpm_pin *pin = &mpc8272_ads_pins[i];
+		cpm2_set_pin(pin->port, pin->pin, pin->flags);
+	}
+
+	cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_SCC3, CPM_CLK8, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_SCC3, CPM_CLK8, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_SCC4, CPM_BRG4, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_SCC4, CPM_BRG4, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK11, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK10, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK15, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK16, CPM_CLK_TX);
+}
+
+static void __init mpc8272_ads_setup_arch(void)
+{
+	struct device_node *np;
+	__be32 __iomem *bcsr;
+
+	if (ppc_md.progress)
+		ppc_md.progress("mpc8272_ads_setup_arch()", 0);
+
+	cpm2_reset();
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc8272ads-bcsr");
+	if (!np) {
+		printk(KERN_ERR "No bcsr in device tree\n");
+		return;
+	}
+
+	bcsr = of_iomap(np, 0);
+	of_node_put(np);
+	if (!bcsr) {
+		printk(KERN_ERR "Cannot map BCSR registers\n");
+		return;
+	}
+
+#define BCSR1_FETHIEN		0x08000000
+#define BCSR1_FETH_RST		0x04000000
+#define BCSR1_RS232_EN1		0x02000000
+#define BCSR1_RS232_EN2		0x01000000
+#define BCSR3_USB_nEN		0x80000000
+#define BCSR3_FETHIEN2		0x10000000
+#define BCSR3_FETH2_RST		0x08000000
+
+	clrbits32(&bcsr[1], BCSR1_RS232_EN1 | BCSR1_RS232_EN2 | BCSR1_FETHIEN);
+	setbits32(&bcsr[1], BCSR1_FETH_RST);
+
+	clrbits32(&bcsr[3], BCSR3_FETHIEN2);
+	setbits32(&bcsr[3], BCSR3_FETH2_RST);
+
+	clrbits32(&bcsr[3], BCSR3_USB_nEN);
+
+	iounmap(bcsr);
+
+	init_ioports();
+	pq2_init_pci();
+
+	if (ppc_md.progress)
+		ppc_md.progress("mpc8272_ads_setup_arch(), finish", 0);
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .name = "soc", },
+	{ .name = "cpm", },
+	{ .name = "localbus", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	/* Publish the QE devices */
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+	return 0;
+}
+machine_device_initcall(mpc8272_ads, declare_of_platform_devices);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init mpc8272_ads_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+	return of_flat_dt_is_compatible(root, "fsl,mpc8272ads");
+}
+
+define_machine(mpc8272_ads)
+{
+	.name = "Freescale MPC8272 ADS",
+	.probe = mpc8272_ads_probe,
+	.setup_arch = mpc8272_ads_setup_arch,
+	.init_IRQ = mpc8272_ads_pic_init,
+	.get_irq = cpm2_get_irq,
+	.calibrate_decr = generic_calibrate_decr,
+	.restart = pq2_restart,
+	.progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/82xx/pq2.c b/arch/powerpc/platforms/82xx/pq2.c
new file mode 100644
index 000000000..fc8b2d6a7
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/pq2.c
@@ -0,0 +1,81 @@
+/*
+ * Common PowerQUICC II code.
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ * Copyright (c) 2007 Freescale Semiconductor
+ *
+ * Based on code by Vitaly Bordug <vbordug@ru.mvista.com>
+ * pq2_restart fix by Wade Farnsworth <wfarnsworth@mvista.com>
+ * Copyright (c) 2006 MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <asm/cpm2.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+
+#include <platforms/82xx/pq2.h>
+
+#define RMR_CSRE 0x00000001
+
+void pq2_restart(char *cmd)
+{
+	local_irq_disable();
+	setbits32(&cpm2_immr->im_clkrst.car_rmr, RMR_CSRE);
+
+	/* Clear the ME,EE,IR & DR bits in MSR to cause checkstop */
+	mtmsr(mfmsr() & ~(MSR_ME | MSR_EE | MSR_IR | MSR_DR));
+	in_8(&cpm2_immr->im_clkrst.res[0]);
+
+	panic("Restart failed\n");
+}
+
+#ifdef CONFIG_PCI
+static int pq2_pci_exclude_device(struct pci_controller *hose,
+                                  u_char bus, u8 devfn)
+{
+	if (bus == 0 && PCI_SLOT(devfn) == 0)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	else
+		return PCIBIOS_SUCCESSFUL;
+}
+
+static void __init pq2_pci_add_bridge(struct device_node *np)
+{
+	struct pci_controller *hose;
+	struct resource r;
+
+	if (of_address_to_resource(np, 0, &r) || r.end - r.start < 0x10b)
+		goto err;
+
+	pci_add_flags(PCI_REASSIGN_ALL_BUS);
+
+	hose = pcibios_alloc_controller(np);
+	if (!hose)
+		return;
+
+	hose->dn = np;
+
+	setup_indirect_pci(hose, r.start + 0x100, r.start + 0x104, 0);
+	pci_process_bridge_OF_ranges(hose, np, 1);
+
+	return;
+
+err:
+	printk(KERN_ERR "No valid PCI reg property in device tree\n");
+}
+
+void __init pq2_init_pci(void)
+{
+	struct device_node *np;
+
+	ppc_md.pci_exclude_device = pq2_pci_exclude_device;
+
+	for_each_compatible_node(np, NULL, "fsl,pq2-pci")
+		pq2_pci_add_bridge(np);
+}
+#endif
diff --git a/arch/powerpc/platforms/82xx/pq2.h b/arch/powerpc/platforms/82xx/pq2.h
new file mode 100644
index 000000000..a41f84ae2
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/pq2.h
@@ -0,0 +1,20 @@
+#ifndef _PQ2_H
+#define _PQ2_H
+
+void pq2_restart(char *cmd);
+
+#ifdef CONFIG_PCI
+int pq2ads_pci_init_irq(void);
+void pq2_init_pci(void);
+#else
+static inline int pq2ads_pci_init_irq(void)
+{
+	return 0;
+}
+
+static inline void pq2_init_pci(void)
+{
+}
+#endif
+
+#endif
diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
new file mode 100644
index 000000000..74861a7fb
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
@@ -0,0 +1,180 @@
+/*
+ * PQ2 ADS-style PCI interrupt controller
+ *
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Loosely based on mpc82xx ADS support by Vitaly Bordug <vbordug@ru.mvista.com>
+ * Copyright (c) 2006 MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/irq.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/cpm2.h>
+
+#include "pq2.h"
+
+static DEFINE_RAW_SPINLOCK(pci_pic_lock);
+
+struct pq2ads_pci_pic {
+	struct device_node *node;
+	struct irq_domain *host;
+
+	struct {
+		u32 stat;
+		u32 mask;
+	} __iomem *regs;
+};
+
+#define NUM_IRQS 32
+
+static void pq2ads_pci_mask_irq(struct irq_data *d)
+{
+	struct pq2ads_pci_pic *priv = irq_data_get_irq_chip_data(d);
+	int irq = NUM_IRQS - irqd_to_hwirq(d) - 1;
+
+	if (irq != -1) {
+		unsigned long flags;
+		raw_spin_lock_irqsave(&pci_pic_lock, flags);
+
+		setbits32(&priv->regs->mask, 1 << irq);
+		mb();
+
+		raw_spin_unlock_irqrestore(&pci_pic_lock, flags);
+	}
+}
+
+static void pq2ads_pci_unmask_irq(struct irq_data *d)
+{
+	struct pq2ads_pci_pic *priv = irq_data_get_irq_chip_data(d);
+	int irq = NUM_IRQS - irqd_to_hwirq(d) - 1;
+
+	if (irq != -1) {
+		unsigned long flags;
+
+		raw_spin_lock_irqsave(&pci_pic_lock, flags);
+		clrbits32(&priv->regs->mask, 1 << irq);
+		raw_spin_unlock_irqrestore(&pci_pic_lock, flags);
+	}
+}
+
+static struct irq_chip pq2ads_pci_ic = {
+	.name = "PQ2 ADS PCI",
+	.irq_mask = pq2ads_pci_mask_irq,
+	.irq_mask_ack = pq2ads_pci_mask_irq,
+	.irq_ack = pq2ads_pci_mask_irq,
+	.irq_unmask = pq2ads_pci_unmask_irq,
+	.irq_enable = pq2ads_pci_unmask_irq,
+	.irq_disable = pq2ads_pci_mask_irq
+};
+
+static void pq2ads_pci_irq_demux(unsigned int irq, struct irq_desc *desc)
+{
+	struct pq2ads_pci_pic *priv = irq_desc_get_handler_data(desc);
+	u32 stat, mask, pend;
+	int bit;
+
+	for (;;) {
+		stat = in_be32(&priv->regs->stat);
+		mask = in_be32(&priv->regs->mask);
+
+		pend = stat & ~mask;
+
+		if (!pend)
+			break;
+
+		for (bit = 0; pend != 0; ++bit, pend <<= 1) {
+			if (pend & 0x80000000) {
+				int virq = irq_linear_revmap(priv->host, bit);
+				generic_handle_irq(virq);
+			}
+		}
+	}
+}
+
+static int pci_pic_host_map(struct irq_domain *h, unsigned int virq,
+			    irq_hw_number_t hw)
+{
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_data(virq, h->host_data);
+	irq_set_chip_and_handler(virq, &pq2ads_pci_ic, handle_level_irq);
+	return 0;
+}
+
+static const struct irq_domain_ops pci_pic_host_ops = {
+	.map = pci_pic_host_map,
+};
+
+int __init pq2ads_pci_init_irq(void)
+{
+	struct pq2ads_pci_pic *priv;
+	struct irq_domain *host;
+	struct device_node *np;
+	int ret = -ENODEV;
+	int irq;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,pq2ads-pci-pic");
+	if (!np) {
+		printk(KERN_ERR "No pci pic node in device tree.\n");
+		of_node_put(np);
+		goto out;
+	}
+
+	irq = irq_of_parse_and_map(np, 0);
+	if (irq == NO_IRQ) {
+		printk(KERN_ERR "No interrupt in pci pic node.\n");
+		of_node_put(np);
+		goto out;
+	}
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		of_node_put(np);
+		ret = -ENOMEM;
+		goto out_unmap_irq;
+	}
+
+	/* PCI interrupt controller registers: status and mask */
+	priv->regs = of_iomap(np, 0);
+	if (!priv->regs) {
+		printk(KERN_ERR "Cannot map PCI PIC registers.\n");
+		goto out_free_kmalloc;
+	}
+
+	/* mask all PCI interrupts */
+	out_be32(&priv->regs->mask, ~0);
+	mb();
+
+	host = irq_domain_add_linear(np, NUM_IRQS, &pci_pic_host_ops, priv);
+	if (!host) {
+		ret = -ENOMEM;
+		goto out_unmap_regs;
+	}
+
+	priv->host = host;
+	irq_set_handler_data(irq, priv);
+	irq_set_chained_handler(irq, pq2ads_pci_irq_demux);
+
+	of_node_put(np);
+	return 0;
+
+out_unmap_regs:
+	iounmap(priv->regs);
+out_free_kmalloc:
+	kfree(priv);
+	of_node_put(np);
+out_unmap_irq:
+	irq_dispose_mapping(irq);
+out:
+	return ret;
+}
diff --git a/arch/powerpc/platforms/82xx/pq2ads.h b/arch/powerpc/platforms/82xx/pq2ads.h
new file mode 100644
index 000000000..6cf0f9748
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/pq2ads.h
@@ -0,0 +1,44 @@
+/*
+ * PQ2/mpc8260 board-specific stuff
+ *
+ * A collection of structures, addresses, and values associated with
+ * the Freescale MPC8260ADS/MPC8266ADS-PCI boards.
+ * Copied from the RPX-Classic and SBS8260 stuff.
+ *
+ * Author: Vitaly Bordug <vbordug@ru.mvista.com>
+ *
+ * Originally written by Dan Malek for Motorola MPC8260 family
+ *
+ * Copyright (c) 2001 Dan Malek <dan@embeddedalley.com>
+ * Copyright (c) 2006 MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifdef __KERNEL__
+#ifndef __MACH_ADS8260_DEFS
+#define __MACH_ADS8260_DEFS
+
+#include <linux/seq_file.h>
+
+/* The ADS8260 has 16, 32-bit wide control/status registers, accessed
+ * only on word boundaries.
+ * Not all are used (yet), or are interesting to us (yet).
+ */
+
+/* Things of interest in the CSR.
+ */
+#define BCSR0_LED0		((uint)0x02000000)      /* 0 == on */
+#define BCSR0_LED1		((uint)0x01000000)      /* 0 == on */
+#define BCSR1_FETHIEN		((uint)0x08000000)      /* 0 == enable*/
+#define BCSR1_FETH_RST		((uint)0x04000000)      /* 0 == reset */
+#define BCSR1_RS232_EN1		((uint)0x02000000)      /* 0 ==enable */
+#define BCSR1_RS232_EN2		((uint)0x01000000)      /* 0 ==enable */
+#define BCSR3_FETHIEN2		((uint)0x10000000)      /* 0 == enable*/
+#define BCSR3_FETH2_RST		((uint)0x80000000)      /* 0 == reset */
+
+#endif /* __MACH_ADS8260_DEFS */
+#endif /* __KERNEL__ */
diff --git a/arch/powerpc/platforms/82xx/pq2fads.c b/arch/powerpc/platforms/82xx/pq2fads.c
new file mode 100644
index 000000000..3a5164ad1
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/pq2fads.c
@@ -0,0 +1,196 @@
+/*
+ * PQ2FADS board support
+ *
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Loosely based on mp82xx ADS support by Vitaly Bordug <vbordug@ru.mvista.com>
+ * Copyright (c) 2006 MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/fsl_devices.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+
+#include <asm/io.h>
+#include <asm/cpm2.h>
+#include <asm/udbg.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/cpm2_pic.h>
+
+#include "pq2ads.h"
+#include "pq2.h"
+
+static void __init pq2fads_pic_init(void)
+{
+	struct device_node *np = of_find_compatible_node(NULL, NULL, "fsl,cpm2-pic");
+	if (!np) {
+		printk(KERN_ERR "PIC init: can not find fsl,cpm2-pic node\n");
+		return;
+	}
+
+	cpm2_pic_init(np);
+	of_node_put(np);
+
+	/* Initialize stuff for the 82xx CPLD IC and install demux  */
+	pq2ads_pci_init_irq();
+}
+
+struct cpm_pin {
+	int port, pin, flags;
+};
+
+static struct cpm_pin pq2fads_pins[] = {
+	/* SCC1 */
+	{3, 30, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{3, 31, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* SCC2 */
+	{3, 27, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{3, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* FCC2 */
+	{1, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 20, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 22, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 25, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 26, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 27, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{1, 30, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 31, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{2, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* FCC3 */
+	{1, 4, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 5, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 6, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 7, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 8, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 9, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 10, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 11, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 12, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 13, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 14, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 15, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+};
+
+static void __init init_ioports(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pq2fads_pins); i++) {
+		struct cpm_pin *pin = &pq2fads_pins[i];
+		cpm2_set_pin(pin->port, pin->pin, pin->flags);
+	}
+
+	cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK13, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK14, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_FCC3, CPM_CLK15, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_FCC3, CPM_CLK16, CPM_CLK_TX);
+}
+
+static void __init pq2fads_setup_arch(void)
+{
+	struct device_node *np;
+	__be32 __iomem *bcsr;
+
+	if (ppc_md.progress)
+		ppc_md.progress("pq2fads_setup_arch()", 0);
+
+	cpm2_reset();
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,pq2fads-bcsr");
+	if (!np) {
+		printk(KERN_ERR "No fsl,pq2fads-bcsr in device tree\n");
+		return;
+	}
+
+	bcsr = of_iomap(np, 0);
+	of_node_put(np);
+	if (!bcsr) {
+		printk(KERN_ERR "Cannot map BCSR registers\n");
+		return;
+	}
+
+	/* Enable the serial and ethernet ports */
+
+	clrbits32(&bcsr[1], BCSR1_RS232_EN1 | BCSR1_RS232_EN2 | BCSR1_FETHIEN);
+	setbits32(&bcsr[1], BCSR1_FETH_RST);
+
+	clrbits32(&bcsr[3], BCSR3_FETHIEN2);
+	setbits32(&bcsr[3], BCSR3_FETH2_RST);
+
+	iounmap(bcsr);
+
+	init_ioports();
+
+	/* Enable external IRQs */
+	clrbits32(&cpm2_immr->im_siu_conf.siu_82xx.sc_siumcr, 0x0c000000);
+
+	pq2_init_pci();
+
+	if (ppc_md.progress)
+		ppc_md.progress("pq2fads_setup_arch(), finish", 0);
+}
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init pq2fads_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+	return of_flat_dt_is_compatible(root, "fsl,pq2fads");
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .name = "soc", },
+	{ .name = "cpm", },
+	{ .name = "localbus", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	/* Publish the QE devices */
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+	return 0;
+}
+machine_device_initcall(pq2fads, declare_of_platform_devices);
+
+define_machine(pq2fads)
+{
+	.name = "Freescale PQ2FADS",
+	.probe = pq2fads_probe,
+	.setup_arch = pq2fads_setup_arch,
+	.init_IRQ = pq2fads_pic_init,
+	.get_irq = cpm2_get_irq,
+	.calibrate_decr = generic_calibrate_decr,
+	.restart = pq2_restart,
+	.progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/Kconfig b/arch/powerpc/platforms/83xx/Kconfig
new file mode 100644
index 000000000..2bdc8c862
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/Kconfig
@@ -0,0 +1,133 @@
+menuconfig PPC_83xx
+	bool "83xx-based boards"
+	depends on 6xx
+	select PPC_UDBG_16550
+	select PPC_PCI_CHOICE
+	select FSL_PCI if PCI
+	select FSL_SOC
+	select IPIC
+
+if PPC_83xx
+
+config MPC830x_RDB
+	bool "Freescale MPC830x RDB and derivatives"
+	select DEFAULT_UIMAGE
+	select PPC_MPC831x
+	select FSL_GTM
+	help
+	  This option enables support for the MPC8308 RDB and MPC8308 P1M boards.
+
+config MPC831x_RDB
+	bool "Freescale MPC831x RDB"
+	select DEFAULT_UIMAGE
+	select PPC_MPC831x
+	help
+	  This option enables support for the MPC8313 RDB and MPC8315 RDB boards.
+
+config MPC832x_MDS
+	bool "Freescale MPC832x MDS"
+	select DEFAULT_UIMAGE
+	select PPC_MPC832x
+	help
+	  This option enables support for the MPC832x MDS evaluation board.
+
+config MPC832x_RDB
+	bool "Freescale MPC832x RDB"
+	select DEFAULT_UIMAGE
+	select PPC_MPC832x
+	help
+	  This option enables support for the MPC8323 RDB board.
+
+config MPC834x_MDS
+	bool "Freescale MPC834x MDS"
+	select DEFAULT_UIMAGE
+	select PPC_MPC834x
+	help
+	  This option enables support for the MPC 834x MDS evaluation board.
+
+	  Be aware that PCI buses can only function when MDS board is plugged
+	  into the PIB (Platform IO Board) board from Freescale which provide
+	  3 PCI slots.  The PIBs PCI initialization is the bootloader's
+	  responsibility.
+
+config MPC834x_ITX
+	bool "Freescale MPC834x ITX"
+	select DEFAULT_UIMAGE
+	select PPC_MPC834x
+	help
+	  This option enables support for the MPC 834x ITX evaluation board.
+
+	  Be aware that PCI initialization is the bootloader's
+	  responsibility.
+
+config MPC836x_MDS
+	bool "Freescale MPC836x MDS"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the MPC836x MDS Processor Board.
+
+config MPC836x_RDK
+	bool "Freescale/Logic MPC836x RDK"
+	select DEFAULT_UIMAGE
+	select FSL_GTM
+	select FSL_LBC
+	help
+	  This option enables support for the MPC836x RDK Processor Board,
+	  also known as ZOOM PowerQUICC Kit.
+
+config MPC837x_MDS
+	bool "Freescale MPC837x MDS"
+	select DEFAULT_UIMAGE
+	select PPC_MPC837x
+	help
+	  This option enables support for the MPC837x MDS Processor Board.
+
+config MPC837x_RDB
+	bool "Freescale MPC837x RDB/WLAN"
+	select DEFAULT_UIMAGE
+	select PPC_MPC837x
+	help
+	  This option enables support for the MPC837x RDB and WLAN Boards.
+
+config SBC834x
+	bool "Wind River SBC834x"
+	select DEFAULT_UIMAGE
+	select PPC_MPC834x
+	help
+	  This option enables support for the Wind River SBC834x board.
+
+config ASP834x
+	bool "Analogue & Micro ASP 834x"
+	select PPC_MPC834x
+	help
+	  This enables support for the Analogue & Micro ASP 83xx
+	  board.
+
+config KMETER1
+	bool "Keymile KMETER1"
+	select DEFAULT_UIMAGE
+	select QUICC_ENGINE
+	help
+	  This enables support for the Keymile KMETER1 board.
+
+
+endif
+
+# used for usb & gpio
+config PPC_MPC831x
+	bool
+	select ARCH_WANT_OPTIONAL_GPIOLIB
+
+# used for math-emu
+config PPC_MPC832x
+	bool
+
+# used for usb & gpio
+config PPC_MPC834x
+	bool
+	select ARCH_WANT_OPTIONAL_GPIOLIB
+
+# used for usb & gpio
+config PPC_MPC837x
+	bool
+	select ARCH_WANT_OPTIONAL_GPIOLIB
diff --git a/arch/powerpc/platforms/83xx/Makefile b/arch/powerpc/platforms/83xx/Makefile
new file mode 100644
index 000000000..ed95bfcbc
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/Makefile
@@ -0,0 +1,19 @@
+#
+# Makefile for the PowerPC 83xx linux kernel.
+#
+obj-y				:= misc.o usb.o
+obj-$(CONFIG_SUSPEND)		+= suspend.o suspend-asm.o
+obj-$(CONFIG_MCU_MPC8349EMITX)	+= mcu_mpc8349emitx.o
+obj-$(CONFIG_MPC830x_RDB)	+= mpc830x_rdb.o
+obj-$(CONFIG_MPC831x_RDB)	+= mpc831x_rdb.o
+obj-$(CONFIG_MPC832x_RDB)	+= mpc832x_rdb.o
+obj-$(CONFIG_MPC834x_MDS)	+= mpc834x_mds.o
+obj-$(CONFIG_MPC834x_ITX)	+= mpc834x_itx.o
+obj-$(CONFIG_MPC836x_MDS)	+= mpc836x_mds.o
+obj-$(CONFIG_MPC836x_RDK)	+= mpc836x_rdk.o
+obj-$(CONFIG_MPC832x_MDS)	+= mpc832x_mds.o
+obj-$(CONFIG_MPC837x_MDS)	+= mpc837x_mds.o
+obj-$(CONFIG_SBC834x)		+= sbc834x.o
+obj-$(CONFIG_MPC837x_RDB)	+= mpc837x_rdb.o
+obj-$(CONFIG_ASP834x)		+= asp834x.o
+obj-$(CONFIG_KMETER1)		+= km83xx.o
diff --git a/arch/powerpc/platforms/83xx/asp834x.c b/arch/powerpc/platforms/83xx/asp834x.c
new file mode 100644
index 000000000..464ea8e02
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/asp834x.c
@@ -0,0 +1,60 @@
+/*
+ * arch/powerpc/platforms/83xx/asp834x.c
+ *
+ * Analogue & Micro ASP8347 board specific routines
+ * clone of mpc834x_itx
+ *
+ * Copyright 2008 Codehermit
+ *
+ * Maintainer: Bryan O'Donoghue <bodonoghue@codhermit.ie>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/ipic.h>
+#include <asm/udbg.h>
+
+#include "mpc83xx.h"
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init asp834x_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("asp834x_setup_arch()", 0);
+
+	mpc834x_usb_cfg();
+}
+
+machine_device_initcall(asp834x, mpc83xx_declare_of_platform_devices);
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init asp834x_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+	return of_flat_dt_is_compatible(root, "analogue-and-micro,asp8347e");
+}
+
+define_machine(asp834x) {
+	.name			= "ASP8347E",
+	.probe			= asp834x_probe,
+	.setup_arch		= asp834x_setup_arch,
+	.init_IRQ		= mpc83xx_ipic_init_IRQ,
+	.get_irq		= ipic_get_irq,
+	.restart		= mpc83xx_restart,
+	.time_init		= mpc83xx_time_init,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c
new file mode 100644
index 000000000..bf4c4473a
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/km83xx.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright 2008-2011 DENX Software Engineering GmbH
+ * Author: Heiko Schocher <hs@denx.de>
+ *
+ * Description:
+ * Keymile 83xx platform specific routines.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/initrd.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+
+#include <linux/atomic.h>
+#include <linux/time.h>
+#include <linux/io.h>
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include <asm/qe.h>
+#include <asm/qe_ic.h>
+
+#include "mpc83xx.h"
+
+#define SVR_REV(svr)    (((svr) >>  0) & 0xFFFF) /* Revision field */
+
+static void quirk_mpc8360e_qe_enet10(void)
+{
+	/*
+	 * handle mpc8360E Erratum QE_ENET10:
+	 * RGMII AC values do not meet the specification
+	 */
+	uint svid = mfspr(SPRN_SVR);
+	struct	device_node *np_par;
+	struct	resource res;
+	void	__iomem *base;
+	int	ret;
+
+	np_par = of_find_node_by_name(NULL, "par_io");
+	if (np_par == NULL) {
+		pr_warn("%s couldn;t find par_io node\n", __func__);
+		return;
+	}
+	/* Map Parallel I/O ports registers */
+	ret = of_address_to_resource(np_par, 0, &res);
+	if (ret) {
+		pr_warn("%s couldn;t map par_io registers\n", __func__);
+		return;
+	}
+
+	base = ioremap(res.start, res.end - res.start + 1);
+
+	/*
+	 * set output delay adjustments to default values according
+	 * table 5 in Errata Rev. 5, 9/2011:
+	 *
+	 * write 0b01 to UCC1 bits 18:19
+	 * write 0b01 to UCC2 option 1 bits 4:5
+	 * write 0b01 to UCC2 option 2 bits 16:17
+	 */
+	clrsetbits_be32((base + 0xa8), 0x0c00f000, 0x04005000);
+
+	/*
+	 * set output delay adjustments to default values according
+	 * table 3-13 in Reference Manual Rev.3 05/2010:
+	 *
+	 * write 0b01 to UCC2 option 2 bits 16:17
+	 * write 0b0101 to UCC1 bits 20:23
+	 * write 0b0101 to UCC2 option 1 bits 24:27
+	 */
+	clrsetbits_be32((base + 0xac), 0x0000cff0, 0x00004550);
+
+	if (SVR_REV(svid) == 0x0021) {
+		/*
+		 * UCC2 option 1: write 0b1010 to bits 24:27
+		 * at address IMMRBAR+0x14AC
+		 */
+		clrsetbits_be32((base + 0xac), 0x000000f0, 0x000000a0);
+	} else if (SVR_REV(svid) == 0x0020) {
+		/*
+		 * UCC1: write 0b11 to bits 18:19
+		 * at address IMMRBAR+0x14A8
+		 */
+		setbits32((base + 0xa8), 0x00003000);
+
+		/*
+		 * UCC2 option 1: write 0b11 to bits 4:5
+		 * at address IMMRBAR+0x14A8
+		 */
+		setbits32((base + 0xa8), 0x0c000000);
+
+		/*
+		 * UCC2 option 2: write 0b11 to bits 16:17
+		 * at address IMMRBAR+0x14AC
+		 */
+		setbits32((base + 0xac), 0x0000c000);
+	}
+	iounmap(base);
+	of_node_put(np_par);
+}
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init mpc83xx_km_setup_arch(void)
+{
+#ifdef CONFIG_QUICC_ENGINE
+	struct device_node *np;
+#endif
+
+	if (ppc_md.progress)
+		ppc_md.progress("kmpbec83xx_setup_arch()", 0);
+
+	mpc83xx_setup_pci();
+
+#ifdef CONFIG_QUICC_ENGINE
+	qe_reset();
+
+	np = of_find_node_by_name(NULL, "par_io");
+	if (np != NULL) {
+		par_io_init(np);
+		of_node_put(np);
+
+		for_each_node_by_name(np, "spi")
+			par_io_of_config(np);
+
+		for_each_node_by_name(np, "ucc")
+			par_io_of_config(np);
+
+		/* Only apply this quirk when par_io is available */
+		np = of_find_compatible_node(NULL, "network", "ucc_geth");
+		if (np != NULL) {
+			quirk_mpc8360e_qe_enet10();
+			of_node_put(np);
+		}
+	}
+#endif	/* CONFIG_QUICC_ENGINE */
+}
+
+machine_device_initcall(mpc83xx_km, mpc83xx_declare_of_platform_devices);
+
+/* list of the supported boards */
+static char *board[] __initdata = {
+	"Keymile,KMETER1",
+	"Keymile,kmpbec8321",
+	NULL
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc83xx_km_probe(void)
+{
+	unsigned long node = of_get_flat_dt_root();
+	int i = 0;
+
+	while (board[i]) {
+		if (of_flat_dt_is_compatible(node, board[i]))
+			break;
+		i++;
+	}
+	return (board[i] != NULL);
+}
+
+define_machine(mpc83xx_km) {
+	.name		= "mpc83xx-km-platform",
+	.probe		= mpc83xx_km_probe,
+	.setup_arch	= mpc83xx_km_setup_arch,
+	.init_IRQ	= mpc83xx_ipic_and_qe_init_IRQ,
+	.get_irq	= ipic_get_irq,
+	.restart	= mpc83xx_restart,
+	.time_init	= mpc83xx_time_init,
+	.calibrate_decr	= generic_calibrate_decr,
+	.progress	= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
new file mode 100644
index 000000000..15e8021dd
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -0,0 +1,238 @@
+/*
+ * Power Management and GPIO expander driver for MPC8349E-mITX-compatible MCU
+ *
+ * Copyright (c) 2008  MontaVista Software, Inc.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/slab.h>
+#include <linux/kthread.h>
+#include <linux/reboot.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+
+/*
+ * I don't have specifications for the MCU firmware, I found this register
+ * and bits positions by the trial&error method.
+ */
+#define MCU_REG_CTRL	0x20
+#define MCU_CTRL_POFF	0x40
+#define MCU_CTRL_BTN	0x80
+
+#define MCU_NUM_GPIO	2
+
+struct mcu {
+	struct mutex lock;
+	struct i2c_client *client;
+	struct gpio_chip gc;
+	u8 reg_ctrl;
+};
+
+static struct mcu *glob_mcu;
+
+struct task_struct *shutdown_thread;
+static int shutdown_thread_fn(void *data)
+{
+	int ret;
+	struct mcu *mcu = glob_mcu;
+
+	while (!kthread_should_stop()) {
+		ret = i2c_smbus_read_byte_data(mcu->client, MCU_REG_CTRL);
+		if (ret < 0)
+			pr_err("MCU status reg read failed.\n");
+		mcu->reg_ctrl = ret;
+
+
+		if (mcu->reg_ctrl & MCU_CTRL_BTN) {
+			i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL,
+						  mcu->reg_ctrl & ~MCU_CTRL_BTN);
+
+			ctrl_alt_del();
+		}
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(HZ);
+	}
+
+	return 0;
+}
+
+static ssize_t show_status(struct device *d,
+			   struct device_attribute *attr, char *buf)
+{
+	int ret;
+	struct mcu *mcu = glob_mcu;
+
+	ret = i2c_smbus_read_byte_data(mcu->client, MCU_REG_CTRL);
+	if (ret < 0)
+		return -ENODEV;
+	mcu->reg_ctrl = ret;
+
+	return sprintf(buf, "%02x\n", ret);
+}
+static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
+
+static void mcu_power_off(void)
+{
+	struct mcu *mcu = glob_mcu;
+
+	pr_info("Sending power-off request to the MCU...\n");
+	mutex_lock(&mcu->lock);
+	i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL,
+				  mcu->reg_ctrl | MCU_CTRL_POFF);
+	mutex_unlock(&mcu->lock);
+}
+
+static void mcu_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+	struct mcu *mcu = container_of(gc, struct mcu, gc);
+	u8 bit = 1 << (4 + gpio);
+
+	mutex_lock(&mcu->lock);
+	if (val)
+		mcu->reg_ctrl &= ~bit;
+	else
+		mcu->reg_ctrl |= bit;
+
+	i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL, mcu->reg_ctrl);
+	mutex_unlock(&mcu->lock);
+}
+
+static int mcu_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+	mcu_gpio_set(gc, gpio, val);
+	return 0;
+}
+
+static int mcu_gpiochip_add(struct mcu *mcu)
+{
+	struct device_node *np;
+	struct gpio_chip *gc = &mcu->gc;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,mcu-mpc8349emitx");
+	if (!np)
+		return -ENODEV;
+
+	gc->owner = THIS_MODULE;
+	gc->label = np->full_name;
+	gc->can_sleep = 1;
+	gc->ngpio = MCU_NUM_GPIO;
+	gc->base = -1;
+	gc->set = mcu_gpio_set;
+	gc->direction_output = mcu_gpio_dir_out;
+	gc->of_node = np;
+
+	return gpiochip_add(gc);
+}
+
+static int mcu_gpiochip_remove(struct mcu *mcu)
+{
+	gpiochip_remove(&mcu->gc);
+	return 0;
+}
+
+static int mcu_probe(struct i2c_client *client, const struct i2c_device_id *id)
+{
+	struct mcu *mcu;
+	int ret;
+
+	mcu = kzalloc(sizeof(*mcu), GFP_KERNEL);
+	if (!mcu)
+		return -ENOMEM;
+
+	mutex_init(&mcu->lock);
+	mcu->client = client;
+	i2c_set_clientdata(client, mcu);
+
+	ret = i2c_smbus_read_byte_data(mcu->client, MCU_REG_CTRL);
+	if (ret < 0)
+		goto err;
+	mcu->reg_ctrl = ret;
+
+	ret = mcu_gpiochip_add(mcu);
+	if (ret)
+		goto err;
+
+	/* XXX: this is potentially racy, but there is no lock for pm_power_off */
+	if (!pm_power_off) {
+		glob_mcu = mcu;
+		pm_power_off = mcu_power_off;
+		dev_info(&client->dev, "will provide power-off service\n");
+	}
+
+	if (device_create_file(&client->dev, &dev_attr_status))
+		dev_err(&client->dev,
+			"couldn't create device file for status\n");
+
+	shutdown_thread = kthread_run(shutdown_thread_fn, NULL,
+				      "mcu-i2c-shdn");
+
+	return 0;
+err:
+	kfree(mcu);
+	return ret;
+}
+
+static int mcu_remove(struct i2c_client *client)
+{
+	struct mcu *mcu = i2c_get_clientdata(client);
+	int ret;
+
+	kthread_stop(shutdown_thread);
+
+	device_remove_file(&client->dev, &dev_attr_status);
+
+	if (glob_mcu == mcu) {
+		pm_power_off = NULL;
+		glob_mcu = NULL;
+	}
+
+	ret = mcu_gpiochip_remove(mcu);
+	if (ret)
+		return ret;
+	kfree(mcu);
+	return 0;
+}
+
+static const struct i2c_device_id mcu_ids[] = {
+	{ "mcu-mpc8349emitx", },
+	{},
+};
+MODULE_DEVICE_TABLE(i2c, mcu_ids);
+
+static const struct of_device_id mcu_of_match_table[] = {
+	{ .compatible = "fsl,mcu-mpc8349emitx", },
+	{ },
+};
+
+static struct i2c_driver mcu_driver = {
+	.driver = {
+		.name = "mcu-mpc8349emitx",
+		.owner = THIS_MODULE,
+		.of_match_table = mcu_of_match_table,
+	},
+	.probe = mcu_probe,
+	.remove	= mcu_remove,
+	.id_table = mcu_ids,
+};
+
+module_i2c_driver(mcu_driver);
+
+MODULE_DESCRIPTION("Power Management and GPIO expander driver for "
+		   "MPC8349E-mITX-compatible MCU");
+MODULE_AUTHOR("Anton Vorontsov <avorontsov@ru.mvista.com>");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c
new file mode 100644
index 000000000..ef9d01a04
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/misc.c
@@ -0,0 +1,144 @@
+/*
+ * misc setup functions for MPC83xx
+ *
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+
+#include <asm/io.h>
+#include <asm/hw_irq.h>
+#include <asm/ipic.h>
+#include <asm/qe_ic.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc83xx.h"
+
+static __be32 __iomem *restart_reg_base;
+
+static int __init mpc83xx_restart_init(void)
+{
+	/* map reset restart_reg_baseister space */
+	restart_reg_base = ioremap(get_immrbase() + 0x900, 0xff);
+
+	return 0;
+}
+
+arch_initcall(mpc83xx_restart_init);
+
+void mpc83xx_restart(char *cmd)
+{
+#define RST_OFFSET	0x00000900
+#define RST_PROT_REG	0x00000018
+#define RST_CTRL_REG	0x0000001c
+
+	local_irq_disable();
+
+	if (restart_reg_base) {
+		/* enable software reset "RSTE" */
+		out_be32(restart_reg_base + (RST_PROT_REG >> 2), 0x52535445);
+
+		/* set software hard reset */
+		out_be32(restart_reg_base + (RST_CTRL_REG >> 2), 0x2);
+	} else {
+		printk (KERN_EMERG "Error: Restart registers not mapped, spinning!\n");
+	}
+
+	for (;;) ;
+}
+
+long __init mpc83xx_time_init(void)
+{
+#define SPCR_OFFSET	0x00000110
+#define SPCR_TBEN	0x00400000
+	__be32 __iomem *spcr = ioremap(get_immrbase() + SPCR_OFFSET, 4);
+	__be32 tmp;
+
+	tmp = in_be32(spcr);
+	out_be32(spcr, tmp | SPCR_TBEN);
+
+	iounmap(spcr);
+
+	return 0;
+}
+
+void __init mpc83xx_ipic_init_IRQ(void)
+{
+	struct device_node *np;
+
+	/* looking for fsl,pq2pro-pic which is asl compatible with fsl,ipic */
+	np = of_find_compatible_node(NULL, NULL, "fsl,ipic");
+	if (!np)
+		np = of_find_node_by_type(NULL, "ipic");
+	if (!np)
+		return;
+
+	ipic_init(np, 0);
+
+	of_node_put(np);
+
+	/* Initialize the default interrupt mapping priorities,
+	 * in case the boot rom changed something on us.
+	 */
+	ipic_set_default_priority();
+}
+
+#ifdef CONFIG_QUICC_ENGINE
+void __init mpc83xx_qe_init_IRQ(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic");
+	if (!np) {
+		np = of_find_node_by_type(NULL, "qeic");
+		if (!np)
+			return;
+	}
+	qe_ic_init(np, 0, qe_ic_cascade_low_ipic, qe_ic_cascade_high_ipic);
+	of_node_put(np);
+}
+
+void __init mpc83xx_ipic_and_qe_init_IRQ(void)
+{
+	mpc83xx_ipic_init_IRQ();
+	mpc83xx_qe_init_IRQ();
+}
+#endif /* CONFIG_QUICC_ENGINE */
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .type = "soc", },
+	{ .compatible = "soc", },
+	{ .compatible = "simple-bus" },
+	{ .compatible = "gianfar" },
+	{ .compatible = "gpio-leds", },
+	{ .type = "qe", },
+	{ .compatible = "fsl,qe", },
+	{},
+};
+
+int __init mpc83xx_declare_of_platform_devices(void)
+{
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+	return 0;
+}
+
+#ifdef CONFIG_PCI
+void __init mpc83xx_setup_pci(void)
+{
+	struct device_node *np;
+
+	for_each_compatible_node(np, "pci", "fsl,mpc8349-pci")
+		mpc83xx_add_bridge(np);
+	for_each_compatible_node(np, "pci", "fsl,mpc8314-pcie")
+		mpc83xx_add_bridge(np);
+}
+#endif
diff --git a/arch/powerpc/platforms/83xx/mpc830x_rdb.c b/arch/powerpc/platforms/83xx/mpc830x_rdb.c
new file mode 100644
index 000000000..4f2d9fea7
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc830x_rdb.c
@@ -0,0 +1,64 @@
+/*
+ * arch/powerpc/platforms/83xx/mpc830x_rdb.c
+ *
+ * Description: MPC830x RDB board specific routines.
+ * This file is based on mpc831x_rdb.c
+ *
+ * Copyright (C) Freescale Semiconductor, Inc. 2009. All rights reserved.
+ * Copyright (C) 2010. Ilya Yanok, Emcraft Systems, yanok@emcraft.com
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+#include <asm/time.h>
+#include <asm/ipic.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_pci.h>
+#include <sysdev/fsl_soc.h>
+#include "mpc83xx.h"
+
+/*
+ * Setup the architecture
+ */
+static void __init mpc830x_rdb_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mpc830x_rdb_setup_arch()", 0);
+
+	mpc83xx_setup_pci();
+	mpc831x_usb_cfg();
+}
+
+static const char *board[] __initdata = {
+	"MPC8308RDB",
+	"fsl,mpc8308rdb",
+	"denx,mpc8308_p1m",
+	NULL
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc830x_rdb_probe(void)
+{
+	return of_flat_dt_match(of_get_flat_dt_root(), board);
+}
+
+machine_device_initcall(mpc830x_rdb, mpc83xx_declare_of_platform_devices);
+
+define_machine(mpc830x_rdb) {
+	.name			= "MPC830x RDB",
+	.probe			= mpc830x_rdb_probe,
+	.setup_arch		= mpc830x_rdb_setup_arch,
+	.init_IRQ		= mpc83xx_ipic_init_IRQ,
+	.get_irq		= ipic_get_irq,
+	.restart		= mpc83xx_restart,
+	.time_init		= mpc83xx_time_init,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc831x_rdb.c b/arch/powerpc/platforms/83xx/mpc831x_rdb.c
new file mode 100644
index 000000000..fa25977c5
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc831x_rdb.c
@@ -0,0 +1,64 @@
+/*
+ * arch/powerpc/platforms/83xx/mpc831x_rdb.c
+ *
+ * Description: MPC831x RDB board specific routines.
+ * This file is based on mpc834x_sys.c
+ * Author: Lo Wlison <r43300@freescale.com>
+ *
+ * Copyright (C) Freescale Semiconductor, Inc. 2006. All rights reserved.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/ipic.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc83xx.h"
+
+/*
+ * Setup the architecture
+ */
+static void __init mpc831x_rdb_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mpc831x_rdb_setup_arch()", 0);
+
+	mpc83xx_setup_pci();
+	mpc831x_usb_cfg();
+}
+
+static const char *board[] __initdata = {
+	"MPC8313ERDB",
+	"fsl,mpc8315erdb",
+	NULL
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc831x_rdb_probe(void)
+{
+	return of_flat_dt_match(of_get_flat_dt_root(), board);
+}
+
+machine_device_initcall(mpc831x_rdb, mpc83xx_declare_of_platform_devices);
+
+define_machine(mpc831x_rdb) {
+	.name			= "MPC831x RDB",
+	.probe			= mpc831x_rdb_probe,
+	.setup_arch		= mpc831x_rdb_setup_arch,
+	.init_IRQ		= mpc83xx_ipic_init_IRQ,
+	.get_irq		= ipic_get_irq,
+	.restart		= mpc83xx_restart,
+	.time_init		= mpc83xx_time_init,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc832x_mds.c b/arch/powerpc/platforms/83xx/mpc832x_mds.c
new file mode 100644
index 000000000..8d762203e
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc832x_mds.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2006 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Description:
+ * MPC832xE MDS board specific routines.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/initrd.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+
+#include <linux/atomic.h>
+#include <asm/time.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include <asm/qe.h>
+#include <asm/qe_ic.h>
+
+#include "mpc83xx.h"
+
+#undef DEBUG
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init mpc832x_sys_setup_arch(void)
+{
+	struct device_node *np;
+	u8 __iomem *bcsr_regs = NULL;
+
+	if (ppc_md.progress)
+		ppc_md.progress("mpc832x_sys_setup_arch()", 0);
+
+	/* Map BCSR area */
+	np = of_find_node_by_name(NULL, "bcsr");
+	if (np) {
+		struct resource res;
+
+		of_address_to_resource(np, 0, &res);
+		bcsr_regs = ioremap(res.start, resource_size(&res));
+		of_node_put(np);
+	}
+
+	mpc83xx_setup_pci();
+
+#ifdef CONFIG_QUICC_ENGINE
+	qe_reset();
+
+	if ((np = of_find_node_by_name(NULL, "par_io")) != NULL) {
+		par_io_init(np);
+		of_node_put(np);
+
+		for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;)
+			par_io_of_config(np);
+	}
+
+	if ((np = of_find_compatible_node(NULL, "network", "ucc_geth"))
+			!= NULL){
+		/* Reset the Ethernet PHYs */
+#define BCSR8_FETH_RST 0x50
+		clrbits8(&bcsr_regs[8], BCSR8_FETH_RST);
+		udelay(1000);
+		setbits8(&bcsr_regs[8], BCSR8_FETH_RST);
+		iounmap(bcsr_regs);
+		of_node_put(np);
+	}
+#endif				/* CONFIG_QUICC_ENGINE */
+}
+
+machine_device_initcall(mpc832x_mds, mpc83xx_declare_of_platform_devices);
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc832x_sys_probe(void)
+{
+        unsigned long root = of_get_flat_dt_root();
+
+        return of_flat_dt_is_compatible(root, "MPC832xMDS");
+}
+
+define_machine(mpc832x_mds) {
+	.name 		= "MPC832x MDS",
+	.probe 		= mpc832x_sys_probe,
+	.setup_arch 	= mpc832x_sys_setup_arch,
+	.init_IRQ	= mpc83xx_ipic_and_qe_init_IRQ,
+	.get_irq 	= ipic_get_irq,
+	.restart 	= mpc83xx_restart,
+	.time_init 	= mpc83xx_time_init,
+	.calibrate_decr	= generic_calibrate_decr,
+	.progress 	= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
new file mode 100644
index 000000000..eff5baabc
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -0,0 +1,240 @@
+/*
+ * arch/powerpc/platforms/83xx/mpc832x_rdb.c
+ *
+ * Copyright (C) Freescale Semiconductor, Inc. 2007. All rights reserved.
+ *
+ * Description:
+ * MPC832x RDB board specific routines.
+ * This file is based on mpc832x_mds.c and mpc8313_rdb.c
+ * Author: Michael Barkowski <michael.barkowski@freescale.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/mmc_spi.h>
+#include <linux/mmc/host.h>
+#include <linux/of_platform.h>
+#include <linux/fsl_devices.h>
+
+#include <asm/time.h>
+#include <asm/ipic.h>
+#include <asm/udbg.h>
+#include <asm/qe.h>
+#include <asm/qe_ic.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc83xx.h"
+
+#undef DEBUG
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+#ifdef CONFIG_QUICC_ENGINE
+static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk,
+				   struct spi_board_info *board_infos,
+				   unsigned int num_board_infos,
+				   void (*cs_control)(struct spi_device *dev,
+						      bool on))
+{
+	struct device_node *np;
+	unsigned int i = 0;
+
+	for_each_compatible_node(np, type, compatible) {
+		int ret;
+		unsigned int j;
+		const void *prop;
+		struct resource res[2];
+		struct platform_device *pdev;
+		struct fsl_spi_platform_data pdata = {
+			.cs_control = cs_control,
+		};
+
+		memset(res, 0, sizeof(res));
+
+		pdata.sysclk = sysclk;
+
+		prop = of_get_property(np, "reg", NULL);
+		if (!prop)
+			goto err;
+		pdata.bus_num = *(u32 *)prop;
+
+		prop = of_get_property(np, "cell-index", NULL);
+		if (prop)
+			i = *(u32 *)prop;
+
+		prop = of_get_property(np, "mode", NULL);
+		if (prop && !strcmp(prop, "cpu-qe"))
+			pdata.flags = SPI_QE_CPU_MODE;
+
+		for (j = 0; j < num_board_infos; j++) {
+			if (board_infos[j].bus_num == pdata.bus_num)
+				pdata.max_chipselect++;
+		}
+
+		if (!pdata.max_chipselect)
+			continue;
+
+		ret = of_address_to_resource(np, 0, &res[0]);
+		if (ret)
+			goto err;
+
+		ret = of_irq_to_resource(np, 0, &res[1]);
+		if (ret == NO_IRQ)
+			goto err;
+
+		pdev = platform_device_alloc("mpc83xx_spi", i);
+		if (!pdev)
+			goto err;
+
+		ret = platform_device_add_data(pdev, &pdata, sizeof(pdata));
+		if (ret)
+			goto unreg;
+
+		ret = platform_device_add_resources(pdev, res,
+						    ARRAY_SIZE(res));
+		if (ret)
+			goto unreg;
+
+		ret = platform_device_add(pdev);
+		if (ret)
+			goto unreg;
+
+		goto next;
+unreg:
+		platform_device_del(pdev);
+err:
+		pr_err("%s: registration failed\n", np->full_name);
+next:
+		i++;
+	}
+
+	return i;
+}
+
+static int __init fsl_spi_init(struct spi_board_info *board_infos,
+			       unsigned int num_board_infos,
+			       void (*cs_control)(struct spi_device *spi,
+						  bool on))
+{
+	u32 sysclk = -1;
+	int ret;
+
+	/* SPI controller is either clocked from QE or SoC clock */
+	sysclk = get_brgfreq();
+	if (sysclk == -1) {
+		sysclk = fsl_get_sys_freq();
+		if (sysclk == -1)
+			return -ENODEV;
+	}
+
+	ret = of_fsl_spi_probe(NULL, "fsl,spi", sysclk, board_infos,
+			       num_board_infos, cs_control);
+	if (!ret)
+		of_fsl_spi_probe("spi", "fsl_spi", sysclk, board_infos,
+				 num_board_infos, cs_control);
+
+	return spi_register_board_info(board_infos, num_board_infos);
+}
+
+static void mpc83xx_spi_cs_control(struct spi_device *spi, bool on)
+{
+	pr_debug("%s %d %d\n", __func__, spi->chip_select, on);
+	par_io_data_set(3, 13, on);
+}
+
+static struct mmc_spi_platform_data mpc832x_mmc_pdata = {
+	.ocr_mask = MMC_VDD_33_34,
+};
+
+static struct spi_board_info mpc832x_spi_boardinfo = {
+	.bus_num = 0x4c0,
+	.chip_select = 0,
+	.max_speed_hz = 50000000,
+	.modalias = "mmc_spi",
+	.platform_data = &mpc832x_mmc_pdata,
+};
+
+static int __init mpc832x_spi_init(void)
+{
+	par_io_config_pin(3,  0, 3, 0, 1, 0); /* SPI1 MOSI, I/O */
+	par_io_config_pin(3,  1, 3, 0, 1, 0); /* SPI1 MISO, I/O */
+	par_io_config_pin(3,  2, 3, 0, 1, 0); /* SPI1 CLK,  I/O */
+	par_io_config_pin(3,  3, 2, 0, 1, 0); /* SPI1 SEL,  I   */
+
+	par_io_config_pin(3, 13, 1, 0, 0, 0); /* !SD_CS,    O */
+	par_io_config_pin(3, 14, 2, 0, 0, 0); /* SD_INSERT, I */
+	par_io_config_pin(3, 15, 2, 0, 0, 0); /* SD_PROTECT,I */
+
+	/*
+	 * Don't bother with legacy stuff when device tree contains
+	 * mmc-spi-slot node.
+	 */
+	if (of_find_compatible_node(NULL, NULL, "mmc-spi-slot"))
+		return 0;
+	return fsl_spi_init(&mpc832x_spi_boardinfo, 1, mpc83xx_spi_cs_control);
+}
+machine_device_initcall(mpc832x_rdb, mpc832x_spi_init);
+#endif /* CONFIG_QUICC_ENGINE */
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init mpc832x_rdb_setup_arch(void)
+{
+#if defined(CONFIG_QUICC_ENGINE)
+	struct device_node *np;
+#endif
+
+	if (ppc_md.progress)
+		ppc_md.progress("mpc832x_rdb_setup_arch()", 0);
+
+	mpc83xx_setup_pci();
+
+#ifdef CONFIG_QUICC_ENGINE
+	qe_reset();
+
+	if ((np = of_find_node_by_name(NULL, "par_io")) != NULL) {
+		par_io_init(np);
+		of_node_put(np);
+
+		for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;)
+			par_io_of_config(np);
+	}
+#endif				/* CONFIG_QUICC_ENGINE */
+}
+
+machine_device_initcall(mpc832x_rdb, mpc83xx_declare_of_platform_devices);
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc832x_rdb_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "MPC832xRDB");
+}
+
+define_machine(mpc832x_rdb) {
+	.name		= "MPC832x RDB",
+	.probe		= mpc832x_rdb_probe,
+	.setup_arch	= mpc832x_rdb_setup_arch,
+	.init_IRQ	= mpc83xx_ipic_and_qe_init_IRQ,
+	.get_irq	= ipic_get_irq,
+	.restart	= mpc83xx_restart,
+	.time_init	= mpc83xx_time_init,
+	.calibrate_decr	= generic_calibrate_decr,
+	.progress	= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c
new file mode 100644
index 000000000..80aea8c4b
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c
@@ -0,0 +1,88 @@
+/*
+ * arch/powerpc/platforms/83xx/mpc834x_itx.c
+ *
+ * MPC834x ITX board specific routines
+ *
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/of_platform.h>
+
+#include <linux/atomic.h>
+#include <asm/time.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc83xx.h"
+
+static const struct of_device_id mpc834x_itx_ids[] __initconst = {
+	{ .compatible = "fsl,pq2pro-localbus", },
+	{},
+};
+
+static int __init mpc834x_itx_declare_of_platform_devices(void)
+{
+	mpc83xx_declare_of_platform_devices();
+	return of_platform_bus_probe(NULL, mpc834x_itx_ids, NULL);
+}
+machine_device_initcall(mpc834x_itx, mpc834x_itx_declare_of_platform_devices);
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init mpc834x_itx_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mpc834x_itx_setup_arch()", 0);
+
+	mpc83xx_setup_pci();
+
+	mpc834x_usb_cfg();
+}
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc834x_itx_probe(void)
+{
+        unsigned long root = of_get_flat_dt_root();
+
+        return of_flat_dt_is_compatible(root, "MPC834xMITX");
+}
+
+define_machine(mpc834x_itx) {
+	.name			= "MPC834x ITX",
+	.probe			= mpc834x_itx_probe,
+	.setup_arch		= mpc834x_itx_setup_arch,
+	.init_IRQ		= mpc83xx_ipic_init_IRQ,
+	.get_irq		= ipic_get_irq,
+	.restart		= mpc83xx_restart,
+	.time_init		= mpc83xx_time_init,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc834x_mds.c b/arch/powerpc/platforms/83xx/mpc834x_mds.c
new file mode 100644
index 000000000..553e793a4
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc834x_mds.c
@@ -0,0 +1,109 @@
+/*
+ * arch/powerpc/platforms/83xx/mpc834x_mds.c
+ *
+ * MPC834x MDS board specific routines
+ *
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/of_platform.h>
+
+#include <linux/atomic.h>
+#include <asm/time.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc83xx.h"
+
+#define BCSR5_INT_USB		0x02
+static int mpc834xemds_usb_cfg(void)
+{
+	struct device_node *np;
+	void __iomem *bcsr_regs = NULL;
+	u8 bcsr5;
+
+	mpc834x_usb_cfg();
+	/* Map BCSR area */
+	np = of_find_node_by_name(NULL, "bcsr");
+	if (np) {
+		struct resource res;
+
+		of_address_to_resource(np, 0, &res);
+		bcsr_regs = ioremap(res.start, resource_size(&res));
+		of_node_put(np);
+	}
+	if (!bcsr_regs)
+		return -1;
+
+	/*
+	 * if Processor Board is plugged into PIB board,
+	 * force to use the PHY on Processor Board
+	 */
+	bcsr5 = in_8(bcsr_regs + 5);
+	if (!(bcsr5 & BCSR5_INT_USB))
+		out_8(bcsr_regs + 5, (bcsr5 | BCSR5_INT_USB));
+	iounmap(bcsr_regs);
+	return 0;
+}
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init mpc834x_mds_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mpc834x_mds_setup_arch()", 0);
+
+	mpc83xx_setup_pci();
+
+	mpc834xemds_usb_cfg();
+}
+
+machine_device_initcall(mpc834x_mds, mpc83xx_declare_of_platform_devices);
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc834x_mds_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "MPC834xMDS");
+}
+
+define_machine(mpc834x_mds) {
+	.name			= "MPC834x MDS",
+	.probe			= mpc834x_mds_probe,
+	.setup_arch		= mpc834x_mds_setup_arch,
+	.init_IRQ		= mpc83xx_ipic_init_IRQ,
+	.get_irq		= ipic_get_irq,
+	.restart		= mpc83xx_restart,
+	.time_init		= mpc83xx_time_init,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c
new file mode 100644
index 000000000..1a26d2f83
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c
@@ -0,0 +1,229 @@
+/*
+ * Copyright 2006 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Li Yang <LeoLi@freescale.com>
+ *	   Yin Olivia <Hong-hua.Yin@freescale.com>
+ *
+ * Description:
+ * MPC8360E MDS board specific routines.
+ *
+ * Changelog:
+ * Jun 21, 2006	Initial version
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/compiler.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/initrd.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+
+#include <linux/atomic.h>
+#include <asm/time.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include <sysdev/simple_gpio.h>
+#include <asm/qe.h>
+#include <asm/qe_ic.h>
+
+#include "mpc83xx.h"
+
+#undef DEBUG
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init mpc836x_mds_setup_arch(void)
+{
+	struct device_node *np;
+	u8 __iomem *bcsr_regs = NULL;
+
+	if (ppc_md.progress)
+		ppc_md.progress("mpc836x_mds_setup_arch()", 0);
+
+	/* Map BCSR area */
+	np = of_find_node_by_name(NULL, "bcsr");
+	if (np) {
+		struct resource res;
+
+		of_address_to_resource(np, 0, &res);
+		bcsr_regs = ioremap(res.start, resource_size(&res));
+		of_node_put(np);
+	}
+
+	mpc83xx_setup_pci();
+
+#ifdef CONFIG_QUICC_ENGINE
+	qe_reset();
+
+	if ((np = of_find_node_by_name(NULL, "par_io")) != NULL) {
+		par_io_init(np);
+		of_node_put(np);
+
+		for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;)
+			par_io_of_config(np);
+#ifdef CONFIG_QE_USB
+		/* Must fixup Par IO before QE GPIO chips are registered. */
+		par_io_config_pin(1,  2, 1, 0, 3, 0); /* USBOE  */
+		par_io_config_pin(1,  3, 1, 0, 3, 0); /* USBTP  */
+		par_io_config_pin(1,  8, 1, 0, 1, 0); /* USBTN  */
+		par_io_config_pin(1, 10, 2, 0, 3, 0); /* USBRXD */
+		par_io_config_pin(1,  9, 2, 1, 3, 0); /* USBRP  */
+		par_io_config_pin(1, 11, 2, 1, 3, 0); /* USBRN  */
+		par_io_config_pin(2, 20, 2, 0, 1, 0); /* CLK21  */
+#endif /* CONFIG_QE_USB */
+	}
+
+	if ((np = of_find_compatible_node(NULL, "network", "ucc_geth"))
+			!= NULL){
+		uint svid;
+
+		/* Reset the Ethernet PHY */
+#define BCSR9_GETHRST 0x20
+		clrbits8(&bcsr_regs[9], BCSR9_GETHRST);
+		udelay(1000);
+		setbits8(&bcsr_regs[9], BCSR9_GETHRST);
+
+		/* handle mpc8360ea rev.2.1 erratum 2: RGMII Timing */
+		svid = mfspr(SPRN_SVR);
+		if (svid == 0x80480021) {
+			void __iomem *immap;
+
+			immap = ioremap(get_immrbase() + 0x14a8, 8);
+
+			/*
+			 * IMMR + 0x14A8[4:5] = 11 (clk delay for UCC 2)
+			 * IMMR + 0x14A8[18:19] = 11 (clk delay for UCC 1)
+			 */
+			setbits32(immap, 0x0c003000);
+
+			/*
+			 * IMMR + 0x14AC[20:27] = 10101010
+			 * (data delay for both UCC's)
+			 */
+			clrsetbits_be32(immap + 4, 0xff0, 0xaa0);
+
+			iounmap(immap);
+		}
+
+		iounmap(bcsr_regs);
+		of_node_put(np);
+	}
+#endif				/* CONFIG_QUICC_ENGINE */
+}
+
+machine_device_initcall(mpc836x_mds, mpc83xx_declare_of_platform_devices);
+
+#ifdef CONFIG_QE_USB
+static int __init mpc836x_usb_cfg(void)
+{
+	u8 __iomem *bcsr;
+	struct device_node *np;
+	const char *mode;
+	int ret = 0;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc8360mds-bcsr");
+	if (!np)
+		return -ENODEV;
+
+	bcsr = of_iomap(np, 0);
+	of_node_put(np);
+	if (!bcsr)
+		return -ENOMEM;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc8323-qe-usb");
+	if (!np) {
+		ret = -ENODEV;
+		goto err;
+	}
+
+#define BCSR8_TSEC1M_MASK	(0x3 << 6)
+#define BCSR8_TSEC1M_RGMII	(0x0 << 6)
+#define BCSR8_TSEC2M_MASK	(0x3 << 4)
+#define BCSR8_TSEC2M_RGMII	(0x0 << 4)
+	/*
+	 * Default is GMII (2), but we should set it to RGMII (0) if we use
+	 * USB (Eth PHY is in RGMII mode anyway).
+	 */
+	clrsetbits_8(&bcsr[8], BCSR8_TSEC1M_MASK | BCSR8_TSEC2M_MASK,
+			       BCSR8_TSEC1M_RGMII | BCSR8_TSEC2M_RGMII);
+
+#define BCSR13_USBMASK	0x0f
+#define BCSR13_nUSBEN	0x08 /* 1 - Disable, 0 - Enable			*/
+#define BCSR13_USBSPEED	0x04 /* 1 - Full, 0 - Low			*/
+#define BCSR13_USBMODE	0x02 /* 1 - Host, 0 - Function			*/
+#define BCSR13_nUSBVCC	0x01 /* 1 - gets VBUS, 0 - supplies VBUS 	*/
+
+	clrsetbits_8(&bcsr[13], BCSR13_USBMASK, BCSR13_USBSPEED);
+
+	mode = of_get_property(np, "mode", NULL);
+	if (mode && !strcmp(mode, "peripheral")) {
+		setbits8(&bcsr[13], BCSR13_nUSBVCC);
+		qe_usb_clock_set(QE_CLK21, 48000000);
+	} else {
+		setbits8(&bcsr[13], BCSR13_USBMODE);
+		/*
+		 * The BCSR GPIOs are used to control power and
+		 * speed of the USB transceiver. This is needed for
+		 * the USB Host only.
+		 */
+		simple_gpiochip_init("fsl,mpc8360mds-bcsr-gpio");
+	}
+
+	of_node_put(np);
+err:
+	iounmap(bcsr);
+	return ret;
+}
+machine_arch_initcall(mpc836x_mds, mpc836x_usb_cfg);
+#endif /* CONFIG_QE_USB */
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc836x_mds_probe(void)
+{
+        unsigned long root = of_get_flat_dt_root();
+
+        return of_flat_dt_is_compatible(root, "MPC836xMDS");
+}
+
+define_machine(mpc836x_mds) {
+	.name		= "MPC836x MDS",
+	.probe		= mpc836x_mds_probe,
+	.setup_arch	= mpc836x_mds_setup_arch,
+	.init_IRQ	= mpc83xx_ipic_and_qe_init_IRQ,
+	.get_irq	= ipic_get_irq,
+	.restart	= mpc83xx_restart,
+	.time_init	= mpc83xx_time_init,
+	.calibrate_decr	= generic_calibrate_decr,
+	.progress	= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc836x_rdk.c b/arch/powerpc/platforms/83xx/mpc836x_rdk.c
new file mode 100644
index 000000000..b63b42d11
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc836x_rdk.c
@@ -0,0 +1,63 @@
+/*
+ * MPC8360E-RDK board file.
+ *
+ * Copyright (c) 2006  Freescale Semiconductor, Inc.
+ * Copyright (c) 2007-2008  MontaVista Software, Inc.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+#include <linux/io.h>
+#include <asm/prom.h>
+#include <asm/time.h>
+#include <asm/ipic.h>
+#include <asm/udbg.h>
+#include <asm/qe.h>
+#include <asm/qe_ic.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc83xx.h"
+
+machine_device_initcall(mpc836x_rdk, mpc83xx_declare_of_platform_devices);
+
+static void __init mpc836x_rdk_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mpc836x_rdk_setup_arch()", 0);
+
+	mpc83xx_setup_pci();
+#ifdef CONFIG_QUICC_ENGINE
+	qe_reset();
+#endif
+}
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened.
+ */
+static int __init mpc836x_rdk_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,mpc8360rdk");
+}
+
+define_machine(mpc836x_rdk) {
+	.name		= "MPC836x RDK",
+	.probe		= mpc836x_rdk_probe,
+	.setup_arch	= mpc836x_rdk_setup_arch,
+	.init_IRQ	= mpc83xx_ipic_and_qe_init_IRQ,
+	.get_irq	= ipic_get_irq,
+	.restart	= mpc83xx_restart,
+	.time_init	= mpc83xx_time_init,
+	.calibrate_decr	= generic_calibrate_decr,
+	.progress	= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc837x_mds.c b/arch/powerpc/platforms/83xx/mpc837x_mds.c
new file mode 100644
index 000000000..e53a60b6c
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc837x_mds.c
@@ -0,0 +1,111 @@
+/*
+ * arch/powerpc/platforms/83xx/mpc837x_mds.c
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * MPC837x MDS board specific routines
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/ipic.h>
+#include <asm/udbg.h>
+#include <asm/prom.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc83xx.h"
+
+#define BCSR12_USB_SER_MASK	0x8a
+#define BCSR12_USB_SER_PIN	0x80
+#define BCSR12_USB_SER_DEVICE	0x02
+
+static int mpc837xmds_usb_cfg(void)
+{
+	struct device_node *np;
+	const void *phy_type, *mode;
+	void __iomem *bcsr_regs = NULL;
+	u8 bcsr12;
+	int ret;
+
+	ret = mpc837x_usb_cfg();
+	if (ret)
+		return ret;
+	/* Map BCSR area */
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc837xmds-bcsr");
+	if (np) {
+		bcsr_regs = of_iomap(np, 0);
+		of_node_put(np);
+	}
+	if (!bcsr_regs)
+		return -1;
+
+	np = of_find_node_by_name(NULL, "usb");
+	if (!np) {
+		ret = -ENODEV;
+		goto out;
+	}
+	phy_type = of_get_property(np, "phy_type", NULL);
+	if (phy_type && !strcmp(phy_type, "ulpi")) {
+		clrbits8(bcsr_regs + 12, BCSR12_USB_SER_PIN);
+	} else if (phy_type && !strcmp(phy_type, "serial")) {
+		mode = of_get_property(np, "dr_mode", NULL);
+		bcsr12 = in_8(bcsr_regs + 12) & ~BCSR12_USB_SER_MASK;
+		bcsr12 |= BCSR12_USB_SER_PIN;
+		if (mode && !strcmp(mode, "peripheral"))
+			bcsr12 |= BCSR12_USB_SER_DEVICE;
+		out_8(bcsr_regs + 12, bcsr12);
+	} else {
+		printk(KERN_ERR "USB DR: unsupported PHY\n");
+	}
+
+	of_node_put(np);
+out:
+	iounmap(bcsr_regs);
+	return ret;
+}
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init mpc837x_mds_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mpc837x_mds_setup_arch()", 0);
+
+	mpc83xx_setup_pci();
+	mpc837xmds_usb_cfg();
+}
+
+machine_device_initcall(mpc837x_mds, mpc83xx_declare_of_platform_devices);
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc837x_mds_probe(void)
+{
+        unsigned long root = of_get_flat_dt_root();
+
+        return of_flat_dt_is_compatible(root, "fsl,mpc837xmds");
+}
+
+define_machine(mpc837x_mds) {
+	.name			= "MPC837x MDS",
+	.probe			= mpc837x_mds_probe,
+	.setup_arch		= mpc837x_mds_setup_arch,
+	.init_IRQ		= mpc83xx_ipic_init_IRQ,
+	.get_irq		= ipic_get_irq,
+	.restart		= mpc83xx_restart,
+	.time_init		= mpc83xx_time_init,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc837x_rdb.c b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
new file mode 100644
index 000000000..9813c81e8
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
@@ -0,0 +1,89 @@
+/*
+ * arch/powerpc/platforms/83xx/mpc837x_rdb.c
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * MPC837x RDB board specific routines
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/ipic.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc83xx.h"
+
+static void mpc837x_rdb_sd_cfg(void)
+{
+	void __iomem *im;
+
+	im = ioremap(get_immrbase(), 0x1000);
+	if (!im) {
+		WARN_ON(1);
+		return;
+	}
+
+	/*
+	 * On RDB boards (in contrast to MDS) USBB pins are used for SD only,
+	 * so we can safely mux them away from the USB block.
+	 */
+	clrsetbits_be32(im + MPC83XX_SICRL_OFFS, MPC837X_SICRL_USBB_MASK,
+						 MPC837X_SICRL_SD);
+	clrsetbits_be32(im + MPC83XX_SICRH_OFFS, MPC837X_SICRH_SPI_MASK,
+						 MPC837X_SICRH_SD);
+	iounmap(im);
+}
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init mpc837x_rdb_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mpc837x_rdb_setup_arch()", 0);
+
+	mpc83xx_setup_pci();
+	mpc837x_usb_cfg();
+	mpc837x_rdb_sd_cfg();
+}
+
+machine_device_initcall(mpc837x_rdb, mpc83xx_declare_of_platform_devices);
+
+static const char * const board[] __initconst = {
+	"fsl,mpc8377rdb",
+	"fsl,mpc8378rdb",
+	"fsl,mpc8379rdb",
+	"fsl,mpc8377wlan",
+	NULL
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc837x_rdb_probe(void)
+{
+	return of_flat_dt_match(of_get_flat_dt_root(), board);
+}
+
+define_machine(mpc837x_rdb) {
+	.name			= "MPC837x RDB/WLAN",
+	.probe			= mpc837x_rdb_probe,
+	.setup_arch		= mpc837x_rdb_setup_arch,
+	.init_IRQ		= mpc83xx_ipic_init_IRQ,
+	.get_irq		= ipic_get_irq,
+	.restart		= mpc83xx_restart,
+	.time_init		= mpc83xx_time_init,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc83xx.h b/arch/powerpc/platforms/83xx/mpc83xx.h
new file mode 100644
index 000000000..0cf74d7ea
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc83xx.h
@@ -0,0 +1,90 @@
+#ifndef __MPC83XX_H__
+#define __MPC83XX_H__
+
+#include <linux/init.h>
+#include <linux/device.h>
+#include <asm/pci-bridge.h>
+
+/* System Clock Control Register */
+#define MPC83XX_SCCR_OFFS          0xA08
+#define MPC83XX_SCCR_USB_MASK      0x00f00000
+#define MPC83XX_SCCR_USB_MPHCM_11  0x00c00000
+#define MPC83XX_SCCR_USB_MPHCM_01  0x00400000
+#define MPC83XX_SCCR_USB_MPHCM_10  0x00800000
+#define MPC83XX_SCCR_USB_DRCM_11   0x00300000
+#define MPC83XX_SCCR_USB_DRCM_01   0x00100000
+#define MPC83XX_SCCR_USB_DRCM_10   0x00200000
+#define MPC8315_SCCR_USB_MASK      0x00c00000
+#define MPC8315_SCCR_USB_DRCM_11   0x00c00000
+#define MPC8315_SCCR_USB_DRCM_01   0x00400000
+#define MPC837X_SCCR_USB_DRCM_11   0x00c00000
+
+/* system i/o configuration register low */
+#define MPC83XX_SICRL_OFFS         0x114
+#define MPC834X_SICRL_USB_MASK     0x60000000
+#define MPC834X_SICRL_USB0         0x20000000
+#define MPC834X_SICRL_USB1         0x40000000
+#define MPC831X_SICRL_USB_MASK     0x00000c00
+#define MPC831X_SICRL_USB_ULPI     0x00000800
+#define MPC8315_SICRL_USB_MASK     0x000000fc
+#define MPC8315_SICRL_USB_ULPI     0x00000054
+#define MPC837X_SICRL_USB_MASK     0xf0000000
+#define MPC837X_SICRL_USB_ULPI     0x50000000
+#define MPC837X_SICRL_USBB_MASK    0x30000000
+#define MPC837X_SICRL_SD           0x20000000
+
+/* system i/o configuration register high */
+#define MPC83XX_SICRH_OFFS         0x118
+#define MPC8308_SICRH_USB_MASK     0x000c0000
+#define MPC8308_SICRH_USB_ULPI     0x00040000
+#define MPC834X_SICRH_USB_UTMI     0x00020000
+#define MPC831X_SICRH_USB_MASK     0x000000e0
+#define MPC831X_SICRH_USB_ULPI     0x000000a0
+#define MPC8315_SICRH_USB_MASK     0x0000ff00
+#define MPC8315_SICRH_USB_ULPI     0x00000000
+#define MPC837X_SICRH_SPI_MASK     0x00000003
+#define MPC837X_SICRH_SD           0x00000001
+
+/* USB Control Register */
+#define FSL_USB2_CONTROL_OFFS      0x500
+#define CONTROL_UTMI_PHY_EN        0x00000200
+#define CONTROL_REFSEL_24MHZ       0x00000040
+#define CONTROL_REFSEL_48MHZ       0x00000080
+#define CONTROL_PHY_CLK_SEL_ULPI   0x00000400
+#define CONTROL_OTG_PORT           0x00000020
+
+/* USB PORTSC Registers */
+#define FSL_USB2_PORTSC1_OFFS      0x184
+#define FSL_USB2_PORTSC2_OFFS      0x188
+#define PORTSCX_PTW_16BIT          0x10000000
+#define PORTSCX_PTS_UTMI           0x00000000
+#define PORTSCX_PTS_ULPI           0x80000000
+
+/*
+ * Declaration for the various functions exported by the
+ * mpc83xx_* files. Mostly for use by mpc83xx_setup
+ */
+
+extern void mpc83xx_restart(char *cmd);
+extern long mpc83xx_time_init(void);
+extern int mpc837x_usb_cfg(void);
+extern int mpc834x_usb_cfg(void);
+extern int mpc831x_usb_cfg(void);
+extern void mpc83xx_ipic_init_IRQ(void);
+#ifdef CONFIG_QUICC_ENGINE
+extern void mpc83xx_qe_init_IRQ(void);
+extern void mpc83xx_ipic_and_qe_init_IRQ(void);
+#else
+static inline void __init mpc83xx_qe_init_IRQ(void) {}
+#define mpc83xx_ipic_and_qe_init_IRQ mpc83xx_ipic_init_IRQ
+#endif /* CONFIG_QUICC_ENGINE */
+
+#ifdef CONFIG_PCI
+extern void mpc83xx_setup_pci(void);
+#else
+#define mpc83xx_setup_pci()	do {} while (0)
+#endif
+
+extern int mpc83xx_declare_of_platform_devices(void);
+
+#endif				/* __MPC83XX_H__ */
diff --git a/arch/powerpc/platforms/83xx/sbc834x.c b/arch/powerpc/platforms/83xx/sbc834x.c
new file mode 100644
index 000000000..26cb3e934
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/sbc834x.c
@@ -0,0 +1,78 @@
+/*
+ * arch/powerpc/platforms/83xx/sbc834x.c
+ *
+ * Wind River SBC834x board specific routines
+ *
+ * By Paul Gortmaker (see MAINTAINERS for contact information)
+ *
+ * Based largely on the mpc834x_mds.c support by Kumar Gala.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/of_platform.h>
+
+#include <linux/atomic.h>
+#include <asm/time.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc83xx.h"
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init sbc834x_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("sbc834x_setup_arch()", 0);
+
+	mpc83xx_setup_pci();
+}
+
+machine_device_initcall(sbc834x, mpc83xx_declare_of_platform_devices);
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init sbc834x_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "SBC834xE");
+}
+
+define_machine(sbc834x) {
+	.name			= "SBC834xE",
+	.probe			= sbc834x_probe,
+	.setup_arch		= sbc834x_setup_arch,
+	.init_IRQ		= mpc83xx_ipic_init_IRQ,
+	.get_irq		= ipic_get_irq,
+	.restart		= mpc83xx_restart,
+	.time_init		= mpc83xx_time_init,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/suspend-asm.S b/arch/powerpc/platforms/83xx/suspend-asm.S
new file mode 100644
index 000000000..3d1ecd211
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/suspend-asm.S
@@ -0,0 +1,533 @@
+/*
+ * Enter and leave deep sleep state on MPC83xx
+ *
+ * Copyright (c) 2006-2008 Freescale Semiconductor, Inc.
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+#include <asm/reg.h>
+#include <asm/asm-offsets.h>
+
+#define SS_MEMSAVE	0x00 /* First 8 bytes of RAM */
+#define SS_HID		0x08 /* 3 HIDs */
+#define SS_IABR		0x14 /* 2 IABRs */
+#define SS_IBCR		0x1c
+#define SS_DABR		0x20 /* 2 DABRs */
+#define SS_DBCR		0x28
+#define SS_SP		0x2c
+#define SS_SR		0x30 /* 16 segment registers */
+#define SS_R2		0x70
+#define SS_MSR		0x74
+#define SS_SDR1		0x78
+#define SS_LR		0x7c
+#define SS_SPRG		0x80 /* 4 SPRGs */
+#define SS_DBAT		0x90 /* 8 DBATs */
+#define SS_IBAT		0xd0 /* 8 IBATs */
+#define SS_TB		0x110
+#define SS_CR		0x118
+#define SS_GPREG	0x11c /* r12-r31 */
+#define STATE_SAVE_SIZE 0x16c
+
+	.section .data
+	.align	5
+
+mpc83xx_sleep_save_area:
+	.space	STATE_SAVE_SIZE
+immrbase:
+	.long	0
+
+	.section .text
+	.align	5
+
+	/* r3 = physical address of IMMR */
+_GLOBAL(mpc83xx_enter_deep_sleep)
+	lis	r4, immrbase@ha
+	stw	r3, immrbase@l(r4)
+
+	/* The first 2 words of memory are used to communicate with the
+	 * bootloader, to tell it how to resume.
+	 *
+	 * The first word is the magic number 0xf5153ae5, and the second
+	 * is the pointer to mpc83xx_deep_resume.
+	 *
+	 * The original content of these two words is saved in SS_MEMSAVE.
+	 */
+
+	lis	r3, mpc83xx_sleep_save_area@h
+	ori	r3, r3, mpc83xx_sleep_save_area@l
+
+	lis	r4, KERNELBASE@h
+	lwz	r5, 0(r4)
+	lwz	r6, 4(r4)
+
+	stw	r5, SS_MEMSAVE+0(r3)
+	stw	r6, SS_MEMSAVE+4(r3)
+
+	mfspr	r5, SPRN_HID0
+	mfspr	r6, SPRN_HID1
+	mfspr	r7, SPRN_HID2
+
+	stw	r5, SS_HID+0(r3)
+	stw	r6, SS_HID+4(r3)
+	stw	r7, SS_HID+8(r3)
+
+	mfspr	r4, SPRN_IABR
+	mfspr	r5, SPRN_IABR2
+	mfspr	r6, SPRN_IBCR
+	mfspr	r7, SPRN_DABR
+	mfspr	r8, SPRN_DABR2
+	mfspr	r9, SPRN_DBCR
+
+	stw	r4, SS_IABR+0(r3)
+	stw	r5, SS_IABR+4(r3)
+	stw	r6, SS_IBCR(r3)
+	stw	r7, SS_DABR+0(r3)
+	stw	r8, SS_DABR+4(r3)
+	stw	r9, SS_DBCR(r3)
+
+	mfspr	r4, SPRN_SPRG0
+	mfspr	r5, SPRN_SPRG1
+	mfspr	r6, SPRN_SPRG2
+	mfspr	r7, SPRN_SPRG3
+	mfsdr1	r8
+
+	stw	r4, SS_SPRG+0(r3)
+	stw	r5, SS_SPRG+4(r3)
+	stw	r6, SS_SPRG+8(r3)
+	stw	r7, SS_SPRG+12(r3)
+	stw	r8, SS_SDR1(r3)
+
+	mfspr	r4, SPRN_DBAT0U
+	mfspr	r5, SPRN_DBAT0L
+	mfspr	r6, SPRN_DBAT1U
+	mfspr	r7, SPRN_DBAT1L
+
+	stw	r4, SS_DBAT+0x00(r3)
+	stw	r5, SS_DBAT+0x04(r3)
+	stw	r6, SS_DBAT+0x08(r3)
+	stw	r7, SS_DBAT+0x0c(r3)
+
+	mfspr	r4, SPRN_DBAT2U
+	mfspr	r5, SPRN_DBAT2L
+	mfspr	r6, SPRN_DBAT3U
+	mfspr	r7, SPRN_DBAT3L
+
+	stw	r4, SS_DBAT+0x10(r3)
+	stw	r5, SS_DBAT+0x14(r3)
+	stw	r6, SS_DBAT+0x18(r3)
+	stw	r7, SS_DBAT+0x1c(r3)
+
+	mfspr	r4, SPRN_DBAT4U
+	mfspr	r5, SPRN_DBAT4L
+	mfspr	r6, SPRN_DBAT5U
+	mfspr	r7, SPRN_DBAT5L
+
+	stw	r4, SS_DBAT+0x20(r3)
+	stw	r5, SS_DBAT+0x24(r3)
+	stw	r6, SS_DBAT+0x28(r3)
+	stw	r7, SS_DBAT+0x2c(r3)
+
+	mfspr	r4, SPRN_DBAT6U
+	mfspr	r5, SPRN_DBAT6L
+	mfspr	r6, SPRN_DBAT7U
+	mfspr	r7, SPRN_DBAT7L
+
+	stw	r4, SS_DBAT+0x30(r3)
+	stw	r5, SS_DBAT+0x34(r3)
+	stw	r6, SS_DBAT+0x38(r3)
+	stw	r7, SS_DBAT+0x3c(r3)
+
+	mfspr	r4, SPRN_IBAT0U
+	mfspr	r5, SPRN_IBAT0L
+	mfspr	r6, SPRN_IBAT1U
+	mfspr	r7, SPRN_IBAT1L
+
+	stw	r4, SS_IBAT+0x00(r3)
+	stw	r5, SS_IBAT+0x04(r3)
+	stw	r6, SS_IBAT+0x08(r3)
+	stw	r7, SS_IBAT+0x0c(r3)
+
+	mfspr	r4, SPRN_IBAT2U
+	mfspr	r5, SPRN_IBAT2L
+	mfspr	r6, SPRN_IBAT3U
+	mfspr	r7, SPRN_IBAT3L
+
+	stw	r4, SS_IBAT+0x10(r3)
+	stw	r5, SS_IBAT+0x14(r3)
+	stw	r6, SS_IBAT+0x18(r3)
+	stw	r7, SS_IBAT+0x1c(r3)
+
+	mfspr	r4, SPRN_IBAT4U
+	mfspr	r5, SPRN_IBAT4L
+	mfspr	r6, SPRN_IBAT5U
+	mfspr	r7, SPRN_IBAT5L
+
+	stw	r4, SS_IBAT+0x20(r3)
+	stw	r5, SS_IBAT+0x24(r3)
+	stw	r6, SS_IBAT+0x28(r3)
+	stw	r7, SS_IBAT+0x2c(r3)
+
+	mfspr	r4, SPRN_IBAT6U
+	mfspr	r5, SPRN_IBAT6L
+	mfspr	r6, SPRN_IBAT7U
+	mfspr	r7, SPRN_IBAT7L
+
+	stw	r4, SS_IBAT+0x30(r3)
+	stw	r5, SS_IBAT+0x34(r3)
+	stw	r6, SS_IBAT+0x38(r3)
+	stw	r7, SS_IBAT+0x3c(r3)
+
+	mfmsr	r4
+	mflr	r5
+	mfcr	r6
+
+	stw	r4, SS_MSR(r3)
+	stw	r5, SS_LR(r3)
+	stw	r6, SS_CR(r3)
+	stw	r1, SS_SP(r3)
+	stw	r2, SS_R2(r3)
+
+1:	mftbu	r4
+	mftb	r5
+	mftbu	r6
+	cmpw	r4, r6
+	bne	1b
+
+	stw	r4, SS_TB+0(r3)
+	stw	r5, SS_TB+4(r3)
+
+	stmw	r12, SS_GPREG(r3)
+
+	li	r4, 0
+	addi	r6, r3, SS_SR-4
+1:	mfsrin	r5, r4
+	stwu	r5, 4(r6)
+	addis	r4, r4, 0x1000
+	cmpwi	r4, 0
+	bne	1b
+
+	/* Disable machine checks and critical exceptions */
+	mfmsr	r4
+	rlwinm	r4, r4, 0, ~MSR_CE
+	rlwinm	r4, r4, 0, ~MSR_ME
+	mtmsr	r4
+	isync
+
+#define TMP_VIRT_IMMR		0xf0000000
+#define DEFAULT_IMMR_VALUE	0xff400000
+#define IMMRBAR_BASE		0x0000
+
+	lis	r4, immrbase@ha
+	lwz	r4, immrbase@l(r4)
+
+	/* Use DBAT0 to address the current IMMR space */
+
+	ori	r4, r4, 0x002a
+	mtspr	SPRN_DBAT0L, r4
+	lis	r8, TMP_VIRT_IMMR@h
+	ori	r4, r8, 0x001e	/* 1 MByte accessible from Kernel Space only */
+	mtspr	SPRN_DBAT0U, r4
+	isync
+
+	/* Use DBAT1 to address the original IMMR space */
+
+	lis	r4, DEFAULT_IMMR_VALUE@h
+	ori	r4, r4, 0x002a
+	mtspr	SPRN_DBAT1L, r4
+	lis	r9, (TMP_VIRT_IMMR + 0x01000000)@h
+	ori	r4, r9, 0x001e	/* 1 MByte accessible from Kernel Space only */
+	mtspr	SPRN_DBAT1U, r4
+	isync
+
+	/* Use DBAT2 to address the beginning of RAM.  This isn't done
+	 * using the normal virtual mapping, because with page debugging
+	 * enabled it will be read-only.
+	 */
+
+	li	r4, 0x0002
+	mtspr	SPRN_DBAT2L, r4
+	lis	r4, KERNELBASE@h
+	ori	r4, r4, 0x001e	/* 1 MByte accessible from Kernel Space only */
+	mtspr	SPRN_DBAT2U, r4
+	isync
+
+	/* Flush the cache with our BAT, as there will be TLB misses
+	 * otherwise if page debugging is enabled, and these misses
+	 * will disturb the PLRU algorithm.
+	 */
+
+	bl	__flush_disable_L1
+
+	/* Keep the i-cache enabled, so the hack below for low-boot
+	 * flash will work.
+	 */
+	mfspr	r3, SPRN_HID0
+	ori	r3, r3, HID0_ICE
+	mtspr	SPRN_HID0, r3
+	isync
+
+	lis	r6, 0xf515
+	ori	r6, r6, 0x3ae5
+
+	lis	r7, mpc83xx_deep_resume@h
+	ori	r7, r7, mpc83xx_deep_resume@l
+	tophys(r7, r7)
+
+	lis	r5, KERNELBASE@h
+	stw	r6, 0(r5)
+	stw	r7, 4(r5)
+
+	/* Reset BARs */
+
+	li	r4, 0
+	stw	r4, 0x0024(r8)
+	stw	r4, 0x002c(r8)
+	stw	r4, 0x0034(r8)
+	stw	r4, 0x003c(r8)
+	stw	r4, 0x0064(r8)
+	stw	r4, 0x006c(r8)
+
+	/* Rev 1 of the 8313 has problems with wakeup events that are
+	 * pending during the transition to deep sleep state (such as if
+	 * the PCI host sets the state to D3 and then D0 in rapid
+	 * succession).  This check shrinks the race window somewhat.
+	 *
+	 * See erratum PCI23, though the problem is not limited
+	 * to PCI.
+	 */
+
+	lwz	r3, 0x0b04(r8)
+	andi.	r3, r3, 1
+	bne-	mpc83xx_deep_resume
+
+	/* Move IMMR back to the default location, following the
+	 * procedure specified in the MPC8313 manual.
+	 */
+	lwz	r4, IMMRBAR_BASE(r8)
+	isync
+	lis	r4, DEFAULT_IMMR_VALUE@h
+	stw	r4, IMMRBAR_BASE(r8)
+	lis	r4, KERNELBASE@h
+	lwz	r4, 0(r4)
+	isync
+	lwz	r4, IMMRBAR_BASE(r9)
+	mr	r8, r9
+	isync
+
+	/* Check the Reset Configuration Word to see whether flash needs
+	 * to be mapped at a low address or a high address.
+	 */
+
+	lwz	r4, 0x0904(r8)
+	andis.	r4, r4, 0x0400
+	li	r4, 0
+	beq	boot_low
+	lis	r4, 0xff80
+boot_low:
+	stw	r4, 0x0020(r8)
+	lis	r7, 0x8000
+	ori	r7, r7, 0x0016
+
+	mfspr	r5, SPRN_HID0
+	rlwinm	r5, r5, 0, ~(HID0_DOZE | HID0_NAP)
+	oris	r5, r5, HID0_SLEEP@h
+	mtspr	SPRN_HID0, r5
+	isync
+
+	mfmsr	r5
+	oris	r5, r5, MSR_POW@h
+
+	/* Enable the flash mapping at the appropriate address.  This
+	 * mapping will override the RAM mapping if booting low, so there's
+	 * no need to disable the latter.  This must be done inside the same
+	 * cache line as setting MSR_POW, so that no instruction fetches
+	 * from RAM happen after the flash mapping is turned on.
+	 */
+
+	.align	5
+	stw	r7, 0x0024(r8)
+	sync
+	isync
+	mtmsr	r5
+	isync
+1:	b	1b
+
+mpc83xx_deep_resume:
+	lis	r4, 1f@h
+	ori	r4, r4, 1f@l
+	tophys(r4, r4)
+	mtsrr0	r4
+
+	mfmsr	r4
+	rlwinm	r4, r4, 0, ~(MSR_IR | MSR_DR)
+	mtsrr1	r4
+
+	rfi
+
+1:	tlbia
+	bl	__inval_enable_L1
+
+	lis	r3, mpc83xx_sleep_save_area@h
+	ori	r3, r3, mpc83xx_sleep_save_area@l
+	tophys(r3, r3)
+
+	lwz	r5, SS_MEMSAVE+0(r3)
+	lwz	r6, SS_MEMSAVE+4(r3)
+
+	stw	r5, 0(0)
+	stw	r6, 4(0)
+
+	lwz	r5, SS_HID+0(r3)
+	lwz	r6, SS_HID+4(r3)
+	lwz	r7, SS_HID+8(r3)
+
+	mtspr	SPRN_HID0, r5
+	mtspr	SPRN_HID1, r6
+	mtspr	SPRN_HID2, r7
+
+	lwz	r4, SS_IABR+0(r3)
+	lwz	r5, SS_IABR+4(r3)
+	lwz	r6, SS_IBCR(r3)
+	lwz	r7, SS_DABR+0(r3)
+	lwz	r8, SS_DABR+4(r3)
+	lwz	r9, SS_DBCR(r3)
+
+	mtspr	SPRN_IABR, r4
+	mtspr	SPRN_IABR2, r5
+	mtspr	SPRN_IBCR, r6
+	mtspr	SPRN_DABR, r7
+	mtspr	SPRN_DABR2, r8
+	mtspr	SPRN_DBCR, r9
+
+	li	r4, 0
+	addi	r6, r3, SS_SR-4
+1:	lwzu	r5, 4(r6)
+	mtsrin	r5, r4
+	addis	r4, r4, 0x1000
+	cmpwi	r4, 0
+	bne	1b
+
+	lwz	r4, SS_DBAT+0x00(r3)
+	lwz	r5, SS_DBAT+0x04(r3)
+	lwz	r6, SS_DBAT+0x08(r3)
+	lwz	r7, SS_DBAT+0x0c(r3)
+
+	mtspr	SPRN_DBAT0U, r4
+	mtspr	SPRN_DBAT0L, r5
+	mtspr	SPRN_DBAT1U, r6
+	mtspr	SPRN_DBAT1L, r7
+
+	lwz	r4, SS_DBAT+0x10(r3)
+	lwz	r5, SS_DBAT+0x14(r3)
+	lwz	r6, SS_DBAT+0x18(r3)
+	lwz	r7, SS_DBAT+0x1c(r3)
+
+	mtspr	SPRN_DBAT2U, r4
+	mtspr	SPRN_DBAT2L, r5
+	mtspr	SPRN_DBAT3U, r6
+	mtspr	SPRN_DBAT3L, r7
+
+	lwz	r4, SS_DBAT+0x20(r3)
+	lwz	r5, SS_DBAT+0x24(r3)
+	lwz	r6, SS_DBAT+0x28(r3)
+	lwz	r7, SS_DBAT+0x2c(r3)
+
+	mtspr	SPRN_DBAT4U, r4
+	mtspr	SPRN_DBAT4L, r5
+	mtspr	SPRN_DBAT5U, r6
+	mtspr	SPRN_DBAT5L, r7
+
+	lwz	r4, SS_DBAT+0x30(r3)
+	lwz	r5, SS_DBAT+0x34(r3)
+	lwz	r6, SS_DBAT+0x38(r3)
+	lwz	r7, SS_DBAT+0x3c(r3)
+
+	mtspr	SPRN_DBAT6U, r4
+	mtspr	SPRN_DBAT6L, r5
+	mtspr	SPRN_DBAT7U, r6
+	mtspr	SPRN_DBAT7L, r7
+
+	lwz	r4, SS_IBAT+0x00(r3)
+	lwz	r5, SS_IBAT+0x04(r3)
+	lwz	r6, SS_IBAT+0x08(r3)
+	lwz	r7, SS_IBAT+0x0c(r3)
+
+	mtspr	SPRN_IBAT0U, r4
+	mtspr	SPRN_IBAT0L, r5
+	mtspr	SPRN_IBAT1U, r6
+	mtspr	SPRN_IBAT1L, r7
+
+	lwz	r4, SS_IBAT+0x10(r3)
+	lwz	r5, SS_IBAT+0x14(r3)
+	lwz	r6, SS_IBAT+0x18(r3)
+	lwz	r7, SS_IBAT+0x1c(r3)
+
+	mtspr	SPRN_IBAT2U, r4
+	mtspr	SPRN_IBAT2L, r5
+	mtspr	SPRN_IBAT3U, r6
+	mtspr	SPRN_IBAT3L, r7
+
+	lwz	r4, SS_IBAT+0x20(r3)
+	lwz	r5, SS_IBAT+0x24(r3)
+	lwz	r6, SS_IBAT+0x28(r3)
+	lwz	r7, SS_IBAT+0x2c(r3)
+
+	mtspr	SPRN_IBAT4U, r4
+	mtspr	SPRN_IBAT4L, r5
+	mtspr	SPRN_IBAT5U, r6
+	mtspr	SPRN_IBAT5L, r7
+
+	lwz	r4, SS_IBAT+0x30(r3)
+	lwz	r5, SS_IBAT+0x34(r3)
+	lwz	r6, SS_IBAT+0x38(r3)
+	lwz	r7, SS_IBAT+0x3c(r3)
+
+	mtspr	SPRN_IBAT6U, r4
+	mtspr	SPRN_IBAT6L, r5
+	mtspr	SPRN_IBAT7U, r6
+	mtspr	SPRN_IBAT7L, r7
+
+	lwz	r4, SS_SPRG+0(r3)
+	lwz	r5, SS_SPRG+4(r3)
+	lwz	r6, SS_SPRG+8(r3)
+	lwz	r7, SS_SPRG+12(r3)
+	lwz	r8, SS_SDR1(r3)
+
+	mtspr	SPRN_SPRG0, r4
+	mtspr	SPRN_SPRG1, r5
+	mtspr	SPRN_SPRG2, r6
+	mtspr	SPRN_SPRG3, r7
+	mtsdr1	r8
+
+	lwz	r4, SS_MSR(r3)
+	lwz	r5, SS_LR(r3)
+	lwz	r6, SS_CR(r3)
+	lwz	r1, SS_SP(r3)
+	lwz	r2, SS_R2(r3)
+
+	mtsrr1	r4
+	mtsrr0	r5
+	mtcr	r6
+
+	li	r4, 0
+	mtspr	SPRN_TBWL, r4
+
+	lwz	r4, SS_TB+0(r3)
+	lwz	r5, SS_TB+4(r3)
+
+	mtspr	SPRN_TBWU, r4
+	mtspr	SPRN_TBWL, r5
+
+	lmw	r12, SS_GPREG(r3)
+
+	/* Kick decrementer */
+	li	r0, 1
+	mtdec	r0
+
+	rfi
diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
new file mode 100644
index 000000000..c9adbfb65
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -0,0 +1,449 @@
+/*
+ * MPC83xx suspend support
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2006-2007 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/pm.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/wait.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <linux/suspend.h>
+#include <linux/fsl_devices.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/export.h>
+
+#include <asm/reg.h>
+#include <asm/io.h>
+#include <asm/time.h>
+#include <asm/mpc6xx.h>
+#include <asm/switch_to.h>
+
+#include <sysdev/fsl_soc.h>
+
+#define PMCCR1_NEXT_STATE       0x0C /* Next state for power management */
+#define PMCCR1_NEXT_STATE_SHIFT 2
+#define PMCCR1_CURR_STATE       0x03 /* Current state for power management*/
+#define IMMR_SYSCR_OFFSET       0x100
+#define IMMR_RCW_OFFSET         0x900
+#define RCW_PCI_HOST            0x80000000
+
+void mpc83xx_enter_deep_sleep(phys_addr_t immrbase);
+
+struct mpc83xx_pmc {
+	u32 config;
+#define PMCCR_DLPEN 2 /* DDR SDRAM low power enable */
+#define PMCCR_SLPEN 1 /* System low power enable */
+
+	u32 event;
+	u32 mask;
+/* All but PMCI are deep-sleep only */
+#define PMCER_GPIO   0x100
+#define PMCER_PCI    0x080
+#define PMCER_USB    0x040
+#define PMCER_ETSEC1 0x020
+#define PMCER_ETSEC2 0x010
+#define PMCER_TIMER  0x008
+#define PMCER_INT1   0x004
+#define PMCER_INT2   0x002
+#define PMCER_PMCI   0x001
+#define PMCER_ALL    0x1FF
+
+	/* deep-sleep only */
+	u32 config1;
+#define PMCCR1_USE_STATE  0x80000000
+#define PMCCR1_PME_EN     0x00000080
+#define PMCCR1_ASSERT_PME 0x00000040
+#define PMCCR1_POWER_OFF  0x00000020
+
+	/* deep-sleep only */
+	u32 config2;
+};
+
+struct mpc83xx_rcw {
+	u32 rcwlr;
+	u32 rcwhr;
+};
+
+struct mpc83xx_clock {
+	u32 spmr;
+	u32 occr;
+	u32 sccr;
+};
+
+struct mpc83xx_syscr {
+	__be32 sgprl;
+	__be32 sgprh;
+	__be32 spridr;
+	__be32 :32;
+	__be32 spcr;
+	__be32 sicrl;
+	__be32 sicrh;
+};
+
+struct mpc83xx_saved {
+	u32 sicrl;
+	u32 sicrh;
+	u32 sccr;
+};
+
+struct pmc_type {
+	int has_deep_sleep;
+};
+
+static struct platform_device *pmc_dev;
+static int has_deep_sleep, deep_sleeping;
+static int pmc_irq;
+static struct mpc83xx_pmc __iomem *pmc_regs;
+static struct mpc83xx_clock __iomem *clock_regs;
+static struct mpc83xx_syscr __iomem *syscr_regs;
+static struct mpc83xx_saved saved_regs;
+static int is_pci_agent, wake_from_pci;
+static phys_addr_t immrbase;
+static int pci_pm_state;
+static DECLARE_WAIT_QUEUE_HEAD(agent_wq);
+
+int fsl_deep_sleep(void)
+{
+	return deep_sleeping;
+}
+EXPORT_SYMBOL(fsl_deep_sleep);
+
+static int mpc83xx_change_state(void)
+{
+	u32 curr_state;
+	u32 reg_cfg1 = in_be32(&pmc_regs->config1);
+
+	if (is_pci_agent) {
+		pci_pm_state = (reg_cfg1 & PMCCR1_NEXT_STATE) >>
+		               PMCCR1_NEXT_STATE_SHIFT;
+		curr_state = reg_cfg1 & PMCCR1_CURR_STATE;
+
+		if (curr_state != pci_pm_state) {
+			reg_cfg1 &= ~PMCCR1_CURR_STATE;
+			reg_cfg1 |= pci_pm_state;
+			out_be32(&pmc_regs->config1, reg_cfg1);
+
+			wake_up(&agent_wq);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static irqreturn_t pmc_irq_handler(int irq, void *dev_id)
+{
+	u32 event = in_be32(&pmc_regs->event);
+	int ret = IRQ_NONE;
+
+	if (mpc83xx_change_state())
+		ret = IRQ_HANDLED;
+
+	if (event) {
+		out_be32(&pmc_regs->event, event);
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static void mpc83xx_suspend_restore_regs(void)
+{
+	out_be32(&syscr_regs->sicrl, saved_regs.sicrl);
+	out_be32(&syscr_regs->sicrh, saved_regs.sicrh);
+	out_be32(&clock_regs->sccr, saved_regs.sccr);
+}
+
+static void mpc83xx_suspend_save_regs(void)
+{
+	saved_regs.sicrl = in_be32(&syscr_regs->sicrl);
+	saved_regs.sicrh = in_be32(&syscr_regs->sicrh);
+	saved_regs.sccr = in_be32(&clock_regs->sccr);
+}
+
+static int mpc83xx_suspend_enter(suspend_state_t state)
+{
+	int ret = -EAGAIN;
+
+	/* Don't go to sleep if there's a race where pci_pm_state changes
+	 * between the agent thread checking it and the PM code disabling
+	 * interrupts.
+	 */
+	if (wake_from_pci) {
+		if (pci_pm_state != (deep_sleeping ? 3 : 2))
+			goto out;
+
+		out_be32(&pmc_regs->config1,
+		         in_be32(&pmc_regs->config1) | PMCCR1_PME_EN);
+	}
+
+	/* Put the system into low-power mode and the RAM
+	 * into self-refresh mode once the core goes to
+	 * sleep.
+	 */
+
+	out_be32(&pmc_regs->config, PMCCR_SLPEN | PMCCR_DLPEN);
+
+	/* If it has deep sleep (i.e. it's an 831x or compatible),
+	 * disable power to the core upon entering sleep mode.  This will
+	 * require going through the boot firmware upon a wakeup event.
+	 */
+
+	if (deep_sleeping) {
+		mpc83xx_suspend_save_regs();
+
+		out_be32(&pmc_regs->mask, PMCER_ALL);
+
+		out_be32(&pmc_regs->config1,
+		         in_be32(&pmc_regs->config1) | PMCCR1_POWER_OFF);
+
+		enable_kernel_fp();
+
+		mpc83xx_enter_deep_sleep(immrbase);
+
+		out_be32(&pmc_regs->config1,
+		         in_be32(&pmc_regs->config1) & ~PMCCR1_POWER_OFF);
+
+		out_be32(&pmc_regs->mask, PMCER_PMCI);
+
+		mpc83xx_suspend_restore_regs();
+	} else {
+		out_be32(&pmc_regs->mask, PMCER_PMCI);
+
+		mpc6xx_enter_standby();
+	}
+
+	ret = 0;
+
+out:
+	out_be32(&pmc_regs->config1,
+	         in_be32(&pmc_regs->config1) & ~PMCCR1_PME_EN);
+
+	return ret;
+}
+
+static void mpc83xx_suspend_end(void)
+{
+	deep_sleeping = 0;
+}
+
+static int mpc83xx_suspend_valid(suspend_state_t state)
+{
+	return state == PM_SUSPEND_STANDBY || state == PM_SUSPEND_MEM;
+}
+
+static int mpc83xx_suspend_begin(suspend_state_t state)
+{
+	switch (state) {
+		case PM_SUSPEND_STANDBY:
+			deep_sleeping = 0;
+			return 0;
+
+		case PM_SUSPEND_MEM:
+			if (has_deep_sleep)
+				deep_sleeping = 1;
+
+			return 0;
+
+		default:
+			return -EINVAL;
+	}
+}
+
+static int agent_thread_fn(void *data)
+{
+	while (1) {
+		wait_event_interruptible(agent_wq, pci_pm_state >= 2);
+		try_to_freeze();
+
+		if (signal_pending(current) || pci_pm_state < 2)
+			continue;
+
+		/* With a preemptible kernel (or SMP), this could race with
+		 * a userspace-driven suspend request.  It's probably best
+		 * to avoid mixing the two with such a configuration (or
+		 * else fix it by adding a mutex to state_store that we can
+		 * synchronize with).
+		 */
+
+		wake_from_pci = 1;
+
+		pm_suspend(pci_pm_state == 3 ? PM_SUSPEND_MEM :
+		                               PM_SUSPEND_STANDBY);
+
+		wake_from_pci = 0;
+	}
+
+	return 0;
+}
+
+static void mpc83xx_set_agent(void)
+{
+	out_be32(&pmc_regs->config1, PMCCR1_USE_STATE);
+	out_be32(&pmc_regs->mask, PMCER_PMCI);
+
+	kthread_run(agent_thread_fn, NULL, "PCI power mgt");
+}
+
+static int mpc83xx_is_pci_agent(void)
+{
+	struct mpc83xx_rcw __iomem *rcw_regs;
+	int ret;
+
+	rcw_regs = ioremap(get_immrbase() + IMMR_RCW_OFFSET,
+	                   sizeof(struct mpc83xx_rcw));
+
+	if (!rcw_regs)
+		return -ENOMEM;
+
+	ret = !(in_be32(&rcw_regs->rcwhr) & RCW_PCI_HOST);
+
+	iounmap(rcw_regs);
+	return ret;
+}
+
+static const struct platform_suspend_ops mpc83xx_suspend_ops = {
+	.valid = mpc83xx_suspend_valid,
+	.begin = mpc83xx_suspend_begin,
+	.enter = mpc83xx_suspend_enter,
+	.end = mpc83xx_suspend_end,
+};
+
+static const struct of_device_id pmc_match[];
+static int pmc_probe(struct platform_device *ofdev)
+{
+	const struct of_device_id *match;
+	struct device_node *np = ofdev->dev.of_node;
+	struct resource res;
+	const struct pmc_type *type;
+	int ret = 0;
+
+	match = of_match_device(pmc_match, &ofdev->dev);
+	if (!match)
+		return -EINVAL;
+
+	type = match->data;
+
+	if (!of_device_is_available(np))
+		return -ENODEV;
+
+	has_deep_sleep = type->has_deep_sleep;
+	immrbase = get_immrbase();
+	pmc_dev = ofdev;
+
+	is_pci_agent = mpc83xx_is_pci_agent();
+	if (is_pci_agent < 0)
+		return is_pci_agent;
+
+	ret = of_address_to_resource(np, 0, &res);
+	if (ret)
+		return -ENODEV;
+
+	pmc_irq = irq_of_parse_and_map(np, 0);
+	if (pmc_irq != NO_IRQ) {
+		ret = request_irq(pmc_irq, pmc_irq_handler, IRQF_SHARED,
+		                  "pmc", ofdev);
+
+		if (ret)
+			return -EBUSY;
+	}
+
+	pmc_regs = ioremap(res.start, sizeof(struct mpc83xx_pmc));
+
+	if (!pmc_regs) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = of_address_to_resource(np, 1, &res);
+	if (ret) {
+		ret = -ENODEV;
+		goto out_pmc;
+	}
+
+	clock_regs = ioremap(res.start, sizeof(struct mpc83xx_pmc));
+
+	if (!clock_regs) {
+		ret = -ENOMEM;
+		goto out_pmc;
+	}
+
+	if (has_deep_sleep) {
+		syscr_regs = ioremap(immrbase + IMMR_SYSCR_OFFSET,
+				     sizeof(*syscr_regs));
+		if (!syscr_regs) {
+			ret = -ENOMEM;
+			goto out_syscr;
+		}
+	}
+
+	if (is_pci_agent)
+		mpc83xx_set_agent();
+
+	suspend_set_ops(&mpc83xx_suspend_ops);
+	return 0;
+
+out_syscr:
+	iounmap(clock_regs);
+out_pmc:
+	iounmap(pmc_regs);
+out:
+	if (pmc_irq != NO_IRQ)
+		free_irq(pmc_irq, ofdev);
+
+	return ret;
+}
+
+static int pmc_remove(struct platform_device *ofdev)
+{
+	return -EPERM;
+};
+
+static struct pmc_type pmc_types[] = {
+	{
+		.has_deep_sleep = 1,
+	},
+	{
+		.has_deep_sleep = 0,
+	}
+};
+
+static const struct of_device_id pmc_match[] = {
+	{
+		.compatible = "fsl,mpc8313-pmc",
+		.data = &pmc_types[0],
+	},
+	{
+		.compatible = "fsl,mpc8349-pmc",
+		.data = &pmc_types[1],
+	},
+	{}
+};
+
+static struct platform_driver pmc_driver = {
+	.driver = {
+		.name = "mpc83xx-pmc",
+		.of_match_table = pmc_match,
+	},
+	.probe = pmc_probe,
+	.remove = pmc_remove
+};
+
+static int pmc_init(void)
+{
+	return platform_driver_register(&pmc_driver);
+}
+
+module_init(pmc_init);
diff --git a/arch/powerpc/platforms/83xx/usb.c b/arch/powerpc/platforms/83xx/usb.c
new file mode 100644
index 000000000..5c31d8292
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/usb.c
@@ -0,0 +1,253 @@
+/*
+ * Freescale 83xx USB SOC setup code
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc.
+ * Author: Li Yang
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <sysdev/fsl_soc.h>
+
+#include "mpc83xx.h"
+
+
+#ifdef CONFIG_PPC_MPC834x
+int mpc834x_usb_cfg(void)
+{
+	unsigned long sccr, sicrl, sicrh;
+	void __iomem *immap;
+	struct device_node *np = NULL;
+	int port0_is_dr = 0, port1_is_dr = 0;
+	const void *prop, *dr_mode;
+
+	immap = ioremap(get_immrbase(), 0x1000);
+	if (!immap)
+		return -ENOMEM;
+
+	/* Read registers */
+	/* Note: DR and MPH must use the same clock setting in SCCR */
+	sccr = in_be32(immap + MPC83XX_SCCR_OFFS) & ~MPC83XX_SCCR_USB_MASK;
+	sicrl = in_be32(immap + MPC83XX_SICRL_OFFS) & ~MPC834X_SICRL_USB_MASK;
+	sicrh = in_be32(immap + MPC83XX_SICRH_OFFS) & ~MPC834X_SICRH_USB_UTMI;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
+	if (np) {
+		sccr |= MPC83XX_SCCR_USB_DRCM_11;  /* 1:3 */
+
+		prop = of_get_property(np, "phy_type", NULL);
+		port1_is_dr = 1;
+		if (prop && (!strcmp(prop, "utmi") ||
+					!strcmp(prop, "utmi_wide"))) {
+			sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1;
+			sicrh |= MPC834X_SICRH_USB_UTMI;
+			port0_is_dr = 1;
+		} else if (prop && !strcmp(prop, "serial")) {
+			dr_mode = of_get_property(np, "dr_mode", NULL);
+			if (dr_mode && !strcmp(dr_mode, "otg")) {
+				sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1;
+				port0_is_dr = 1;
+			} else {
+				sicrl |= MPC834X_SICRL_USB1;
+			}
+		} else if (prop && !strcmp(prop, "ulpi")) {
+			sicrl |= MPC834X_SICRL_USB1;
+		} else {
+			printk(KERN_WARNING "834x USB PHY type not supported\n");
+		}
+		of_node_put(np);
+	}
+	np = of_find_compatible_node(NULL, NULL, "fsl-usb2-mph");
+	if (np) {
+		sccr |= MPC83XX_SCCR_USB_MPHCM_11; /* 1:3 */
+
+		prop = of_get_property(np, "port0", NULL);
+		if (prop) {
+			if (port0_is_dr)
+				printk(KERN_WARNING
+					"834x USB port0 can't be used by both DR and MPH!\n");
+			sicrl &= ~MPC834X_SICRL_USB0;
+		}
+		prop = of_get_property(np, "port1", NULL);
+		if (prop) {
+			if (port1_is_dr)
+				printk(KERN_WARNING
+					"834x USB port1 can't be used by both DR and MPH!\n");
+			sicrl &= ~MPC834X_SICRL_USB1;
+		}
+		of_node_put(np);
+	}
+
+	/* Write back */
+	out_be32(immap + MPC83XX_SCCR_OFFS, sccr);
+	out_be32(immap + MPC83XX_SICRL_OFFS, sicrl);
+	out_be32(immap + MPC83XX_SICRH_OFFS, sicrh);
+
+	iounmap(immap);
+	return 0;
+}
+#endif /* CONFIG_PPC_MPC834x */
+
+#ifdef CONFIG_PPC_MPC831x
+int mpc831x_usb_cfg(void)
+{
+	u32 temp;
+	void __iomem *immap, *usb_regs;
+	struct device_node *np = NULL;
+	struct device_node *immr_node = NULL;
+	const void *prop;
+	struct resource res;
+	int ret = 0;
+#ifdef CONFIG_USB_OTG
+	const void *dr_mode;
+#endif
+
+	np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
+	if (!np)
+		return -ENODEV;
+	prop = of_get_property(np, "phy_type", NULL);
+
+	/* Map IMMR space for pin and clock settings */
+	immap = ioremap(get_immrbase(), 0x1000);
+	if (!immap) {
+		of_node_put(np);
+		return -ENOMEM;
+	}
+
+	/* Configure clock */
+	immr_node = of_get_parent(np);
+	if (immr_node && (of_device_is_compatible(immr_node, "fsl,mpc8315-immr") ||
+			of_device_is_compatible(immr_node, "fsl,mpc8308-immr")))
+		clrsetbits_be32(immap + MPC83XX_SCCR_OFFS,
+		                MPC8315_SCCR_USB_MASK,
+		                MPC8315_SCCR_USB_DRCM_01);
+	else
+		clrsetbits_be32(immap + MPC83XX_SCCR_OFFS,
+		                MPC83XX_SCCR_USB_MASK,
+		                MPC83XX_SCCR_USB_DRCM_11);
+
+	/* Configure pin mux for ULPI.  There is no pin mux for UTMI */
+	if (prop && !strcmp(prop, "ulpi")) {
+		if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) {
+			clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
+					MPC8308_SICRH_USB_MASK,
+					MPC8308_SICRH_USB_ULPI);
+		} else if (of_device_is_compatible(immr_node, "fsl,mpc8315-immr")) {
+			clrsetbits_be32(immap + MPC83XX_SICRL_OFFS,
+					MPC8315_SICRL_USB_MASK,
+					MPC8315_SICRL_USB_ULPI);
+			clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
+					MPC8315_SICRH_USB_MASK,
+					MPC8315_SICRH_USB_ULPI);
+		} else {
+			clrsetbits_be32(immap + MPC83XX_SICRL_OFFS,
+					MPC831X_SICRL_USB_MASK,
+					MPC831X_SICRL_USB_ULPI);
+			clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
+					MPC831X_SICRH_USB_MASK,
+					MPC831X_SICRH_USB_ULPI);
+		}
+	}
+
+	iounmap(immap);
+
+	of_node_put(immr_node);
+
+	/* Map USB SOC space */
+	ret = of_address_to_resource(np, 0, &res);
+	if (ret) {
+		of_node_put(np);
+		return ret;
+	}
+	usb_regs = ioremap(res.start, resource_size(&res));
+
+	/* Using on-chip PHY */
+	if (prop && (!strcmp(prop, "utmi_wide") ||
+		     !strcmp(prop, "utmi"))) {
+		u32 refsel;
+
+		if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr"))
+			goto out;
+
+		if (of_device_is_compatible(immr_node, "fsl,mpc8315-immr"))
+			refsel = CONTROL_REFSEL_24MHZ;
+		else
+			refsel = CONTROL_REFSEL_48MHZ;
+		/* Set UTMI_PHY_EN and REFSEL */
+		out_be32(usb_regs + FSL_USB2_CONTROL_OFFS,
+				CONTROL_UTMI_PHY_EN | refsel);
+	/* Using external UPLI PHY */
+	} else if (prop && !strcmp(prop, "ulpi")) {
+		/* Set PHY_CLK_SEL to ULPI */
+		temp = CONTROL_PHY_CLK_SEL_ULPI;
+#ifdef CONFIG_USB_OTG
+		/* Set OTG_PORT */
+		if (!of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) {
+			dr_mode = of_get_property(np, "dr_mode", NULL);
+			if (dr_mode && !strcmp(dr_mode, "otg"))
+				temp |= CONTROL_OTG_PORT;
+		}
+#endif /* CONFIG_USB_OTG */
+		out_be32(usb_regs + FSL_USB2_CONTROL_OFFS, temp);
+	} else {
+		printk(KERN_WARNING "831x USB PHY type not supported\n");
+		ret = -EINVAL;
+	}
+
+out:
+	iounmap(usb_regs);
+	of_node_put(np);
+	return ret;
+}
+#endif /* CONFIG_PPC_MPC831x */
+
+#ifdef CONFIG_PPC_MPC837x
+int mpc837x_usb_cfg(void)
+{
+	void __iomem *immap;
+	struct device_node *np = NULL;
+	const void *prop;
+	int ret = 0;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
+	if (!np || !of_device_is_available(np))
+		return -ENODEV;
+	prop = of_get_property(np, "phy_type", NULL);
+
+	if (!prop || (strcmp(prop, "ulpi") && strcmp(prop, "serial"))) {
+		printk(KERN_WARNING "837x USB PHY type not supported\n");
+		of_node_put(np);
+		return -EINVAL;
+	}
+
+	/* Map IMMR space for pin and clock settings */
+	immap = ioremap(get_immrbase(), 0x1000);
+	if (!immap) {
+		of_node_put(np);
+		return -ENOMEM;
+	}
+
+	/* Configure clock */
+	clrsetbits_be32(immap + MPC83XX_SCCR_OFFS, MPC837X_SCCR_USB_DRCM_11,
+			MPC837X_SCCR_USB_DRCM_11);
+
+	/* Configure pin mux for ULPI/serial */
+	clrsetbits_be32(immap + MPC83XX_SICRL_OFFS, MPC837X_SICRL_USB_MASK,
+			MPC837X_SICRL_USB_ULPI);
+
+	iounmap(immap);
+	of_node_put(np);
+	return ret;
+}
+#endif /* CONFIG_PPC_MPC837x */
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
new file mode 100644
index 000000000..2fb4b2436
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -0,0 +1,290 @@
+menuconfig FSL_SOC_BOOKE
+	bool "Freescale Book-E Machine Type"
+	depends on PPC_85xx || PPC_BOOK3E
+	select FSL_SOC
+	select PPC_UDBG_16550
+	select MPIC
+	select PPC_PCI_CHOICE
+	select FSL_PCI if PCI
+	select SERIAL_8250_EXTENDED if SERIAL_8250
+	select SERIAL_8250_SHARE_IRQ if SERIAL_8250
+	default y
+
+if FSL_SOC_BOOKE
+
+if PPC32
+
+config FSL_85XX_CACHE_SRAM
+	bool
+	select PPC_LIB_RHEAP
+	help
+	  When selected, this option enables cache-sram support
+	  for memory allocation on P1/P2 QorIQ platforms.
+	  cache-sram-size and cache-sram-offset kernel boot
+	  parameters should be passed when this option is enabled.
+
+config BSC9131_RDB
+	bool "Freescale BSC9131RDB"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the Freescale BSC9131RDB board.
+	  The BSC9131 is a heterogeneous SoC containing an e500v2 powerpc and a
+	  StarCore SC3850 DSP
+	  Manufacturer : Freescale Semiconductor, Inc
+
+config C293_PCIE
+	  bool "Freescale C293PCIE"
+	  select DEFAULT_UIMAGE
+	  help
+	  This option enables support for the C293PCIE board
+
+config BSC9132_QDS
+	bool "Freescale BSC9132QDS"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the Freescale BSC9132 QDS board.
+	  BSC9132 is a heterogeneous SoC containing dual e500v2 powerpc cores
+	  and dual StarCore SC3850 DSP cores.
+	  Manufacturer : Freescale Semiconductor, Inc
+
+config MPC8540_ADS
+	bool "Freescale MPC8540 ADS"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the MPC 8540 ADS board
+
+config MPC8560_ADS
+	bool "Freescale MPC8560 ADS"
+	select DEFAULT_UIMAGE
+	select CPM2
+	help
+	  This option enables support for the MPC 8560 ADS board
+
+config MPC85xx_CDS
+	bool "Freescale MPC85xx CDS"
+	select DEFAULT_UIMAGE
+	select PPC_I8259
+	select HAS_RAPIDIO
+	help
+	  This option enables support for the MPC85xx CDS board
+
+config MPC85xx_MDS
+	bool "Freescale MPC85xx MDS"
+	select DEFAULT_UIMAGE
+	select PHYLIB
+	select HAS_RAPIDIO
+	select SWIOTLB
+	help
+	  This option enables support for the MPC85xx MDS board
+
+config MPC8536_DS
+	bool "Freescale MPC8536 DS"
+	select DEFAULT_UIMAGE
+	select SWIOTLB
+	help
+	  This option enables support for the MPC8536 DS board
+
+config MPC85xx_DS
+	bool "Freescale MPC85xx DS"
+	select PPC_I8259
+	select DEFAULT_UIMAGE
+	select FSL_ULI1575 if PCI
+	select SWIOTLB
+	help
+	  This option enables support for the MPC85xx DS (MPC8544 DS) board
+
+config MPC85xx_RDB
+	bool "Freescale MPC85xx RDB"
+	select PPC_I8259
+	select DEFAULT_UIMAGE
+	select FSL_ULI1575 if PCI
+	select SWIOTLB
+	help
+	  This option enables support for the MPC85xx RDB (P2020 RDB) board
+
+config P1010_RDB
+	bool "Freescale P1010RDB"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the MPC85xx RDB (P1010 RDB) board
+
+	  P1010RDB contains P1010Si, which provides CPU performance up to 800
+	  MHz and 1600 DMIPS, additional functionality and faster interfaces
+	  (DDR3/3L, SATA II, and PCI  Express).
+
+config P1022_DS
+	bool "Freescale P1022 DS"
+	select DEFAULT_UIMAGE
+	select SWIOTLB
+	help
+	  This option enables support for the Freescale P1022DS reference board.
+
+config P1022_RDK
+	bool "Freescale / iVeia P1022 RDK"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the Freescale / iVeia P1022RDK
+	  reference board.
+
+config P1023_RDB
+	bool "Freescale P1023 RDB"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the P1023 RDB board.
+
+config TWR_P102x
+	bool "Freescale TWR-P102x"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the TWR-P1025 board.
+
+config SOCRATES
+	bool "Socrates"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the Socrates board.
+
+config KSI8560
+        bool "Emerson KSI8560"
+        select DEFAULT_UIMAGE
+        help
+          This option enables support for the Emerson KSI8560 board
+
+config XES_MPC85xx
+	bool "X-ES single-board computer"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the various single-board
+	  computers from Extreme Engineering Solutions (X-ES) based on
+	  Freescale MPC85xx processors.
+	  Manufacturer: Extreme Engineering Solutions, Inc.
+	  URL: <http://www.xes-inc.com/>
+
+config STX_GP3
+	bool "Silicon Turnkey Express GP3"
+	help
+	  This option enables support for the Silicon Turnkey Express GP3
+	  board.
+	select CPM2
+	select DEFAULT_UIMAGE
+
+config TQM8540
+	bool "TQ Components TQM8540"
+	help
+	  This option enables support for the TQ Components TQM8540 board.
+	select DEFAULT_UIMAGE
+	select TQM85xx
+
+config TQM8541
+	bool "TQ Components TQM8541"
+	help
+	  This option enables support for the TQ Components TQM8541 board.
+	select DEFAULT_UIMAGE
+	select TQM85xx
+	select CPM2
+
+config TQM8548
+	bool "TQ Components TQM8548"
+	help
+	  This option enables support for the TQ Components TQM8548 board.
+	select DEFAULT_UIMAGE
+	select TQM85xx
+
+config TQM8555
+	bool "TQ Components TQM8555"
+	help
+	  This option enables support for the TQ Components TQM8555 board.
+	select DEFAULT_UIMAGE
+	select TQM85xx
+	select CPM2
+
+config TQM8560
+	bool "TQ Components TQM8560"
+	help
+	  This option enables support for the TQ Components TQM8560 board.
+	select DEFAULT_UIMAGE
+	select TQM85xx
+	select CPM2
+
+config SBC8548
+	bool "Wind River SBC8548"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the Wind River SBC8548 board
+
+config PPA8548
+	bool "Prodrive PPA8548"
+	help
+	  This option enables support for the Prodrive PPA8548 board.
+	select DEFAULT_UIMAGE
+	select HAS_RAPIDIO
+
+config GE_IMP3A
+	bool "GE Intelligent Platforms IMP3A"
+	select DEFAULT_UIMAGE
+	select SWIOTLB
+	select MMIO_NVRAM
+	select ARCH_REQUIRE_GPIOLIB
+	select GE_FPGA
+	help
+	  This option enables support for the GE Intelligent Platforms IMP3A
+	  board.
+
+	  This board is a 3U CompactPCI Single Board Computer with a Freescale
+	  P2020 processor.
+
+config SGY_CTS1000
+	tristate "Servergy CTS-1000 support"
+	select GPIOLIB
+	select OF_GPIO
+	depends on CORENET_GENERIC
+	help
+	  Enable this to support functionality in Servergy's CTS-1000 systems.
+
+config MVME2500
+	bool "Artesyn MVME2500"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the Emerson/Artesyn MVME2500 board.
+
+endif # PPC32
+
+config PPC_QEMU_E500
+	bool "QEMU generic e500 platform"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for running as a QEMU guest using
+	  QEMU's generic e500 machine.  This is not required if you're
+	  using a QEMU machine that targets a specific board, such as
+	  mpc8544ds.
+
+	  Unlike most e500 boards that target a specific CPU, this
+	  platform works with any e500-family CPU that QEMU supports.
+	  Thus, you'll need to make sure CONFIG_PPC_E500MC is set or
+	  unset based on the emulated CPU (or actual host CPU in the case
+	  of KVM).
+
+config CORENET_GENERIC
+	bool "Freescale CoreNet Generic"
+	select DEFAULT_UIMAGE
+	select E500
+	select PPC_E500MC
+	select PHYS_64BIT
+	select SWIOTLB
+	select ARCH_REQUIRE_GPIOLIB
+	select GPIO_MPC8XXX
+	select HAS_RAPIDIO
+	select PPC_EPAPR_HV_PIC
+	help
+	  This option enables support for the FSL CoreNet based boards.
+	  For 32bit kernel, the following boards are supported:
+	    P2041 RDB, P3041 DS, P4080 DS, kmcoge4, and OCA4080
+	  For 64bit kernel, the following boards are supported:
+	    T208x QDS/RDB, T4240 QDS/RDB and B4 QDS
+	  The following boards are supported for both 32bit and 64bit kernel:
+	    P5020 DS, P5040 DS and T104xQDS/RDB
+
+endif # FSL_SOC_BOOKE
+
+config TQM85xx
+	bool
diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
new file mode 100644
index 000000000..1fe7fb951
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/Makefile
@@ -0,0 +1,34 @@
+#
+# Makefile for the PowerPC 85xx linux kernel.
+#
+obj-$(CONFIG_SMP) += smp.o
+
+obj-y += common.o
+
+obj-$(CONFIG_BSC9131_RDB) += bsc913x_rdb.o
+obj-$(CONFIG_BSC9132_QDS) += bsc913x_qds.o
+obj-$(CONFIG_C293_PCIE)   += c293pcie.o
+obj-$(CONFIG_MPC8540_ADS) += mpc85xx_ads.o
+obj-$(CONFIG_MPC8560_ADS) += mpc85xx_ads.o
+obj-$(CONFIG_MPC85xx_CDS) += mpc85xx_cds.o
+obj-$(CONFIG_MPC8536_DS)  += mpc8536_ds.o
+obj-$(CONFIG_MPC85xx_DS)  += mpc85xx_ds.o
+obj-$(CONFIG_MPC85xx_MDS) += mpc85xx_mds.o
+obj-$(CONFIG_MPC85xx_RDB) += mpc85xx_rdb.o
+obj-$(CONFIG_P1010_RDB)   += p1010rdb.o
+obj-$(CONFIG_P1022_DS)    += p1022_ds.o
+obj-$(CONFIG_P1022_RDK)   += p1022_rdk.o
+obj-$(CONFIG_P1023_RDB)   += p1023_rdb.o
+obj-$(CONFIG_TWR_P102x)   += twr_p102x.o
+obj-$(CONFIG_CORENET_GENERIC)   += corenet_generic.o
+obj-$(CONFIG_STX_GP3)	  += stx_gp3.o
+obj-$(CONFIG_TQM85xx)	  += tqm85xx.o
+obj-$(CONFIG_SBC8548)     += sbc8548.o
+obj-$(CONFIG_PPA8548)     += ppa8548.o
+obj-$(CONFIG_SOCRATES)    += socrates.o socrates_fpga_pic.o
+obj-$(CONFIG_KSI8560)	  += ksi8560.o
+obj-$(CONFIG_XES_MPC85xx) += xes_mpc85xx.o
+obj-$(CONFIG_GE_IMP3A)	  += ge_imp3a.o
+obj-$(CONFIG_PPC_QEMU_E500) += qemu_e500.o
+obj-$(CONFIG_SGY_CTS1000) += sgy_cts1000.o
+obj-$(CONFIG_MVME2500)	  += mvme2500.o
diff --git a/arch/powerpc/platforms/85xx/bsc913x_qds.c b/arch/powerpc/platforms/85xx/bsc913x_qds.c
new file mode 100644
index 000000000..f0927e58a
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/bsc913x_qds.c
@@ -0,0 +1,74 @@
+/*
+ * BSC913xQDS Board Setup
+ *
+ * Author:
+ *   Harninder Rai <harninder.rai@freescale.com>
+ *   Priyanka Jain <Priyanka.Jain@freescale.com>
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <asm/mpic.h>
+#include <sysdev/fsl_soc.h>
+#include <asm/udbg.h>
+
+#include "mpc85xx.h"
+#include "smp.h"
+
+void __init bsc913x_qds_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+	  MPIC_SINGLE_DEST_CPU,
+	  0, 256, " OpenPIC  ");
+
+	if (!mpic)
+		pr_err("bsc913x: Failed to allocate MPIC structure\n");
+	else
+		mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init bsc913x_qds_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("bsc913x_qds_setup_arch()", 0);
+
+#if defined(CONFIG_SMP)
+	mpc85xx_smp_init();
+#endif
+
+	pr_info("bsc913x board from Freescale Semiconductor\n");
+}
+
+machine_device_initcall(bsc9132_qds, mpc85xx_common_publish_devices);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+
+static int __init bsc9132_qds_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,bsc9132qds");
+}
+
+define_machine(bsc9132_qds) {
+	.name			= "BSC9132 QDS",
+	.probe			= bsc9132_qds_probe,
+	.setup_arch		= bsc913x_qds_setup_arch,
+	.init_IRQ		= bsc913x_qds_pic_init,
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/bsc913x_rdb.c b/arch/powerpc/platforms/85xx/bsc913x_rdb.c
new file mode 100644
index 000000000..9d57bedb9
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/bsc913x_rdb.c
@@ -0,0 +1,67 @@
+/*
+ * BSC913xRDB Board Setup
+ *
+ * Author: Priyanka Jain <Priyanka.Jain@freescale.com>
+ *
+ * Copyright 2011-2012 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <asm/mpic.h>
+#include <sysdev/fsl_soc.h>
+#include <asm/udbg.h>
+
+#include "mpc85xx.h"
+
+void __init bsc913x_rdb_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+	  MPIC_SINGLE_DEST_CPU,
+	  0, 256, " OpenPIC  ");
+
+	if (!mpic)
+		pr_err("bsc913x: Failed to allocate MPIC structure\n");
+	else
+		mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init bsc913x_rdb_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("bsc913x_rdb_setup_arch()", 0);
+
+	pr_info("bsc913x board from Freescale Semiconductor\n");
+}
+
+machine_device_initcall(bsc9131_rdb, mpc85xx_common_publish_devices);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+
+static int __init bsc9131_rdb_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,bsc9131rdb");
+}
+
+define_machine(bsc9131_rdb) {
+	.name			= "BSC9131 RDB",
+	.probe			= bsc9131_rdb_probe,
+	.setup_arch		= bsc913x_rdb_setup_arch,
+	.init_IRQ		= bsc913x_rdb_pic_init,
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/c293pcie.c b/arch/powerpc/platforms/85xx/c293pcie.c
new file mode 100644
index 000000000..84476b646
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/c293pcie.c
@@ -0,0 +1,77 @@
+/*
+ * C293PCIE Board Setup
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+void __init c293_pcie_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+	  MPIC_SINGLE_DEST_CPU, 0, 256, " OpenPIC  ");
+
+	BUG_ON(mpic == NULL);
+
+	mpic_init(mpic);
+}
+
+
+/*
+ * Setup the architecture
+ */
+static void __init c293_pcie_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("c293_pcie_setup_arch()", 0);
+
+	fsl_pci_assign_primary();
+
+	printk(KERN_INFO "C293 PCIE board from Freescale Semiconductor\n");
+}
+
+machine_arch_initcall(c293_pcie, mpc85xx_common_publish_devices);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init c293_pcie_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "fsl,C293PCIE"))
+		return 1;
+	return 0;
+}
+
+define_machine(c293_pcie) {
+	.name			= "C293 PCIE",
+	.probe			= c293_pcie_probe,
+	.setup_arch		= c293_pcie_setup_arch,
+	.init_IRQ		= c293_pcie_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c
new file mode 100644
index 000000000..7bfb9b184
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/common.c
@@ -0,0 +1,129 @@
+/*
+ * Routines common to most mpc85xx-based boards.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+
+#include <asm/qe.h>
+#include <sysdev/cpm2_pic.h>
+
+#include "mpc85xx.h"
+
+static const struct of_device_id mpc85xx_common_ids[] __initconst = {
+	{ .type = "soc", },
+	{ .compatible = "soc", },
+	{ .compatible = "simple-bus", },
+	{ .name = "cpm", },
+	{ .name = "localbus", },
+	{ .compatible = "gianfar", },
+	{ .compatible = "fsl,qe", },
+	{ .compatible = "fsl,cpm2", },
+	{ .compatible = "fsl,srio", },
+	/* So that the DMA channel nodes can be probed individually: */
+	{ .compatible = "fsl,eloplus-dma", },
+	/* For the PMC driver */
+	{ .compatible = "fsl,mpc8548-guts", },
+	/* Probably unnecessary? */
+	{ .compatible = "gpio-leds", },
+	/* For all PCI controllers */
+	{ .compatible = "fsl,mpc8540-pci", },
+	{ .compatible = "fsl,mpc8548-pcie", },
+	{ .compatible = "fsl,p1022-pcie", },
+	{ .compatible = "fsl,p1010-pcie", },
+	{ .compatible = "fsl,p1023-pcie", },
+	{ .compatible = "fsl,p4080-pcie", },
+	{ .compatible = "fsl,qoriq-pcie-v2.4", },
+	{ .compatible = "fsl,qoriq-pcie-v2.3", },
+	{ .compatible = "fsl,qoriq-pcie-v2.2", },
+	{ .compatible = "fsl,fman", },
+	{},
+};
+
+int __init mpc85xx_common_publish_devices(void)
+{
+	return of_platform_bus_probe(NULL, mpc85xx_common_ids, NULL);
+}
+#ifdef CONFIG_CPM2
+static void cpm2_cascade(unsigned int irq, struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	int cascade_irq;
+
+	while ((cascade_irq = cpm2_get_irq()) >= 0)
+		generic_handle_irq(cascade_irq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+
+void __init mpc85xx_cpm2_pic_init(void)
+{
+	struct device_node *np;
+	int irq;
+
+	/* Setup CPM2 PIC */
+	np = of_find_compatible_node(NULL, NULL, "fsl,cpm2-pic");
+	if (np == NULL) {
+		printk(KERN_ERR "PIC init: can not find fsl,cpm2-pic node\n");
+		return;
+	}
+	irq = irq_of_parse_and_map(np, 0);
+	if (irq == NO_IRQ) {
+		of_node_put(np);
+		printk(KERN_ERR "PIC init: got no IRQ for cpm cascade\n");
+		return;
+	}
+
+	cpm2_pic_init(np);
+	of_node_put(np);
+	irq_set_chained_handler(irq, cpm2_cascade);
+}
+#endif
+
+#ifdef CONFIG_QUICC_ENGINE
+void __init mpc85xx_qe_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,qe");
+	if (!np) {
+		np = of_find_node_by_name(NULL, "qe");
+		if (!np) {
+			pr_err("%s: Could not find Quicc Engine node\n",
+					__func__);
+			return;
+		}
+	}
+
+	if (!of_device_is_available(np)) {
+		of_node_put(np);
+		return;
+	}
+
+	qe_reset();
+	of_node_put(np);
+
+}
+
+void __init mpc85xx_qe_par_io_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_node_by_name(NULL, "par_io");
+	if (np) {
+		struct device_node *ucc;
+
+		par_io_init(np);
+		of_node_put(np);
+
+		for_each_node_by_name(ucc, "ucc")
+			par_io_of_config(ucc);
+
+	}
+}
+#endif
diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
new file mode 100644
index 000000000..9824d2cf7
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -0,0 +1,227 @@
+/*
+ * Corenet based SoC DS Setup
+ *
+ * Maintained by Kumar Gala (see MAINTAINERS for contact information)
+ *
+ * Copyright 2009-2011 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/pgtable.h>
+#include <asm/ppc-pci.h>
+#include <mm/mmu_decl.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/ehv_pic.h>
+#include <asm/qe_ic.h>
+
+#include <linux/of_platform.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+#include "mpc85xx.h"
+
+void __init corenet_gen_pic_init(void)
+{
+	struct mpic *mpic;
+	unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU |
+		MPIC_NO_RESET;
+
+	struct device_node *np;
+
+	if (ppc_md.get_irq == mpic_get_coreint_irq)
+		flags |= MPIC_ENABLE_COREINT;
+
+	mpic = mpic_alloc(NULL, 0, flags, 0, 512, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+
+	mpic_init(mpic);
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic");
+	if (np) {
+		qe_ic_init(np, 0, qe_ic_cascade_low_mpic,
+				qe_ic_cascade_high_mpic);
+		of_node_put(np);
+	}
+}
+
+/*
+ * Setup the architecture
+ */
+void __init corenet_gen_setup_arch(void)
+{
+	mpc85xx_smp_init();
+
+	swiotlb_detect_4g();
+
+#if defined(CONFIG_FSL_PCI) && defined(CONFIG_ZONE_DMA32)
+	/*
+	 * Inbound windows don't cover the full lower 4 GiB
+	 * due to conflicts with PCICSRBAR and outbound windows,
+	 * so limit the DMA32 zone to 2 GiB, to allow consistent
+	 * allocations to succeed.
+	 */
+	limit_zone_pfn(ZONE_DMA32, 1UL << (31 - PAGE_SHIFT));
+#endif
+
+	pr_info("%s board\n", ppc_md.name);
+
+	mpc85xx_qe_init();
+}
+
+static const struct of_device_id of_device_ids[] = {
+	{
+		.compatible	= "simple-bus"
+	},
+	{
+		.compatible	= "mdio-mux-gpio"
+	},
+	{
+		.compatible	= "fsl,fpga-ngpixis"
+	},
+	{
+		.compatible	= "fsl,fpga-qixis"
+	},
+	{
+		.compatible	= "fsl,srio",
+	},
+	{
+		.compatible	= "fsl,p4080-pcie",
+	},
+	{
+		.compatible	= "fsl,qoriq-pcie-v2.2",
+	},
+	{
+		.compatible	= "fsl,qoriq-pcie-v2.3",
+	},
+	{
+		.compatible	= "fsl,qoriq-pcie-v2.4",
+	},
+	{
+		.compatible	= "fsl,qoriq-pcie-v3.0",
+	},
+	{
+		.compatible	= "fsl,qe",
+	},
+	{
+		.compatible    = "fsl,fman",
+	},
+	/* The following two are for the Freescale hypervisor */
+	{
+		.name		= "hypervisor",
+	},
+	{
+		.name		= "handles",
+	},
+	{}
+};
+
+int __init corenet_gen_publish_devices(void)
+{
+	return of_platform_bus_probe(NULL, of_device_ids, NULL);
+}
+
+static const char * const boards[] __initconst = {
+	"fsl,P2041RDB",
+	"fsl,P3041DS",
+	"fsl,OCA4080",
+	"fsl,P4080DS",
+	"fsl,P5020DS",
+	"fsl,P5040DS",
+	"fsl,T2080QDS",
+	"fsl,T2080RDB",
+	"fsl,T2081QDS",
+	"fsl,T4240QDS",
+	"fsl,T4240RDB",
+	"fsl,B4860QDS",
+	"fsl,B4420QDS",
+	"fsl,B4220QDS",
+	"fsl,T1040QDS",
+	"fsl,T1042QDS",
+	"fsl,T1040RDB",
+	"fsl,T1042RDB",
+	"fsl,T1042RDB_PI",
+	"keymile,kmcoge4",
+	NULL
+};
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init corenet_generic_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+	char hv_compat[24];
+	int i;
+#ifdef CONFIG_SMP
+	extern struct smp_ops_t smp_85xx_ops;
+#endif
+
+	if (of_flat_dt_match(root, boards))
+		return 1;
+
+	/* Check if we're running under the Freescale hypervisor */
+	for (i = 0; boards[i]; i++) {
+		snprintf(hv_compat, sizeof(hv_compat), "%s-hv", boards[i]);
+		if (of_flat_dt_is_compatible(root, hv_compat)) {
+			ppc_md.init_IRQ = ehv_pic_init;
+
+			ppc_md.get_irq = ehv_pic_get_irq;
+			ppc_md.restart = fsl_hv_restart;
+			pm_power_off = fsl_hv_halt;
+			ppc_md.halt = fsl_hv_halt;
+#ifdef CONFIG_SMP
+			/*
+			 * Disable the timebase sync operations because we
+			 * can't write to the timebase registers under the
+			 * hypervisor.
+			 */
+			smp_85xx_ops.give_timebase = NULL;
+			smp_85xx_ops.take_timebase = NULL;
+#endif
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+define_machine(corenet_generic) {
+	.name			= "CoreNet Generic",
+	.probe			= corenet_generic_probe,
+	.setup_arch		= corenet_gen_setup_arch,
+	.init_IRQ		= corenet_gen_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_coreint_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+#ifdef CONFIG_PPC64
+	.power_save		= book3e_idle,
+#else
+	.power_save		= e500_idle,
+#endif
+};
+
+machine_arch_initcall(corenet_generic, corenet_gen_publish_devices);
+
+#ifdef CONFIG_SWIOTLB
+machine_arch_initcall(corenet_generic, swiotlb_setup_bus_notifier);
+#endif
diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c
new file mode 100644
index 000000000..11790e074
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/ge_imp3a.c
@@ -0,0 +1,224 @@
+/*
+ * GE IMP3A Board Setup
+ *
+ * Author Martyn Welch <martyn.welch@ge.com>
+ *
+ * Copyright 2010 GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * Based on: mpc85xx_ds.c (MPC85xx DS Board Setup)
+ * Copyright 2007 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/swiotlb.h>
+#include <asm/nvram.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+#include <sysdev/ge/ge_pic.h>
+
+void __iomem *imp3a_regs;
+
+void __init ge_imp3a_pic_init(void)
+{
+	struct mpic *mpic;
+	struct device_node *np;
+	struct device_node *cascade_node = NULL;
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "fsl,MPC8572DS-CAMP")) {
+		mpic = mpic_alloc(NULL, 0,
+			MPIC_NO_RESET |
+			MPIC_BIG_ENDIAN |
+			MPIC_SINGLE_DEST_CPU,
+			0, 256, " OpenPIC  ");
+	} else {
+		mpic = mpic_alloc(NULL, 0,
+			  MPIC_BIG_ENDIAN |
+			  MPIC_SINGLE_DEST_CPU,
+			0, 256, " OpenPIC  ");
+	}
+
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+	/*
+	 * There is a simple interrupt handler in the main FPGA, this needs
+	 * to be cascaded into the MPIC
+	 */
+	for_each_node_by_type(np, "interrupt-controller")
+		if (of_device_is_compatible(np, "gef,fpga-pic-1.00")) {
+			cascade_node = np;
+			break;
+		}
+
+	if (cascade_node == NULL) {
+		printk(KERN_WARNING "IMP3A: No FPGA PIC\n");
+		return;
+	}
+
+	gef_pic_init(cascade_node);
+	of_node_put(cascade_node);
+}
+
+static void ge_imp3a_pci_assign_primary(void)
+{
+#ifdef CONFIG_PCI
+	struct device_node *np;
+	struct resource rsrc;
+
+	for_each_node_by_type(np, "pci") {
+		if (of_device_is_compatible(np, "fsl,mpc8540-pci") ||
+		    of_device_is_compatible(np, "fsl,mpc8548-pcie") ||
+		    of_device_is_compatible(np, "fsl,p2020-pcie")) {
+			of_address_to_resource(np, 0, &rsrc);
+			if ((rsrc.start & 0xfffff) == 0x9000)
+				fsl_pci_primary = np;
+		}
+	}
+#endif
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init ge_imp3a_setup_arch(void)
+{
+	struct device_node *regs;
+
+	if (ppc_md.progress)
+		ppc_md.progress("ge_imp3a_setup_arch()", 0);
+
+	mpc85xx_smp_init();
+
+	ge_imp3a_pci_assign_primary();
+
+	swiotlb_detect_4g();
+
+	/* Remap basic board registers */
+	regs = of_find_compatible_node(NULL, NULL, "ge,imp3a-fpga-regs");
+	if (regs) {
+		imp3a_regs = of_iomap(regs, 0);
+		if (imp3a_regs == NULL)
+			printk(KERN_WARNING "Unable to map board registers\n");
+		of_node_put(regs);
+	}
+
+#if defined(CONFIG_MMIO_NVRAM)
+	mmio_nvram_init();
+#endif
+
+	printk(KERN_INFO "GE Intelligent Platforms IMP3A 3U cPCI SBC\n");
+}
+
+/* Return the PCB revision */
+static unsigned int ge_imp3a_get_pcb_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread16(imp3a_regs);
+	return (reg >> 8) & 0xff;
+}
+
+/* Return the board (software) revision */
+static unsigned int ge_imp3a_get_board_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread16(imp3a_regs + 0x2);
+	return reg & 0xff;
+}
+
+/* Return the FPGA revision */
+static unsigned int ge_imp3a_get_fpga_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread16(imp3a_regs + 0x2);
+	return (reg >> 8) & 0xff;
+}
+
+/* Return compactPCI Geographical Address */
+static unsigned int ge_imp3a_get_cpci_geo_addr(void)
+{
+	unsigned int reg;
+
+	reg = ioread16(imp3a_regs + 0x6);
+	return (reg & 0x0f00) >> 8;
+}
+
+/* Return compactPCI System Controller Status */
+static unsigned int ge_imp3a_get_cpci_is_syscon(void)
+{
+	unsigned int reg;
+
+	reg = ioread16(imp3a_regs + 0x6);
+	return reg & (1 << 12);
+}
+
+static void ge_imp3a_show_cpuinfo(struct seq_file *m)
+{
+	seq_printf(m, "Vendor\t\t: GE Intelligent Platforms\n");
+
+	seq_printf(m, "Revision\t: %u%c\n", ge_imp3a_get_pcb_rev(),
+		('A' + ge_imp3a_get_board_rev() - 1));
+
+	seq_printf(m, "FPGA Revision\t: %u\n", ge_imp3a_get_fpga_rev());
+
+	seq_printf(m, "cPCI geo. addr\t: %u\n", ge_imp3a_get_cpci_geo_addr());
+
+	seq_printf(m, "cPCI syscon\t: %s\n",
+		ge_imp3a_get_cpci_is_syscon() ? "yes" : "no");
+}
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init ge_imp3a_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "ge,IMP3A");
+}
+
+machine_arch_initcall(ge_imp3a, mpc85xx_common_publish_devices);
+
+machine_arch_initcall(ge_imp3a, swiotlb_setup_bus_notifier);
+
+define_machine(ge_imp3a) {
+	.name			= "GE_IMP3A",
+	.probe			= ge_imp3a_probe,
+	.setup_arch		= ge_imp3a_setup_arch,
+	.init_IRQ		= ge_imp3a_pic_init,
+	.show_cpuinfo		= ge_imp3a_show_cpuinfo,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/ksi8560.c b/arch/powerpc/platforms/85xx/ksi8560.c
new file mode 100644
index 000000000..3dc1bda3d
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/ksi8560.c
@@ -0,0 +1,193 @@
+/*
+ * Board setup routines for the Emerson KSI8560
+ *
+ * Author: Alexandr Smirnov <asmirnov@ru.mvista.com>
+ *
+ * Based on mpc85xx_ads.c maintained by Kumar Gala
+ *
+ * 2008 (c) MontaVista, Software, Inc.  This file is licensed under
+ * the terms of the GNU General Public License version 2.  This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ *
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/mpic.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+#include <asm/prom.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include <asm/cpm2.h>
+#include <sysdev/cpm2_pic.h>
+
+#include "mpc85xx.h"
+
+#define KSI8560_CPLD_HVR		0x04 /* Hardware Version Register */
+#define KSI8560_CPLD_PVR		0x08 /* PLD Version Register */
+#define KSI8560_CPLD_RCR1		0x30 /* Reset Command Register 1 */
+
+#define KSI8560_CPLD_RCR1_CPUHR		0x80 /* CPU Hard Reset */
+
+static void __iomem *cpld_base = NULL;
+
+static void machine_restart(char *cmd)
+{
+	if (cpld_base)
+		out_8(cpld_base + KSI8560_CPLD_RCR1, KSI8560_CPLD_RCR1_CPUHR);
+	else
+		printk(KERN_ERR "Can't find CPLD base, hang forever\n");
+
+	for (;;);
+}
+
+static void __init ksi8560_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
+			0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+
+	mpc85xx_cpm2_pic_init();
+}
+
+#ifdef CONFIG_CPM2
+/*
+ * Setup I/O ports
+ */
+struct cpm_pin {
+	int port, pin, flags;
+};
+
+static struct cpm_pin __initdata ksi8560_pins[] = {
+	/* SCC1 */
+	{3, 29, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{3, 30, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{3, 31, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* SCC2 */
+	{3, 26, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{3, 27, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{3, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* FCC1 */
+	{0, 14, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{0, 18, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 19, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 21, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{0, 26, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{0, 27, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{0, 28, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{0, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{0, 30, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{0, 31, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+	{2, 23, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* CLK9 */
+	{2, 22, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* CLK10 */
+
+};
+
+static void __init init_ioports(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ksi8560_pins); i++) {
+		struct cpm_pin *pin = &ksi8560_pins[i];
+		cpm2_set_pin(pin->port, pin->pin, pin->flags);
+	}
+
+	cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK9, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK10, CPM_CLK_TX);
+}
+#endif
+
+/*
+ * Setup the architecture
+ */
+static void __init ksi8560_setup_arch(void)
+{
+	struct device_node *cpld;
+
+	cpld = of_find_compatible_node(NULL, NULL, "emerson,KSI8560-cpld");
+	if (cpld)
+		cpld_base = of_iomap(cpld, 0);
+	else
+		printk(KERN_ERR "Can't find CPLD in device tree\n");
+
+	if (ppc_md.progress)
+		ppc_md.progress("ksi8560_setup_arch()", 0);
+
+#ifdef CONFIG_CPM2
+	cpm2_reset();
+	init_ioports();
+#endif
+}
+
+static void ksi8560_show_cpuinfo(struct seq_file *m)
+{
+	uint pvid, svid, phid1;
+
+	pvid = mfspr(SPRN_PVR);
+	svid = mfspr(SPRN_SVR);
+
+	seq_printf(m, "Vendor\t\t: Emerson Network Power\n");
+	seq_printf(m, "Board\t\t: KSI8560\n");
+
+	if (cpld_base) {
+		seq_printf(m, "Hardware rev\t: %d\n",
+					in_8(cpld_base + KSI8560_CPLD_HVR));
+		seq_printf(m, "CPLD rev\t: %d\n",
+					in_8(cpld_base + KSI8560_CPLD_PVR));
+	} else
+		seq_printf(m, "Unknown Hardware and CPLD revs\n");
+
+	seq_printf(m, "PVR\t\t: 0x%x\n", pvid);
+	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+
+	/* Display cpu Pll setting */
+	phid1 = mfspr(SPRN_HID1);
+	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
+}
+
+machine_device_initcall(ksi8560, mpc85xx_common_publish_devices);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init ksi8560_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "emerson,KSI8560");
+}
+
+define_machine(ksi8560) {
+	.name			= "KSI8560",
+	.probe			= ksi8560_probe,
+	.setup_arch		= ksi8560_setup_arch,
+	.init_IRQ		= ksi8560_pic_init,
+	.show_cpuinfo		= ksi8560_show_cpuinfo,
+	.get_irq		= mpic_get_irq,
+	.restart		= machine_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+};
diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c
new file mode 100644
index 000000000..a378ba351
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c
@@ -0,0 +1,85 @@
+/*
+ * MPC8536 DS Board Setup
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/swiotlb.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+void __init mpc8536_ds_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
+			0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init mpc8536_ds_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mpc8536_ds_setup_arch()", 0);
+
+	fsl_pci_assign_primary();
+
+	swiotlb_detect_4g();
+
+	printk("MPC8536 DS board from Freescale Semiconductor\n");
+}
+
+machine_arch_initcall(mpc8536_ds, mpc85xx_common_publish_devices);
+
+machine_arch_initcall(mpc8536_ds, swiotlb_setup_bus_notifier);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init mpc8536_ds_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,mpc8536ds");
+}
+
+define_machine(mpc8536_ds) {
+	.name			= "MPC8536 DS",
+	.probe			= mpc8536_ds_probe,
+	.setup_arch		= mpc8536_ds_setup_arch,
+	.init_IRQ		= mpc8536_ds_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/mpc85xx.h b/arch/powerpc/platforms/85xx/mpc85xx.h
new file mode 100644
index 000000000..39056f6be
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx.h
@@ -0,0 +1,19 @@
+#ifndef MPC85xx_H
+#define MPC85xx_H
+extern int mpc85xx_common_publish_devices(void);
+
+#ifdef CONFIG_CPM2
+extern void mpc85xx_cpm2_pic_init(void);
+#else
+static inline void __init mpc85xx_cpm2_pic_init(void) {}
+#endif /* CONFIG_CPM2 */
+
+#ifdef CONFIG_QUICC_ENGINE
+extern void mpc85xx_qe_init(void);
+extern void mpc85xx_qe_par_io_init(void);
+#else
+static inline void __init mpc85xx_qe_init(void) {}
+static inline void __init mpc85xx_qe_par_io_init(void) {}
+#endif
+
+#endif
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ads.c b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
new file mode 100644
index 000000000..7d12a19aa
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
@@ -0,0 +1,193 @@
+/*
+ * MPC85xx setup and early boot code plus other random bits.
+ *
+ * Maintained by Kumar Gala (see MAINTAINERS for contact information)
+ *
+ * Copyright 2005 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/mpic.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#ifdef CONFIG_CPM2
+#include <asm/cpm2.h>
+#include <sysdev/cpm2_pic.h>
+#endif
+
+#include "mpc85xx.h"
+
+#ifdef CONFIG_PCI
+static int mpc85xx_exclude_device(struct pci_controller *hose,
+				   u_char bus, u_char devfn)
+{
+	if (bus == 0 && PCI_SLOT(devfn) == 0)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	else
+		return PCIBIOS_SUCCESSFUL;
+}
+#endif /* CONFIG_PCI */
+
+static void __init mpc85xx_ads_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
+			0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+
+	mpc85xx_cpm2_pic_init();
+}
+
+/*
+ * Setup the architecture
+ */
+#ifdef CONFIG_CPM2
+struct cpm_pin {
+	int port, pin, flags;
+};
+
+static const struct cpm_pin mpc8560_ads_pins[] = {
+	/* SCC1 */
+	{3, 29, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{3, 30, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{3, 31, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* SCC2 */
+	{2, 12, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 13, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{3, 26, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{3, 27, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{3, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+	/* FCC2 */
+	{1, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 20, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 22, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 25, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 26, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 27, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{1, 30, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 31, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{2, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* CLK14 */
+	{2, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* CLK13 */
+
+	/* FCC3 */
+	{1, 4, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 5, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 6, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 8, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 9, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 10, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 11, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 12, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 13, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 14, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 15, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+	{1, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{1, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+	{2, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* CLK16 */
+	{2, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* CLK15 */
+	{2, 27, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+};
+
+static void __init init_ioports(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mpc8560_ads_pins); i++) {
+		const struct cpm_pin *pin = &mpc8560_ads_pins[i];
+		cpm2_set_pin(pin->port, pin->pin, pin->flags);
+	}
+
+	cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK13, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK14, CPM_CLK_TX);
+	cpm2_clk_setup(CPM_CLK_FCC3, CPM_CLK15, CPM_CLK_RX);
+	cpm2_clk_setup(CPM_CLK_FCC3, CPM_CLK16, CPM_CLK_TX);
+}
+#endif
+
+static void __init mpc85xx_ads_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mpc85xx_ads_setup_arch()", 0);
+
+#ifdef CONFIG_CPM2
+	cpm2_reset();
+	init_ioports();
+#endif
+
+#ifdef CONFIG_PCI
+	ppc_md.pci_exclude_device = mpc85xx_exclude_device;
+#endif
+
+	fsl_pci_assign_primary();
+}
+
+static void mpc85xx_ads_show_cpuinfo(struct seq_file *m)
+{
+	uint pvid, svid, phid1;
+
+	pvid = mfspr(SPRN_PVR);
+	svid = mfspr(SPRN_SVR);
+
+	seq_printf(m, "Vendor\t\t: Freescale Semiconductor\n");
+	seq_printf(m, "PVR\t\t: 0x%x\n", pvid);
+	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+
+	/* Display cpu Pll setting */
+	phid1 = mfspr(SPRN_HID1);
+	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
+}
+
+machine_arch_initcall(mpc85xx_ads, mpc85xx_common_publish_devices);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init mpc85xx_ads_probe(void)
+{
+        unsigned long root = of_get_flat_dt_root();
+
+        return of_flat_dt_is_compatible(root, "MPC85xxADS");
+}
+
+define_machine(mpc85xx_ads) {
+	.name			= "MPC85xx ADS",
+	.probe			= mpc85xx_ads_probe,
+	.setup_arch		= mpc85xx_ads_setup_arch,
+	.init_IRQ		= mpc85xx_ads_pic_init,
+	.show_cpuinfo		= mpc85xx_ads_show_cpuinfo,
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
new file mode 100644
index 000000000..b0753e222
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
@@ -0,0 +1,394 @@
+/*
+ * MPC85xx setup and early boot code plus other random bits.
+ *
+ * Maintained by Kumar Gala (see MAINTAINERS for contact information)
+ *
+ * Copyright 2005, 2011-2012 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/initrd.h>
+#include <linux/interrupt.h>
+#include <linux/fsl_devices.h>
+#include <linux/of_platform.h>
+
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <linux/atomic.h>
+#include <asm/time.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/pci-bridge.h>
+#include <asm/irq.h>
+#include <mm/mmu_decl.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/i8259.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+/*
+ * The CDS board contains an FPGA/CPLD called "Cadmus", which collects
+ * various logic and performs system control functions.
+ * Here is the FPGA/CPLD register map.
+ */
+struct cadmus_reg {
+	u8 cm_ver;		/* Board version */
+	u8 cm_csr;		/* General control/status */
+	u8 cm_rst;		/* Reset control */
+	u8 cm_hsclk;	/* High speed clock */
+	u8 cm_hsxclk;	/* High speed clock extended */
+	u8 cm_led;		/* LED data */
+	u8 cm_pci;		/* PCI control/status */
+	u8 cm_dma;		/* DMA control */
+	u8 res[248];	/* Total 256 bytes */
+};
+
+static struct cadmus_reg *cadmus;
+
+#ifdef CONFIG_PCI
+
+#define ARCADIA_HOST_BRIDGE_IDSEL	17
+#define ARCADIA_2ND_BRIDGE_IDSEL	3
+
+static int mpc85xx_exclude_device(struct pci_controller *hose,
+				  u_char bus, u_char devfn)
+{
+	/* We explicitly do not go past the Tundra 320 Bridge */
+	if ((bus == 1) && (PCI_SLOT(devfn) == ARCADIA_2ND_BRIDGE_IDSEL))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	if ((bus == 0) && (PCI_SLOT(devfn) == ARCADIA_2ND_BRIDGE_IDSEL))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	else
+		return PCIBIOS_SUCCESSFUL;
+}
+
+static void mpc85xx_cds_restart(char *cmd)
+{
+	struct pci_dev *dev;
+	u_char tmp;
+
+	if ((dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686,
+					NULL))) {
+
+		/* Use the VIA Super Southbridge to force a PCI reset */
+		pci_read_config_byte(dev, 0x47, &tmp);
+		pci_write_config_byte(dev, 0x47, tmp | 1);
+
+		/* Flush the outbound PCI write queues */
+		pci_read_config_byte(dev, 0x47, &tmp);
+
+		/*
+		 *  At this point, the harware reset should have triggered.
+		 *  However, if it doesn't work for some mysterious reason,
+		 *  just fall through to the default reset below.
+		 */
+
+		pci_dev_put(dev);
+	}
+
+	/*
+	 *  If we can't find the VIA chip (maybe the P2P bridge is disabled)
+	 *  or the VIA chip reset didn't work, just use the default reset.
+	 */
+	fsl_rstcr_restart(NULL);
+}
+
+static void __init mpc85xx_cds_pci_irq_fixup(struct pci_dev *dev)
+{
+	u_char c;
+	if (dev->vendor == PCI_VENDOR_ID_VIA) {
+		switch (dev->device) {
+		case PCI_DEVICE_ID_VIA_82C586_1:
+			/*
+			 * U-Boot does not set the enable bits
+			 * for the IDE device. Force them on here.
+			 */
+			pci_read_config_byte(dev, 0x40, &c);
+			c |= 0x03; /* IDE: Chip Enable Bits */
+			pci_write_config_byte(dev, 0x40, c);
+
+			/*
+			 * Since only primary interface works, force the
+			 * IDE function to standard primary IDE interrupt
+			 * w/ 8259 offset
+			 */
+			dev->irq = 14;
+			pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
+			break;
+		/*
+		 * Force legacy USB interrupt routing
+		 */
+		case PCI_DEVICE_ID_VIA_82C586_2:
+		/* There are two USB controllers.
+		 * Identify them by functon number
+		 */
+			if (PCI_FUNC(dev->devfn) == 3)
+				dev->irq = 11;
+			else
+				dev->irq = 10;
+			pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
+		default:
+			break;
+		}
+	}
+}
+
+static void skip_fake_bridge(struct pci_dev *dev)
+{
+	/* Make it an error to skip the fake bridge
+	 * in pci_setup_device() in probe.c */
+	dev->hdr_type = 0x7f;
+}
+DECLARE_PCI_FIXUP_EARLY(0x1957, 0x3fff, skip_fake_bridge);
+DECLARE_PCI_FIXUP_EARLY(0x3fff, 0x1957, skip_fake_bridge);
+DECLARE_PCI_FIXUP_EARLY(0xff3f, 0x5719, skip_fake_bridge);
+
+#define PCI_DEVICE_ID_IDT_TSI310	0x01a7
+
+/*
+ * Fix Tsi310 PCI-X bridge resource.
+ * Force the bridge to open a window from 0x0000-0x1fff in PCI I/O space.
+ * This allows legacy I/O(i8259, etc) on the VIA southbridge to be accessed.
+ */
+void mpc85xx_cds_fixup_bus(struct pci_bus *bus)
+{
+	struct pci_dev *dev = bus->self;
+	struct resource *res = bus->resource[0];
+
+	if (dev != NULL &&
+	    dev->vendor == PCI_VENDOR_ID_IBM &&
+	    dev->device == PCI_DEVICE_ID_IDT_TSI310) {
+		if (res) {
+			res->start = 0;
+			res->end   = 0x1fff;
+			res->flags = IORESOURCE_IO;
+			pr_info("mpc85xx_cds: PCI bridge resource fixup applied\n");
+			pr_info("mpc85xx_cds: %pR\n", res);
+		}
+	}
+
+	fsl_pcibios_fixup_bus(bus);
+}
+
+#ifdef CONFIG_PPC_I8259
+static void mpc85xx_8259_cascade_handler(unsigned int irq,
+					 struct irq_desc *desc)
+{
+	unsigned int cascade_irq = i8259_irq();
+
+	if (cascade_irq != NO_IRQ)
+		/* handle an interrupt from the 8259 */
+		generic_handle_irq(cascade_irq);
+
+	/* check for any interrupts from the shared IRQ line */
+	handle_fasteoi_irq(irq, desc);
+}
+
+static irqreturn_t mpc85xx_8259_cascade_action(int irq, void *dev_id)
+{
+	return IRQ_HANDLED;
+}
+
+static struct irqaction mpc85xxcds_8259_irqaction = {
+	.handler = mpc85xx_8259_cascade_action,
+	.flags = IRQF_SHARED | IRQF_NO_THREAD,
+	.name = "8259 cascade",
+};
+#endif /* PPC_I8259 */
+#endif /* CONFIG_PCI */
+
+static void __init mpc85xx_cds_pic_init(void)
+{
+	struct mpic *mpic;
+	mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
+			0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+}
+
+#if defined(CONFIG_PPC_I8259) && defined(CONFIG_PCI)
+static int mpc85xx_cds_8259_attach(void)
+{
+	int ret;
+	struct device_node *np = NULL;
+	struct device_node *cascade_node = NULL;
+	int cascade_irq;
+
+	/* Initialize the i8259 controller */
+	for_each_node_by_type(np, "interrupt-controller")
+		if (of_device_is_compatible(np, "chrp,iic")) {
+			cascade_node = np;
+			break;
+		}
+
+	if (cascade_node == NULL) {
+		printk(KERN_DEBUG "Could not find i8259 PIC\n");
+		return -ENODEV;
+	}
+
+	cascade_irq = irq_of_parse_and_map(cascade_node, 0);
+	if (cascade_irq == NO_IRQ) {
+		printk(KERN_ERR "Failed to map cascade interrupt\n");
+		return -ENXIO;
+	}
+
+	i8259_init(cascade_node, 0);
+	of_node_put(cascade_node);
+
+	/*
+	 *  Hook the interrupt to make sure desc->action is never NULL.
+	 *  This is required to ensure that the interrupt does not get
+	 *  disabled when the last user of the shared IRQ line frees their
+	 *  interrupt.
+	 */
+	if ((ret = setup_irq(cascade_irq, &mpc85xxcds_8259_irqaction))) {
+		printk(KERN_ERR "Failed to setup cascade interrupt\n");
+		return ret;
+	}
+
+	/* Success. Connect our low-level cascade handler. */
+	irq_set_handler(cascade_irq, mpc85xx_8259_cascade_handler);
+
+	return 0;
+}
+machine_device_initcall(mpc85xx_cds, mpc85xx_cds_8259_attach);
+
+#endif /* CONFIG_PPC_I8259 */
+
+static void mpc85xx_cds_pci_assign_primary(void)
+{
+#ifdef CONFIG_PCI
+	struct device_node *np;
+
+	if (fsl_pci_primary)
+		return;
+
+	/*
+	 * MPC85xx_CDS has ISA bridge but unfortunately there is no
+	 * isa node in device tree. We now looking for i8259 node as
+	 * a workaround for such a broken device tree. This routine
+	 * is for complying to all device trees.
+	 */
+	np = of_find_node_by_name(NULL, "i8259");
+	while ((fsl_pci_primary = of_get_parent(np))) {
+		of_node_put(np);
+		np = fsl_pci_primary;
+
+		if ((of_device_is_compatible(np, "fsl,mpc8540-pci") ||
+		    of_device_is_compatible(np, "fsl,mpc8548-pcie")) &&
+		    of_device_is_available(np))
+			return;
+	}
+#endif
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init mpc85xx_cds_setup_arch(void)
+{
+	struct device_node *np;
+	int cds_pci_slot;
+
+	if (ppc_md.progress)
+		ppc_md.progress("mpc85xx_cds_setup_arch()", 0);
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc8548cds-fpga");
+	if (!np) {
+		pr_err("Could not find FPGA node.\n");
+		return;
+	}
+
+	cadmus = of_iomap(np, 0);
+	of_node_put(np);
+	if (!cadmus) {
+		pr_err("Fail to map FPGA area.\n");
+		return;
+	}
+
+	if (ppc_md.progress) {
+		char buf[40];
+		cds_pci_slot = ((in_8(&cadmus->cm_csr) >> 6) & 0x3) + 1;
+		snprintf(buf, 40, "CDS Version = 0x%x in slot %d\n",
+				in_8(&cadmus->cm_ver), cds_pci_slot);
+		ppc_md.progress(buf, 0);
+	}
+
+#ifdef CONFIG_PCI
+	ppc_md.pci_irq_fixup = mpc85xx_cds_pci_irq_fixup;
+	ppc_md.pci_exclude_device = mpc85xx_exclude_device;
+#endif
+
+	mpc85xx_cds_pci_assign_primary();
+	fsl_pci_assign_primary();
+}
+
+static void mpc85xx_cds_show_cpuinfo(struct seq_file *m)
+{
+	uint pvid, svid, phid1;
+
+	pvid = mfspr(SPRN_PVR);
+	svid = mfspr(SPRN_SVR);
+
+	seq_printf(m, "Vendor\t\t: Freescale Semiconductor\n");
+	seq_printf(m, "Machine\t\t: MPC85xx CDS (0x%x)\n",
+			in_8(&cadmus->cm_ver));
+	seq_printf(m, "PVR\t\t: 0x%x\n", pvid);
+	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+
+	/* Display cpu Pll setting */
+	phid1 = mfspr(SPRN_HID1);
+	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
+}
+
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init mpc85xx_cds_probe(void)
+{
+        unsigned long root = of_get_flat_dt_root();
+
+        return of_flat_dt_is_compatible(root, "MPC85xxCDS");
+}
+
+machine_arch_initcall(mpc85xx_cds, mpc85xx_common_publish_devices);
+
+define_machine(mpc85xx_cds) {
+	.name		= "MPC85xx CDS",
+	.probe		= mpc85xx_cds_probe,
+	.setup_arch	= mpc85xx_cds_setup_arch,
+	.init_IRQ	= mpc85xx_cds_pic_init,
+	.show_cpuinfo	= mpc85xx_cds_show_cpuinfo,
+	.get_irq	= mpic_get_irq,
+#ifdef CONFIG_PCI
+	.restart	= mpc85xx_cds_restart,
+	.pcibios_fixup_bus	= mpc85xx_cds_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#else
+	.restart	= fsl_rstcr_restart,
+#endif
+	.calibrate_decr = generic_calibrate_decr,
+	.progress	= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
new file mode 100644
index 000000000..ffdf02121
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
@@ -0,0 +1,248 @@
+/*
+ * MPC85xx DS Board Setup
+ *
+ * Author Xianghua Xiao (x.xiao@freescale.com)
+ * Roy Zang <tie-fei.zang@freescale.com>
+ * 	- Add PCI/PCI Exprees support
+ * Copyright 2007 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/i8259.h>
+#include <asm/swiotlb.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
+#else
+#define DBG(fmt, args...)
+#endif
+
+#ifdef CONFIG_PPC_I8259
+static void mpc85xx_8259_cascade(unsigned int irq, struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned int cascade_irq = i8259_irq();
+
+	if (cascade_irq != NO_IRQ) {
+		generic_handle_irq(cascade_irq);
+	}
+	chip->irq_eoi(&desc->irq_data);
+}
+#endif	/* CONFIG_PPC_I8259 */
+
+void __init mpc85xx_ds_pic_init(void)
+{
+	struct mpic *mpic;
+#ifdef CONFIG_PPC_I8259
+	struct device_node *np;
+	struct device_node *cascade_node = NULL;
+	int cascade_irq;
+#endif
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "fsl,MPC8572DS-CAMP")) {
+		mpic = mpic_alloc(NULL, 0,
+			MPIC_NO_RESET |
+			MPIC_BIG_ENDIAN |
+			MPIC_SINGLE_DEST_CPU,
+			0, 256, " OpenPIC  ");
+	} else {
+		mpic = mpic_alloc(NULL, 0,
+			  MPIC_BIG_ENDIAN |
+			  MPIC_SINGLE_DEST_CPU,
+			0, 256, " OpenPIC  ");
+	}
+
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+
+#ifdef CONFIG_PPC_I8259
+	/* Initialize the i8259 controller */
+	for_each_node_by_type(np, "interrupt-controller")
+	    if (of_device_is_compatible(np, "chrp,iic")) {
+		cascade_node = np;
+		break;
+	}
+
+	if (cascade_node == NULL) {
+		printk(KERN_DEBUG "Could not find i8259 PIC\n");
+		return;
+	}
+
+	cascade_irq = irq_of_parse_and_map(cascade_node, 0);
+	if (cascade_irq == NO_IRQ) {
+		printk(KERN_ERR "Failed to map cascade interrupt\n");
+		return;
+	}
+
+	DBG("mpc85xxds: cascade mapped to irq %d\n", cascade_irq);
+
+	i8259_init(cascade_node, 0);
+	of_node_put(cascade_node);
+
+	irq_set_chained_handler(cascade_irq, mpc85xx_8259_cascade);
+#endif	/* CONFIG_PPC_I8259 */
+}
+
+#ifdef CONFIG_PCI
+extern int uli_exclude_device(struct pci_controller *hose,
+				u_char bus, u_char devfn);
+
+static struct device_node *pci_with_uli;
+
+static int mpc85xx_exclude_device(struct pci_controller *hose,
+				   u_char bus, u_char devfn)
+{
+	if (hose->dn == pci_with_uli)
+		return uli_exclude_device(hose, bus, devfn);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+#endif	/* CONFIG_PCI */
+
+static void __init mpc85xx_ds_uli_init(void)
+{
+#ifdef CONFIG_PCI
+	struct device_node *node;
+
+	/* See if we have a ULI under the primary */
+
+	node = of_find_node_by_name(NULL, "uli1575");
+	while ((pci_with_uli = of_get_parent(node))) {
+		of_node_put(node);
+		node = pci_with_uli;
+
+		if (pci_with_uli == fsl_pci_primary) {
+			ppc_md.pci_exclude_device = mpc85xx_exclude_device;
+			break;
+		}
+	}
+#endif
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init mpc85xx_ds_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mpc85xx_ds_setup_arch()", 0);
+
+	swiotlb_detect_4g();
+	fsl_pci_assign_primary();
+	mpc85xx_ds_uli_init();
+	mpc85xx_smp_init();
+
+	printk("MPC85xx DS board from Freescale Semiconductor\n");
+}
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init mpc8544_ds_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return !!of_flat_dt_is_compatible(root, "MPC8544DS");
+}
+
+machine_arch_initcall(mpc8544_ds, mpc85xx_common_publish_devices);
+machine_arch_initcall(mpc8572_ds, mpc85xx_common_publish_devices);
+machine_arch_initcall(p2020_ds, mpc85xx_common_publish_devices);
+
+machine_arch_initcall(mpc8544_ds, swiotlb_setup_bus_notifier);
+machine_arch_initcall(mpc8572_ds, swiotlb_setup_bus_notifier);
+machine_arch_initcall(p2020_ds, swiotlb_setup_bus_notifier);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init mpc8572_ds_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return !!of_flat_dt_is_compatible(root, "fsl,MPC8572DS");
+}
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init p2020_ds_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return !!of_flat_dt_is_compatible(root, "fsl,P2020DS");
+}
+
+define_machine(mpc8544_ds) {
+	.name			= "MPC8544 DS",
+	.probe			= mpc8544_ds_probe,
+	.setup_arch		= mpc85xx_ds_setup_arch,
+	.init_IRQ		= mpc85xx_ds_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
+
+define_machine(mpc8572_ds) {
+	.name			= "MPC8572 DS",
+	.probe			= mpc8572_ds_probe,
+	.setup_arch		= mpc85xx_ds_setup_arch,
+	.init_IRQ		= mpc85xx_ds_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
+
+define_machine(p2020_ds) {
+	.name			= "P2020 DS",
+	.probe			= p2020_ds_probe,
+	.setup_arch		= mpc85xx_ds_setup_arch,
+	.init_IRQ		= mpc85xx_ds_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
new file mode 100644
index 000000000..a392e94a0
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -0,0 +1,443 @@
+/*
+ * Copyright (C) 2006-2010, 2012-2013 Freescale Semiconductor, Inc.
+ * All rights reserved.
+ *
+ * Author: Andy Fleming <afleming@freescale.com>
+ *
+ * Based on 83xx/mpc8360e_pb.c by:
+ *	   Li Yang <LeoLi@freescale.com>
+ *	   Yin Olivia <Hong-hua.Yin@freescale.com>
+ *
+ * Description:
+ * MPC85xx MDS board specific routines.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/initrd.h>
+#include <linux/fsl_devices.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+#include <linux/phy.h>
+#include <linux/memblock.h>
+
+#include <linux/atomic.h>
+#include <asm/time.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/irq.h>
+#include <mm/mmu_decl.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include <sysdev/simple_gpio.h>
+#include <asm/qe.h>
+#include <asm/qe_ic.h>
+#include <asm/mpic.h>
+#include <asm/swiotlb.h>
+#include <asm/fsl_guts.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+#undef DEBUG
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+#define MV88E1111_SCR	0x10
+#define MV88E1111_SCR_125CLK	0x0010
+static int mpc8568_fixup_125_clock(struct phy_device *phydev)
+{
+	int scr;
+	int err;
+
+	/* Workaround for the 125 CLK Toggle */
+	scr = phy_read(phydev, MV88E1111_SCR);
+
+	if (scr < 0)
+		return scr;
+
+	err = phy_write(phydev, MV88E1111_SCR, scr & ~(MV88E1111_SCR_125CLK));
+
+	if (err)
+		return err;
+
+	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+
+	if (err)
+		return err;
+
+	scr = phy_read(phydev, MV88E1111_SCR);
+
+	if (scr < 0)
+		return scr;
+
+	err = phy_write(phydev, MV88E1111_SCR, scr | 0x0008);
+
+	return err;
+}
+
+static int mpc8568_mds_phy_fixups(struct phy_device *phydev)
+{
+	int temp;
+	int err;
+
+	/* Errata */
+	err = phy_write(phydev,29, 0x0006);
+
+	if (err)
+		return err;
+
+	temp = phy_read(phydev, 30);
+
+	if (temp < 0)
+		return temp;
+
+	temp = (temp & (~0x8000)) | 0x4000;
+	err = phy_write(phydev,30, temp);
+
+	if (err)
+		return err;
+
+	err = phy_write(phydev,29, 0x000a);
+
+	if (err)
+		return err;
+
+	temp = phy_read(phydev, 30);
+
+	if (temp < 0)
+		return temp;
+
+	temp = phy_read(phydev, 30);
+
+	if (temp < 0)
+		return temp;
+
+	temp &= ~0x0020;
+
+	err = phy_write(phydev,30,temp);
+
+	if (err)
+		return err;
+
+	/* Disable automatic MDI/MDIX selection */
+	temp = phy_read(phydev, 16);
+
+	if (temp < 0)
+		return temp;
+
+	temp &= ~0x0060;
+	err = phy_write(phydev,16,temp);
+
+	return err;
+}
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+#ifdef CONFIG_QUICC_ENGINE
+static void __init mpc85xx_mds_reset_ucc_phys(void)
+{
+	struct device_node *np;
+	static u8 __iomem *bcsr_regs;
+
+	/* Map BCSR area */
+	np = of_find_node_by_name(NULL, "bcsr");
+	if (!np)
+		return;
+
+	bcsr_regs = of_iomap(np, 0);
+	of_node_put(np);
+	if (!bcsr_regs)
+		return;
+
+	if (machine_is(mpc8568_mds)) {
+#define BCSR_UCC1_GETH_EN	(0x1 << 7)
+#define BCSR_UCC2_GETH_EN	(0x1 << 7)
+#define BCSR_UCC1_MODE_MSK	(0x3 << 4)
+#define BCSR_UCC2_MODE_MSK	(0x3 << 0)
+
+		/* Turn off UCC1 & UCC2 */
+		clrbits8(&bcsr_regs[8], BCSR_UCC1_GETH_EN);
+		clrbits8(&bcsr_regs[9], BCSR_UCC2_GETH_EN);
+
+		/* Mode is RGMII, all bits clear */
+		clrbits8(&bcsr_regs[11], BCSR_UCC1_MODE_MSK |
+					 BCSR_UCC2_MODE_MSK);
+
+		/* Turn UCC1 & UCC2 on */
+		setbits8(&bcsr_regs[8], BCSR_UCC1_GETH_EN);
+		setbits8(&bcsr_regs[9], BCSR_UCC2_GETH_EN);
+	} else if (machine_is(mpc8569_mds)) {
+#define BCSR7_UCC12_GETHnRST	(0x1 << 2)
+#define BCSR8_UEM_MARVELL_RST	(0x1 << 1)
+#define BCSR_UCC_RGMII		(0x1 << 6)
+#define BCSR_UCC_RTBI		(0x1 << 5)
+		/*
+		 * U-Boot mangles interrupt polarity for Marvell PHYs,
+		 * so reset built-in and UEM Marvell PHYs, this puts
+		 * the PHYs into their normal state.
+		 */
+		clrbits8(&bcsr_regs[7], BCSR7_UCC12_GETHnRST);
+		setbits8(&bcsr_regs[8], BCSR8_UEM_MARVELL_RST);
+
+		setbits8(&bcsr_regs[7], BCSR7_UCC12_GETHnRST);
+		clrbits8(&bcsr_regs[8], BCSR8_UEM_MARVELL_RST);
+
+		for_each_compatible_node(np, "network", "ucc_geth") {
+			const unsigned int *prop;
+			int ucc_num;
+
+			prop = of_get_property(np, "cell-index", NULL);
+			if (prop == NULL)
+				continue;
+
+			ucc_num = *prop - 1;
+
+			prop = of_get_property(np, "phy-connection-type", NULL);
+			if (prop == NULL)
+				continue;
+
+			if (strcmp("rtbi", (const char *)prop) == 0)
+				clrsetbits_8(&bcsr_regs[7 + ucc_num],
+					BCSR_UCC_RGMII, BCSR_UCC_RTBI);
+		}
+	} else if (machine_is(p1021_mds)) {
+#define BCSR11_ENET_MICRST     (0x1 << 5)
+		/* Reset Micrel PHY */
+		clrbits8(&bcsr_regs[11], BCSR11_ENET_MICRST);
+		setbits8(&bcsr_regs[11], BCSR11_ENET_MICRST);
+	}
+
+	iounmap(bcsr_regs);
+}
+
+static void __init mpc85xx_mds_qe_init(void)
+{
+	struct device_node *np;
+
+	mpc85xx_qe_init();
+	mpc85xx_qe_par_io_init();
+	mpc85xx_mds_reset_ucc_phys();
+
+	if (machine_is(p1021_mds)) {
+
+		struct ccsr_guts __iomem *guts;
+
+		np = of_find_node_by_name(NULL, "global-utilities");
+		if (np) {
+			guts = of_iomap(np, 0);
+			if (!guts)
+				pr_err("mpc85xx-rdb: could not map global utilities register\n");
+			else{
+			/* P1021 has pins muxed for QE and other functions. To
+			 * enable QE UEC mode, we need to set bit QE0 for UCC1
+			 * in Eth mode, QE0 and QE3 for UCC5 in Eth mode, QE9
+			 * and QE12 for QE MII management signals in PMUXCR
+			 * register.
+			 */
+				setbits32(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) |
+						  MPC85xx_PMUXCR_QE(3) |
+						  MPC85xx_PMUXCR_QE(9) |
+						  MPC85xx_PMUXCR_QE(12));
+				iounmap(guts);
+			}
+			of_node_put(np);
+		}
+
+	}
+}
+
+static void __init mpc85xx_mds_qeic_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,qe");
+	if (!of_device_is_available(np)) {
+		of_node_put(np);
+		return;
+	}
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic");
+	if (!np) {
+		np = of_find_node_by_type(NULL, "qeic");
+		if (!np)
+			return;
+	}
+
+	if (machine_is(p1021_mds))
+		qe_ic_init(np, 0, qe_ic_cascade_low_mpic,
+				qe_ic_cascade_high_mpic);
+	else
+		qe_ic_init(np, 0, qe_ic_cascade_muxed_mpic, NULL);
+	of_node_put(np);
+}
+#else
+static void __init mpc85xx_mds_qe_init(void) { }
+static void __init mpc85xx_mds_qeic_init(void) { }
+#endif	/* CONFIG_QUICC_ENGINE */
+
+static void __init mpc85xx_mds_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mpc85xx_mds_setup_arch()", 0);
+
+	mpc85xx_smp_init();
+
+	mpc85xx_mds_qe_init();
+
+	fsl_pci_assign_primary();
+
+	swiotlb_detect_4g();
+}
+
+
+static int __init board_fixups(void)
+{
+	char phy_id[20];
+	char *compstrs[2] = {"fsl,gianfar-mdio", "fsl,ucc-mdio"};
+	struct device_node *mdio;
+	struct resource res;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(compstrs); i++) {
+		mdio = of_find_compatible_node(NULL, NULL, compstrs[i]);
+
+		of_address_to_resource(mdio, 0, &res);
+		snprintf(phy_id, sizeof(phy_id), "%llx:%02x",
+			(unsigned long long)res.start, 1);
+
+		phy_register_fixup_for_id(phy_id, mpc8568_fixup_125_clock);
+		phy_register_fixup_for_id(phy_id, mpc8568_mds_phy_fixups);
+
+		/* Register a workaround for errata */
+		snprintf(phy_id, sizeof(phy_id), "%llx:%02x",
+			(unsigned long long)res.start, 7);
+		phy_register_fixup_for_id(phy_id, mpc8568_mds_phy_fixups);
+
+		of_node_put(mdio);
+	}
+
+	return 0;
+}
+machine_arch_initcall(mpc8568_mds, board_fixups);
+machine_arch_initcall(mpc8569_mds, board_fixups);
+
+static int __init mpc85xx_publish_devices(void)
+{
+	if (machine_is(mpc8568_mds))
+		simple_gpiochip_init("fsl,mpc8568mds-bcsr-gpio");
+	if (machine_is(mpc8569_mds))
+		simple_gpiochip_init("fsl,mpc8569mds-bcsr-gpio");
+
+	return mpc85xx_common_publish_devices();
+}
+
+machine_arch_initcall(mpc8568_mds, mpc85xx_publish_devices);
+machine_arch_initcall(mpc8569_mds, mpc85xx_publish_devices);
+machine_arch_initcall(p1021_mds, mpc85xx_common_publish_devices);
+
+machine_arch_initcall(mpc8568_mds, swiotlb_setup_bus_notifier);
+machine_arch_initcall(mpc8569_mds, swiotlb_setup_bus_notifier);
+machine_arch_initcall(p1021_mds, swiotlb_setup_bus_notifier);
+
+static void __init mpc85xx_mds_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+			MPIC_SINGLE_DEST_CPU,
+			0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+
+	mpic_init(mpic);
+	mpc85xx_mds_qeic_init();
+}
+
+static int __init mpc85xx_mds_probe(void)
+{
+        unsigned long root = of_get_flat_dt_root();
+
+        return of_flat_dt_is_compatible(root, "MPC85xxMDS");
+}
+
+define_machine(mpc8568_mds) {
+	.name		= "MPC8568 MDS",
+	.probe		= mpc85xx_mds_probe,
+	.setup_arch	= mpc85xx_mds_setup_arch,
+	.init_IRQ	= mpc85xx_mds_pic_init,
+	.get_irq	= mpic_get_irq,
+	.restart	= fsl_rstcr_restart,
+	.calibrate_decr	= generic_calibrate_decr,
+	.progress	= udbg_progress,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+};
+
+static int __init mpc8569_mds_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,MPC8569EMDS");
+}
+
+define_machine(mpc8569_mds) {
+	.name		= "MPC8569 MDS",
+	.probe		= mpc8569_mds_probe,
+	.setup_arch	= mpc85xx_mds_setup_arch,
+	.init_IRQ	= mpc85xx_mds_pic_init,
+	.get_irq	= mpic_get_irq,
+	.restart	= fsl_rstcr_restart,
+	.calibrate_decr	= generic_calibrate_decr,
+	.progress	= udbg_progress,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+};
+
+static int __init p1021_mds_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,P1021MDS");
+
+}
+
+define_machine(p1021_mds) {
+	.name		= "P1021 MDS",
+	.probe		= p1021_mds_probe,
+	.setup_arch	= mpc85xx_mds_setup_arch,
+	.init_IRQ	= mpc85xx_mds_pic_init,
+	.get_irq	= mpic_get_irq,
+	.restart	= fsl_rstcr_restart,
+	.calibrate_decr	= generic_calibrate_decr,
+	.progress	= udbg_progress,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+};
+
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
new file mode 100644
index 000000000..e358bed66
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
@@ -0,0 +1,375 @@
+/*
+ * MPC85xx RDB Board Setup
+ *
+ * Copyright 2009,2012-2013 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/qe.h>
+#include <asm/qe_ic.h>
+#include <asm/fsl_guts.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
+#else
+#define DBG(fmt, args...)
+#endif
+
+
+void __init mpc85xx_rdb_pic_init(void)
+{
+	struct mpic *mpic;
+	unsigned long root = of_get_flat_dt_root();
+
+#ifdef CONFIG_QUICC_ENGINE
+	struct device_node *np;
+#endif
+
+	if (of_flat_dt_is_compatible(root, "fsl,MPC85XXRDB-CAMP")) {
+		mpic = mpic_alloc(NULL, 0, MPIC_NO_RESET |
+			MPIC_BIG_ENDIAN |
+			MPIC_SINGLE_DEST_CPU,
+			0, 256, " OpenPIC  ");
+	} else {
+		mpic = mpic_alloc(NULL, 0,
+		  MPIC_BIG_ENDIAN |
+		  MPIC_SINGLE_DEST_CPU,
+		  0, 256, " OpenPIC  ");
+	}
+
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+
+#ifdef CONFIG_QUICC_ENGINE
+	np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic");
+	if (np) {
+		qe_ic_init(np, 0, qe_ic_cascade_low_mpic,
+				qe_ic_cascade_high_mpic);
+		of_node_put(np);
+
+	} else
+		pr_err("%s: Could not find qe-ic node\n", __func__);
+#endif
+
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init mpc85xx_rdb_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mpc85xx_rdb_setup_arch()", 0);
+
+	mpc85xx_smp_init();
+
+	fsl_pci_assign_primary();
+
+#ifdef CONFIG_QUICC_ENGINE
+	mpc85xx_qe_init();
+	mpc85xx_qe_par_io_init();
+#if defined(CONFIG_UCC_GETH) || defined(CONFIG_SERIAL_QE)
+	if (machine_is(p1025_rdb)) {
+		struct device_node *np;
+
+		struct ccsr_guts __iomem *guts;
+
+		np = of_find_node_by_name(NULL, "global-utilities");
+		if (np) {
+			guts = of_iomap(np, 0);
+			if (!guts) {
+
+				pr_err("mpc85xx-rdb: could not map global utilities register\n");
+
+			} else {
+			/* P1025 has pins muxed for QE and other functions. To
+			* enable QE UEC mode, we need to set bit QE0 for UCC1
+			* in Eth mode, QE0 and QE3 for UCC5 in Eth mode, QE9
+			* and QE12 for QE MII management singals in PMUXCR
+			* register.
+			*/
+				setbits32(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) |
+						MPC85xx_PMUXCR_QE(3) |
+						MPC85xx_PMUXCR_QE(9) |
+						MPC85xx_PMUXCR_QE(12));
+				iounmap(guts);
+			}
+			of_node_put(np);
+		}
+
+	}
+#endif
+#endif	/* CONFIG_QUICC_ENGINE */
+
+	printk(KERN_INFO "MPC85xx RDB board from Freescale Semiconductor\n");
+}
+
+machine_arch_initcall(p2020_rdb, mpc85xx_common_publish_devices);
+machine_arch_initcall(p2020_rdb_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_mbg_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_rdb, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_rdb_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_rdb_pd, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_utm_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1021_rdb_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1025_rdb, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1024_rdb, mpc85xx_common_publish_devices);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init p2020_rdb_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "fsl,P2020RDB"))
+		return 1;
+	return 0;
+}
+
+static int __init p1020_rdb_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "fsl,P1020RDB"))
+		return 1;
+	return 0;
+}
+
+static int __init p1020_rdb_pc_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,P1020RDB-PC");
+}
+
+static int __init p1020_rdb_pd_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,P1020RDB-PD");
+}
+
+static int __init p1021_rdb_pc_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "fsl,P1021RDB-PC"))
+		return 1;
+	return 0;
+}
+
+static int __init p2020_rdb_pc_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "fsl,P2020RDB-PC"))
+		return 1;
+	return 0;
+}
+
+static int __init p1025_rdb_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,P1025RDB");
+}
+
+static int __init p1020_mbg_pc_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,P1020MBG-PC");
+}
+
+static int __init p1020_utm_pc_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,P1020UTM-PC");
+}
+
+static int __init p1024_rdb_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,P1024RDB");
+}
+
+define_machine(p2020_rdb) {
+	.name			= "P2020 RDB",
+	.probe			= p2020_rdb_probe,
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
+
+define_machine(p1020_rdb) {
+	.name			= "P1020 RDB",
+	.probe			= p1020_rdb_probe,
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
+
+define_machine(p1021_rdb_pc) {
+	.name			= "P1021 RDB-PC",
+	.probe			= p1021_rdb_pc_probe,
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
+
+define_machine(p2020_rdb_pc) {
+	.name			= "P2020RDB-PC",
+	.probe			= p2020_rdb_pc_probe,
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
+
+define_machine(p1025_rdb) {
+	.name			= "P1025 RDB",
+	.probe			= p1025_rdb_probe,
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
+
+define_machine(p1020_mbg_pc) {
+	.name			= "P1020 MBG-PC",
+	.probe			= p1020_mbg_pc_probe,
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
+
+define_machine(p1020_utm_pc) {
+	.name			= "P1020 UTM-PC",
+	.probe			= p1020_utm_pc_probe,
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
+
+define_machine(p1020_rdb_pc) {
+	.name			= "P1020RDB-PC",
+	.probe			= p1020_rdb_pc_probe,
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
+
+define_machine(p1020_rdb_pd) {
+	.name			= "P1020RDB-PD",
+	.probe			= p1020_rdb_pd_probe,
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
+
+define_machine(p1024_rdb) {
+	.name			= "P1024 RDB",
+	.probe			= p1024_rdb_probe,
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/mvme2500.c b/arch/powerpc/platforms/85xx/mvme2500.c
new file mode 100644
index 000000000..123305056
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mvme2500.c
@@ -0,0 +1,74 @@
+/*
+ * Board setup routines for the Emerson/Artesyn MVME2500
+ *
+ * Copyright 2014 Elettra-Sincrotrone Trieste S.C.p.A.
+ *
+ * Based on earlier code by:
+ *
+ *	Xianghua Xiao (x.xiao@freescale.com)
+ *	Tom Armistead (tom.armistead@emerson.com)
+ *	Copyright 2012 Emerson
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * Author Alessio Igor Bogani <alessio.bogani@elettra.eu>
+ *
+ */
+
+#include <linux/pci.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+void __init mvme2500_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0,
+		  MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU,
+		0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init mvme2500_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mvme2500_setup_arch()", 0);
+	fsl_pci_assign_primary();
+	pr_info("MVME2500 board from Artesyn\n");
+}
+
+machine_arch_initcall(mvme2500, mpc85xx_common_publish_devices);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init mvme2500_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "artesyn,MVME2500");
+}
+
+define_machine(mvme2500) {
+	.name			= "MVME2500",
+	.probe			= mvme2500_probe,
+	.setup_arch		= mvme2500_setup_arch,
+	.init_IRQ		= mvme2500_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c
new file mode 100644
index 000000000..ad1a3d438
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/p1010rdb.c
@@ -0,0 +1,87 @@
+/*
+ * P1010RDB Board Setup
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+void __init p1010_rdb_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+	  MPIC_SINGLE_DEST_CPU,
+	  0, 256, " OpenPIC  ");
+
+	BUG_ON(mpic == NULL);
+
+	mpic_init(mpic);
+}
+
+
+/*
+ * Setup the architecture
+ */
+static void __init p1010_rdb_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("p1010_rdb_setup_arch()", 0);
+
+	fsl_pci_assign_primary();
+
+	printk(KERN_INFO "P1010 RDB board from Freescale Semiconductor\n");
+}
+
+machine_arch_initcall(p1010_rdb, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1010_rdb, swiotlb_setup_bus_notifier);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init p1010_rdb_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "fsl,P1010RDB"))
+		return 1;
+	if (of_flat_dt_is_compatible(root, "fsl,P1010RDB-PB"))
+		return 1;
+	return 0;
+}
+
+define_machine(p1010_rdb) {
+	.name			= "P1010 RDB",
+	.probe			= p1010_rdb_probe,
+	.setup_arch		= p1010_rdb_setup_arch,
+	.init_IRQ		= p1010_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
new file mode 100644
index 000000000..6ac986d3f
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -0,0 +1,576 @@
+/*
+ * P1022DS board specific routines
+ *
+ * Authors: Travis Wheatley <travis.wheatley@freescale.com>
+ *          Dave Liu <daveliu@freescale.com>
+ *          Timur Tabi <timur@freescale.com>
+ *
+ * Copyright 2010 Freescale Semiconductor, Inc.
+ *
+ * This file is taken from the Freescale P1022DS BSP, with modifications:
+ * 2) No AMP support
+ * 3) No PCI endpoint support
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+#include <asm/div64.h>
+#include <asm/mpic.h>
+#include <asm/swiotlb.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include <asm/udbg.h>
+#include <asm/fsl_guts.h>
+#include <asm/fsl_lbc.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+
+#define PMUXCR_ELBCDIU_MASK	0xc0000000
+#define PMUXCR_ELBCDIU_NOR16	0x80000000
+#define PMUXCR_ELBCDIU_DIU	0x40000000
+
+/*
+ * Board-specific initialization of the DIU.  This code should probably be
+ * executed when the DIU is opened, rather than in arch code, but the DIU
+ * driver does not have a mechanism for this (yet).
+ *
+ * This is especially problematic on the P1022DS because the local bus (eLBC)
+ * and the DIU video signals share the same pins, which means that enabling the
+ * DIU will disable access to NOR flash.
+ */
+
+/* DIU Pixel Clock bits of the CLKDVDR Global Utilities register */
+#define CLKDVDR_PXCKEN		0x80000000
+#define CLKDVDR_PXCKINV		0x10000000
+#define CLKDVDR_PXCKDLY		0x06000000
+#define CLKDVDR_PXCLK_MASK	0x00FF0000
+
+/* Some ngPIXIS register definitions */
+#define PX_CTL		3
+#define PX_BRDCFG0	8
+#define PX_BRDCFG1	9
+
+#define PX_BRDCFG0_ELBC_SPI_MASK	0xc0
+#define PX_BRDCFG0_ELBC_SPI_ELBC	0x00
+#define PX_BRDCFG0_ELBC_SPI_NULL	0xc0
+#define PX_BRDCFG0_ELBC_DIU		0x02
+
+#define PX_BRDCFG1_DVIEN	0x80
+#define PX_BRDCFG1_DFPEN	0x40
+#define PX_BRDCFG1_BACKLIGHT	0x20
+#define PX_BRDCFG1_DDCEN	0x10
+
+#define PX_CTL_ALTACC		0x80
+
+/*
+ * DIU Area Descriptor
+ *
+ * Note that we need to byte-swap the value before it's written to the AD
+ * register.  So even though the registers don't look like they're in the same
+ * bit positions as they are on the MPC8610, the same value is written to the
+ * AD register on the MPC8610 and on the P1022.
+ */
+#define AD_BYTE_F		0x10000000
+#define AD_ALPHA_C_MASK		0x0E000000
+#define AD_ALPHA_C_SHIFT	25
+#define AD_BLUE_C_MASK		0x01800000
+#define AD_BLUE_C_SHIFT		23
+#define AD_GREEN_C_MASK		0x00600000
+#define AD_GREEN_C_SHIFT	21
+#define AD_RED_C_MASK		0x00180000
+#define AD_RED_C_SHIFT		19
+#define AD_PALETTE		0x00040000
+#define AD_PIXEL_S_MASK		0x00030000
+#define AD_PIXEL_S_SHIFT	16
+#define AD_COMP_3_MASK		0x0000F000
+#define AD_COMP_3_SHIFT		12
+#define AD_COMP_2_MASK		0x00000F00
+#define AD_COMP_2_SHIFT		8
+#define AD_COMP_1_MASK		0x000000F0
+#define AD_COMP_1_SHIFT		4
+#define AD_COMP_0_MASK		0x0000000F
+#define AD_COMP_0_SHIFT		0
+
+#define MAKE_AD(alpha, red, blue, green, size, c0, c1, c2, c3) \
+	cpu_to_le32(AD_BYTE_F | (alpha << AD_ALPHA_C_SHIFT) | \
+	(blue << AD_BLUE_C_SHIFT) | (green << AD_GREEN_C_SHIFT) | \
+	(red << AD_RED_C_SHIFT) | (c3 << AD_COMP_3_SHIFT) | \
+	(c2 << AD_COMP_2_SHIFT) | (c1 << AD_COMP_1_SHIFT) | \
+	(c0 << AD_COMP_0_SHIFT) | (size << AD_PIXEL_S_SHIFT))
+
+struct fsl_law {
+	u32	lawbar;
+	u32	reserved1;
+	u32	lawar;
+	u32	reserved[5];
+};
+
+#define LAWBAR_MASK	0x00F00000
+#define LAWBAR_SHIFT	12
+
+#define LAWAR_EN	0x80000000
+#define LAWAR_TGT_MASK	0x01F00000
+#define LAW_TRGT_IF_LBC	(0x04 << 20)
+
+#define LAWAR_MASK	(LAWAR_EN | LAWAR_TGT_MASK)
+#define LAWAR_MATCH	(LAWAR_EN | LAW_TRGT_IF_LBC)
+
+#define BR_BA		0xFFFF8000
+
+/*
+ * Map a BRx value to a physical address
+ *
+ * The localbus BRx registers only store the lower 32 bits of the address.  To
+ * obtain the upper four bits, we need to scan the LAW table.  The entry which
+ * maps to the localbus will contain the upper four bits.
+ */
+static phys_addr_t lbc_br_to_phys(const void *ecm, unsigned int count, u32 br)
+{
+#ifndef CONFIG_PHYS_64BIT
+	/*
+	 * If we only have 32-bit addressing, then the BRx address *is* the
+	 * physical address.
+	 */
+	return br & BR_BA;
+#else
+	const struct fsl_law *law = ecm + 0xc08;
+	unsigned int i;
+
+	for (i = 0; i < count; i++) {
+		u64 lawbar = in_be32(&law[i].lawbar);
+		u32 lawar = in_be32(&law[i].lawar);
+
+		if ((lawar & LAWAR_MASK) == LAWAR_MATCH)
+			/* Extract the upper four bits */
+			return (br & BR_BA) | ((lawbar & LAWBAR_MASK) << 12);
+	}
+
+	return 0;
+#endif
+}
+
+/**
+ * p1022ds_set_monitor_port: switch the output to a different monitor port
+ */
+static void p1022ds_set_monitor_port(enum fsl_diu_monitor_port port)
+{
+	struct device_node *guts_node;
+	struct device_node *lbc_node = NULL;
+	struct device_node *law_node = NULL;
+	struct ccsr_guts __iomem *guts;
+	struct fsl_lbc_regs *lbc = NULL;
+	void *ecm = NULL;
+	u8 __iomem *lbc_lcs0_ba = NULL;
+	u8 __iomem *lbc_lcs1_ba = NULL;
+	phys_addr_t cs0_addr, cs1_addr;
+	u32 br0, or0, br1, or1;
+	const __be32 *iprop;
+	unsigned int num_laws;
+	u8 b;
+
+	/* Map the global utilities registers. */
+	guts_node = of_find_compatible_node(NULL, NULL, "fsl,p1022-guts");
+	if (!guts_node) {
+		pr_err("p1022ds: missing global utilities device node\n");
+		return;
+	}
+
+	guts = of_iomap(guts_node, 0);
+	if (!guts) {
+		pr_err("p1022ds: could not map global utilities device\n");
+		goto exit;
+	}
+
+	lbc_node = of_find_compatible_node(NULL, NULL, "fsl,p1022-elbc");
+	if (!lbc_node) {
+		pr_err("p1022ds: missing localbus node\n");
+		goto exit;
+	}
+
+	lbc = of_iomap(lbc_node, 0);
+	if (!lbc) {
+		pr_err("p1022ds: could not map localbus node\n");
+		goto exit;
+	}
+
+	law_node = of_find_compatible_node(NULL, NULL, "fsl,ecm-law");
+	if (!law_node) {
+		pr_err("p1022ds: missing local access window node\n");
+		goto exit;
+	}
+
+	ecm = of_iomap(law_node, 0);
+	if (!ecm) {
+		pr_err("p1022ds: could not map local access window node\n");
+		goto exit;
+	}
+
+	iprop = of_get_property(law_node, "fsl,num-laws", NULL);
+	if (!iprop) {
+		pr_err("p1022ds: LAW node is missing fsl,num-laws property\n");
+		goto exit;
+	}
+	num_laws = be32_to_cpup(iprop);
+
+	/*
+	 * Indirect mode requires both BR0 and BR1 to be set to "GPCM",
+	 * otherwise writes to these addresses won't actually appear on the
+	 * local bus, and so the PIXIS won't see them.
+	 *
+	 * In FCM mode, writes go to the NAND controller, which does not pass
+	 * them to the localbus directly.  So we force BR0 and BR1 into GPCM
+	 * mode, since we don't care about what's behind the localbus any
+	 * more.
+	 */
+	br0 = in_be32(&lbc->bank[0].br);
+	br1 = in_be32(&lbc->bank[1].br);
+	or0 = in_be32(&lbc->bank[0].or);
+	or1 = in_be32(&lbc->bank[1].or);
+
+	/* Make sure CS0 and CS1 are programmed */
+	if (!(br0 & BR_V) || !(br1 & BR_V)) {
+		pr_err("p1022ds: CS0 and/or CS1 is not programmed\n");
+		goto exit;
+	}
+
+	/*
+	 * Use the existing BRx/ORx values if it's already GPCM. Otherwise,
+	 * force the values to simple 32KB GPCM windows with the most
+	 * conservative timing.
+	 */
+	if ((br0 & BR_MSEL) != BR_MS_GPCM) {
+		br0 = (br0 & BR_BA) | BR_V;
+		or0 = 0xFFFF8000 | 0xFF7;
+		out_be32(&lbc->bank[0].br, br0);
+		out_be32(&lbc->bank[0].or, or0);
+	}
+	if ((br1 & BR_MSEL) != BR_MS_GPCM) {
+		br1 = (br1 & BR_BA) | BR_V;
+		or1 = 0xFFFF8000 | 0xFF7;
+		out_be32(&lbc->bank[1].br, br1);
+		out_be32(&lbc->bank[1].or, or1);
+	}
+
+	cs0_addr = lbc_br_to_phys(ecm, num_laws, br0);
+	if (!cs0_addr) {
+		pr_err("p1022ds: could not determine physical address for CS0"
+		       " (BR0=%08x)\n", br0);
+		goto exit;
+	}
+	cs1_addr = lbc_br_to_phys(ecm, num_laws, br1);
+	if (!cs1_addr) {
+		pr_err("p1022ds: could not determine physical address for CS1"
+		       " (BR1=%08x)\n", br1);
+		goto exit;
+	}
+
+	lbc_lcs0_ba = ioremap(cs0_addr, 1);
+	if (!lbc_lcs0_ba) {
+		pr_err("p1022ds: could not ioremap CS0 address %llx\n",
+		       (unsigned long long)cs0_addr);
+		goto exit;
+	}
+	lbc_lcs1_ba = ioremap(cs1_addr, 1);
+	if (!lbc_lcs1_ba) {
+		pr_err("p1022ds: could not ioremap CS1 address %llx\n",
+		       (unsigned long long)cs1_addr);
+		goto exit;
+	}
+
+	/* Make sure we're in indirect mode first. */
+	if ((in_be32(&guts->pmuxcr) & PMUXCR_ELBCDIU_MASK) !=
+	    PMUXCR_ELBCDIU_DIU) {
+		struct device_node *pixis_node;
+		void __iomem *pixis;
+
+		pixis_node =
+			of_find_compatible_node(NULL, NULL, "fsl,p1022ds-fpga");
+		if (!pixis_node) {
+			pr_err("p1022ds: missing pixis node\n");
+			goto exit;
+		}
+
+		pixis = of_iomap(pixis_node, 0);
+		of_node_put(pixis_node);
+		if (!pixis) {
+			pr_err("p1022ds: could not map pixis registers\n");
+			goto exit;
+		}
+
+		/* Enable indirect PIXIS mode.  */
+		setbits8(pixis + PX_CTL, PX_CTL_ALTACC);
+		iounmap(pixis);
+
+		/* Switch the board mux to the DIU */
+		out_8(lbc_lcs0_ba, PX_BRDCFG0);	/* BRDCFG0 */
+		b = in_8(lbc_lcs1_ba);
+		b |= PX_BRDCFG0_ELBC_DIU;
+		out_8(lbc_lcs1_ba, b);
+
+		/* Set the chip mux to DIU mode. */
+		clrsetbits_be32(&guts->pmuxcr, PMUXCR_ELBCDIU_MASK,
+				PMUXCR_ELBCDIU_DIU);
+		in_be32(&guts->pmuxcr);
+	}
+
+
+	switch (port) {
+	case FSL_DIU_PORT_DVI:
+		/* Enable the DVI port, disable the DFP and the backlight */
+		out_8(lbc_lcs0_ba, PX_BRDCFG1);
+		b = in_8(lbc_lcs1_ba);
+		b &= ~(PX_BRDCFG1_DFPEN | PX_BRDCFG1_BACKLIGHT);
+		b |= PX_BRDCFG1_DVIEN;
+		out_8(lbc_lcs1_ba, b);
+		break;
+	case FSL_DIU_PORT_LVDS:
+		/*
+		 * LVDS also needs backlight enabled, otherwise the display
+		 * will be blank.
+		 */
+		/* Enable the DFP port, disable the DVI and the backlight */
+		out_8(lbc_lcs0_ba, PX_BRDCFG1);
+		b = in_8(lbc_lcs1_ba);
+		b &= ~PX_BRDCFG1_DVIEN;
+		b |= PX_BRDCFG1_DFPEN | PX_BRDCFG1_BACKLIGHT;
+		out_8(lbc_lcs1_ba, b);
+		break;
+	default:
+		pr_err("p1022ds: unsupported monitor port %i\n", port);
+	}
+
+exit:
+	if (lbc_lcs1_ba)
+		iounmap(lbc_lcs1_ba);
+	if (lbc_lcs0_ba)
+		iounmap(lbc_lcs0_ba);
+	if (lbc)
+		iounmap(lbc);
+	if (ecm)
+		iounmap(ecm);
+	if (guts)
+		iounmap(guts);
+
+	of_node_put(law_node);
+	of_node_put(lbc_node);
+	of_node_put(guts_node);
+}
+
+/**
+ * p1022ds_set_pixel_clock: program the DIU's clock
+ *
+ * @pixclock: the wavelength, in picoseconds, of the clock
+ */
+void p1022ds_set_pixel_clock(unsigned int pixclock)
+{
+	struct device_node *guts_np = NULL;
+	struct ccsr_guts __iomem *guts;
+	unsigned long freq;
+	u64 temp;
+	u32 pxclk;
+
+	/* Map the global utilities registers. */
+	guts_np = of_find_compatible_node(NULL, NULL, "fsl,p1022-guts");
+	if (!guts_np) {
+		pr_err("p1022ds: missing global utilities device node\n");
+		return;
+	}
+
+	guts = of_iomap(guts_np, 0);
+	of_node_put(guts_np);
+	if (!guts) {
+		pr_err("p1022ds: could not map global utilities device\n");
+		return;
+	}
+
+	/* Convert pixclock from a wavelength to a frequency */
+	temp = 1000000000000ULL;
+	do_div(temp, pixclock);
+	freq = temp;
+
+	/*
+	 * 'pxclk' is the ratio of the platform clock to the pixel clock.
+	 * This number is programmed into the CLKDVDR register, and the valid
+	 * range of values is 2-255.
+	 */
+	pxclk = DIV_ROUND_CLOSEST(fsl_get_sys_freq(), freq);
+	pxclk = clamp_t(u32, pxclk, 2, 255);
+
+	/* Disable the pixel clock, and set it to non-inverted and no delay */
+	clrbits32(&guts->clkdvdr,
+		  CLKDVDR_PXCKEN | CLKDVDR_PXCKDLY | CLKDVDR_PXCLK_MASK);
+
+	/* Enable the clock and set the pxclk */
+	setbits32(&guts->clkdvdr, CLKDVDR_PXCKEN | (pxclk << 16));
+
+	iounmap(guts);
+}
+
+/**
+ * p1022ds_valid_monitor_port: set the monitor port for sysfs
+ */
+enum fsl_diu_monitor_port
+p1022ds_valid_monitor_port(enum fsl_diu_monitor_port port)
+{
+	switch (port) {
+	case FSL_DIU_PORT_DVI:
+	case FSL_DIU_PORT_LVDS:
+		return port;
+	default:
+		return FSL_DIU_PORT_DVI; /* Dual-link LVDS is not supported */
+	}
+}
+
+#endif
+
+void __init p1022_ds_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+		MPIC_SINGLE_DEST_CPU,
+		0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+}
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+
+/* TRUE if there is a "video=fslfb" command-line parameter. */
+static bool fslfb;
+
+/*
+ * Search for a "video=fslfb" command-line parameter, and set 'fslfb' to
+ * true if we find it.
+ *
+ * We need to use early_param() instead of __setup() because the normal
+ * __setup() gets called to late.  However, early_param() gets called very
+ * early, before the device tree is unflattened, so all we can do now is set a
+ * global variable.  Later on, p1022_ds_setup_arch() will use that variable
+ * to determine if we need to update the device tree.
+ */
+static int __init early_video_setup(char *options)
+{
+	fslfb = (strncmp(options, "fslfb:", 6) == 0);
+
+	return 0;
+}
+early_param("video", early_video_setup);
+
+#endif
+
+/*
+ * Setup the architecture
+ */
+static void __init p1022_ds_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("p1022_ds_setup_arch()", 0);
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+	diu_ops.set_monitor_port	= p1022ds_set_monitor_port;
+	diu_ops.set_pixel_clock		= p1022ds_set_pixel_clock;
+	diu_ops.valid_monitor_port	= p1022ds_valid_monitor_port;
+
+	/*
+	 * Disable the NOR and NAND flash nodes if there is video=fslfb...
+	 * command-line parameter.  When the DIU is active, the localbus is
+	 * unavailable, so we have to disable these nodes before the MTD
+	 * driver loads.
+	 */
+	if (fslfb) {
+		struct device_node *np =
+			of_find_compatible_node(NULL, NULL, "fsl,p1022-elbc");
+
+		if (np) {
+			struct device_node *np2;
+
+			of_node_get(np);
+			np2 = of_find_compatible_node(np, NULL, "cfi-flash");
+			if (np2) {
+				static struct property nor_status = {
+					.name = "status",
+					.value = "disabled",
+					.length = sizeof("disabled"),
+				};
+
+				/*
+				 * of_update_property() is called before
+				 * kmalloc() is available, so the 'new' object
+				 * should be allocated in the global area.
+				 * The easiest way is to do that is to
+				 * allocate one static local variable for each
+				 * call to this function.
+				 */
+				pr_info("p1022ds: disabling %s node",
+					np2->full_name);
+				of_update_property(np2, &nor_status);
+				of_node_put(np2);
+			}
+
+			of_node_get(np);
+			np2 = of_find_compatible_node(np, NULL,
+						      "fsl,elbc-fcm-nand");
+			if (np2) {
+				static struct property nand_status = {
+					.name = "status",
+					.value = "disabled",
+					.length = sizeof("disabled"),
+				};
+
+				pr_info("p1022ds: disabling %s node",
+					np2->full_name);
+				of_update_property(np2, &nand_status);
+				of_node_put(np2);
+			}
+
+			of_node_put(np);
+		}
+
+	}
+
+#endif
+
+	mpc85xx_smp_init();
+
+	fsl_pci_assign_primary();
+
+	swiotlb_detect_4g();
+
+	pr_info("Freescale P1022 DS reference board\n");
+}
+
+machine_arch_initcall(p1022_ds, mpc85xx_common_publish_devices);
+
+machine_arch_initcall(p1022_ds, swiotlb_setup_bus_notifier);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init p1022_ds_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,p1022ds");
+}
+
+define_machine(p1022_ds) {
+	.name			= "P1022 DS",
+	.probe			= p1022_ds_probe,
+	.setup_arch		= p1022_ds_setup_arch,
+	.init_IRQ		= p1022_ds_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb	= fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c
new file mode 100644
index 000000000..680232d6b
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/p1022_rdk.c
@@ -0,0 +1,156 @@
+/*
+ * P1022 RDK board specific routines
+ *
+ * Copyright 2012 Freescale Semiconductor, Inc.
+ *
+ * Author: Timur Tabi <timur@freescale.com>
+ *
+ * Based on p1022_ds.c
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+#include <asm/div64.h>
+#include <asm/mpic.h>
+#include <asm/swiotlb.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include <asm/udbg.h>
+#include <asm/fsl_guts.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+
+/* DIU Pixel Clock bits of the CLKDVDR Global Utilities register */
+#define CLKDVDR_PXCKEN		0x80000000
+#define CLKDVDR_PXCKINV		0x10000000
+#define CLKDVDR_PXCKDLY		0x06000000
+#define CLKDVDR_PXCLK_MASK	0x00FF0000
+
+/**
+ * p1022rdk_set_pixel_clock: program the DIU's clock
+ *
+ * @pixclock: the wavelength, in picoseconds, of the clock
+ */
+void p1022rdk_set_pixel_clock(unsigned int pixclock)
+{
+	struct device_node *guts_np = NULL;
+	struct ccsr_guts __iomem *guts;
+	unsigned long freq;
+	u64 temp;
+	u32 pxclk;
+
+	/* Map the global utilities registers. */
+	guts_np = of_find_compatible_node(NULL, NULL, "fsl,p1022-guts");
+	if (!guts_np) {
+		pr_err("p1022rdk: missing global utilities device node\n");
+		return;
+	}
+
+	guts = of_iomap(guts_np, 0);
+	of_node_put(guts_np);
+	if (!guts) {
+		pr_err("p1022rdk: could not map global utilities device\n");
+		return;
+	}
+
+	/* Convert pixclock from a wavelength to a frequency */
+	temp = 1000000000000ULL;
+	do_div(temp, pixclock);
+	freq = temp;
+
+	/*
+	 * 'pxclk' is the ratio of the platform clock to the pixel clock.
+	 * This number is programmed into the CLKDVDR register, and the valid
+	 * range of values is 2-255.
+	 */
+	pxclk = DIV_ROUND_CLOSEST(fsl_get_sys_freq(), freq);
+	pxclk = clamp_t(u32, pxclk, 2, 255);
+
+	/* Disable the pixel clock, and set it to non-inverted and no delay */
+	clrbits32(&guts->clkdvdr,
+		  CLKDVDR_PXCKEN | CLKDVDR_PXCKDLY | CLKDVDR_PXCLK_MASK);
+
+	/* Enable the clock and set the pxclk */
+	setbits32(&guts->clkdvdr, CLKDVDR_PXCKEN | (pxclk << 16));
+
+	iounmap(guts);
+}
+
+/**
+ * p1022rdk_valid_monitor_port: set the monitor port for sysfs
+ */
+enum fsl_diu_monitor_port
+p1022rdk_valid_monitor_port(enum fsl_diu_monitor_port port)
+{
+	return FSL_DIU_PORT_DVI;
+}
+
+#endif
+
+void __init p1022_rdk_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+		MPIC_SINGLE_DEST_CPU,
+		0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init p1022_rdk_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("p1022_rdk_setup_arch()", 0);
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+	diu_ops.set_pixel_clock		= p1022rdk_set_pixel_clock;
+	diu_ops.valid_monitor_port	= p1022rdk_valid_monitor_port;
+#endif
+
+	mpc85xx_smp_init();
+
+	fsl_pci_assign_primary();
+
+	swiotlb_detect_4g();
+
+	pr_info("Freescale / iVeia P1022 RDK reference board\n");
+}
+
+machine_arch_initcall(p1022_rdk, mpc85xx_common_publish_devices);
+
+machine_arch_initcall(p1022_rdk, swiotlb_setup_bus_notifier);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init p1022_rdk_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,p1022rdk");
+}
+
+define_machine(p1022_rdk) {
+	.name			= "P1022 RDK",
+	.probe			= p1022_rdk_probe,
+	.setup_arch		= p1022_rdk_setup_arch,
+	.init_IRQ		= p1022_rdk_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/p1023_rdb.c b/arch/powerpc/platforms/85xx/p1023_rdb.c
new file mode 100644
index 000000000..d5b750982
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/p1023_rdb.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2010-2011, 2013 Freescale Semiconductor, Inc.
+ *
+ * Author: Roy Zang <tie-fei.zang@freescale.com>
+ *
+ * Description:
+ * P1023 RDB Board Setup
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/fsl_devices.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include "smp.h"
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init mpc85xx_rdb_setup_arch(void)
+{
+	struct device_node *np;
+
+	if (ppc_md.progress)
+		ppc_md.progress("p1023_rdb_setup_arch()", 0);
+
+	/* Map BCSR area */
+	np = of_find_node_by_name(NULL, "bcsr");
+	if (np != NULL) {
+		static u8 __iomem *bcsr_regs;
+
+		bcsr_regs = of_iomap(np, 0);
+		of_node_put(np);
+
+		if (!bcsr_regs) {
+			printk(KERN_ERR
+			       "BCSR: Failed to map bcsr register space\n");
+			return;
+		} else {
+#define BCSR15_I2C_BUS0_SEG_CLR		0x07
+#define BCSR15_I2C_BUS0_SEG2		0x02
+/*
+ * Note: Accessing exclusively i2c devices.
+ *
+ * The i2c controller selects initially ID EEPROM in the u-boot;
+ * but if menu configuration selects RTC support in the kernel,
+ * the i2c controller switches to select RTC chip in the kernel.
+ */
+#ifdef CONFIG_RTC_CLASS
+			/* Enable RTC chip on the segment #2 of i2c */
+			clrbits8(&bcsr_regs[15], BCSR15_I2C_BUS0_SEG_CLR);
+			setbits8(&bcsr_regs[15], BCSR15_I2C_BUS0_SEG2);
+#endif
+
+			iounmap(bcsr_regs);
+		}
+	}
+
+	mpc85xx_smp_init();
+
+	fsl_pci_assign_primary();
+}
+
+machine_arch_initcall(p1023_rdb, mpc85xx_common_publish_devices);
+
+static void __init mpc85xx_rdb_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+		MPIC_SINGLE_DEST_CPU,
+		0, 256, " OpenPIC  ");
+
+	BUG_ON(mpic == NULL);
+
+	mpic_init(mpic);
+}
+
+static int __init p1023_rdb_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,P1023RDB");
+
+}
+
+define_machine(p1023_rdb) {
+	.name			= "P1023 RDB",
+	.probe			= p1023_rdb_probe,
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+};
diff --git a/arch/powerpc/platforms/85xx/ppa8548.c b/arch/powerpc/platforms/85xx/ppa8548.c
new file mode 100644
index 000000000..12019f17f
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/ppa8548.c
@@ -0,0 +1,99 @@
+/*
+ * ppa8548 setup and early boot code.
+ *
+ * Copyright 2009 Prodrive B.V..
+ *
+ * By Stef van Os (see MAINTAINERS for contact information)
+ *
+ * Based on the SBC8548 support - Copyright 2007 Wind River Systems Inc.
+ * Based on the MPC8548CDS support - Copyright 2005 Freescale Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/reboot.h>
+#include <linux/seq_file.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+
+#include <sysdev/fsl_soc.h>
+
+static void __init ppa8548_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
+			0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init ppa8548_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("ppa8548_setup_arch()", 0);
+}
+
+static void ppa8548_show_cpuinfo(struct seq_file *m)
+{
+	uint32_t svid, phid1;
+
+	svid = mfspr(SPRN_SVR);
+
+	seq_printf(m, "Vendor\t\t: Prodrive B.V.\n");
+	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+
+	/* Display cpu Pll setting */
+	phid1 = mfspr(SPRN_HID1);
+	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .name = "soc", },
+	{ .type = "soc", },
+	{ .compatible = "simple-bus", },
+	{ .compatible = "gianfar", },
+	{ .compatible = "fsl,srio", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+	return 0;
+}
+machine_device_initcall(ppa8548, declare_of_platform_devices);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init ppa8548_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "ppa8548");
+}
+
+define_machine(ppa8548) {
+	.name		= "ppa8548",
+	.probe		= ppa8548_probe,
+	.setup_arch	= ppa8548_setup_arch,
+	.init_IRQ	= ppa8548_pic_init,
+	.show_cpuinfo	= ppa8548_show_cpuinfo,
+	.get_irq	= mpic_get_irq,
+	.restart	= fsl_rstcr_restart,
+	.calibrate_decr = generic_calibrate_decr,
+	.progress	= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c
new file mode 100644
index 000000000..8ad2fe6f2
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/qemu_e500.c
@@ -0,0 +1,85 @@
+/*
+ * Paravirt target for a generic QEMU e500 machine
+ *
+ * This is intended to be a flexible device-tree-driven platform, not fixed
+ * to a particular piece of hardware or a particular spec of virtual hardware,
+ * beyond the assumption of an e500-family CPU.  Some things are still hardcoded
+ * here, such as MPIC, but this is a limitation of the current code rather than
+ * an interface contract with QEMU.
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/of_fdt.h>
+#include <asm/machdep.h>
+#include <asm/pgtable.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+#include "mpc85xx.h"
+
+void __init qemu_e500_pic_init(void)
+{
+	struct mpic *mpic;
+	unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU |
+		MPIC_ENABLE_COREINT;
+
+	mpic = mpic_alloc(NULL, 0, flags, 0, 256, " OpenPIC  ");
+
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+}
+
+static void __init qemu_e500_setup_arch(void)
+{
+	ppc_md.progress("qemu_e500_setup_arch()", 0);
+
+	fsl_pci_assign_primary();
+	swiotlb_detect_4g();
+#if defined(CONFIG_FSL_PCI) && defined(CONFIG_ZONE_DMA32)
+	/*
+	 * Inbound windows don't cover the full lower 4 GiB
+	 * due to conflicts with PCICSRBAR and outbound windows,
+	 * so limit the DMA32 zone to 2 GiB, to allow consistent
+	 * allocations to succeed.
+	 */
+	limit_zone_pfn(ZONE_DMA32, 1UL << (31 - PAGE_SHIFT));
+#endif
+	mpc85xx_smp_init();
+}
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init qemu_e500_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return !!of_flat_dt_is_compatible(root, "fsl,qemu-e500");
+}
+
+machine_arch_initcall(qemu_e500, mpc85xx_common_publish_devices);
+
+define_machine(qemu_e500) {
+	.name			= "QEMU e500",
+	.probe			= qemu_e500_probe,
+	.setup_arch		= qemu_e500_setup_arch,
+	.init_IRQ		= qemu_e500_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_coreint_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/sbc8548.c b/arch/powerpc/platforms/85xx/sbc8548.c
new file mode 100644
index 000000000..b07214666
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/sbc8548.c
@@ -0,0 +1,142 @@
+/*
+ * Wind River SBC8548 setup and early boot code.
+ *
+ * Copyright 2007 Wind River Systems Inc.
+ *
+ * By Paul Gortmaker (see MAINTAINERS for contact information)
+ *
+ * Based largely on the MPC8548CDS support - Copyright 2005 Freescale Inc.
+ *
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/initrd.h>
+#include <linux/interrupt.h>
+#include <linux/fsl_devices.h>
+#include <linux/of_platform.h>
+
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <linux/atomic.h>
+#include <asm/time.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/pci-bridge.h>
+#include <asm/irq.h>
+#include <mm/mmu_decl.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+static int sbc_rev;
+
+static void __init sbc8548_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
+			0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+}
+
+/* Extract the HW Rev from the EPLD on the board */
+static int __init sbc8548_hw_rev(void)
+{
+	struct device_node *np;
+	struct resource res;
+	unsigned int *rev;
+	int board_rev = 0;
+
+	np = of_find_compatible_node(NULL, NULL, "hw-rev");
+	if (np == NULL) {
+		printk("No HW-REV found in DTB.\n");
+		return -ENODEV;
+	}
+
+	of_address_to_resource(np, 0, &res);
+	of_node_put(np);
+
+	rev = ioremap(res.start,sizeof(unsigned int));
+	board_rev = (*rev) >> 28;
+	iounmap(rev);
+
+	return board_rev;
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init sbc8548_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("sbc8548_setup_arch()", 0);
+
+	fsl_pci_assign_primary();
+
+	sbc_rev = sbc8548_hw_rev();
+}
+
+static void sbc8548_show_cpuinfo(struct seq_file *m)
+{
+	uint pvid, svid, phid1;
+
+	pvid = mfspr(SPRN_PVR);
+	svid = mfspr(SPRN_SVR);
+
+	seq_printf(m, "Vendor\t\t: Wind River\n");
+	seq_printf(m, "Machine\t\t: SBC8548 v%d\n", sbc_rev);
+	seq_printf(m, "PVR\t\t: 0x%x\n", pvid);
+	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+
+	/* Display cpu Pll setting */
+	phid1 = mfspr(SPRN_HID1);
+	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
+}
+
+machine_arch_initcall(sbc8548, mpc85xx_common_publish_devices);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init sbc8548_probe(void)
+{
+        unsigned long root = of_get_flat_dt_root();
+
+        return of_flat_dt_is_compatible(root, "SBC8548");
+}
+
+define_machine(sbc8548) {
+	.name		= "SBC8548",
+	.probe		= sbc8548_probe,
+	.setup_arch	= sbc8548_setup_arch,
+	.init_IRQ	= sbc8548_pic_init,
+	.show_cpuinfo	= sbc8548_show_cpuinfo,
+	.get_irq	= mpic_get_irq,
+	.restart	= fsl_rstcr_restart,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.calibrate_decr = generic_calibrate_decr,
+	.progress	= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c
new file mode 100644
index 000000000..79fd0dfd4
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c
@@ -0,0 +1,175 @@
+/*
+ * Servergy CTS-1000 Setup
+ *
+ * Maintained by Ben Collins <ben.c@servergy.com>
+ *
+ * Copyright 2012 by Servergy, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/of_gpio.h>
+#include <linux/of_irq.h>
+#include <linux/workqueue.h>
+#include <linux/reboot.h>
+#include <linux/interrupt.h>
+
+#include <asm/machdep.h>
+
+static struct device_node *halt_node;
+
+static const struct of_device_id child_match[] = {
+	{
+		.compatible = "sgy,gpio-halt",
+	},
+	{},
+};
+
+static void gpio_halt_wfn(struct work_struct *work)
+{
+	/* Likely wont return */
+	orderly_poweroff(true);
+}
+static DECLARE_WORK(gpio_halt_wq, gpio_halt_wfn);
+
+static void gpio_halt_cb(void)
+{
+	enum of_gpio_flags flags;
+	int trigger, gpio;
+
+	if (!halt_node)
+		return;
+
+	gpio = of_get_gpio_flags(halt_node, 0, &flags);
+
+	if (!gpio_is_valid(gpio))
+		return;
+
+	trigger = (flags == OF_GPIO_ACTIVE_LOW);
+
+	printk(KERN_INFO "gpio-halt: triggering GPIO.\n");
+
+	/* Probably wont return */
+	gpio_set_value(gpio, trigger);
+}
+
+/* This IRQ means someone pressed the power button and it is waiting for us
+ * to handle the shutdown/poweroff. */
+static irqreturn_t gpio_halt_irq(int irq, void *__data)
+{
+	printk(KERN_INFO "gpio-halt: shutdown due to power button IRQ.\n");
+	schedule_work(&gpio_halt_wq);
+
+        return IRQ_HANDLED;
+};
+
+static int gpio_halt_probe(struct platform_device *pdev)
+{
+	enum of_gpio_flags flags;
+	struct device_node *node = pdev->dev.of_node;
+	int gpio, err, irq;
+	int trigger;
+
+	if (!node)
+		return -ENODEV;
+
+	/* If there's no matching child, this isn't really an error */
+	halt_node = of_find_matching_node(node, child_match);
+	if (!halt_node)
+		return 0;
+
+	/* Technically we could just read the first one, but punish
+	 * DT writers for invalid form. */
+	if (of_gpio_count(halt_node) != 1)
+		return -EINVAL;
+
+	/* Get the gpio number relative to the dynamic base. */
+	gpio = of_get_gpio_flags(halt_node, 0, &flags);
+	if (!gpio_is_valid(gpio))
+		return -EINVAL;
+
+	err = gpio_request(gpio, "gpio-halt");
+	if (err) {
+		printk(KERN_ERR "gpio-halt: error requesting GPIO %d.\n",
+		       gpio);
+		halt_node = NULL;
+		return err;
+	}
+
+	trigger = (flags == OF_GPIO_ACTIVE_LOW);
+
+	gpio_direction_output(gpio, !trigger);
+
+	/* Now get the IRQ which tells us when the power button is hit */
+	irq = irq_of_parse_and_map(halt_node, 0);
+	err = request_irq(irq, gpio_halt_irq, IRQF_TRIGGER_RISING |
+			  IRQF_TRIGGER_FALLING, "gpio-halt", halt_node);
+	if (err) {
+		printk(KERN_ERR "gpio-halt: error requesting IRQ %d for "
+		       "GPIO %d.\n", irq, gpio);
+		gpio_free(gpio);
+		halt_node = NULL;
+		return err;
+	}
+
+	/* Register our halt function */
+	ppc_md.halt = gpio_halt_cb;
+	pm_power_off = gpio_halt_cb;
+
+	printk(KERN_INFO "gpio-halt: registered GPIO %d (%d trigger, %d"
+	       " irq).\n", gpio, trigger, irq);
+
+	return 0;
+}
+
+static int gpio_halt_remove(struct platform_device *pdev)
+{
+	if (halt_node) {
+		int gpio = of_get_gpio(halt_node, 0);
+		int irq = irq_of_parse_and_map(halt_node, 0);
+
+		free_irq(irq, halt_node);
+
+		ppc_md.halt = NULL;
+		pm_power_off = NULL;
+
+		gpio_free(gpio);
+
+		halt_node = NULL;
+	}
+
+	return 0;
+}
+
+static const struct of_device_id gpio_halt_match[] = {
+	/* We match on the gpio bus itself and scan the children since they
+	 * wont be matched against us. We know the bus wont match until it
+	 * has been registered too. */
+	{
+		.compatible = "fsl,qoriq-gpio",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, gpio_halt_match);
+
+static struct platform_driver gpio_halt_driver = {
+	.driver = {
+		.name		= "gpio-halt",
+		.of_match_table = gpio_halt_match,
+	},
+	.probe		= gpio_halt_probe,
+	.remove		= gpio_halt_remove,
+};
+
+module_platform_driver(gpio_halt_driver);
+
+MODULE_DESCRIPTION("Driver to support GPIO triggered system halt for Servergy CTS-1000 Systems.");
+MODULE_VERSION("1.0");
+MODULE_AUTHOR("Ben Collins <ben.c@servergy.com>");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
new file mode 100644
index 000000000..8631ac5f0
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -0,0 +1,504 @@
+/*
+ * Author: Andy Fleming <afleming@freescale.com>
+ * 	   Kumar Gala <galak@kernel.crashing.org>
+ *
+ * Copyright 2006-2008, 2011-2012 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/kexec.h>
+#include <linux/highmem.h>
+#include <linux/cpu.h>
+
+#include <asm/machdep.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/mpic.h>
+#include <asm/cacheflush.h>
+#include <asm/dbell.h>
+#include <asm/fsl_guts.h>
+#include <asm/code-patching.h>
+#include <asm/cputhreads.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/mpic.h>
+#include "smp.h"
+
+struct epapr_spin_table {
+	u32	addr_h;
+	u32	addr_l;
+	u32	r3_h;
+	u32	r3_l;
+	u32	reserved;
+	u32	pir;
+};
+
+static struct ccsr_guts __iomem *guts;
+static u64 timebase;
+static int tb_req;
+static int tb_valid;
+
+static void mpc85xx_timebase_freeze(int freeze)
+{
+	uint32_t mask;
+
+	mask = CCSR_GUTS_DEVDISR_TB0 | CCSR_GUTS_DEVDISR_TB1;
+	if (freeze)
+		setbits32(&guts->devdisr, mask);
+	else
+		clrbits32(&guts->devdisr, mask);
+
+	in_be32(&guts->devdisr);
+}
+
+static void mpc85xx_give_timebase(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	while (!tb_req)
+		barrier();
+	tb_req = 0;
+
+	mpc85xx_timebase_freeze(1);
+#ifdef CONFIG_PPC64
+	/*
+	 * e5500/e6500 have a workaround for erratum A-006958 in place
+	 * that will reread the timebase until TBL is non-zero.
+	 * That would be a bad thing when the timebase is frozen.
+	 *
+	 * Thus, we read it manually, and instead of checking that
+	 * TBL is non-zero, we ensure that TB does not change.  We don't
+	 * do that for the main mftb implementation, because it requires
+	 * a scratch register
+	 */
+	{
+		u64 prev;
+
+		asm volatile("mfspr %0, %1" : "=r" (timebase) :
+			     "i" (SPRN_TBRL));
+
+		do {
+			prev = timebase;
+			asm volatile("mfspr %0, %1" : "=r" (timebase) :
+				     "i" (SPRN_TBRL));
+		} while (prev != timebase);
+	}
+#else
+	timebase = get_tb();
+#endif
+	mb();
+	tb_valid = 1;
+
+	while (tb_valid)
+		barrier();
+
+	mpc85xx_timebase_freeze(0);
+
+	local_irq_restore(flags);
+}
+
+static void mpc85xx_take_timebase(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	tb_req = 1;
+	while (!tb_valid)
+		barrier();
+
+	set_tb(timebase >> 32, timebase & 0xffffffff);
+	isync();
+	tb_valid = 0;
+
+	local_irq_restore(flags);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void smp_85xx_mach_cpu_die(void)
+{
+	unsigned int cpu = smp_processor_id();
+	u32 tmp;
+
+	local_irq_disable();
+	idle_task_exit();
+	generic_set_cpu_dead(cpu);
+	mb();
+
+	mtspr(SPRN_TCR, 0);
+
+	__flush_disable_L1();
+	tmp = (mfspr(SPRN_HID0) & ~(HID0_DOZE|HID0_SLEEP)) | HID0_NAP;
+	mtspr(SPRN_HID0, tmp);
+	isync();
+
+	/* Enter NAP mode. */
+	tmp = mfmsr();
+	tmp |= MSR_WE;
+	mb();
+	mtmsr(tmp);
+	isync();
+
+	while (1)
+		;
+}
+#endif
+
+static inline void flush_spin_table(void *spin_table)
+{
+	flush_dcache_range((ulong)spin_table,
+		(ulong)spin_table + sizeof(struct epapr_spin_table));
+}
+
+static inline u32 read_spin_table_addr_l(void *spin_table)
+{
+	flush_dcache_range((ulong)spin_table,
+		(ulong)spin_table + sizeof(struct epapr_spin_table));
+	return in_be32(&((struct epapr_spin_table *)spin_table)->addr_l);
+}
+
+#ifdef CONFIG_PPC64
+static void wake_hw_thread(void *info)
+{
+	void fsl_secondary_thread_init(void);
+	unsigned long imsr1, inia1;
+	int nr = *(const int *)info;
+
+	imsr1 = MSR_KERNEL;
+	inia1 = *(unsigned long *)fsl_secondary_thread_init;
+
+	mttmr(TMRN_IMSR1, imsr1);
+	mttmr(TMRN_INIA1, inia1);
+	mtspr(SPRN_TENS, TEN_THREAD(1));
+
+	smp_generic_kick_cpu(nr);
+}
+#endif
+
+static int smp_85xx_kick_cpu(int nr)
+{
+	unsigned long flags;
+	const u64 *cpu_rel_addr;
+	__iomem struct epapr_spin_table *spin_table;
+	struct device_node *np;
+	int hw_cpu = get_hard_smp_processor_id(nr);
+	int ioremappable;
+	int ret = 0;
+
+	WARN_ON(nr < 0 || nr >= NR_CPUS);
+	WARN_ON(hw_cpu < 0 || hw_cpu >= NR_CPUS);
+
+	pr_debug("smp_85xx_kick_cpu: kick CPU #%d\n", nr);
+
+#ifdef CONFIG_PPC64
+	/* Threads don't use the spin table */
+	if (cpu_thread_in_core(nr) != 0) {
+		int primary = cpu_first_thread_sibling(nr);
+
+		if (WARN_ON_ONCE(!cpu_has_feature(CPU_FTR_SMT)))
+			return -ENOENT;
+
+		if (cpu_thread_in_core(nr) != 1) {
+			pr_err("%s: cpu %d: invalid hw thread %d\n",
+			       __func__, nr, cpu_thread_in_core(nr));
+			return -ENOENT;
+		}
+
+		if (!cpu_online(primary)) {
+			pr_err("%s: cpu %d: primary %d not online\n",
+			       __func__, nr, primary);
+			return -ENOENT;
+		}
+
+		smp_call_function_single(primary, wake_hw_thread, &nr, 0);
+		return 0;
+	}
+#endif
+
+	np = of_get_cpu_node(nr, NULL);
+	cpu_rel_addr = of_get_property(np, "cpu-release-addr", NULL);
+
+	if (cpu_rel_addr == NULL) {
+		printk(KERN_ERR "No cpu-release-addr for cpu %d\n", nr);
+		return -ENOENT;
+	}
+
+	/*
+	 * A secondary core could be in a spinloop in the bootpage
+	 * (0xfffff000), somewhere in highmem, or somewhere in lowmem.
+	 * The bootpage and highmem can be accessed via ioremap(), but
+	 * we need to directly access the spinloop if its in lowmem.
+	 */
+	ioremappable = *cpu_rel_addr > virt_to_phys(high_memory);
+
+	/* Map the spin table */
+	if (ioremappable)
+		spin_table = ioremap_prot(*cpu_rel_addr,
+			sizeof(struct epapr_spin_table), _PAGE_COHERENT);
+	else
+		spin_table = phys_to_virt(*cpu_rel_addr);
+
+	local_irq_save(flags);
+#ifdef CONFIG_PPC32
+#ifdef CONFIG_HOTPLUG_CPU
+	/* Corresponding to generic_set_cpu_dead() */
+	generic_set_cpu_up(nr);
+
+	if (system_state == SYSTEM_RUNNING) {
+		/*
+		 * To keep it compatible with old boot program which uses
+		 * cache-inhibit spin table, we need to flush the cache
+		 * before accessing spin table to invalidate any staled data.
+		 * We also need to flush the cache after writing to spin
+		 * table to push data out.
+		 */
+		flush_spin_table(spin_table);
+		out_be32(&spin_table->addr_l, 0);
+		flush_spin_table(spin_table);
+
+		/*
+		 * We don't set the BPTR register here since it already points
+		 * to the boot page properly.
+		 */
+		mpic_reset_core(nr);
+
+		/*
+		 * wait until core is ready...
+		 * We need to invalidate the stale data, in case the boot
+		 * loader uses a cache-inhibited spin table.
+		 */
+		if (!spin_event_timeout(
+				read_spin_table_addr_l(spin_table) == 1,
+				10000, 100)) {
+			pr_err("%s: timeout waiting for core %d to reset\n",
+							__func__, hw_cpu);
+			ret = -ENOENT;
+			goto out;
+		}
+
+		/*  clear the acknowledge status */
+		__secondary_hold_acknowledge = -1;
+	}
+#endif
+	flush_spin_table(spin_table);
+	out_be32(&spin_table->pir, hw_cpu);
+	out_be32(&spin_table->addr_l, __pa(__early_start));
+	flush_spin_table(spin_table);
+
+	/* Wait a bit for the CPU to ack. */
+	if (!spin_event_timeout(__secondary_hold_acknowledge == hw_cpu,
+					10000, 100)) {
+		pr_err("%s: timeout waiting for core %d to ack\n",
+						__func__, hw_cpu);
+		ret = -ENOENT;
+		goto out;
+	}
+out:
+#else
+	smp_generic_kick_cpu(nr);
+
+	flush_spin_table(spin_table);
+	out_be32(&spin_table->pir, hw_cpu);
+	out_be64((u64 *)(&spin_table->addr_h),
+		__pa(ppc_function_entry(generic_secondary_smp_init)));
+	flush_spin_table(spin_table);
+#endif
+
+	local_irq_restore(flags);
+
+	if (ioremappable)
+		iounmap(spin_table);
+
+	return ret;
+}
+
+struct smp_ops_t smp_85xx_ops = {
+	.kick_cpu = smp_85xx_kick_cpu,
+	.cpu_bootable = smp_generic_cpu_bootable,
+#ifdef CONFIG_HOTPLUG_CPU
+	.cpu_disable	= generic_cpu_disable,
+	.cpu_die	= generic_cpu_die,
+#endif
+#ifdef CONFIG_KEXEC
+	.give_timebase	= smp_generic_give_timebase,
+	.take_timebase	= smp_generic_take_timebase,
+#endif
+};
+
+#ifdef CONFIG_KEXEC
+atomic_t kexec_down_cpus = ATOMIC_INIT(0);
+
+void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
+{
+	local_irq_disable();
+
+	if (secondary) {
+		atomic_inc(&kexec_down_cpus);
+		/* loop forever */
+		while (1);
+	}
+}
+
+static void mpc85xx_smp_kexec_down(void *arg)
+{
+	if (ppc_md.kexec_cpu_down)
+		ppc_md.kexec_cpu_down(0,1);
+}
+
+static void map_and_flush(unsigned long paddr)
+{
+	struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
+	unsigned long kaddr  = (unsigned long)kmap_atomic(page);
+
+	flush_dcache_range(kaddr, kaddr + PAGE_SIZE);
+	kunmap_atomic((void *)kaddr);
+}
+
+/**
+ * Before we reset the other cores, we need to flush relevant cache
+ * out to memory so we don't get anything corrupted, some of these flushes
+ * are performed out of an overabundance of caution as interrupts are not
+ * disabled yet and we can switch cores
+ */
+static void mpc85xx_smp_flush_dcache_kexec(struct kimage *image)
+{
+	kimage_entry_t *ptr, entry;
+	unsigned long paddr;
+	int i;
+
+	if (image->type == KEXEC_TYPE_DEFAULT) {
+		/* normal kexec images are stored in temporary pages */
+		for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE);
+		     ptr = (entry & IND_INDIRECTION) ?
+				phys_to_virt(entry & PAGE_MASK) : ptr + 1) {
+			if (!(entry & IND_DESTINATION)) {
+				map_and_flush(entry);
+			}
+		}
+		/* flush out last IND_DONE page */
+		map_and_flush(entry);
+	} else {
+		/* crash type kexec images are copied to the crash region */
+		for (i = 0; i < image->nr_segments; i++) {
+			struct kexec_segment *seg = &image->segment[i];
+			for (paddr = seg->mem; paddr < seg->mem + seg->memsz;
+			     paddr += PAGE_SIZE) {
+				map_and_flush(paddr);
+			}
+		}
+	}
+
+	/* also flush the kimage struct to be passed in as well */
+	flush_dcache_range((unsigned long)image,
+			   (unsigned long)image + sizeof(*image));
+}
+
+static void mpc85xx_smp_machine_kexec(struct kimage *image)
+{
+	int timeout = INT_MAX;
+	int i, num_cpus = num_present_cpus();
+
+	mpc85xx_smp_flush_dcache_kexec(image);
+
+	if (image->type == KEXEC_TYPE_DEFAULT)
+		smp_call_function(mpc85xx_smp_kexec_down, NULL, 0);
+
+	while ( (atomic_read(&kexec_down_cpus) != (num_cpus - 1)) &&
+		( timeout > 0 ) )
+	{
+		timeout--;
+	}
+
+	if ( !timeout )
+		printk(KERN_ERR "Unable to bring down secondary cpu(s)");
+
+	for_each_online_cpu(i)
+	{
+		if ( i == smp_processor_id() ) continue;
+		mpic_reset_core(i);
+	}
+
+	default_machine_kexec(image);
+}
+#endif /* CONFIG_KEXEC */
+
+static void smp_85xx_basic_setup(int cpu_nr)
+{
+	if (cpu_has_feature(CPU_FTR_DBELL))
+		doorbell_setup_this_cpu();
+}
+
+static void smp_85xx_setup_cpu(int cpu_nr)
+{
+	mpic_setup_this_cpu();
+	smp_85xx_basic_setup(cpu_nr);
+}
+
+static const struct of_device_id mpc85xx_smp_guts_ids[] = {
+	{ .compatible = "fsl,mpc8572-guts", },
+	{ .compatible = "fsl,p1020-guts", },
+	{ .compatible = "fsl,p1021-guts", },
+	{ .compatible = "fsl,p1022-guts", },
+	{ .compatible = "fsl,p1023-guts", },
+	{ .compatible = "fsl,p2020-guts", },
+	{},
+};
+
+void __init mpc85xx_smp_init(void)
+{
+	struct device_node *np;
+
+
+	np = of_find_node_by_type(NULL, "open-pic");
+	if (np) {
+		smp_85xx_ops.probe = smp_mpic_probe;
+		smp_85xx_ops.setup_cpu = smp_85xx_setup_cpu;
+		smp_85xx_ops.message_pass = smp_mpic_message_pass;
+	} else
+		smp_85xx_ops.setup_cpu = smp_85xx_basic_setup;
+
+	if (cpu_has_feature(CPU_FTR_DBELL)) {
+		/*
+		 * If left NULL, .message_pass defaults to
+		 * smp_muxed_ipi_message_pass
+		 */
+		smp_85xx_ops.message_pass = NULL;
+		smp_85xx_ops.cause_ipi = doorbell_cause_ipi;
+		smp_85xx_ops.probe = NULL;
+	}
+
+	np = of_find_matching_node(NULL, mpc85xx_smp_guts_ids);
+	if (np) {
+		guts = of_iomap(np, 0);
+		of_node_put(np);
+		if (!guts) {
+			pr_err("%s: Could not map guts node address\n",
+								__func__);
+			return;
+		}
+		smp_85xx_ops.give_timebase = mpc85xx_give_timebase;
+		smp_85xx_ops.take_timebase = mpc85xx_take_timebase;
+#ifdef CONFIG_HOTPLUG_CPU
+		ppc_md.cpu_die = smp_85xx_mach_cpu_die;
+#endif
+	}
+
+	smp_ops = &smp_85xx_ops;
+
+#ifdef CONFIG_KEXEC
+	ppc_md.kexec_cpu_down = mpc85xx_smp_kexec_cpu_down;
+	ppc_md.machine_kexec = mpc85xx_smp_machine_kexec;
+#endif
+}
diff --git a/arch/powerpc/platforms/85xx/smp.h b/arch/powerpc/platforms/85xx/smp.h
new file mode 100644
index 000000000..e2b44933f
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/smp.h
@@ -0,0 +1,15 @@
+#ifndef POWERPC_85XX_SMP_H_
+#define POWERPC_85XX_SMP_H_ 1
+
+#include <linux/init.h>
+
+#ifdef CONFIG_SMP
+void __init mpc85xx_smp_init(void);
+#else
+static inline void mpc85xx_smp_init(void)
+{
+	/* Nothing to do */
+}
+#endif
+
+#endif /* not POWERPC_85XX_SMP_H_ */
diff --git a/arch/powerpc/platforms/85xx/socrates.c b/arch/powerpc/platforms/85xx/socrates.c
new file mode 100644
index 000000000..ae368e0e1
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/socrates.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2008 Emcraft Systems
+ * Sergei Poselenov <sposelenov@emcraft.com>
+ *
+ * Based on MPC8560 ADS and arch/ppc tqm85xx ports
+ *
+ * Maintained by Kumar Gala (see MAINTAINERS for contact information)
+ *
+ * Copyright 2008 Freescale Semiconductor Inc.
+ *
+ * Copyright (c) 2005-2006 DENX Software Engineering
+ * Stefan Roese <sr@denx.de>
+ *
+ * Based on original work by
+ * 	Kumar Gala <kumar.gala@freescale.com>
+ *      Copyright 2004 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/mpic.h>
+#include <asm/prom.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+#include "socrates_fpga_pic.h"
+
+static void __init socrates_pic_init(void)
+{
+	struct device_node *np;
+
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
+			0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+
+	np = of_find_compatible_node(NULL, NULL, "abb,socrates-fpga-pic");
+	if (!np) {
+		printk(KERN_ERR "Could not find socrates-fpga-pic node\n");
+		return;
+	}
+	socrates_fpga_pic_init(np);
+	of_node_put(np);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init socrates_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("socrates_setup_arch()", 0);
+
+	fsl_pci_assign_primary();
+}
+
+machine_arch_initcall(socrates, mpc85xx_common_publish_devices);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init socrates_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "abb,socrates"))
+		return 1;
+
+	return 0;
+}
+
+define_machine(socrates) {
+	.name			= "Socrates",
+	.probe			= socrates_probe,
+	.setup_arch		= socrates_setup_arch,
+	.init_IRQ		= socrates_pic_init,
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
new file mode 100644
index 000000000..55a9682b9
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
@@ -0,0 +1,314 @@
+/*
+ *  Copyright (C) 2008 Ilya Yanok, Emcraft Systems
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/irq.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/io.h>
+
+/*
+ * The FPGA supports 9 interrupt sources, which can be routed to 3
+ * interrupt request lines of the MPIC. The line to be used can be
+ * specified through the third cell of FDT property  "interrupts".
+ */
+
+#define SOCRATES_FPGA_NUM_IRQS	9
+
+#define FPGA_PIC_IRQCFG		(0x0)
+#define FPGA_PIC_IRQMASK(n)	(0x4 + 0x4 * (n))
+
+#define SOCRATES_FPGA_IRQ_MASK	((1 << SOCRATES_FPGA_NUM_IRQS) - 1)
+
+struct socrates_fpga_irq_info {
+	unsigned int irq_line;
+	int type;
+};
+
+/*
+ * Interrupt routing and type table
+ *
+ * IRQ_TYPE_NONE means the interrupt type is configurable,
+ * otherwise it's fixed to the specified value.
+ */
+static struct socrates_fpga_irq_info fpga_irqs[SOCRATES_FPGA_NUM_IRQS] = {
+	[0] = {0, IRQ_TYPE_NONE},
+	[1] = {0, IRQ_TYPE_LEVEL_HIGH},
+	[2] = {0, IRQ_TYPE_LEVEL_LOW},
+	[3] = {0, IRQ_TYPE_NONE},
+	[4] = {0, IRQ_TYPE_NONE},
+	[5] = {0, IRQ_TYPE_NONE},
+	[6] = {0, IRQ_TYPE_NONE},
+	[7] = {0, IRQ_TYPE_NONE},
+	[8] = {0, IRQ_TYPE_LEVEL_HIGH},
+};
+
+static DEFINE_RAW_SPINLOCK(socrates_fpga_pic_lock);
+
+static void __iomem *socrates_fpga_pic_iobase;
+static struct irq_domain *socrates_fpga_pic_irq_host;
+static unsigned int socrates_fpga_irqs[3];
+
+static inline uint32_t socrates_fpga_pic_read(int reg)
+{
+	return in_be32(socrates_fpga_pic_iobase + reg);
+}
+
+static inline void socrates_fpga_pic_write(int reg, uint32_t val)
+{
+	out_be32(socrates_fpga_pic_iobase + reg, val);
+}
+
+static inline unsigned int socrates_fpga_pic_get_irq(unsigned int irq)
+{
+	uint32_t cause;
+	unsigned long flags;
+	int i;
+
+	/* Check irq line routed to the MPIC */
+	for (i = 0; i < 3; i++) {
+		if (irq == socrates_fpga_irqs[i])
+			break;
+	}
+	if (i == 3)
+		return NO_IRQ;
+
+	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+	cause = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(i));
+	raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+	for (i = SOCRATES_FPGA_NUM_IRQS - 1; i >= 0; i--) {
+		if (cause >> (i + 16))
+			break;
+	}
+	return irq_linear_revmap(socrates_fpga_pic_irq_host,
+			(irq_hw_number_t)i);
+}
+
+void socrates_fpga_pic_cascade(unsigned int irq, struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned int cascade_irq;
+
+	/*
+	 * See if we actually have an interrupt, call generic handling code if
+	 * we do.
+	 */
+	cascade_irq = socrates_fpga_pic_get_irq(irq);
+
+	if (cascade_irq != NO_IRQ)
+		generic_handle_irq(cascade_irq);
+	chip->irq_eoi(&desc->irq_data);
+}
+
+static void socrates_fpga_pic_ack(struct irq_data *d)
+{
+	unsigned long flags;
+	unsigned int irq_line, hwirq = irqd_to_hwirq(d);
+	uint32_t mask;
+
+	irq_line = fpga_irqs[hwirq].irq_line;
+	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
+		& SOCRATES_FPGA_IRQ_MASK;
+	mask |= (1 << (hwirq + 16));
+	socrates_fpga_pic_write(FPGA_PIC_IRQMASK(irq_line), mask);
+	raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+}
+
+static void socrates_fpga_pic_mask(struct irq_data *d)
+{
+	unsigned long flags;
+	unsigned int hwirq = irqd_to_hwirq(d);
+	int irq_line;
+	u32 mask;
+
+	irq_line = fpga_irqs[hwirq].irq_line;
+	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
+		& SOCRATES_FPGA_IRQ_MASK;
+	mask &= ~(1 << hwirq);
+	socrates_fpga_pic_write(FPGA_PIC_IRQMASK(irq_line), mask);
+	raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+}
+
+static void socrates_fpga_pic_mask_ack(struct irq_data *d)
+{
+	unsigned long flags;
+	unsigned int hwirq = irqd_to_hwirq(d);
+	int irq_line;
+	u32 mask;
+
+	irq_line = fpga_irqs[hwirq].irq_line;
+	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
+		& SOCRATES_FPGA_IRQ_MASK;
+	mask &= ~(1 << hwirq);
+	mask |= (1 << (hwirq + 16));
+	socrates_fpga_pic_write(FPGA_PIC_IRQMASK(irq_line), mask);
+	raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+}
+
+static void socrates_fpga_pic_unmask(struct irq_data *d)
+{
+	unsigned long flags;
+	unsigned int hwirq = irqd_to_hwirq(d);
+	int irq_line;
+	u32 mask;
+
+	irq_line = fpga_irqs[hwirq].irq_line;
+	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
+		& SOCRATES_FPGA_IRQ_MASK;
+	mask |= (1 << hwirq);
+	socrates_fpga_pic_write(FPGA_PIC_IRQMASK(irq_line), mask);
+	raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+}
+
+static void socrates_fpga_pic_eoi(struct irq_data *d)
+{
+	unsigned long flags;
+	unsigned int hwirq = irqd_to_hwirq(d);
+	int irq_line;
+	u32 mask;
+
+	irq_line = fpga_irqs[hwirq].irq_line;
+	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
+		& SOCRATES_FPGA_IRQ_MASK;
+	mask |= (1 << (hwirq + 16));
+	socrates_fpga_pic_write(FPGA_PIC_IRQMASK(irq_line), mask);
+	raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+}
+
+static int socrates_fpga_pic_set_type(struct irq_data *d,
+		unsigned int flow_type)
+{
+	unsigned long flags;
+	unsigned int hwirq = irqd_to_hwirq(d);
+	int polarity;
+	u32 mask;
+
+	if (fpga_irqs[hwirq].type != IRQ_TYPE_NONE)
+		return -EINVAL;
+
+	switch (flow_type & IRQ_TYPE_SENSE_MASK) {
+	case IRQ_TYPE_LEVEL_HIGH:
+		polarity = 1;
+		break;
+	case IRQ_TYPE_LEVEL_LOW:
+		polarity = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+	mask = socrates_fpga_pic_read(FPGA_PIC_IRQCFG);
+	if (polarity)
+		mask |= (1 << hwirq);
+	else
+		mask &= ~(1 << hwirq);
+	socrates_fpga_pic_write(FPGA_PIC_IRQCFG, mask);
+	raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+	return 0;
+}
+
+static struct irq_chip socrates_fpga_pic_chip = {
+	.name		= "FPGA-PIC",
+	.irq_ack	= socrates_fpga_pic_ack,
+	.irq_mask	= socrates_fpga_pic_mask,
+	.irq_mask_ack	= socrates_fpga_pic_mask_ack,
+	.irq_unmask	= socrates_fpga_pic_unmask,
+	.irq_eoi	= socrates_fpga_pic_eoi,
+	.irq_set_type	= socrates_fpga_pic_set_type,
+};
+
+static int socrates_fpga_pic_host_map(struct irq_domain *h, unsigned int virq,
+		irq_hw_number_t hwirq)
+{
+	/* All interrupts are LEVEL sensitive */
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &socrates_fpga_pic_chip,
+				 handle_fasteoi_irq);
+
+	return 0;
+}
+
+static int socrates_fpga_pic_host_xlate(struct irq_domain *h,
+		struct device_node *ct,	const u32 *intspec, unsigned int intsize,
+		irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+{
+	struct socrates_fpga_irq_info *fpga_irq = &fpga_irqs[intspec[0]];
+
+	*out_hwirq = intspec[0];
+	if  (fpga_irq->type == IRQ_TYPE_NONE) {
+		/* type is configurable */
+		if (intspec[1] != IRQ_TYPE_LEVEL_LOW &&
+		    intspec[1] != IRQ_TYPE_LEVEL_HIGH) {
+			pr_warning("FPGA PIC: invalid irq type, "
+				   "setting default active low\n");
+			*out_flags = IRQ_TYPE_LEVEL_LOW;
+		} else {
+			*out_flags = intspec[1];
+		}
+	} else {
+		/* type is fixed */
+		*out_flags = fpga_irq->type;
+	}
+
+	/* Use specified interrupt routing */
+	if (intspec[2] <= 2)
+		fpga_irq->irq_line = intspec[2];
+	else
+		pr_warning("FPGA PIC: invalid irq routing\n");
+
+	return 0;
+}
+
+static const struct irq_domain_ops socrates_fpga_pic_host_ops = {
+	.map    = socrates_fpga_pic_host_map,
+	.xlate  = socrates_fpga_pic_host_xlate,
+};
+
+void socrates_fpga_pic_init(struct device_node *pic)
+{
+	unsigned long flags;
+	int i;
+
+	/* Setup an irq_domain structure */
+	socrates_fpga_pic_irq_host = irq_domain_add_linear(pic,
+		    SOCRATES_FPGA_NUM_IRQS, &socrates_fpga_pic_host_ops, NULL);
+	if (socrates_fpga_pic_irq_host == NULL) {
+		pr_err("FPGA PIC: Unable to allocate host\n");
+		return;
+	}
+
+	for (i = 0; i < 3; i++) {
+		socrates_fpga_irqs[i] = irq_of_parse_and_map(pic, i);
+		if (socrates_fpga_irqs[i] == NO_IRQ) {
+			pr_warning("FPGA PIC: can't get irq%d.\n", i);
+			continue;
+		}
+		irq_set_chained_handler(socrates_fpga_irqs[i],
+					socrates_fpga_pic_cascade);
+	}
+
+	socrates_fpga_pic_iobase = of_iomap(pic, 0);
+
+	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+	socrates_fpga_pic_write(FPGA_PIC_IRQMASK(0),
+			SOCRATES_FPGA_IRQ_MASK << 16);
+	socrates_fpga_pic_write(FPGA_PIC_IRQMASK(1),
+			SOCRATES_FPGA_IRQ_MASK << 16);
+	socrates_fpga_pic_write(FPGA_PIC_IRQMASK(2),
+			SOCRATES_FPGA_IRQ_MASK << 16);
+	raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+
+	pr_info("FPGA PIC: Setting up Socrates FPGA PIC\n");
+}
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.h b/arch/powerpc/platforms/85xx/socrates_fpga_pic.h
new file mode 100644
index 000000000..21d7d8e42
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.h
@@ -0,0 +1,16 @@
+/*
+ *  Copyright (C) 2008 Ilya Yanok, Emcraft Systems
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef SOCRATES_FPGA_PIC_H
+#define SOCRATES_FPGA_PIC_H
+
+void socrates_fpga_pic_init(struct device_node *pic);
+
+#endif
diff --git a/arch/powerpc/platforms/85xx/stx_gp3.c b/arch/powerpc/platforms/85xx/stx_gp3.c
new file mode 100644
index 000000000..6f4939b63
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/stx_gp3.c
@@ -0,0 +1,111 @@
+/*
+ * Based on MPC8560 ADS and arch/ppc stx_gp3 ports
+ *
+ * Maintained by Kumar Gala (see MAINTAINERS for contact information)
+ *
+ * Copyright 2008 Freescale Semiconductor Inc.
+ *
+ * Dan Malek <dan@embeddededge.com>
+ * Copyright 2004 Embedded Edge, LLC
+ *
+ * Copied from mpc8560_ads.c
+ * Copyright 2002, 2003 Motorola Inc.
+ *
+ * Ported to 2.6, Matt Porter <mporter@kernel.crashing.org>
+ * Copyright 2004-2005 MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/mpic.h>
+#include <asm/prom.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+#ifdef CONFIG_CPM2
+#include <asm/cpm2.h>
+#endif /* CONFIG_CPM2 */
+
+static void __init stx_gp3_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
+			0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+
+	mpc85xx_cpm2_pic_init();
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init stx_gp3_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("stx_gp3_setup_arch()", 0);
+
+	fsl_pci_assign_primary();
+
+#ifdef CONFIG_CPM2
+	cpm2_reset();
+#endif
+}
+
+static void stx_gp3_show_cpuinfo(struct seq_file *m)
+{
+	uint pvid, svid, phid1;
+
+	pvid = mfspr(SPRN_PVR);
+	svid = mfspr(SPRN_SVR);
+
+	seq_printf(m, "Vendor\t\t: RPC Electronics STx\n");
+	seq_printf(m, "PVR\t\t: 0x%x\n", pvid);
+	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+
+	/* Display cpu Pll setting */
+	phid1 = mfspr(SPRN_HID1);
+	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
+}
+
+machine_arch_initcall(stx_gp3, mpc85xx_common_publish_devices);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init stx_gp3_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "stx,gp3-8560");
+}
+
+define_machine(stx_gp3) {
+	.name			= "STX GP3",
+	.probe			= stx_gp3_probe,
+	.setup_arch		= stx_gp3_setup_arch,
+	.init_IRQ		= stx_gp3_pic_init,
+	.show_cpuinfo		= stx_gp3_show_cpuinfo,
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/tqm85xx.c b/arch/powerpc/platforms/85xx/tqm85xx.c
new file mode 100644
index 000000000..ec0b7272f
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/tqm85xx.c
@@ -0,0 +1,138 @@
+/*
+ * Based on MPC8560 ADS and arch/ppc tqm85xx ports
+ *
+ * Maintained by Kumar Gala (see MAINTAINERS for contact information)
+ *
+ * Copyright 2008 Freescale Semiconductor Inc.
+ *
+ * Copyright (c) 2005-2006 DENX Software Engineering
+ * Stefan Roese <sr@denx.de>
+ *
+ * Based on original work by
+ * 	Kumar Gala <kumar.gala@freescale.com>
+ *      Copyright 2004 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/mpic.h>
+#include <asm/prom.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+#ifdef CONFIG_CPM2
+#include <asm/cpm2.h>
+#endif /* CONFIG_CPM2 */
+
+static void __init tqm85xx_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0,
+			MPIC_BIG_ENDIAN,
+			0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+
+	mpc85xx_cpm2_pic_init();
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init tqm85xx_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("tqm85xx_setup_arch()", 0);
+
+#ifdef CONFIG_CPM2
+	cpm2_reset();
+#endif
+
+	fsl_pci_assign_primary();
+}
+
+static void tqm85xx_show_cpuinfo(struct seq_file *m)
+{
+	uint pvid, svid, phid1;
+
+	pvid = mfspr(SPRN_PVR);
+	svid = mfspr(SPRN_SVR);
+
+	seq_printf(m, "Vendor\t\t: TQ Components\n");
+	seq_printf(m, "PVR\t\t: 0x%x\n", pvid);
+	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+
+	/* Display cpu Pll setting */
+	phid1 = mfspr(SPRN_HID1);
+	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
+}
+
+static void tqm85xx_ti1520_fixup(struct pci_dev *pdev)
+{
+	unsigned int val;
+
+	/* Do not do the fixup on other platforms! */
+	if (!machine_is(tqm85xx))
+		return;
+
+	dev_info(&pdev->dev, "Using TI 1520 fixup on TQM85xx\n");
+
+	/*
+	 * Enable P2CCLK bit in system control register
+	 * to enable CLOCK output to power chip
+	 */
+	pci_read_config_dword(pdev, 0x80, &val);
+	pci_write_config_dword(pdev, 0x80, val | (1 << 27));
+
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_1520,
+		tqm85xx_ti1520_fixup);
+
+machine_arch_initcall(tqm85xx, mpc85xx_common_publish_devices);
+
+static const char * const board[] __initconst = {
+	"tqc,tqm8540",
+	"tqc,tqm8541",
+	"tqc,tqm8548",
+	"tqc,tqm8555",
+	"tqc,tqm8560",
+	NULL
+};
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init tqm85xx_probe(void)
+{
+	return of_flat_dt_match(of_get_flat_dt_root(), board);
+}
+
+define_machine(tqm85xx) {
+	.name			= "TQM85xx",
+	.probe			= tqm85xx_probe,
+	.setup_arch		= tqm85xx_setup_arch,
+	.init_IRQ		= tqm85xx_pic_init,
+	.show_cpuinfo		= tqm85xx_show_cpuinfo,
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/twr_p102x.c b/arch/powerpc/platforms/85xx/twr_p102x.c
new file mode 100644
index 000000000..1eadb6d0d
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/twr_p102x.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright 2010-2011, 2013 Freescale Semiconductor, Inc.
+ *
+ * Author: Michael Johnston <michael.johnston@freescale.com>
+ *
+ * Description:
+ * TWR-P102x Board Setup
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/qe.h>
+#include <asm/qe_ic.h>
+#include <asm/fsl_guts.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+static void __init twr_p1025_pic_init(void)
+{
+	struct mpic *mpic;
+
+#ifdef CONFIG_QUICC_ENGINE
+	struct device_node *np;
+#endif
+
+	mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+			MPIC_SINGLE_DEST_CPU,
+			0, 256, " OpenPIC  ");
+
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+
+#ifdef CONFIG_QUICC_ENGINE
+	np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic");
+	if (np) {
+		qe_ic_init(np, 0, qe_ic_cascade_low_mpic,
+				qe_ic_cascade_high_mpic);
+		of_node_put(np);
+	} else
+		pr_err("Could not find qe-ic node\n");
+#endif
+}
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init twr_p1025_setup_arch(void)
+{
+#ifdef CONFIG_QUICC_ENGINE
+	struct device_node *np;
+#endif
+
+	if (ppc_md.progress)
+		ppc_md.progress("twr_p1025_setup_arch()", 0);
+
+	mpc85xx_smp_init();
+
+	fsl_pci_assign_primary();
+
+#ifdef CONFIG_QUICC_ENGINE
+	mpc85xx_qe_init();
+	mpc85xx_qe_par_io_init();
+
+#if defined(CONFIG_UCC_GETH) || defined(CONFIG_SERIAL_QE)
+	if (machine_is(twr_p1025)) {
+		struct ccsr_guts __iomem *guts;
+
+		np = of_find_compatible_node(NULL, NULL, "fsl,p1021-guts");
+		if (np) {
+			guts = of_iomap(np, 0);
+			if (!guts)
+				pr_err("twr_p1025: could not map global utilities register\n");
+			else {
+			/* P1025 has pins muxed for QE and other functions. To
+			 * enable QE UEC mode, we need to set bit QE0 for UCC1
+			 * in Eth mode, QE0 and QE3 for UCC5 in Eth mode, QE9
+			 * and QE12 for QE MII management signals in PMUXCR
+			 * register.
+			 * Set QE mux bits in PMUXCR */
+			setbits32(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) |
+					MPC85xx_PMUXCR_QE(3) |
+					MPC85xx_PMUXCR_QE(9) |
+					MPC85xx_PMUXCR_QE(12));
+			iounmap(guts);
+
+#if defined(CONFIG_SERIAL_QE)
+			/* On P1025TWR board, the UCC7 acted as UART port.
+			 * However, The UCC7's CTS pin is low level in default,
+			 * it will impact the transmission in full duplex
+			 * communication. So disable the Flow control pin PA18.
+			 * The UCC7 UART just can use RXD and TXD pins.
+			 */
+			par_io_config_pin(0, 18, 0, 0, 0, 0);
+#endif
+			/* Drive PB29 to CPLD low - CPLD will then change
+			 * muxing from LBC to QE */
+			par_io_config_pin(1, 29, 1, 0, 0, 0);
+			par_io_data_set(1, 29, 0);
+			}
+			of_node_put(np);
+		}
+	}
+#endif
+#endif	/* CONFIG_QUICC_ENGINE */
+
+	pr_info("TWR-P1025 board from Freescale Semiconductor\n");
+}
+
+machine_arch_initcall(twr_p1025, mpc85xx_common_publish_devices);
+
+static int __init twr_p1025_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,TWR-P1025");
+}
+
+define_machine(twr_p1025) {
+	.name			= "TWR-P1025",
+	.probe			= twr_p1025_probe,
+	.setup_arch		= twr_p1025_setup_arch,
+	.init_IRQ		= twr_p1025_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
new file mode 100644
index 000000000..1a9c10858
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2009 Extreme Engineering Solutions, Inc.
+ *
+ * X-ES board-specific functionality
+ *
+ * Based on mpc85xx_ds code from Freescale Semiconductor, Inc.
+ *
+ * Author: Nate Case <ncase@xes-inc.com>
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+/* A few bit definitions needed for fixups on some boards */
+#define MPC85xx_L2CTL_L2E		0x80000000 /* L2 enable */
+#define MPC85xx_L2CTL_L2I		0x40000000 /* L2 flash invalidate */
+#define MPC85xx_L2CTL_L2SIZ_MASK	0x30000000 /* L2 SRAM size (R/O) */
+
+void __init xes_mpc85xx_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
+			0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+}
+
+static void xes_mpc85xx_configure_l2(void __iomem *l2_base)
+{
+	volatile uint32_t ctl, tmp;
+
+	asm volatile("msync; isync");
+	tmp = in_be32(l2_base);
+
+	/*
+	 * xMon may have enabled part of L2 as SRAM, so we need to set it
+	 * up for all cache mode just to be safe.
+	 */
+	printk(KERN_INFO "xes_mpc85xx: Enabling L2 as cache\n");
+
+	ctl = MPC85xx_L2CTL_L2E | MPC85xx_L2CTL_L2I;
+	if (of_machine_is_compatible("MPC8540") ||
+	    of_machine_is_compatible("MPC8560"))
+		/*
+		 * Assume L2 SRAM is used fully for cache, so set
+		 * L2BLKSZ (bits 4:5) to match L2SIZ (bits 2:3).
+		 */
+		ctl |= (tmp & MPC85xx_L2CTL_L2SIZ_MASK) >> 2;
+
+	asm volatile("msync; isync");
+	out_be32(l2_base, ctl);
+	asm volatile("msync; isync");
+}
+
+static void xes_mpc85xx_fixups(void)
+{
+	struct device_node *np;
+	int err;
+
+	/*
+	 * Legacy xMon firmware on some X-ES boards does not enable L2
+	 * as cache.  We must ensure that they get enabled here.
+	 */
+	for_each_node_by_name(np, "l2-cache-controller") {
+		struct resource r[2];
+		void __iomem *l2_base;
+
+		/* Only MPC8548, MPC8540, and MPC8560 boards are affected */
+		if (!of_device_is_compatible(np,
+				    "fsl,mpc8548-l2-cache-controller") &&
+		    !of_device_is_compatible(np,
+				    "fsl,mpc8540-l2-cache-controller") &&
+		    !of_device_is_compatible(np,
+				    "fsl,mpc8560-l2-cache-controller"))
+			continue;
+
+		err = of_address_to_resource(np, 0, &r[0]);
+		if (err) {
+			printk(KERN_WARNING "xes_mpc85xx: Could not get "
+			       "resource for device tree node '%s'",
+			       np->full_name);
+			continue;
+		}
+
+		l2_base = ioremap(r[0].start, resource_size(&r[0]));
+
+		xes_mpc85xx_configure_l2(l2_base);
+	}
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init xes_mpc85xx_setup_arch(void)
+{
+	struct device_node *root;
+	const char *model = "Unknown";
+
+	root = of_find_node_by_path("/");
+	if (root == NULL)
+		return;
+
+	model = of_get_property(root, "model", NULL);
+
+	printk(KERN_INFO "X-ES MPC85xx-based single-board computer: %s\n",
+	       model + strlen("xes,"));
+
+	xes_mpc85xx_fixups();
+
+	mpc85xx_smp_init();
+
+	fsl_pci_assign_primary();
+}
+
+machine_arch_initcall(xes_mpc8572, mpc85xx_common_publish_devices);
+machine_arch_initcall(xes_mpc8548, mpc85xx_common_publish_devices);
+machine_arch_initcall(xes_mpc8540, mpc85xx_common_publish_devices);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init xes_mpc8572_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "xes,MPC8572");
+}
+
+static int __init xes_mpc8548_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "xes,MPC8548");
+}
+
+static int __init xes_mpc8540_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "xes,MPC8540");
+}
+
+define_machine(xes_mpc8572) {
+	.name			= "X-ES MPC8572",
+	.probe			= xes_mpc8572_probe,
+	.setup_arch		= xes_mpc85xx_setup_arch,
+	.init_IRQ		= xes_mpc85xx_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
+
+define_machine(xes_mpc8548) {
+	.name			= "X-ES MPC8548",
+	.probe			= xes_mpc8548_probe,
+	.setup_arch		= xes_mpc85xx_setup_arch,
+	.init_IRQ		= xes_mpc85xx_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
+
+define_machine(xes_mpc8540) {
+	.name			= "X-ES MPC8540",
+	.probe			= xes_mpc8540_probe,
+	.setup_arch		= xes_mpc85xx_setup_arch,
+	.init_IRQ		= xes_mpc85xx_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig
new file mode 100644
index 000000000..1afd1e4a2
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/Kconfig
@@ -0,0 +1,80 @@
+config PPC_86xx
+menuconfig PPC_86xx
+	bool "86xx-based boards"
+	depends on 6xx
+	select FSL_SOC
+	select ALTIVEC
+	select ARCH_WANT_OPTIONAL_GPIOLIB
+	help
+	  The Freescale E600 SoCs have 74xx cores.
+
+if PPC_86xx
+
+config MPC8641_HPCN
+	bool "Freescale MPC8641 HPCN"
+	select PPC_I8259
+	select DEFAULT_UIMAGE
+	select FSL_ULI1575 if PCI
+	select HAS_RAPIDIO
+	select SWIOTLB
+	help
+	  This option enables support for the MPC8641 HPCN board.
+
+config SBC8641D
+	bool "Wind River SBC8641D"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the WRS SBC8641D board.
+
+config MPC8610_HPCD
+	bool "Freescale MPC8610 HPCD"
+	select DEFAULT_UIMAGE
+	select FSL_ULI1575 if PCI
+	help
+	  This option enables support for the MPC8610 HPCD board.
+
+config GEF_PPC9A
+	bool "GE PPC9A"
+	select DEFAULT_UIMAGE
+	select MMIO_NVRAM
+	select ARCH_REQUIRE_GPIOLIB
+	select GE_FPGA
+	help
+	  This option enables support for the GE PPC9A.
+
+config GEF_SBC310
+	bool "GE SBC310"
+	select DEFAULT_UIMAGE
+	select MMIO_NVRAM
+	select ARCH_REQUIRE_GPIOLIB
+	select GE_FPGA
+	help
+	  This option enables support for the GE SBC310.
+
+config GEF_SBC610
+	bool "GE SBC610"
+	select DEFAULT_UIMAGE
+	select MMIO_NVRAM
+	select ARCH_REQUIRE_GPIOLIB
+	select GE_FPGA
+	select HAS_RAPIDIO
+	help
+	  This option enables support for the GE SBC610.
+
+endif
+
+config MPC8641
+	bool
+	select PPC_PCI_CHOICE
+	select FSL_PCI if PCI
+	select PPC_UDBG_16550
+	select MPIC
+	default y if MPC8641_HPCN || SBC8641D || GEF_SBC610 || GEF_SBC310 || GEF_PPC9A
+
+config MPC8610
+	bool
+	select PPC_PCI_CHOICE
+	select FSL_PCI if PCI
+	select PPC_UDBG_16550
+	select MPIC
+	default y if MPC8610_HPCD
diff --git a/arch/powerpc/platforms/86xx/Makefile b/arch/powerpc/platforms/86xx/Makefile
new file mode 100644
index 000000000..ede815d64
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for the PowerPC 86xx linux kernel.
+#
+
+obj-y				:= pic.o
+obj-$(CONFIG_SMP)		+= mpc86xx_smp.o
+obj-$(CONFIG_MPC8641_HPCN)	+= mpc86xx_hpcn.o
+obj-$(CONFIG_SBC8641D)		+= sbc8641d.o
+obj-$(CONFIG_MPC8610_HPCD)	+= mpc8610_hpcd.o
+obj-$(CONFIG_GEF_SBC610)	+= gef_sbc610.o
+obj-$(CONFIG_GEF_SBC310)	+= gef_sbc310.o
+obj-$(CONFIG_GEF_PPC9A)		+= gef_ppc9a.o
diff --git a/arch/powerpc/platforms/86xx/gef_ppc9a.c b/arch/powerpc/platforms/86xx/gef_ppc9a.c
new file mode 100644
index 000000000..bf17933b2
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/gef_ppc9a.c
@@ -0,0 +1,246 @@
+/*
+ * GE PPC9A board support
+ *
+ * Author: Martyn Welch <martyn.welch@ge.com>
+ *
+ * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * Based on: mpc86xx_hpcn.c (MPC86xx HPCN board specific routines)
+ * Copyright 2006 Freescale Semiconductor Inc.
+ *
+ * NEC fixup adapted from arch/mips/pci/fixup-lm2e.c
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/prom.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <asm/mpic.h>
+#include <asm/nvram.h>
+
+#include <sysdev/fsl_pci.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/ge/ge_pic.h>
+
+#include "mpc86xx.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG (fmt...) do { printk(KERN_ERR "PPC9A: " fmt); } while (0)
+#else
+#define DBG (fmt...) do { } while (0)
+#endif
+
+void __iomem *ppc9a_regs;
+
+static void __init gef_ppc9a_init_irq(void)
+{
+	struct device_node *cascade_node = NULL;
+
+	mpc86xx_init_irq();
+
+	/*
+	 * There is a simple interrupt handler in the main FPGA, this needs
+	 * to be cascaded into the MPIC
+	 */
+	cascade_node = of_find_compatible_node(NULL, NULL, "gef,fpga-pic-1.00");
+	if (!cascade_node) {
+		printk(KERN_WARNING "PPC9A: No FPGA PIC\n");
+		return;
+	}
+
+	gef_pic_init(cascade_node);
+	of_node_put(cascade_node);
+}
+
+static void __init gef_ppc9a_setup_arch(void)
+{
+	struct device_node *regs;
+
+	printk(KERN_INFO "GE Intelligent Platforms PPC9A 6U VME SBC\n");
+
+#ifdef CONFIG_SMP
+	mpc86xx_smp_init();
+#endif
+
+	fsl_pci_assign_primary();
+
+	/* Remap basic board registers */
+	regs = of_find_compatible_node(NULL, NULL, "gef,ppc9a-fpga-regs");
+	if (regs) {
+		ppc9a_regs = of_iomap(regs, 0);
+		if (ppc9a_regs == NULL)
+			printk(KERN_WARNING "Unable to map board registers\n");
+		of_node_put(regs);
+	}
+
+#if defined(CONFIG_MMIO_NVRAM)
+	mmio_nvram_init();
+#endif
+}
+
+/* Return the PCB revision */
+static unsigned int gef_ppc9a_get_pcb_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread32be(ppc9a_regs);
+	return (reg >> 16) & 0xff;
+}
+
+/* Return the board (software) revision */
+static unsigned int gef_ppc9a_get_board_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread32be(ppc9a_regs);
+	return (reg >> 8) & 0xff;
+}
+
+/* Return the FPGA revision */
+static unsigned int gef_ppc9a_get_fpga_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread32be(ppc9a_regs);
+	return reg & 0xf;
+}
+
+/* Return VME Geographical Address */
+static unsigned int gef_ppc9a_get_vme_geo_addr(void)
+{
+	unsigned int reg;
+
+	reg = ioread32be(ppc9a_regs + 0x4);
+	return reg & 0x1f;
+}
+
+/* Return VME System Controller Status */
+static unsigned int gef_ppc9a_get_vme_is_syscon(void)
+{
+	unsigned int reg;
+
+	reg = ioread32be(ppc9a_regs + 0x4);
+	return (reg >> 9) & 0x1;
+}
+
+static void gef_ppc9a_show_cpuinfo(struct seq_file *m)
+{
+	uint svid = mfspr(SPRN_SVR);
+
+	seq_printf(m, "Vendor\t\t: GE Intelligent Platforms\n");
+
+	seq_printf(m, "Revision\t: %u%c\n", gef_ppc9a_get_pcb_rev(),
+		('A' + gef_ppc9a_get_board_rev()));
+	seq_printf(m, "FPGA Revision\t: %u\n", gef_ppc9a_get_fpga_rev());
+
+	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+
+	seq_printf(m, "VME geo. addr\t: %u\n", gef_ppc9a_get_vme_geo_addr());
+
+	seq_printf(m, "VME syscon\t: %s\n",
+		gef_ppc9a_get_vme_is_syscon() ? "yes" : "no");
+}
+
+static void gef_ppc9a_nec_fixup(struct pci_dev *pdev)
+{
+	unsigned int val;
+
+	/* Do not do the fixup on other platforms! */
+	if (!machine_is(gef_ppc9a))
+		return;
+
+	printk(KERN_INFO "Running NEC uPD720101 Fixup\n");
+
+	/* Ensure ports 1, 2, 3, 4 & 5 are enabled */
+	pci_read_config_dword(pdev, 0xe0, &val);
+	pci_write_config_dword(pdev, 0xe0, (val & ~7) | 0x5);
+
+	/* System clock is 48-MHz Oscillator and EHCI Enabled. */
+	pci_write_config_dword(pdev, 0xe4, 1 << 5);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB,
+	gef_ppc9a_nec_fixup);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ *
+ * This function is called to determine whether the BSP is compatible with the
+ * supplied device-tree, which is assumed to be the correct one for the actual
+ * board. It is expected thati, in the future, a kernel may support multiple
+ * boards.
+ */
+static int __init gef_ppc9a_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "gef,ppc9a"))
+		return 1;
+
+	return 0;
+}
+
+static long __init mpc86xx_time_init(void)
+{
+	unsigned int temp;
+
+	/* Set the time base to zero */
+	mtspr(SPRN_TBWL, 0);
+	mtspr(SPRN_TBWU, 0);
+
+	temp = mfspr(SPRN_HID0);
+	temp |= HID0_TBEN;
+	mtspr(SPRN_HID0, temp);
+	asm volatile("isync");
+
+	return 0;
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .compatible = "simple-bus", },
+	{ .compatible = "gianfar", },
+	{ .compatible = "fsl,mpc8641-pcie", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	printk(KERN_DEBUG "Probe platform devices\n");
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+	return 0;
+}
+machine_arch_initcall(gef_ppc9a, declare_of_platform_devices);
+
+define_machine(gef_ppc9a) {
+	.name			= "GE PPC9A",
+	.probe			= gef_ppc9a_probe,
+	.setup_arch		= gef_ppc9a_setup_arch,
+	.init_IRQ		= gef_ppc9a_init_irq,
+	.show_cpuinfo		= gef_ppc9a_show_cpuinfo,
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.time_init		= mpc86xx_time_init,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
+};
diff --git a/arch/powerpc/platforms/86xx/gef_sbc310.c b/arch/powerpc/platforms/86xx/gef_sbc310.c
new file mode 100644
index 000000000..8facf5873
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/gef_sbc310.c
@@ -0,0 +1,233 @@
+/*
+ * GE SBC310 board support
+ *
+ * Author: Martyn Welch <martyn.welch@ge.com>
+ *
+ * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * Based on: mpc86xx_hpcn.c (MPC86xx HPCN board specific routines)
+ * Copyright 2006 Freescale Semiconductor Inc.
+ *
+ * NEC fixup adapted from arch/mips/pci/fixup-lm2e.c
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/prom.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <asm/mpic.h>
+#include <asm/nvram.h>
+
+#include <sysdev/fsl_pci.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/ge/ge_pic.h>
+
+#include "mpc86xx.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG (fmt...) do { printk(KERN_ERR "SBC310: " fmt); } while (0)
+#else
+#define DBG (fmt...) do { } while (0)
+#endif
+
+void __iomem *sbc310_regs;
+
+static void __init gef_sbc310_init_irq(void)
+{
+	struct device_node *cascade_node = NULL;
+
+	mpc86xx_init_irq();
+
+	/*
+	 * There is a simple interrupt handler in the main FPGA, this needs
+	 * to be cascaded into the MPIC
+	 */
+	cascade_node = of_find_compatible_node(NULL, NULL, "gef,fpga-pic");
+	if (!cascade_node) {
+		printk(KERN_WARNING "SBC310: No FPGA PIC\n");
+		return;
+	}
+
+	gef_pic_init(cascade_node);
+	of_node_put(cascade_node);
+}
+
+static void __init gef_sbc310_setup_arch(void)
+{
+	struct device_node *regs;
+	printk(KERN_INFO "GE Intelligent Platforms SBC310 6U VPX SBC\n");
+
+#ifdef CONFIG_SMP
+	mpc86xx_smp_init();
+#endif
+
+	fsl_pci_assign_primary();
+
+	/* Remap basic board registers */
+	regs = of_find_compatible_node(NULL, NULL, "gef,fpga-regs");
+	if (regs) {
+		sbc310_regs = of_iomap(regs, 0);
+		if (sbc310_regs == NULL)
+			printk(KERN_WARNING "Unable to map board registers\n");
+		of_node_put(regs);
+	}
+
+#if defined(CONFIG_MMIO_NVRAM)
+	mmio_nvram_init();
+#endif
+}
+
+/* Return the PCB revision */
+static unsigned int gef_sbc310_get_board_id(void)
+{
+	unsigned int reg;
+
+	reg = ioread32(sbc310_regs);
+	return reg & 0xff;
+}
+
+/* Return the PCB revision */
+static unsigned int gef_sbc310_get_pcb_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread32(sbc310_regs);
+	return (reg >> 8) & 0xff;
+}
+
+/* Return the board (software) revision */
+static unsigned int gef_sbc310_get_board_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread32(sbc310_regs);
+	return (reg >> 16) & 0xff;
+}
+
+/* Return the FPGA revision */
+static unsigned int gef_sbc310_get_fpga_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread32(sbc310_regs);
+	return (reg >> 24) & 0xf;
+}
+
+static void gef_sbc310_show_cpuinfo(struct seq_file *m)
+{
+	uint svid = mfspr(SPRN_SVR);
+
+	seq_printf(m, "Vendor\t\t: GE Intelligent Platforms\n");
+
+	seq_printf(m, "Board ID\t: 0x%2.2x\n", gef_sbc310_get_board_id());
+	seq_printf(m, "Revision\t: %u%c\n", gef_sbc310_get_pcb_rev(),
+		('A' + gef_sbc310_get_board_rev() - 1));
+	seq_printf(m, "FPGA Revision\t: %u\n", gef_sbc310_get_fpga_rev());
+
+	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+
+}
+
+static void gef_sbc310_nec_fixup(struct pci_dev *pdev)
+{
+	unsigned int val;
+
+	/* Do not do the fixup on other platforms! */
+	if (!machine_is(gef_sbc310))
+		return;
+
+	printk(KERN_INFO "Running NEC uPD720101 Fixup\n");
+
+	/* Ensure only ports 1 & 2 are enabled */
+	pci_read_config_dword(pdev, 0xe0, &val);
+	pci_write_config_dword(pdev, 0xe0, (val & ~7) | 0x2);
+
+	/* System clock is 48-MHz Oscillator and EHCI Enabled. */
+	pci_write_config_dword(pdev, 0xe4, 1 << 5);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB,
+	gef_sbc310_nec_fixup);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ *
+ * This function is called to determine whether the BSP is compatible with the
+ * supplied device-tree, which is assumed to be the correct one for the actual
+ * board. It is expected thati, in the future, a kernel may support multiple
+ * boards.
+ */
+static int __init gef_sbc310_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "gef,sbc310"))
+		return 1;
+
+	return 0;
+}
+
+static long __init mpc86xx_time_init(void)
+{
+	unsigned int temp;
+
+	/* Set the time base to zero */
+	mtspr(SPRN_TBWL, 0);
+	mtspr(SPRN_TBWU, 0);
+
+	temp = mfspr(SPRN_HID0);
+	temp |= HID0_TBEN;
+	mtspr(SPRN_HID0, temp);
+	asm volatile("isync");
+
+	return 0;
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .compatible = "simple-bus", },
+	{ .compatible = "gianfar", },
+	{ .compatible = "fsl,mpc8641-pcie", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	printk(KERN_DEBUG "Probe platform devices\n");
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+	return 0;
+}
+machine_arch_initcall(gef_sbc310, declare_of_platform_devices);
+
+define_machine(gef_sbc310) {
+	.name			= "GE SBC310",
+	.probe			= gef_sbc310_probe,
+	.setup_arch		= gef_sbc310_setup_arch,
+	.init_IRQ		= gef_sbc310_init_irq,
+	.show_cpuinfo		= gef_sbc310_show_cpuinfo,
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.time_init		= mpc86xx_time_init,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
+};
diff --git a/arch/powerpc/platforms/86xx/gef_sbc610.c b/arch/powerpc/platforms/86xx/gef_sbc610.c
new file mode 100644
index 000000000..8c9058df5
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/gef_sbc610.c
@@ -0,0 +1,223 @@
+/*
+ * GE SBC610 board support
+ *
+ * Author: Martyn Welch <martyn.welch@ge.com>
+ *
+ * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * Based on: mpc86xx_hpcn.c (MPC86xx HPCN board specific routines)
+ * Copyright 2006 Freescale Semiconductor Inc.
+ *
+ * NEC fixup adapted from arch/mips/pci/fixup-lm2e.c
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/prom.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <asm/mpic.h>
+#include <asm/nvram.h>
+
+#include <sysdev/fsl_pci.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/ge/ge_pic.h>
+
+#include "mpc86xx.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG (fmt...) do { printk(KERN_ERR "SBC610: " fmt); } while (0)
+#else
+#define DBG (fmt...) do { } while (0)
+#endif
+
+void __iomem *sbc610_regs;
+
+static void __init gef_sbc610_init_irq(void)
+{
+	struct device_node *cascade_node = NULL;
+
+	mpc86xx_init_irq();
+
+	/*
+	 * There is a simple interrupt handler in the main FPGA, this needs
+	 * to be cascaded into the MPIC
+	 */
+	cascade_node = of_find_compatible_node(NULL, NULL, "gef,fpga-pic");
+	if (!cascade_node) {
+		printk(KERN_WARNING "SBC610: No FPGA PIC\n");
+		return;
+	}
+
+	gef_pic_init(cascade_node);
+	of_node_put(cascade_node);
+}
+
+static void __init gef_sbc610_setup_arch(void)
+{
+	struct device_node *regs;
+
+	printk(KERN_INFO "GE Intelligent Platforms SBC610 6U VPX SBC\n");
+
+#ifdef CONFIG_SMP
+	mpc86xx_smp_init();
+#endif
+
+	fsl_pci_assign_primary();
+
+	/* Remap basic board registers */
+	regs = of_find_compatible_node(NULL, NULL, "gef,fpga-regs");
+	if (regs) {
+		sbc610_regs = of_iomap(regs, 0);
+		if (sbc610_regs == NULL)
+			printk(KERN_WARNING "Unable to map board registers\n");
+		of_node_put(regs);
+	}
+
+#if defined(CONFIG_MMIO_NVRAM)
+	mmio_nvram_init();
+#endif
+}
+
+/* Return the PCB revision */
+static unsigned int gef_sbc610_get_pcb_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread32(sbc610_regs);
+	return (reg >> 8) & 0xff;
+}
+
+/* Return the board (software) revision */
+static unsigned int gef_sbc610_get_board_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread32(sbc610_regs);
+	return (reg >> 16) & 0xff;
+}
+
+/* Return the FPGA revision */
+static unsigned int gef_sbc610_get_fpga_rev(void)
+{
+	unsigned int reg;
+
+	reg = ioread32(sbc610_regs);
+	return (reg >> 24) & 0xf;
+}
+
+static void gef_sbc610_show_cpuinfo(struct seq_file *m)
+{
+	uint svid = mfspr(SPRN_SVR);
+
+	seq_printf(m, "Vendor\t\t: GE Intelligent Platforms\n");
+
+	seq_printf(m, "Revision\t: %u%c\n", gef_sbc610_get_pcb_rev(),
+		('A' + gef_sbc610_get_board_rev() - 1));
+	seq_printf(m, "FPGA Revision\t: %u\n", gef_sbc610_get_fpga_rev());
+
+	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+}
+
+static void gef_sbc610_nec_fixup(struct pci_dev *pdev)
+{
+	unsigned int val;
+
+	/* Do not do the fixup on other platforms! */
+	if (!machine_is(gef_sbc610))
+		return;
+
+	printk(KERN_INFO "Running NEC uPD720101 Fixup\n");
+
+	/* Ensure ports 1, 2, 3, 4 & 5 are enabled */
+	pci_read_config_dword(pdev, 0xe0, &val);
+	pci_write_config_dword(pdev, 0xe0, (val & ~7) | 0x5);
+
+	/* System clock is 48-MHz Oscillator and EHCI Enabled. */
+	pci_write_config_dword(pdev, 0xe4, 1 << 5);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB,
+	gef_sbc610_nec_fixup);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ *
+ * This function is called to determine whether the BSP is compatible with the
+ * supplied device-tree, which is assumed to be the correct one for the actual
+ * board. It is expected thati, in the future, a kernel may support multiple
+ * boards.
+ */
+static int __init gef_sbc610_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "gef,sbc610"))
+		return 1;
+
+	return 0;
+}
+
+static long __init mpc86xx_time_init(void)
+{
+	unsigned int temp;
+
+	/* Set the time base to zero */
+	mtspr(SPRN_TBWL, 0);
+	mtspr(SPRN_TBWU, 0);
+
+	temp = mfspr(SPRN_HID0);
+	temp |= HID0_TBEN;
+	mtspr(SPRN_HID0, temp);
+	asm volatile("isync");
+
+	return 0;
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .compatible = "simple-bus", },
+	{ .compatible = "gianfar", },
+	{ .compatible = "fsl,mpc8641-pcie", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	printk(KERN_DEBUG "Probe platform devices\n");
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+	return 0;
+}
+machine_arch_initcall(gef_sbc610, declare_of_platform_devices);
+
+define_machine(gef_sbc610) {
+	.name			= "GE SBC610",
+	.probe			= gef_sbc610_probe,
+	.setup_arch		= gef_sbc610_setup_arch,
+	.init_IRQ		= gef_sbc610_init_irq,
+	.show_cpuinfo		= gef_sbc610_show_cpuinfo,
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.time_init		= mpc86xx_time_init,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
+};
diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
new file mode 100644
index 000000000..55413a547
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
@@ -0,0 +1,359 @@
+/*
+ * MPC8610 HPCD board specific routines
+ *
+ * Initial author: Xianghua Xiao <x.xiao@freescale.com>
+ * Recode: Jason Jin <jason.jin@freescale.com>
+ *         York Sun <yorksun@freescale.com>
+ *
+ * Rewrite the interrupt routing. remove the 8259PIC support,
+ * All the integrated device in ULI use sideband interrupt.
+ *
+ * Copyright 2008 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/prom.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <asm/mpic.h>
+
+#include <linux/of_platform.h>
+#include <sysdev/fsl_pci.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/simple_gpio.h>
+#include <asm/fsl_guts.h>
+
+#include "mpc86xx.h"
+
+static struct device_node *pixis_node;
+static unsigned char *pixis_bdcfg0, *pixis_arch;
+
+/* DIU Pixel Clock bits of the CLKDVDR Global Utilities register */
+#define CLKDVDR_PXCKEN		0x80000000
+#define CLKDVDR_PXCKINV		0x10000000
+#define CLKDVDR_PXCKDLY		0x06000000
+#define CLKDVDR_PXCLK_MASK	0x001F0000
+
+#ifdef CONFIG_SUSPEND
+static irqreturn_t mpc8610_sw9_irq(int irq, void *data)
+{
+	pr_debug("%s: PIXIS' event (sw9/wakeup) IRQ handled\n", __func__);
+	return IRQ_HANDLED;
+}
+
+static void __init mpc8610_suspend_init(void)
+{
+	int irq;
+	int ret;
+
+	if (!pixis_node)
+		return;
+
+	irq = irq_of_parse_and_map(pixis_node, 0);
+	if (!irq) {
+		pr_err("%s: can't map pixis event IRQ.\n", __func__);
+		return;
+	}
+
+	ret = request_irq(irq, mpc8610_sw9_irq, 0, "sw9:wakeup", NULL);
+	if (ret) {
+		pr_err("%s: can't request pixis event IRQ: %d\n",
+		       __func__, ret);
+		irq_dispose_mapping(irq);
+	}
+
+	enable_irq_wake(irq);
+}
+#else
+static inline void mpc8610_suspend_init(void) { }
+#endif /* CONFIG_SUSPEND */
+
+static const struct of_device_id mpc8610_ids[] __initconst = {
+	{ .compatible = "fsl,mpc8610-immr", },
+	{ .compatible = "fsl,mpc8610-guts", },
+	{ .compatible = "simple-bus", },
+	/* So that the DMA channel nodes can be probed individually: */
+	{ .compatible = "fsl,eloplus-dma", },
+	/* PCI controllers */
+	{ .compatible = "fsl,mpc8610-pci", },
+	{ .compatible = "fsl,mpc8641-pcie", },
+	{}
+};
+
+static int __init mpc8610_declare_of_platform_devices(void)
+{
+	/* Firstly, register PIXIS GPIOs. */
+	simple_gpiochip_init("fsl,fpga-pixis-gpio-bank");
+
+	/* Enable wakeup on PIXIS' event IRQ. */
+	mpc8610_suspend_init();
+
+	/* Without this call, the SSI device driver won't get probed. */
+	of_platform_bus_probe(NULL, mpc8610_ids, NULL);
+
+	return 0;
+}
+machine_arch_initcall(mpc86xx_hpcd, mpc8610_declare_of_platform_devices);
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+
+/*
+ * DIU Area Descriptor
+ *
+ * The MPC8610 reference manual shows the bits of the AD register in
+ * little-endian order, which causes the BLUE_C field to be split into two
+ * parts. To simplify the definition of the MAKE_AD() macro, we define the
+ * fields in big-endian order and byte-swap the result.
+ *
+ * So even though the registers don't look like they're in the
+ * same bit positions as they are on the P1022, the same value is written to
+ * the AD register on the MPC8610 and on the P1022.
+ */
+#define AD_BYTE_F		0x10000000
+#define AD_ALPHA_C_MASK		0x0E000000
+#define AD_ALPHA_C_SHIFT	25
+#define AD_BLUE_C_MASK		0x01800000
+#define AD_BLUE_C_SHIFT		23
+#define AD_GREEN_C_MASK		0x00600000
+#define AD_GREEN_C_SHIFT	21
+#define AD_RED_C_MASK		0x00180000
+#define AD_RED_C_SHIFT		19
+#define AD_PALETTE		0x00040000
+#define AD_PIXEL_S_MASK		0x00030000
+#define AD_PIXEL_S_SHIFT	16
+#define AD_COMP_3_MASK		0x0000F000
+#define AD_COMP_3_SHIFT		12
+#define AD_COMP_2_MASK		0x00000F00
+#define AD_COMP_2_SHIFT		8
+#define AD_COMP_1_MASK		0x000000F0
+#define AD_COMP_1_SHIFT		4
+#define AD_COMP_0_MASK		0x0000000F
+#define AD_COMP_0_SHIFT		0
+
+#define MAKE_AD(alpha, red, blue, green, size, c0, c1, c2, c3) \
+	cpu_to_le32(AD_BYTE_F | (alpha << AD_ALPHA_C_SHIFT) | \
+	(blue << AD_BLUE_C_SHIFT) | (green << AD_GREEN_C_SHIFT) | \
+	(red << AD_RED_C_SHIFT) | (c3 << AD_COMP_3_SHIFT) | \
+	(c2 << AD_COMP_2_SHIFT) | (c1 << AD_COMP_1_SHIFT) | \
+	(c0 << AD_COMP_0_SHIFT) | (size << AD_PIXEL_S_SHIFT))
+
+u32 mpc8610hpcd_get_pixel_format(enum fsl_diu_monitor_port port,
+				 unsigned int bits_per_pixel)
+{
+	static const u32 pixelformat[][3] = {
+		{
+			MAKE_AD(3, 0, 2, 1, 3, 8, 8, 8, 8),
+			MAKE_AD(4, 2, 0, 1, 2, 8, 8, 8, 0),
+			MAKE_AD(4, 0, 2, 1, 1, 5, 6, 5, 0)
+		},
+		{
+			MAKE_AD(3, 2, 0, 1, 3, 8, 8, 8, 8),
+			MAKE_AD(4, 0, 2, 1, 2, 8, 8, 8, 0),
+			MAKE_AD(4, 2, 0, 1, 1, 5, 6, 5, 0)
+		},
+	};
+	unsigned int arch_monitor;
+
+	/* The DVI port is mis-wired on revision 1 of this board. */
+	arch_monitor =
+		((*pixis_arch == 0x01) && (port == FSL_DIU_PORT_DVI)) ? 0 : 1;
+
+	switch (bits_per_pixel) {
+	case 32:
+		return pixelformat[arch_monitor][0];
+	case 24:
+		return pixelformat[arch_monitor][1];
+	case 16:
+		return pixelformat[arch_monitor][2];
+	default:
+		pr_err("fsl-diu: unsupported pixel depth %u\n", bits_per_pixel);
+		return 0;
+	}
+}
+
+void mpc8610hpcd_set_gamma_table(enum fsl_diu_monitor_port port,
+				 char *gamma_table_base)
+{
+	int i;
+	if (port == FSL_DIU_PORT_DLVDS) {
+		for (i = 0; i < 256*3; i++)
+			gamma_table_base[i] = (gamma_table_base[i] << 2) |
+					 ((gamma_table_base[i] >> 6) & 0x03);
+	}
+}
+
+#define PX_BRDCFG0_DVISEL	(1 << 3)
+#define PX_BRDCFG0_DLINK	(1 << 4)
+#define PX_BRDCFG0_DIU_MASK	(PX_BRDCFG0_DVISEL | PX_BRDCFG0_DLINK)
+
+void mpc8610hpcd_set_monitor_port(enum fsl_diu_monitor_port port)
+{
+	switch (port) {
+	case FSL_DIU_PORT_DVI:
+		clrsetbits_8(pixis_bdcfg0, PX_BRDCFG0_DIU_MASK,
+			     PX_BRDCFG0_DVISEL | PX_BRDCFG0_DLINK);
+		break;
+	case FSL_DIU_PORT_LVDS:
+		clrsetbits_8(pixis_bdcfg0, PX_BRDCFG0_DIU_MASK,
+			     PX_BRDCFG0_DLINK);
+		break;
+	case FSL_DIU_PORT_DLVDS:
+		clrbits8(pixis_bdcfg0, PX_BRDCFG0_DIU_MASK);
+		break;
+	}
+}
+
+/**
+ * mpc8610hpcd_set_pixel_clock: program the DIU's clock
+ *
+ * @pixclock: the wavelength, in picoseconds, of the clock
+ */
+void mpc8610hpcd_set_pixel_clock(unsigned int pixclock)
+{
+	struct device_node *guts_np = NULL;
+	struct ccsr_guts __iomem *guts;
+	unsigned long freq;
+	u64 temp;
+	u32 pxclk;
+
+	/* Map the global utilities registers. */
+	guts_np = of_find_compatible_node(NULL, NULL, "fsl,mpc8610-guts");
+	if (!guts_np) {
+		pr_err("mpc8610hpcd: missing global utilities device node\n");
+		return;
+	}
+
+	guts = of_iomap(guts_np, 0);
+	of_node_put(guts_np);
+	if (!guts) {
+		pr_err("mpc8610hpcd: could not map global utilities device\n");
+		return;
+	}
+
+	/* Convert pixclock from a wavelength to a frequency */
+	temp = 1000000000000ULL;
+	do_div(temp, pixclock);
+	freq = temp;
+
+	/*
+	 * 'pxclk' is the ratio of the platform clock to the pixel clock.
+	 * On the MPC8610, the value programmed into CLKDVDR is the ratio
+	 * minus one.  The valid range of values is 2-31.
+	 */
+	pxclk = DIV_ROUND_CLOSEST(fsl_get_sys_freq(), freq) - 1;
+	pxclk = clamp_t(u32, pxclk, 2, 31);
+
+	/* Disable the pixel clock, and set it to non-inverted and no delay */
+	clrbits32(&guts->clkdvdr,
+		  CLKDVDR_PXCKEN | CLKDVDR_PXCKDLY | CLKDVDR_PXCLK_MASK);
+
+	/* Enable the clock and set the pxclk */
+	setbits32(&guts->clkdvdr, CLKDVDR_PXCKEN | (pxclk << 16));
+
+	iounmap(guts);
+}
+
+enum fsl_diu_monitor_port
+mpc8610hpcd_valid_monitor_port(enum fsl_diu_monitor_port port)
+{
+	return port;
+}
+
+#endif
+
+static void __init mpc86xx_hpcd_setup_arch(void)
+{
+	struct resource r;
+	unsigned char *pixis;
+
+	if (ppc_md.progress)
+		ppc_md.progress("mpc86xx_hpcd_setup_arch()", 0);
+
+	fsl_pci_assign_primary();
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+	diu_ops.get_pixel_format	= mpc8610hpcd_get_pixel_format;
+	diu_ops.set_gamma_table		= mpc8610hpcd_set_gamma_table;
+	diu_ops.set_monitor_port	= mpc8610hpcd_set_monitor_port;
+	diu_ops.set_pixel_clock		= mpc8610hpcd_set_pixel_clock;
+	diu_ops.valid_monitor_port	= mpc8610hpcd_valid_monitor_port;
+#endif
+
+	pixis_node = of_find_compatible_node(NULL, NULL, "fsl,fpga-pixis");
+	if (pixis_node) {
+		of_address_to_resource(pixis_node, 0, &r);
+		of_node_put(pixis_node);
+		pixis = ioremap(r.start, 32);
+		if (!pixis) {
+			printk(KERN_ERR "Err: can't map FPGA cfg register!\n");
+			return;
+		}
+		pixis_bdcfg0 = pixis + 8;
+		pixis_arch = pixis + 1;
+	} else
+		printk(KERN_ERR "Err: "
+				"can't find device node 'fsl,fpga-pixis'\n");
+
+	printk("MPC86xx HPCD board from Freescale Semiconductor\n");
+}
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init mpc86xx_hpcd_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "fsl,MPC8610HPCD"))
+		return 1;	/* Looks good */
+
+	return 0;
+}
+
+static long __init mpc86xx_time_init(void)
+{
+	unsigned int temp;
+
+	/* Set the time base to zero */
+	mtspr(SPRN_TBWL, 0);
+	mtspr(SPRN_TBWU, 0);
+
+	temp = mfspr(SPRN_HID0);
+	temp |= HID0_TBEN;
+	mtspr(SPRN_HID0, temp);
+	asm volatile("isync");
+
+	return 0;
+}
+
+define_machine(mpc86xx_hpcd) {
+	.name			= "MPC86xx HPCD",
+	.probe			= mpc86xx_hpcd_probe,
+	.setup_arch		= mpc86xx_hpcd_setup_arch,
+	.init_IRQ		= mpc86xx_init_irq,
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.time_init		= mpc86xx_time_init,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
+};
diff --git a/arch/powerpc/platforms/86xx/mpc86xx.h b/arch/powerpc/platforms/86xx/mpc86xx.h
new file mode 100644
index 000000000..08efb5755
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/mpc86xx.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2006 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __MPC86XX_H__
+#define __MPC86XX_H__
+
+/*
+ * Declaration for the various functions exported by the
+ * mpc86xx_* files. Mostly for use by mpc86xx_setup().
+ */
+
+extern void mpc86xx_smp_init(void);
+extern void mpc86xx_init_irq(void);
+
+#endif	/* __MPC86XX_H__ */
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
new file mode 100644
index 000000000..07ccb1b0c
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
@@ -0,0 +1,161 @@
+/*
+ * MPC86xx HPCN board specific routines
+ *
+ * Recode: ZHANG WEI <wei.zhang@freescale.com>
+ * Initial author: Xianghua Xiao <x.xiao@freescale.com>
+ *
+ * Copyright 2006 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/prom.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+#include <asm/swiotlb.h>
+
+#include <asm/mpic.h>
+
+#include <sysdev/fsl_pci.h>
+#include <sysdev/fsl_soc.h>
+
+#include "mpc86xx.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) do { printk(KERN_ERR fmt); } while(0)
+#else
+#define DBG(fmt...) do { } while(0)
+#endif
+
+#ifdef CONFIG_PCI
+extern int uli_exclude_device(struct pci_controller *hose,
+				u_char bus, u_char devfn);
+
+static int mpc86xx_exclude_device(struct pci_controller *hose,
+				   u_char bus, u_char devfn)
+{
+	if (hose->dn == fsl_pci_primary)
+		return uli_exclude_device(hose, bus, devfn);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+#endif /* CONFIG_PCI */
+
+
+static void __init
+mpc86xx_hpcn_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("mpc86xx_hpcn_setup_arch()", 0);
+
+#ifdef CONFIG_PCI
+	ppc_md.pci_exclude_device = mpc86xx_exclude_device;
+#endif
+
+	printk("MPC86xx HPCN board from Freescale Semiconductor\n");
+
+#ifdef CONFIG_SMP
+	mpc86xx_smp_init();
+#endif
+
+	fsl_pci_assign_primary();
+
+	swiotlb_detect_4g();
+}
+
+
+static void
+mpc86xx_hpcn_show_cpuinfo(struct seq_file *m)
+{
+	uint svid = mfspr(SPRN_SVR);
+
+	seq_printf(m, "Vendor\t\t: Freescale Semiconductor\n");
+
+	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+}
+
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init mpc86xx_hpcn_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "fsl,mpc8641hpcn"))
+		return 1;	/* Looks good */
+
+	/* Be nice and don't give silent boot death.  Delete this in 2.6.27 */
+	if (of_flat_dt_is_compatible(root, "mpc86xx")) {
+		pr_warning("WARNING: your dts/dtb is old. You must update before the next kernel release\n");
+		return 1;
+	}
+
+	return 0;
+}
+
+static long __init
+mpc86xx_time_init(void)
+{
+	unsigned int temp;
+
+	/* Set the time base to zero */
+	mtspr(SPRN_TBWL, 0);
+	mtspr(SPRN_TBWU, 0);
+
+	temp = mfspr(SPRN_HID0);
+	temp |= HID0_TBEN;
+	mtspr(SPRN_HID0, temp);
+	asm volatile("isync");
+
+	return 0;
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .compatible = "simple-bus", },
+	{ .compatible = "fsl,srio", },
+	{ .compatible = "gianfar", },
+	{ .compatible = "fsl,mpc8641-pcie", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+	return 0;
+}
+machine_arch_initcall(mpc86xx_hpcn, declare_of_platform_devices);
+machine_arch_initcall(mpc86xx_hpcn, swiotlb_setup_bus_notifier);
+
+define_machine(mpc86xx_hpcn) {
+	.name			= "MPC86xx HPCN",
+	.probe			= mpc86xx_hpcn_probe,
+	.setup_arch		= mpc86xx_hpcn_setup_arch,
+	.init_IRQ		= mpc86xx_init_irq,
+	.show_cpuinfo		= mpc86xx_hpcn_show_cpuinfo,
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.time_init		= mpc86xx_time_init,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
+};
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
new file mode 100644
index 000000000..af09baee2
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
@@ -0,0 +1,121 @@
+/*
+ * Author: Xianghua Xiao <x.xiao@freescale.com>
+ *         Zhang Wei <wei.zhang@freescale.com>
+ *
+ * Copyright 2006 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+
+#include <asm/code-patching.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/pci-bridge.h>
+#include <asm/mpic.h>
+#include <asm/cacheflush.h>
+
+#include <sysdev/fsl_soc.h>
+
+#include "mpc86xx.h"
+
+extern void __secondary_start_mpc86xx(void);
+
+#define MCM_PORT_CONFIG_OFFSET	0x10
+
+/* Offset from CCSRBAR */
+#define MPC86xx_MCM_OFFSET      (0x1000)
+#define MPC86xx_MCM_SIZE        (0x1000)
+
+static void __init
+smp_86xx_release_core(int nr)
+{
+	__be32 __iomem *mcm_vaddr;
+	unsigned long pcr;
+
+	if (nr < 0 || nr >= NR_CPUS)
+		return;
+
+	/*
+	 * Startup Core #nr.
+	 */
+	mcm_vaddr = ioremap(get_immrbase() + MPC86xx_MCM_OFFSET,
+			    MPC86xx_MCM_SIZE);
+	pcr = in_be32(mcm_vaddr + (MCM_PORT_CONFIG_OFFSET >> 2));
+	pcr |= 1 << (nr + 24);
+	out_be32(mcm_vaddr + (MCM_PORT_CONFIG_OFFSET >> 2), pcr);
+
+	iounmap(mcm_vaddr);
+}
+
+
+static int __init
+smp_86xx_kick_cpu(int nr)
+{
+	unsigned int save_vector;
+	unsigned long target, flags;
+	int n = 0;
+	unsigned int *vector = (unsigned int *)(KERNELBASE + 0x100);
+
+	if (nr < 0 || nr >= NR_CPUS)
+		return -ENOENT;
+
+	pr_debug("smp_86xx_kick_cpu: kick CPU #%d\n", nr);
+
+	local_irq_save(flags);
+
+	/* Save reset vector */
+	save_vector = *vector;
+
+	/* Setup fake reset vector to call __secondary_start_mpc86xx. */
+	target = (unsigned long) __secondary_start_mpc86xx;
+	patch_branch(vector, target, BRANCH_SET_LINK);
+
+	/* Kick that CPU */
+	smp_86xx_release_core(nr);
+
+	/* Wait a bit for the CPU to take the exception. */
+	while ((__secondary_hold_acknowledge != nr) && (n++, n < 1000))
+		mdelay(1);
+
+	/* Restore the exception vector */
+	*vector = save_vector;
+	flush_icache_range((unsigned long) vector, (unsigned long) vector + 4);
+
+	local_irq_restore(flags);
+
+	pr_debug("wait CPU #%d for %d msecs.\n", nr, n);
+
+	return 0;
+}
+
+
+static void __init
+smp_86xx_setup_cpu(int cpu_nr)
+{
+	mpic_setup_this_cpu();
+}
+
+
+struct smp_ops_t smp_86xx_ops = {
+	.message_pass = smp_mpic_message_pass,
+	.probe = smp_mpic_probe,
+	.kick_cpu = smp_86xx_kick_cpu,
+	.setup_cpu = smp_86xx_setup_cpu,
+	.take_timebase = smp_generic_take_timebase,
+	.give_timebase = smp_generic_give_timebase,
+};
+
+
+void __init
+mpc86xx_smp_init(void)
+{
+	smp_ops = &smp_86xx_ops;
+}
diff --git a/arch/powerpc/platforms/86xx/pic.c b/arch/powerpc/platforms/86xx/pic.c
new file mode 100644
index 000000000..d5b98c0f9
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/pic.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2008 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+
+#include <asm/mpic.h>
+#include <asm/i8259.h>
+
+#ifdef CONFIG_PPC_I8259
+static void mpc86xx_8259_cascade(unsigned int irq, struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned int cascade_irq = i8259_irq();
+
+	if (cascade_irq != NO_IRQ)
+		generic_handle_irq(cascade_irq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+#endif	/* CONFIG_PPC_I8259 */
+
+void __init mpc86xx_init_irq(void)
+{
+#ifdef CONFIG_PPC_I8259
+	struct device_node *np;
+	struct device_node *cascade_node = NULL;
+	int cascade_irq;
+#endif
+
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+			MPIC_SINGLE_DEST_CPU,
+			0, 256, " MPIC     ");
+	BUG_ON(mpic == NULL);
+
+	mpic_init(mpic);
+
+#ifdef CONFIG_PPC_I8259
+	/* Initialize i8259 controller */
+	for_each_node_by_type(np, "interrupt-controller")
+		if (of_device_is_compatible(np, "chrp,iic")) {
+			cascade_node = np;
+			break;
+		}
+
+	if (cascade_node == NULL) {
+		printk(KERN_DEBUG "Could not find i8259 PIC\n");
+		return;
+	}
+
+	cascade_irq = irq_of_parse_and_map(cascade_node, 0);
+	if (cascade_irq == NO_IRQ) {
+		printk(KERN_ERR "Failed to map cascade interrupt\n");
+		return;
+	}
+
+	i8259_init(cascade_node, 0);
+	of_node_put(cascade_node);
+
+	irq_set_chained_handler(cascade_irq, mpc86xx_8259_cascade);
+#endif
+}
diff --git a/arch/powerpc/platforms/86xx/sbc8641d.c b/arch/powerpc/platforms/86xx/sbc8641d.c
new file mode 100644
index 000000000..6810b71d5
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/sbc8641d.c
@@ -0,0 +1,124 @@
+/*
+ * SBC8641D board specific routines
+ *
+ * Copyright 2008 Wind River Systems Inc.
+ *
+ * By Paul Gortmaker (see MAINTAINERS for contact information)
+ *
+ * Based largely on the 8641 HPCN support by Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/prom.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <asm/mpic.h>
+
+#include <sysdev/fsl_pci.h>
+#include <sysdev/fsl_soc.h>
+
+#include "mpc86xx.h"
+
+static void __init
+sbc8641_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("sbc8641_setup_arch()", 0);
+
+	printk("SBC8641 board from Wind River\n");
+
+#ifdef CONFIG_SMP
+	mpc86xx_smp_init();
+#endif
+
+	fsl_pci_assign_primary();
+}
+
+
+static void
+sbc8641_show_cpuinfo(struct seq_file *m)
+{
+	uint svid = mfspr(SPRN_SVR);
+
+	seq_printf(m, "Vendor\t\t: Wind River Systems\n");
+
+	seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+}
+
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init sbc8641_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "wind,sbc8641"))
+		return 1;	/* Looks good */
+
+	return 0;
+}
+
+static long __init
+mpc86xx_time_init(void)
+{
+	unsigned int temp;
+
+	/* Set the time base to zero */
+	mtspr(SPRN_TBWL, 0);
+	mtspr(SPRN_TBWU, 0);
+
+	temp = mfspr(SPRN_HID0);
+	temp |= HID0_TBEN;
+	mtspr(SPRN_HID0, temp);
+	asm volatile("isync");
+
+	return 0;
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .compatible = "simple-bus", },
+	{ .compatible = "gianfar", },
+	{ .compatible = "fsl,mpc8641-pcie", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+	return 0;
+}
+machine_arch_initcall(sbc8641, declare_of_platform_devices);
+
+define_machine(sbc8641) {
+	.name			= "SBC8641D",
+	.probe			= sbc8641_probe,
+	.setup_arch		= sbc8641_setup_arch,
+	.init_IRQ		= mpc86xx_init_irq,
+	.show_cpuinfo		= sbc8641_show_cpuinfo,
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.time_init		= mpc86xx_time_init,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
+};
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
new file mode 100644
index 000000000..157250426
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -0,0 +1,178 @@
+config CPM1
+	bool
+	select CPM
+
+choice
+	prompt "8xx Machine Type"
+	depends on PPC_8xx
+	depends on 8xx
+	default MPC885ADS
+
+config MPC8XXFADS
+	bool "FADS"
+
+config MPC86XADS
+	bool "MPC86XADS"
+	select CPM1
+	help
+	  MPC86x Application Development System by Freescale Semiconductor.
+	  The MPC86xADS is meant to serve as a platform for s/w and h/w
+	  development around the MPC86X processor families.
+
+config MPC885ADS
+	bool "MPC885ADS"
+	select CPM1
+	select OF_DYNAMIC
+	help
+	  Freescale Semiconductor MPC885 Application Development System (ADS).
+	  Also known as DUET.
+	  The MPC885ADS is meant to serve as a platform for s/w and h/w
+	  development around the MPC885 processor family.
+
+config PPC_EP88XC
+	bool "Embedded Planet EP88xC (a.k.a. CWH-PPC-885XN-VE)"
+	select CPM1
+	help
+	  This enables support for the Embedded Planet EP88xC board.
+
+	  This board is also resold by Freescale as the QUICCStart
+	  MPC885 Evaluation System and/or the CWH-PPC-885XN-VE.
+
+config PPC_ADDER875
+	bool "Analogue & Micro Adder 875"
+	select CPM1
+	help
+	  This enables support for the Analogue & Micro Adder 875
+	  board.
+
+config TQM8XX
+	bool "TQM8XX"
+	select CPM1
+	help
+	  support for the mpc8xx based boards from TQM.
+
+endchoice
+
+menu "Freescale Ethernet driver platform-specific options"
+	depends on (FS_ENET && MPC885ADS)
+
+	config MPC8xx_SECOND_ETH
+	bool "Second Ethernet channel"
+	depends on MPC885ADS
+	default y
+	help
+	  This enables support for second Ethernet on MPC885ADS and MPC86xADS boards.
+	  The latter will use SCC1, for 885ADS you can select it below.
+
+	choice
+		prompt "Second Ethernet channel"
+		depends on MPC8xx_SECOND_ETH
+		default MPC8xx_SECOND_ETH_FEC2
+
+		config MPC8xx_SECOND_ETH_FEC2
+		bool "FEC2"
+		depends on MPC885ADS
+		help
+		  Enable FEC2 to serve as 2-nd Ethernet channel. Note that SMC2
+		  (often 2-nd UART) will not work if this is enabled.
+
+		config MPC8xx_SECOND_ETH_SCC3
+		bool "SCC3"
+		depends on MPC885ADS
+		help
+		  Enable SCC3 to serve as 2-nd Ethernet channel. Note that SMC1
+		  (often 1-nd UART) will not work if this is enabled.
+
+	endchoice
+
+endmenu
+
+#
+# MPC8xx Communication options
+#
+
+menu "MPC8xx CPM Options"
+	depends on 8xx
+
+# This doesn't really belong here, but it is convenient to ask
+# 8xx specific questions.
+comment "Generic MPC8xx Options"
+
+config 8xx_COPYBACK
+	bool "Copy-Back Data Cache (else Writethrough)"
+	help
+	  Saying Y here will cause the cache on an MPC8xx processor to be used
+	  in Copy-Back mode.  If you say N here, it is used in Writethrough
+	  mode.
+
+	  If in doubt, say Y here.
+
+config 8xx_GPIO
+	bool "GPIO API Support"
+	select ARCH_REQUIRE_GPIOLIB
+	help
+	  Saying Y here will cause the ports on an MPC8xx processor to be used
+	  with the GPIO API.  If you say N here, the kernel needs less memory.
+
+	  If in doubt, say Y here.
+
+config 8xx_CPU6
+	bool "CPU6 Silicon Errata (860 Pre Rev. C)"
+	help
+	  MPC860 CPUs, prior to Rev C have some bugs in the silicon, which
+	  require workarounds for Linux (and most other OSes to work).  If you
+	  get a BUG() very early in boot, this might fix the problem.  For
+	  more details read the document entitled "MPC860 Family Device Errata
+	  Reference" on Freescale's website.  This option also incurs a
+	  performance hit.
+
+	  If in doubt, say N here.
+
+config 8xx_CPU15
+	bool "CPU15 Silicon Errata"
+	default y
+	help
+	  This enables a workaround for erratum CPU15 on MPC8xx chips.
+	  This bug can cause incorrect code execution under certain
+	  circumstances.  This workaround adds some overhead (a TLB miss
+	  every time execution crosses a page boundary), and you may wish
+	  to disable it if you have worked around the bug in the compiler
+	  (by not placing conditional branches or branches to LR or CTR
+	  in the last word of a page, with a target of the last cache
+	  line in the next page), or if you have used some other
+	  workaround.
+
+	  If in doubt, say Y here.
+
+choice
+	prompt "Microcode patch selection"
+	default NO_UCODE_PATCH
+	help
+	  Help not implemented yet, coming soon.
+
+config NO_UCODE_PATCH
+	bool "None"
+
+config USB_SOF_UCODE_PATCH
+	bool "USB SOF patch"
+	help
+	  Help not implemented yet, coming soon.
+
+config I2C_SPI_UCODE_PATCH
+	bool "I2C/SPI relocation patch"
+	help
+	  Help not implemented yet, coming soon.
+
+config I2C_SPI_SMC1_UCODE_PATCH
+	bool "I2C/SPI/SMC1 relocation patch"
+	help
+	  Help not implemented yet, coming soon.
+
+endchoice
+
+config UCODE_PATCH
+	bool
+	default y
+	depends on !NO_UCODE_PATCH
+
+endmenu
diff --git a/arch/powerpc/platforms/8xx/Makefile b/arch/powerpc/platforms/8xx/Makefile
new file mode 100644
index 000000000..76a81c335
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile for the PowerPC 8xx linux kernel.
+#
+obj-$(CONFIG_PPC_8xx)	  += m8xx_setup.o
+obj-$(CONFIG_MPC885ADS)   += mpc885ads_setup.o
+obj-$(CONFIG_MPC86XADS)   += mpc86xads_setup.o
+obj-$(CONFIG_PPC_EP88XC)  += ep88xc.o
+obj-$(CONFIG_PPC_ADDER875) += adder875.o
+obj-$(CONFIG_TQM8XX)      += tqm8xx_setup.o
diff --git a/arch/powerpc/platforms/8xx/adder875.c b/arch/powerpc/platforms/8xx/adder875.c
new file mode 100644
index 000000000..61cae4c1e
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/adder875.c
@@ -0,0 +1,118 @@
+/* Analogue & Micro Adder MPC875 board support
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/fs_enet_pd.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/cpm1.h>
+#include <asm/fs_pd.h>
+#include <asm/udbg.h>
+#include <asm/prom.h>
+
+#include "mpc8xx.h"
+
+struct cpm_pin {
+	int port, pin, flags;
+};
+
+static __initdata struct cpm_pin adder875_pins[] = {
+	/* SMC1 */
+	{CPM_PORTB, 24, CPM_PIN_INPUT}, /* RX */
+	{CPM_PORTB, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
+
+	/* MII1 */
+	{CPM_PORTA, 0, CPM_PIN_INPUT},
+	{CPM_PORTA, 1, CPM_PIN_INPUT},
+	{CPM_PORTA, 2, CPM_PIN_INPUT},
+	{CPM_PORTA, 3, CPM_PIN_INPUT},
+	{CPM_PORTA, 4, CPM_PIN_OUTPUT},
+	{CPM_PORTA, 10, CPM_PIN_OUTPUT},
+	{CPM_PORTA, 11, CPM_PIN_OUTPUT},
+	{CPM_PORTB, 19, CPM_PIN_INPUT},
+	{CPM_PORTB, 31, CPM_PIN_INPUT},
+	{CPM_PORTC, 12, CPM_PIN_INPUT},
+	{CPM_PORTC, 13, CPM_PIN_INPUT},
+	{CPM_PORTE, 30, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 31, CPM_PIN_OUTPUT},
+
+	/* MII2 */
+	{CPM_PORTE, 14, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 15, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 16, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 17, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 18, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 19, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 20, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 21, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 22, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 23, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 24, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 25, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 26, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 27, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 28, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 29, CPM_PIN_OUTPUT},
+};
+
+static void __init init_ioports(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(adder875_pins); i++) {
+		const struct cpm_pin *pin = &adder875_pins[i];
+		cpm1_set_pin(pin->port, pin->pin, pin->flags);
+	}
+
+	cpm1_clk_setup(CPM_CLK_SMC1, CPM_BRG1, CPM_CLK_RTX);
+
+	/* Set FEC1 and FEC2 to MII mode */
+	clrbits32(&mpc8xx_immr->im_cpm.cp_cptr, 0x00000180);
+}
+
+static void __init adder875_setup(void)
+{
+	cpm_reset();
+	init_ioports();
+}
+
+static int __init adder875_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+	return of_flat_dt_is_compatible(root, "analogue-and-micro,adder875");
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .compatible = "simple-bus", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+	return 0;
+}
+machine_device_initcall(adder875, declare_of_platform_devices);
+
+define_machine(adder875) {
+	.name = "Adder MPC875",
+	.probe = adder875_probe,
+	.setup_arch = adder875_setup,
+	.init_IRQ = mpc8xx_pics_init,
+	.get_irq = mpc8xx_get_irq,
+	.restart = mpc8xx_restart,
+	.calibrate_decr = generic_calibrate_decr,
+	.set_rtc_time = mpc8xx_set_rtc_time,
+	.get_rtc_time = mpc8xx_get_rtc_time,
+	.progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/8xx/ep88xc.c b/arch/powerpc/platforms/8xx/ep88xc.c
new file mode 100644
index 000000000..2bedeb7d5
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/ep88xc.c
@@ -0,0 +1,177 @@
+/*
+ * Platform setup for the Embedded Planet EP88xC board
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/io.h>
+#include <asm/udbg.h>
+#include <asm/cpm1.h>
+
+#include "mpc8xx.h"
+
+struct cpm_pin {
+	int port, pin, flags;
+};
+
+static struct cpm_pin ep88xc_pins[] = {
+	/* SMC1 */
+	{1, 24, CPM_PIN_INPUT}, /* RX */
+	{1, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
+
+	/* SCC2 */
+	{0, 12, CPM_PIN_INPUT}, /* TX */
+	{0, 13, CPM_PIN_INPUT}, /* RX */
+	{2, 8, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* CD */
+	{2, 9, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* CTS */
+	{2, 14, CPM_PIN_INPUT}, /* RTS */
+
+	/* MII1 */
+	{0, 0, CPM_PIN_INPUT},
+	{0, 1, CPM_PIN_INPUT},
+	{0, 2, CPM_PIN_INPUT},
+	{0, 3, CPM_PIN_INPUT},
+	{0, 4, CPM_PIN_OUTPUT},
+	{0, 10, CPM_PIN_OUTPUT},
+	{0, 11, CPM_PIN_OUTPUT},
+	{1, 19, CPM_PIN_INPUT},
+	{1, 31, CPM_PIN_INPUT},
+	{2, 12, CPM_PIN_INPUT},
+	{2, 13, CPM_PIN_INPUT},
+	{3, 8, CPM_PIN_INPUT},
+	{4, 30, CPM_PIN_OUTPUT},
+	{4, 31, CPM_PIN_OUTPUT},
+
+	/* MII2 */
+	{4, 14, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{4, 15, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{4, 16, CPM_PIN_OUTPUT},
+	{4, 17, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{4, 18, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{4, 19, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{4, 20, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{4, 21, CPM_PIN_OUTPUT},
+	{4, 22, CPM_PIN_OUTPUT},
+	{4, 23, CPM_PIN_OUTPUT},
+	{4, 24, CPM_PIN_OUTPUT},
+	{4, 25, CPM_PIN_OUTPUT},
+	{4, 26, CPM_PIN_OUTPUT},
+	{4, 27, CPM_PIN_OUTPUT},
+	{4, 28, CPM_PIN_OUTPUT},
+	{4, 29, CPM_PIN_OUTPUT},
+
+	/* USB */
+	{0, 6, CPM_PIN_INPUT},  /* CLK2 */
+	{0, 14, CPM_PIN_INPUT}, /* USBOE */
+	{0, 15, CPM_PIN_INPUT}, /* USBRXD */
+	{2, 6, CPM_PIN_OUTPUT}, /* USBTXN */
+	{2, 7, CPM_PIN_OUTPUT}, /* USBTXP */
+	{2, 10, CPM_PIN_INPUT}, /* USBRXN */
+	{2, 11, CPM_PIN_INPUT}, /* USBRXP */
+
+	/* Misc */
+	{1, 26, CPM_PIN_INPUT}, /* BRGO2 */
+	{1, 27, CPM_PIN_INPUT}, /* BRGO1 */
+};
+
+static void __init init_ioports(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ep88xc_pins); i++) {
+		struct cpm_pin *pin = &ep88xc_pins[i];
+		cpm1_set_pin(pin->port, pin->pin, pin->flags);
+	}
+
+	cpm1_clk_setup(CPM_CLK_SMC1, CPM_BRG1, CPM_CLK_RTX);
+	cpm1_clk_setup(CPM_CLK_SCC1, CPM_CLK2, CPM_CLK_TX); /* USB */
+	cpm1_clk_setup(CPM_CLK_SCC1, CPM_CLK2, CPM_CLK_RX);
+	cpm1_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_TX);
+	cpm1_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_RX);
+}
+
+static u8 __iomem *ep88xc_bcsr;
+
+#define BCSR7_SCC2_ENABLE 0x10
+
+#define BCSR8_PHY1_ENABLE 0x80
+#define BCSR8_PHY1_POWER  0x40
+#define BCSR8_PHY2_ENABLE 0x20
+#define BCSR8_PHY2_POWER  0x10
+
+#define BCSR9_USB_ENABLE  0x80
+#define BCSR9_USB_POWER   0x40
+#define BCSR9_USB_HOST    0x20
+#define BCSR9_USB_FULL_SPEED_TARGET 0x10
+
+static void __init ep88xc_setup_arch(void)
+{
+	struct device_node *np;
+
+	cpm_reset();
+	init_ioports();
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,ep88xc-bcsr");
+	if (!np) {
+		printk(KERN_CRIT "Could not find fsl,ep88xc-bcsr node\n");
+		return;
+	}
+
+	ep88xc_bcsr = of_iomap(np, 0);
+	of_node_put(np);
+
+	if (!ep88xc_bcsr) {
+		printk(KERN_CRIT "Could not remap BCSR\n");
+		return;
+	}
+
+	setbits8(&ep88xc_bcsr[7], BCSR7_SCC2_ENABLE);
+	setbits8(&ep88xc_bcsr[8], BCSR8_PHY1_ENABLE | BCSR8_PHY1_POWER |
+	                          BCSR8_PHY2_ENABLE | BCSR8_PHY2_POWER);
+}
+
+static int __init ep88xc_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+	return of_flat_dt_is_compatible(root, "fsl,ep88xc");
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .name = "soc", },
+	{ .name = "cpm", },
+	{ .name = "localbus", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	/* Publish the QE devices */
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+	return 0;
+}
+machine_device_initcall(ep88xc, declare_of_platform_devices);
+
+define_machine(ep88xc) {
+	.name = "Embedded Planet EP88xC",
+	.probe = ep88xc_probe,
+	.setup_arch = ep88xc_setup_arch,
+	.init_IRQ = mpc8xx_pics_init,
+	.get_irq	= mpc8xx_get_irq,
+	.restart = mpc8xx_restart,
+	.calibrate_decr = mpc8xx_calibrate_decr,
+	.set_rtc_time = mpc8xx_set_rtc_time,
+	.get_rtc_time = mpc8xx_get_rtc_time,
+	.progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
new file mode 100644
index 000000000..d30377470
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -0,0 +1,246 @@
+/*
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Adapted from 'alpha' version by Gary Thomas
+ *  Modified by Cort Dougan (cort@cs.nmt.edu)
+ *  Modified for MBX using prep/chrp/pmac functions by Dan (dmalek@jlc.net)
+ *  Further modified for generic 8xx by Dan.
+ */
+
+/*
+ * bootup setup stuff..
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/time.h>
+#include <linux/rtc.h>
+#include <linux/fsl_devices.h>
+
+#include <asm/io.h>
+#include <asm/8xx_immap.h>
+#include <asm/prom.h>
+#include <asm/fs_pd.h>
+#include <mm/mmu_decl.h>
+
+#include <sysdev/mpc8xx_pic.h>
+
+#include "mpc8xx.h"
+
+extern int cpm_pic_init(void);
+extern int cpm_get_irq(void);
+
+/* A place holder for time base interrupts, if they are ever enabled. */
+static irqreturn_t timebase_interrupt(int irq, void *dev)
+{
+	printk ("timebase_interrupt()\n");
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction tbint_irqaction = {
+	.handler = timebase_interrupt,
+	.flags = IRQF_NO_THREAD,
+	.name = "tbint",
+};
+
+/* per-board overridable init_internal_rtc() function. */
+void __init __attribute__ ((weak))
+init_internal_rtc(void)
+{
+	sit8xx_t __iomem *sys_tmr = immr_map(im_sit);
+
+	/* Disable the RTC one second and alarm interrupts. */
+	clrbits16(&sys_tmr->sit_rtcsc, (RTCSC_SIE | RTCSC_ALE));
+
+	/* Enable the RTC */
+	setbits16(&sys_tmr->sit_rtcsc, (RTCSC_RTF | RTCSC_RTE));
+	immr_unmap(sys_tmr);
+}
+
+static int __init get_freq(char *name, unsigned long *val)
+{
+	struct device_node *cpu;
+	const unsigned int *fp;
+	int found = 0;
+
+	/* The cpu node should have timebase and clock frequency properties */
+	cpu = of_find_node_by_type(NULL, "cpu");
+
+	if (cpu) {
+		fp = of_get_property(cpu, name, NULL);
+		if (fp) {
+			found = 1;
+			*val = *fp;
+		}
+
+		of_node_put(cpu);
+	}
+
+	return found;
+}
+
+/* The decrementer counts at the system (internal) clock frequency divided by
+ * sixteen, or external oscillator divided by four.  We force the processor
+ * to use system clock divided by sixteen.
+ */
+void __init mpc8xx_calibrate_decr(void)
+{
+	struct device_node *cpu;
+	cark8xx_t __iomem *clk_r1;
+	car8xx_t __iomem *clk_r2;
+	sitk8xx_t __iomem *sys_tmr1;
+	sit8xx_t __iomem *sys_tmr2;
+	int irq, virq;
+
+	clk_r1 = immr_map(im_clkrstk);
+
+	/* Unlock the SCCR. */
+	out_be32(&clk_r1->cark_sccrk, ~KAPWR_KEY);
+	out_be32(&clk_r1->cark_sccrk, KAPWR_KEY);
+	immr_unmap(clk_r1);
+
+	/* Force all 8xx processors to use divide by 16 processor clock. */
+	clk_r2 = immr_map(im_clkrst);
+	setbits32(&clk_r2->car_sccr, 0x02000000);
+	immr_unmap(clk_r2);
+
+	/* Processor frequency is MHz.
+	 */
+	ppc_proc_freq = 50000000;
+	if (!get_freq("clock-frequency", &ppc_proc_freq))
+		printk(KERN_ERR "WARNING: Estimating processor frequency "
+		                "(not found)\n");
+
+	ppc_tb_freq = ppc_proc_freq / 16;
+	printk("Decrementer Frequency = 0x%lx\n", ppc_tb_freq);
+
+	/* Perform some more timer/timebase initialization.  This used
+	 * to be done elsewhere, but other changes caused it to get
+	 * called more than once....that is a bad thing.
+	 *
+	 * First, unlock all of the registers we are going to modify.
+	 * To protect them from corruption during power down, registers
+	 * that are maintained by keep alive power are "locked".  To
+	 * modify these registers we have to write the key value to
+	 * the key location associated with the register.
+	 * Some boards power up with these unlocked, while others
+	 * are locked.  Writing anything (including the unlock code?)
+	 * to the unlocked registers will lock them again.  So, here
+	 * we guarantee the registers are locked, then we unlock them
+	 * for our use.
+	 */
+	sys_tmr1 = immr_map(im_sitk);
+	out_be32(&sys_tmr1->sitk_tbscrk, ~KAPWR_KEY);
+	out_be32(&sys_tmr1->sitk_rtcsck, ~KAPWR_KEY);
+	out_be32(&sys_tmr1->sitk_tbk, ~KAPWR_KEY);
+	out_be32(&sys_tmr1->sitk_tbscrk, KAPWR_KEY);
+	out_be32(&sys_tmr1->sitk_rtcsck, KAPWR_KEY);
+	out_be32(&sys_tmr1->sitk_tbk, KAPWR_KEY);
+	immr_unmap(sys_tmr1);
+
+	init_internal_rtc();
+
+	/* Enabling the decrementer also enables the timebase interrupts
+	 * (or from the other point of view, to get decrementer interrupts
+	 * we have to enable the timebase).  The decrementer interrupt
+	 * is wired into the vector table, nothing to do here for that.
+	 */
+	cpu = of_find_node_by_type(NULL, "cpu");
+	virq= irq_of_parse_and_map(cpu, 0);
+	irq = virq_to_hw(virq);
+
+	sys_tmr2 = immr_map(im_sit);
+	out_be16(&sys_tmr2->sit_tbscr, ((1 << (7 - (irq/2))) << 8) |
+					(TBSCR_TBF | TBSCR_TBE));
+	immr_unmap(sys_tmr2);
+
+	if (setup_irq(virq, &tbint_irqaction))
+		panic("Could not allocate timer IRQ!");
+}
+
+/* The RTC on the MPC8xx is an internal register.
+ * We want to protect this during power down, so we need to unlock,
+ * modify, and re-lock.
+ */
+
+int mpc8xx_set_rtc_time(struct rtc_time *tm)
+{
+	sitk8xx_t __iomem *sys_tmr1;
+	sit8xx_t __iomem *sys_tmr2;
+	int time;
+
+	sys_tmr1 = immr_map(im_sitk);
+	sys_tmr2 = immr_map(im_sit);
+	time = mktime(tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday,
+	              tm->tm_hour, tm->tm_min, tm->tm_sec);
+
+	out_be32(&sys_tmr1->sitk_rtck, KAPWR_KEY);
+	out_be32(&sys_tmr2->sit_rtc, time);
+	out_be32(&sys_tmr1->sitk_rtck, ~KAPWR_KEY);
+
+	immr_unmap(sys_tmr2);
+	immr_unmap(sys_tmr1);
+	return 0;
+}
+
+void mpc8xx_get_rtc_time(struct rtc_time *tm)
+{
+	unsigned long data;
+	sit8xx_t __iomem *sys_tmr = immr_map(im_sit);
+
+	/* Get time from the RTC. */
+	data = in_be32(&sys_tmr->sit_rtc);
+	to_tm(data, tm);
+	tm->tm_year -= 1900;
+	tm->tm_mon -= 1;
+	immr_unmap(sys_tmr);
+	return;
+}
+
+void mpc8xx_restart(char *cmd)
+{
+	car8xx_t __iomem *clk_r = immr_map(im_clkrst);
+
+
+	local_irq_disable();
+
+	setbits32(&clk_r->car_plprcr, 0x00000080);
+	/* Clear the ME bit in MSR to cause checkstop on machine check
+	*/
+	mtmsr(mfmsr() & ~0x1000);
+
+	in_8(&clk_r->res[0]);
+	panic("Restart failed\n");
+}
+
+static void cpm_cascade(unsigned int irq, struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	int cascade_irq = cpm_get_irq();
+
+	if (cascade_irq >= 0)
+		generic_handle_irq(cascade_irq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+/* Initialize the internal interrupt controllers.  The number of
+ * interrupts supported can vary with the processor type, and the
+ * 82xx family can have up to 64.
+ * External interrupts can be either edge or level triggered, and
+ * need to be initialized by the appropriate driver.
+ */
+void __init mpc8xx_pics_init(void)
+{
+	int irq;
+
+	if (mpc8xx_pic_init()) {
+		printk(KERN_ERR "Failed interrupt 8xx controller  initialization\n");
+		return;
+	}
+
+	irq = cpm_pic_init();
+	if (irq != NO_IRQ)
+		irq_set_chained_handler(irq, cpm_cascade);
+}
diff --git a/arch/powerpc/platforms/8xx/mpc86xads.h b/arch/powerpc/platforms/8xx/mpc86xads.h
new file mode 100644
index 000000000..17b1fe75e
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/mpc86xads.h
@@ -0,0 +1,47 @@
+/*
+ * A collection of structures, addresses, and values associated with
+ * the Freescale MPC86xADS board.
+ * Copied from the FADS stuff.
+ *
+ * Author: MontaVista Software, Inc.
+ *         source@mvista.com
+ *
+ * 2005 (c) MontaVista Software, Inc.  This file is licensed under the
+ * terms of the GNU General Public License version 2.  This program is licensed
+ * "as is" without any warranty of any kind, whether express or implied.
+ */
+
+#ifdef __KERNEL__
+#ifndef __ASM_MPC86XADS_H__
+#define __ASM_MPC86XADS_H__
+
+/* Bits of interest in the BCSRs.
+ */
+#define BCSR1_ETHEN		((uint)0x20000000)
+#define BCSR1_IRDAEN		((uint)0x10000000)
+#define BCSR1_RS232EN_1		((uint)0x01000000)
+#define BCSR1_PCCEN		((uint)0x00800000)
+#define BCSR1_PCCVCC0		((uint)0x00400000)
+#define BCSR1_PCCVPP0		((uint)0x00200000)
+#define BCSR1_PCCVPP1		((uint)0x00100000)
+#define BCSR1_PCCVPP_MASK	(BCSR1_PCCVPP0 | BCSR1_PCCVPP1)
+#define BCSR1_RS232EN_2		((uint)0x00040000)
+#define BCSR1_PCCVCC1		((uint)0x00010000)
+#define BCSR1_PCCVCC_MASK	(BCSR1_PCCVCC0 | BCSR1_PCCVCC1)
+
+#define BCSR4_ETH10_RST		((uint)0x80000000)	/* 10Base-T PHY reset*/
+#define BCSR4_USB_LO_SPD	((uint)0x04000000)
+#define BCSR4_USB_VCC		((uint)0x02000000)
+#define BCSR4_USB_FULL_SPD	((uint)0x00040000)
+#define BCSR4_USB_EN		((uint)0x00020000)
+
+#define BCSR5_MII2_EN		0x40
+#define BCSR5_MII2_RST		0x20
+#define BCSR5_T1_RST		0x10
+#define BCSR5_ATM155_RST	0x08
+#define BCSR5_ATM25_RST		0x04
+#define BCSR5_MII1_EN		0x02
+#define BCSR5_MII1_RST		0x01
+
+#endif /* __ASM_MPC86XADS_H__ */
+#endif /* __KERNEL__ */
diff --git a/arch/powerpc/platforms/8xx/mpc86xads_setup.c b/arch/powerpc/platforms/8xx/mpc86xads_setup.c
new file mode 100644
index 000000000..78180c5e7
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/mpc86xads_setup.c
@@ -0,0 +1,151 @@
+/*arch/powerpc/platforms/8xx/mpc86xads_setup.c
+ *
+ * Platform setup for the Freescale mpc86xads board
+ *
+ * Vitaly Bordug <vbordug@ru.mvista.com>
+ *
+ * Copyright 2005 MontaVista Software Inc.
+ *
+ * Heavily modified by Scott Wood <scottwood@freescale.com>
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/8xx_immap.h>
+#include <asm/cpm1.h>
+#include <asm/fs_pd.h>
+#include <asm/udbg.h>
+
+#include "mpc86xads.h"
+#include "mpc8xx.h"
+
+struct cpm_pin {
+	int port, pin, flags;
+};
+
+static struct cpm_pin mpc866ads_pins[] = {
+	/* SMC1 */
+	{CPM_PORTB, 24, CPM_PIN_INPUT}, /* RX */
+	{CPM_PORTB, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
+
+	/* SMC2 */
+	{CPM_PORTB, 21, CPM_PIN_INPUT}, /* RX */
+	{CPM_PORTB, 20, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
+
+	/* SCC1 */
+	{CPM_PORTA, 6, CPM_PIN_INPUT}, /* CLK1 */
+	{CPM_PORTA, 7, CPM_PIN_INPUT}, /* CLK2 */
+	{CPM_PORTA, 14, CPM_PIN_INPUT}, /* TX */
+	{CPM_PORTA, 15, CPM_PIN_INPUT}, /* RX */
+	{CPM_PORTB, 19, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TENA */
+	{CPM_PORTC, 10, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* RENA */
+	{CPM_PORTC, 11, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* CLSN */
+
+	/* MII */
+	{CPM_PORTD, 3, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 4, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 5, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 6, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 7, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 8, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 9, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 10, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 11, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 12, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 13, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 14, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 15, CPM_PIN_OUTPUT},
+
+	/* I2C */
+	{CPM_PORTB, 26, CPM_PIN_INPUT | CPM_PIN_OPENDRAIN},
+	{CPM_PORTB, 27, CPM_PIN_INPUT | CPM_PIN_OPENDRAIN},
+};
+
+static void __init init_ioports(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mpc866ads_pins); i++) {
+		struct cpm_pin *pin = &mpc866ads_pins[i];
+		cpm1_set_pin(pin->port, pin->pin, pin->flags);
+	}
+
+	cpm1_clk_setup(CPM_CLK_SMC1, CPM_BRG1, CPM_CLK_RTX);
+	cpm1_clk_setup(CPM_CLK_SMC2, CPM_BRG2, CPM_CLK_RTX);
+	cpm1_clk_setup(CPM_CLK_SCC1, CPM_CLK1, CPM_CLK_TX);
+	cpm1_clk_setup(CPM_CLK_SCC1, CPM_CLK2, CPM_CLK_RX);
+
+	/* Set FEC1 and FEC2 to MII mode */
+	clrbits32(&mpc8xx_immr->im_cpm.cp_cptr, 0x00000180);
+}
+
+static void __init mpc86xads_setup_arch(void)
+{
+	struct device_node *np;
+	u32 __iomem *bcsr_io;
+
+	cpm_reset();
+	init_ioports();
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc866ads-bcsr");
+	if (!np) {
+		printk(KERN_CRIT "Could not find fsl,mpc866ads-bcsr node\n");
+		return;
+	}
+
+	bcsr_io = of_iomap(np, 0);
+	of_node_put(np);
+
+	if (bcsr_io == NULL) {
+		printk(KERN_CRIT "Could not remap BCSR\n");
+		return;
+	}
+
+	clrbits32(bcsr_io, BCSR1_RS232EN_1 | BCSR1_RS232EN_2 | BCSR1_ETHEN);
+	iounmap(bcsr_io);
+}
+
+static int __init mpc86xads_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+	return of_flat_dt_is_compatible(root, "fsl,mpc866ads");
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .name = "soc", },
+	{ .name = "cpm", },
+	{ .name = "localbus", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+	return 0;
+}
+machine_device_initcall(mpc86x_ads, declare_of_platform_devices);
+
+define_machine(mpc86x_ads) {
+	.name			= "MPC86x ADS",
+	.probe			= mpc86xads_probe,
+	.setup_arch		= mpc86xads_setup_arch,
+	.init_IRQ		= mpc8xx_pics_init,
+	.get_irq		= mpc8xx_get_irq,
+	.restart		= mpc8xx_restart,
+	.calibrate_decr		= mpc8xx_calibrate_decr,
+	.set_rtc_time		= mpc8xx_set_rtc_time,
+	.get_rtc_time		= mpc8xx_get_rtc_time,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/8xx/mpc885ads.h b/arch/powerpc/platforms/8xx/mpc885ads.h
new file mode 100644
index 000000000..19412f76f
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/mpc885ads.h
@@ -0,0 +1,49 @@
+/*
+ * A collection of structures, addresses, and values associated with
+ * the Freescale MPC885ADS board.
+ * Copied from the FADS stuff.
+ *
+ * Author: MontaVista Software, Inc.
+ *         source@mvista.com
+ *
+ * 2005 (c) MontaVista Software, Inc.  This file is licensed under the
+ * terms of the GNU General Public License version 2.  This program is licensed
+ * "as is" without any warranty of any kind, whether express or implied.
+ */
+
+#ifdef __KERNEL__
+#ifndef __ASM_MPC885ADS_H__
+#define __ASM_MPC885ADS_H__
+
+#include <sysdev/fsl_soc.h>
+
+/* Bits of interest in the BCSRs.
+ */
+#define BCSR1_ETHEN		((uint)0x20000000)
+#define BCSR1_IRDAEN		((uint)0x10000000)
+#define BCSR1_RS232EN_1		((uint)0x01000000)
+#define BCSR1_PCCEN		((uint)0x00800000)
+#define BCSR1_PCCVCC0		((uint)0x00400000)
+#define BCSR1_PCCVPP0		((uint)0x00200000)
+#define BCSR1_PCCVPP1		((uint)0x00100000)
+#define BCSR1_PCCVPP_MASK	(BCSR1_PCCVPP0 | BCSR1_PCCVPP1)
+#define BCSR1_RS232EN_2		((uint)0x00040000)
+#define BCSR1_PCCVCC1		((uint)0x00010000)
+#define BCSR1_PCCVCC_MASK	(BCSR1_PCCVCC0 | BCSR1_PCCVCC1)
+
+#define BCSR4_ETH10_RST		((uint)0x80000000)	/* 10Base-T PHY reset*/
+#define BCSR4_USB_LO_SPD	((uint)0x04000000)
+#define BCSR4_USB_VCC		((uint)0x02000000)
+#define BCSR4_USB_FULL_SPD	((uint)0x00040000)
+#define BCSR4_USB_EN		((uint)0x00020000)
+
+#define BCSR5_MII2_EN		0x40
+#define BCSR5_MII2_RST		0x20
+#define BCSR5_T1_RST		0x10
+#define BCSR5_ATM155_RST	0x08
+#define BCSR5_ATM25_RST		0x04
+#define BCSR5_MII1_EN		0x02
+#define BCSR5_MII1_RST		0x01
+
+#endif /* __ASM_MPC885ADS_H__ */
+#endif /* __KERNEL__ */
diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
new file mode 100644
index 000000000..4d62bf9dc
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
@@ -0,0 +1,227 @@
+/*
+ * Platform setup for the Freescale mpc885ads board
+ *
+ * Vitaly Bordug <vbordug@ru.mvista.com>
+ *
+ * Copyright 2005 MontaVista Software Inc.
+ *
+ * Heavily modified by Scott Wood <scottwood@freescale.com>
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/ioport.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+
+#include <linux/fs_enet_pd.h>
+#include <linux/fs_uart_pd.h>
+#include <linux/fsl_devices.h>
+#include <linux/mii.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+
+#include <asm/delay.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/time.h>
+#include <asm/8xx_immap.h>
+#include <asm/cpm1.h>
+#include <asm/fs_pd.h>
+#include <asm/udbg.h>
+
+#include "mpc885ads.h"
+#include "mpc8xx.h"
+
+static u32 __iomem *bcsr, *bcsr5;
+
+struct cpm_pin {
+	int port, pin, flags;
+};
+
+static struct cpm_pin mpc885ads_pins[] = {
+	/* SMC1 */
+	{CPM_PORTB, 24, CPM_PIN_INPUT}, /* RX */
+	{CPM_PORTB, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
+
+	/* SMC2 */
+#ifndef CONFIG_MPC8xx_SECOND_ETH_FEC2
+	{CPM_PORTE, 21, CPM_PIN_INPUT}, /* RX */
+	{CPM_PORTE, 20, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
+#endif
+
+	/* SCC3 */
+	{CPM_PORTA, 9, CPM_PIN_INPUT}, /* RX */
+	{CPM_PORTA, 8, CPM_PIN_INPUT}, /* TX */
+	{CPM_PORTC, 4, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* RENA */
+	{CPM_PORTC, 5, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* CLSN */
+	{CPM_PORTE, 27, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TENA */
+	{CPM_PORTE, 17, CPM_PIN_INPUT}, /* CLK5 */
+	{CPM_PORTE, 16, CPM_PIN_INPUT}, /* CLK6 */
+
+	/* MII1 */
+	{CPM_PORTA, 0, CPM_PIN_INPUT},
+	{CPM_PORTA, 1, CPM_PIN_INPUT},
+	{CPM_PORTA, 2, CPM_PIN_INPUT},
+	{CPM_PORTA, 3, CPM_PIN_INPUT},
+	{CPM_PORTA, 4, CPM_PIN_OUTPUT},
+	{CPM_PORTA, 10, CPM_PIN_OUTPUT},
+	{CPM_PORTA, 11, CPM_PIN_OUTPUT},
+	{CPM_PORTB, 19, CPM_PIN_INPUT},
+	{CPM_PORTB, 31, CPM_PIN_INPUT},
+	{CPM_PORTC, 12, CPM_PIN_INPUT},
+	{CPM_PORTC, 13, CPM_PIN_INPUT},
+	{CPM_PORTE, 30, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 31, CPM_PIN_OUTPUT},
+
+	/* MII2 */
+#ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2
+	{CPM_PORTE, 14, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 15, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 16, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 17, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 18, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 19, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 20, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+	{CPM_PORTE, 21, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 22, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 23, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 24, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 25, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 26, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 27, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 28, CPM_PIN_OUTPUT},
+	{CPM_PORTE, 29, CPM_PIN_OUTPUT},
+#endif
+	/* I2C */
+	{CPM_PORTB, 26, CPM_PIN_INPUT | CPM_PIN_OPENDRAIN},
+	{CPM_PORTB, 27, CPM_PIN_INPUT | CPM_PIN_OPENDRAIN},
+};
+
+static void __init init_ioports(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mpc885ads_pins); i++) {
+		struct cpm_pin *pin = &mpc885ads_pins[i];
+		cpm1_set_pin(pin->port, pin->pin, pin->flags);
+	}
+
+	cpm1_clk_setup(CPM_CLK_SMC1, CPM_BRG1, CPM_CLK_RTX);
+	cpm1_clk_setup(CPM_CLK_SMC2, CPM_BRG2, CPM_CLK_RTX);
+	cpm1_clk_setup(CPM_CLK_SCC3, CPM_CLK5, CPM_CLK_TX);
+	cpm1_clk_setup(CPM_CLK_SCC3, CPM_CLK6, CPM_CLK_RX);
+
+	/* Set FEC1 and FEC2 to MII mode */
+	clrbits32(&mpc8xx_immr->im_cpm.cp_cptr, 0x00000180);
+}
+
+static void __init mpc885ads_setup_arch(void)
+{
+	struct device_node *np;
+
+	cpm_reset();
+	init_ioports();
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc885ads-bcsr");
+	if (!np) {
+		printk(KERN_CRIT "Could not find fsl,mpc885ads-bcsr node\n");
+		return;
+	}
+
+	bcsr = of_iomap(np, 0);
+	bcsr5 = of_iomap(np, 1);
+	of_node_put(np);
+
+	if (!bcsr || !bcsr5) {
+		printk(KERN_CRIT "Could not remap BCSR\n");
+		return;
+	}
+
+	clrbits32(&bcsr[1], BCSR1_RS232EN_1);
+#ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2
+	setbits32(&bcsr[1], BCSR1_RS232EN_2);
+#else
+	clrbits32(&bcsr[1], BCSR1_RS232EN_2);
+#endif
+
+	clrbits32(bcsr5, BCSR5_MII1_EN);
+	setbits32(bcsr5, BCSR5_MII1_RST);
+	udelay(1000);
+	clrbits32(bcsr5, BCSR5_MII1_RST);
+
+#ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2
+	clrbits32(bcsr5, BCSR5_MII2_EN);
+	setbits32(bcsr5, BCSR5_MII2_RST);
+	udelay(1000);
+	clrbits32(bcsr5, BCSR5_MII2_RST);
+#else
+	setbits32(bcsr5, BCSR5_MII2_EN);
+#endif
+
+#ifdef CONFIG_MPC8xx_SECOND_ETH_SCC3
+	clrbits32(&bcsr[4], BCSR4_ETH10_RST);
+	udelay(1000);
+	setbits32(&bcsr[4], BCSR4_ETH10_RST);
+
+	setbits32(&bcsr[1], BCSR1_ETHEN);
+
+	np = of_find_node_by_path("/soc@ff000000/cpm@9c0/serial@a80");
+#else
+	np = of_find_node_by_path("/soc@ff000000/cpm@9c0/ethernet@a40");
+#endif
+
+	/* The SCC3 enet registers overlap the SMC1 registers, so
+	 * one of the two must be removed from the device tree.
+	 */
+
+	if (np) {
+		of_detach_node(np);
+		of_node_put(np);
+	}
+}
+
+static int __init mpc885ads_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+	return of_flat_dt_is_compatible(root, "fsl,mpc885ads");
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .name = "soc", },
+	{ .name = "cpm", },
+	{ .name = "localbus", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	/* Publish the QE devices */
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+	return 0;
+}
+machine_device_initcall(mpc885_ads, declare_of_platform_devices);
+
+define_machine(mpc885_ads) {
+	.name			= "Freescale MPC885 ADS",
+	.probe			= mpc885ads_probe,
+	.setup_arch		= mpc885ads_setup_arch,
+	.init_IRQ		= mpc8xx_pics_init,
+	.get_irq		= mpc8xx_get_irq,
+	.restart		= mpc8xx_restart,
+	.calibrate_decr		= mpc8xx_calibrate_decr,
+	.set_rtc_time		= mpc8xx_set_rtc_time,
+	.get_rtc_time		= mpc8xx_get_rtc_time,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/8xx/mpc8xx.h b/arch/powerpc/platforms/8xx/mpc8xx.h
new file mode 100644
index 000000000..239a243a6
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/mpc8xx.h
@@ -0,0 +1,21 @@
+/*
+ * Prototypes, etc. for the Freescale MPC8xx embedded cpu chips
+ * May need to be cleaned as the port goes on ...
+ *
+ * Copyright (C) 2008 Jochen Friedrich <jochen@scram.de>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+#ifndef __MPC8xx_H
+#define __MPC8xx_H
+
+extern void mpc8xx_restart(char *cmd);
+extern void mpc8xx_calibrate_decr(void);
+extern int mpc8xx_set_rtc_time(struct rtc_time *tm);
+extern void mpc8xx_get_rtc_time(struct rtc_time *tm);
+extern void mpc8xx_pics_init(void);
+extern unsigned int mpc8xx_get_irq(void);
+
+#endif /* __MPC8xx_H */
diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
new file mode 100644
index 000000000..bee47a2b2
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
@@ -0,0 +1,154 @@
+/*
+ * Platform setup for the MPC8xx based boards from TQM.
+ *
+ * Heiko Schocher <hs@denx.de>
+ * Copyright 2010 DENX Software Engineering GmbH
+ *
+ * based on:
+ * Vitaly Bordug <vbordug@ru.mvista.com>
+ *
+ * Copyright 2005 MontaVista Software Inc.
+ *
+ * Heavily modified by Scott Wood <scottwood@freescale.com>
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/ioport.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+
+#include <linux/fs_enet_pd.h>
+#include <linux/fs_uart_pd.h>
+#include <linux/fsl_devices.h>
+#include <linux/mii.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+
+#include <asm/delay.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/time.h>
+#include <asm/8xx_immap.h>
+#include <asm/cpm1.h>
+#include <asm/fs_pd.h>
+#include <asm/udbg.h>
+
+#include "mpc8xx.h"
+
+struct cpm_pin {
+	int port, pin, flags;
+};
+
+static struct cpm_pin tqm8xx_pins[] __initdata = {
+	/* SMC1 */
+	{CPM_PORTB, 24, CPM_PIN_INPUT}, /* RX */
+	{CPM_PORTB, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
+
+	/* SCC1 */
+	{CPM_PORTA, 5, CPM_PIN_INPUT}, /* CLK1 */
+	{CPM_PORTA, 7, CPM_PIN_INPUT}, /* CLK2 */
+	{CPM_PORTA, 14, CPM_PIN_INPUT}, /* TX */
+	{CPM_PORTA, 15, CPM_PIN_INPUT}, /* RX */
+	{CPM_PORTC, 15, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TENA */
+	{CPM_PORTC, 10, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO},
+	{CPM_PORTC, 11, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO},
+};
+
+static struct cpm_pin tqm8xx_fec_pins[] __initdata = {
+	/* MII */
+	{CPM_PORTD, 3, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 4, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 5, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 6, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 7, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 8, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 9, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 10, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 11, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 12, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 13, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 14, CPM_PIN_OUTPUT},
+	{CPM_PORTD, 15, CPM_PIN_OUTPUT},
+};
+
+static void __init init_pins(int n, struct cpm_pin *pin)
+{
+	int i;
+
+	for (i = 0; i < n; i++) {
+		cpm1_set_pin(pin->port, pin->pin, pin->flags);
+		pin++;
+	}
+}
+
+static void __init init_ioports(void)
+{
+	struct device_node *dnode;
+	struct property *prop;
+	int	len;
+
+	init_pins(ARRAY_SIZE(tqm8xx_pins), &tqm8xx_pins[0]);
+
+	cpm1_clk_setup(CPM_CLK_SMC1, CPM_BRG1, CPM_CLK_RTX);
+
+	dnode = of_find_node_by_name(NULL, "aliases");
+	if (dnode == NULL)
+		return;
+	prop = of_find_property(dnode, "ethernet1", &len);
+	if (prop == NULL)
+		return;
+
+	/* init FEC pins */
+	init_pins(ARRAY_SIZE(tqm8xx_fec_pins), &tqm8xx_fec_pins[0]);
+}
+
+static void __init tqm8xx_setup_arch(void)
+{
+	cpm_reset();
+	init_ioports();
+}
+
+static int __init tqm8xx_probe(void)
+{
+	unsigned long node = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(node, "tqc,tqm8xx");
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .name = "soc", },
+	{ .name = "cpm", },
+	{ .name = "localbus", },
+	{ .compatible = "simple-bus" },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+	return 0;
+}
+machine_device_initcall(tqm8xx, declare_of_platform_devices);
+
+define_machine(tqm8xx) {
+	.name			= "TQM8xx",
+	.probe			= tqm8xx_probe,
+	.setup_arch		= tqm8xx_setup_arch,
+	.init_IRQ		= mpc8xx_pics_init,
+	.get_irq		= mpc8xx_get_irq,
+	.restart		= mpc8xx_restart,
+	.calibrate_decr		= mpc8xx_calibrate_decr,
+	.set_rtc_time		= mpc8xx_set_rtc_time,
+	.get_rtc_time		= mpc8xx_get_rtc_time,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
new file mode 100644
index 000000000..b7f9c408b
--- /dev/null
+++ b/arch/powerpc/platforms/Kconfig
@@ -0,0 +1,359 @@
+menu "Platform support"
+
+source "arch/powerpc/platforms/powernv/Kconfig"
+source "arch/powerpc/platforms/pseries/Kconfig"
+source "arch/powerpc/platforms/chrp/Kconfig"
+source "arch/powerpc/platforms/512x/Kconfig"
+source "arch/powerpc/platforms/52xx/Kconfig"
+source "arch/powerpc/platforms/powermac/Kconfig"
+source "arch/powerpc/platforms/maple/Kconfig"
+source "arch/powerpc/platforms/pasemi/Kconfig"
+source "arch/powerpc/platforms/ps3/Kconfig"
+source "arch/powerpc/platforms/cell/Kconfig"
+source "arch/powerpc/platforms/8xx/Kconfig"
+source "arch/powerpc/platforms/82xx/Kconfig"
+source "arch/powerpc/platforms/83xx/Kconfig"
+source "arch/powerpc/platforms/85xx/Kconfig"
+source "arch/powerpc/platforms/86xx/Kconfig"
+source "arch/powerpc/platforms/embedded6xx/Kconfig"
+source "arch/powerpc/platforms/44x/Kconfig"
+source "arch/powerpc/platforms/40x/Kconfig"
+source "arch/powerpc/platforms/amigaone/Kconfig"
+
+config KVM_GUEST
+	bool "KVM Guest support"
+	default n
+	select EPAPR_PARAVIRT
+	---help---
+	  This option enables various optimizations for running under the KVM
+	  hypervisor. Overhead for the kernel when not running inside KVM should
+	  be minimal.
+
+	  In case of doubt, say Y
+
+config EPAPR_PARAVIRT
+	bool "ePAPR para-virtualization support"
+	default n
+	help
+	  Enables ePAPR para-virtualization support for guests.
+
+	  In case of doubt, say Y
+
+config PPC_NATIVE
+	bool
+	depends on 6xx || PPC64
+	help
+	  Support for running natively on the hardware, i.e. without
+	  a hypervisor. This option is not user-selectable but should
+	  be selected by all platforms that need it.
+
+config PPC_OF_BOOT_TRAMPOLINE
+	bool "Support booting from Open Firmware or yaboot"
+	depends on 6xx || PPC64
+	default y
+	help
+	  Support from booting from Open Firmware or yaboot using an
+	  Open Firmware client interface. This enables the kernel to
+	  communicate with open firmware to retrieve system information
+	  such as the device tree.
+
+	  In case of doubt, say Y
+
+config UDBG_RTAS_CONSOLE
+	bool "RTAS based debug console"
+	depends on PPC_RTAS
+	default n
+
+config PPC_SMP_MUXED_IPI
+	bool
+	help
+	  Select this opton if your platform supports SMP and your
+	  interrupt controller provides less than 4 interrupts to each
+	  cpu.	This will enable the generic code to multiplex the 4
+	  messages on to one ipi.
+
+config IPIC
+	bool
+	default n
+
+config MPIC
+	bool
+	default n
+
+config MPIC_TIMER
+	bool "MPIC Global Timer"
+	depends on MPIC && FSL_SOC
+	default n
+	help
+	  The MPIC global timer is a hardware timer inside the
+	  Freescale PIC complying with OpenPIC standard. When the
+	  specified interval times out, the hardware timer generates
+	  an interrupt. The driver currently is only tested on fsl
+	  chip, but it can potentially support other global timers
+	  complying with the OpenPIC standard.
+
+config FSL_MPIC_TIMER_WAKEUP
+	tristate "Freescale MPIC global timer wakeup driver"
+	depends on FSL_SOC &&  MPIC_TIMER && PM
+	default n
+	help
+	  The driver provides a way to wake up the system by MPIC
+	  timer.
+	  e.g. "echo 5 > /sys/devices/system/mpic/timer_wakeup"
+
+config PPC_EPAPR_HV_PIC
+	bool
+	default n
+	select EPAPR_PARAVIRT
+
+config MPIC_WEIRD
+	bool
+	default n
+
+config MPIC_MSGR
+	bool "MPIC message register support"
+	depends on MPIC
+	default n
+	help
+	  Enables support for the MPIC message registers.  These
+	  registers are used for inter-processor communication.
+
+config PPC_I8259
+	bool
+	default n
+
+config U3_DART
+	bool
+	depends on PPC64
+	default n
+
+config PPC_RTAS
+	bool
+	default n
+
+config RTAS_ERROR_LOGGING
+	bool
+	depends on PPC_RTAS
+	default n
+
+config PPC_RTAS_DAEMON
+	bool
+	depends on PPC_RTAS
+	default n
+
+config RTAS_PROC
+	bool "Proc interface to RTAS"
+	depends on PPC_RTAS && PROC_FS
+	default y
+
+config RTAS_FLASH
+	tristate "Firmware flash interface"
+	depends on PPC64 && RTAS_PROC
+
+config MMIO_NVRAM
+	bool
+	default n
+
+config MPIC_U3_HT_IRQS
+	bool
+	default n
+
+config MPIC_BROKEN_REGREAD
+	bool
+	depends on MPIC
+	help
+	  This option enables a MPIC driver workaround for some chips
+	  that have a bug that causes some interrupt source information
+	  to not read back properly. It is safe to use on other chips as
+	  well, but enabling it uses about 8KB of memory to keep copies
+	  of the register contents in software.
+
+config IBMVIO
+	depends on PPC_PSERIES
+	bool
+	default y
+
+config IBMEBUS
+	depends on PPC_PSERIES
+	bool "Support for GX bus based adapters"
+	help
+	  Bus device driver for GX bus based adapters.
+
+config EEH
+	bool
+	depends on (PPC_POWERNV || PPC_PSERIES) && PCI
+	default y
+
+config PPC_MPC106
+	bool
+	default n
+
+config PPC_970_NAP
+	bool
+	default n
+
+config PPC_P7_NAP
+	bool
+	default n
+
+config PPC_INDIRECT_PIO
+	bool
+	select GENERIC_IOMAP
+
+config PPC_INDIRECT_MMIO
+	bool
+
+config PPC_IO_WORKAROUNDS
+	bool
+
+source "drivers/cpufreq/Kconfig"
+
+menu "CPUIdle driver"
+
+source "drivers/cpuidle/Kconfig"
+
+endmenu
+
+config PPC601_SYNC_FIX
+	bool "Workarounds for PPC601 bugs"
+	depends on 6xx && PPC_PMAC
+	help
+	  Some versions of the PPC601 (the first PowerPC chip) have bugs which
+	  mean that extra synchronization instructions are required near
+	  certain instructions, typically those that make major changes to the
+	  CPU state.  These extra instructions reduce performance slightly.
+	  If you say N here, these extra instructions will not be included,
+	  resulting in a kernel which will run faster but may not run at all
+	  on some systems with the PPC601 chip.
+
+	  If in doubt, say Y here.
+
+config TAU
+	bool "On-chip CPU temperature sensor support"
+	depends on 6xx
+	help
+	  G3 and G4 processors have an on-chip temperature sensor called the
+	  'Thermal Assist Unit (TAU)', which, in theory, can measure the on-die
+	  temperature within 2-4 degrees Celsius. This option shows the current
+	  on-die temperature in /proc/cpuinfo if the cpu supports it.
+
+	  Unfortunately, on some chip revisions, this sensor is very inaccurate
+	  and in many cases, does not work at all, so don't assume the cpu
+	  temp is actually what /proc/cpuinfo says it is.
+
+config TAU_INT
+	bool "Interrupt driven TAU driver (DANGEROUS)"
+	depends on TAU
+	---help---
+	  The TAU supports an interrupt driven mode which causes an interrupt
+	  whenever the temperature goes out of range. This is the fastest way
+	  to get notified the temp has exceeded a range. With this option off,
+	  a timer is used to re-check the temperature periodically.
+
+	  However, on some cpus it appears that the TAU interrupt hardware
+	  is buggy and can cause a situation which would lead unexplained hard
+	  lockups.
+
+	  Unless you are extending the TAU driver, or enjoy kernel/hardware
+	  debugging, leave this option off.
+
+config TAU_AVERAGE
+	bool "Average high and low temp"
+	depends on TAU
+	---help---
+	  The TAU hardware can compare the temperature to an upper and lower
+	  bound.  The default behavior is to show both the upper and lower
+	  bound in /proc/cpuinfo. If the range is large, the temperature is
+	  either changing a lot, or the TAU hardware is broken (likely on some
+	  G4's). If the range is small (around 4 degrees), the temperature is
+	  relatively stable.  If you say Y here, a single temperature value,
+	  halfway between the upper and lower bounds, will be reported in
+	  /proc/cpuinfo.
+
+	  If in doubt, say N here.
+
+config QUICC_ENGINE
+	bool "Freescale QUICC Engine (QE) Support"
+	depends on FSL_SOC && PPC32
+	select PPC_LIB_RHEAP
+	select CRC32
+	help
+	  The QUICC Engine (QE) is a new generation of communications
+	  coprocessors on Freescale embedded CPUs (akin to CPM in older chips).
+	  Selecting this option means that you wish to build a kernel
+	  for a machine with a QE coprocessor.
+
+config QE_GPIO
+	bool "QE GPIO support"
+	depends on QUICC_ENGINE
+	select ARCH_REQUIRE_GPIOLIB
+	help
+	  Say Y here if you're going to use hardware that connects to the
+	  QE GPIOs.
+
+config CPM2
+	bool "Enable support for the CPM2 (Communications Processor Module)"
+	depends on (FSL_SOC_BOOKE && PPC32) || 8260
+	select CPM
+	select PPC_LIB_RHEAP
+	select PPC_PCI_CHOICE
+	select ARCH_REQUIRE_GPIOLIB
+	help
+	  The CPM2 (Communications Processor Module) is a coprocessor on
+	  embedded CPUs made by Freescale.  Selecting this option means that
+	  you wish to build a kernel for a machine with a CPM2 coprocessor
+	  on it (826x, 827x, 8560).
+
+config AXON_RAM
+	tristate "Axon DDR2 memory device driver"
+	depends on PPC_IBM_CELL_BLADE && BLOCK
+	default m
+	help
+	  It registers one block device per Axon's DDR2 memory bank found
+	  on a system. Block devices are called axonram?, their major and
+	  minor numbers are available in /proc/devices, /proc/partitions or
+	  in /sys/block/axonram?/dev.
+
+config FSL_ULI1575
+	bool
+	default n
+	select GENERIC_ISA_DMA
+	help
+	  Supports for the ULI1575 PCIe south bridge that exists on some
+	  Freescale reference boards. The boards all use the ULI in pretty
+	  much the same way.
+
+config CPM
+	bool
+
+config OF_RTC
+	bool
+	help
+	  Uses information from the OF or flattened device tree to instantiate
+	  platform devices for direct mapped RTC chips like the DS1742 or DS1743.
+
+config SIMPLE_GPIO
+	bool "Support for simple, memory-mapped GPIO controllers"
+	depends on PPC
+	select ARCH_REQUIRE_GPIOLIB
+	help
+	  Say Y here to support simple, memory-mapped GPIO controllers.
+	  These are usually BCSRs used to control board's switches, LEDs,
+	  chip-selects, Ethernet/USB PHY's power and various other small
+	  on-board peripherals.
+
+config MCU_MPC8349EMITX
+	bool "MPC8349E-mITX MCU driver"
+	depends on I2C=y && PPC_83xx
+	select ARCH_REQUIRE_GPIOLIB
+	help
+	  Say Y here to enable soft power-off functionality on the Freescale
+	  boards with the MPC8349E-mITX-compatible MCU chips. This driver will
+	  also register MCU GPIOs with the generic GPIO API, so you'll able
+	  to use MCU pins as GPIOs.
+
+config XILINX_PCI
+	bool "Xilinx PCI host bridge support"
+	depends on PCI && XILINX_VIRTEX
+
+endmenu
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
new file mode 100644
index 000000000..7264e9119
--- /dev/null
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -0,0 +1,436 @@
+config PPC64
+	bool "64-bit kernel"
+	default n
+	select HAVE_VIRT_CPU_ACCOUNTING
+	select ZLIB_DEFLATE
+	help
+	  This option selects whether a 32-bit or a 64-bit kernel
+	  will be built.
+
+menu "Processor support"
+choice
+	prompt "Processor Type"
+	depends on PPC32
+	help
+	  There are five families of 32 bit PowerPC chips supported.
+	  The most common ones are the desktop and server CPUs (601, 603,
+	  604, 740, 750, 74xx) CPUs from Freescale and IBM, with their
+	  embedded 512x/52xx/82xx/83xx/86xx counterparts.
+	  The other embedded parts, namely 4xx, 8xx, e200 (55xx) and e500
+	  (85xx) each form a family of their own that is not compatible
+	  with the others.
+
+	  If unsure, select 52xx/6xx/7xx/74xx/82xx/83xx/86xx.
+
+config PPC_BOOK3S_32
+	bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx"
+	select PPC_FPU
+
+config PPC_85xx
+	bool "Freescale 85xx"
+	select E500
+
+config PPC_8xx
+	bool "Freescale 8xx"
+	select FSL_SOC
+	select 8xx
+	select PPC_LIB_RHEAP
+
+config 40x
+	bool "AMCC 40x"
+	select PPC_DCR_NATIVE
+	select PPC_UDBG_16550
+	select 4xx_SOC
+	select PPC_PCI_CHOICE
+
+config 44x
+	bool "AMCC 44x, 46x or 47x"
+	select PPC_DCR_NATIVE
+	select PPC_UDBG_16550
+	select 4xx_SOC
+	select PPC_PCI_CHOICE
+	select PHYS_64BIT
+
+config E200
+	bool "Freescale e200"
+
+endchoice
+
+choice
+	prompt "Processor Type"
+	depends on PPC64
+	help
+	  There are two families of 64 bit PowerPC chips supported.
+	  The most common ones are the desktop and server CPUs
+	  (POWER4, POWER5, 970, POWER5+, POWER6, POWER7, POWER8 ...)
+
+	  The other are the "embedded" processors compliant with the
+	  "Book 3E" variant of the architecture
+
+config PPC_BOOK3S_64
+	bool "Server processors"
+	select PPC_FPU
+	select PPC_HAVE_PMU_SUPPORT
+	select SYS_SUPPORTS_HUGETLBFS
+	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES
+	select ARCH_SUPPORTS_NUMA_BALANCING
+	select IRQ_WORK
+
+config PPC_BOOK3E_64
+	bool "Embedded processors"
+	select PPC_FPU # Make it a choice ?
+	select PPC_SMP_MUXED_IPI
+	select PPC_DOORBELL
+
+endchoice
+
+choice
+	prompt "CPU selection"
+	depends on PPC64
+	default GENERIC_CPU
+	help
+	  This will create a kernel which is optimised for a particular CPU.
+	  The resulting kernel may not run on other CPUs, so use this with care.
+
+	  If unsure, select Generic.
+
+config GENERIC_CPU
+	bool "Generic"
+	depends on !CPU_LITTLE_ENDIAN
+
+config CELL_CPU
+	bool "Cell Broadband Engine"
+	depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
+
+config POWER4_CPU
+	bool "POWER4"
+	depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
+
+config POWER5_CPU
+	bool "POWER5"
+	depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
+
+config POWER6_CPU
+	bool "POWER6"
+	depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
+
+config POWER7_CPU
+	bool "POWER7"
+	depends on PPC_BOOK3S_64
+	select ARCH_HAS_FAST_MULTIPLIER
+
+config POWER8_CPU
+	bool "POWER8"
+	depends on PPC_BOOK3S_64
+	select ARCH_HAS_FAST_MULTIPLIER
+
+config E5500_CPU
+	bool "Freescale e5500"
+	depends on E500
+
+config E6500_CPU
+	bool "Freescale e6500"
+	depends on E500
+
+endchoice
+
+config PPC_BOOK3S
+	def_bool y
+	depends on PPC_BOOK3S_32 || PPC_BOOK3S_64
+
+config PPC_BOOK3E
+	def_bool y
+	depends on PPC_BOOK3E_64
+
+config 6xx
+	def_bool y
+	depends on PPC32 && PPC_BOOK3S
+	select PPC_HAVE_PMU_SUPPORT
+
+config TUNE_CELL
+	bool "Optimize for Cell Broadband Engine"
+	depends on PPC64 && PPC_BOOK3S
+	help
+	  Cause the compiler to optimize for the PPE of the Cell Broadband
+	  Engine. This will make the code run considerably faster on Cell
+	  but somewhat slower on other machines. This option only changes
+	  the scheduling of instructions, not the selection of instructions
+	  itself, so the resulting kernel will keep running on all other
+	  machines.
+
+# this is temp to handle compat with arch=ppc
+config 8xx
+	bool
+
+config E500
+	select FSL_EMB_PERFMON
+	select PPC_FSL_BOOK3E
+	bool
+
+config PPC_E500MC
+	bool "e500mc Support"
+	select PPC_FPU
+	select COMMON_CLK
+	depends on E500
+	help
+	  This must be enabled for running on e500mc (and derivatives
+	  such as e5500/e6500), and must be disabled for running on
+	  e500v1 or e500v2.
+
+config PPC_FPU
+	bool
+	default y if PPC64
+
+config FSL_EMB_PERFMON
+	bool "Freescale Embedded Perfmon"
+	depends on E500 || PPC_83xx
+	help
+	  This is the Performance Monitor support found on the e500 core
+	  and some e300 cores (c3 and c4).  Select this only if your
+	  core supports the Embedded Performance Monitor APU
+
+config FSL_EMB_PERF_EVENT
+	bool
+	depends on FSL_EMB_PERFMON && PERF_EVENTS && !PPC_PERF_CTRS
+	default y
+
+config FSL_EMB_PERF_EVENT_E500
+	bool
+	depends on FSL_EMB_PERF_EVENT && E500
+	default y
+
+config 4xx
+	bool
+	depends on 40x || 44x
+	default y
+
+config BOOKE
+	bool
+	depends on E200 || E500 || 44x || PPC_BOOK3E
+	default y
+
+config FSL_BOOKE
+	bool
+	depends on (E200 || E500) && PPC32
+	default y
+
+# this is for common code between PPC32 & PPC64 FSL BOOKE
+config PPC_FSL_BOOK3E
+	bool
+	select FSL_EMB_PERFMON
+	select PPC_SMP_MUXED_IPI
+	select SYS_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64
+	select PPC_DOORBELL
+	default y if FSL_BOOKE
+
+config PTE_64BIT
+	bool
+	depends on 44x || E500 || PPC_86xx
+	default y if PHYS_64BIT
+
+config PHYS_64BIT
+	bool 'Large physical address support' if E500 || PPC_86xx
+	depends on (44x || E500 || PPC_86xx) && !PPC_83xx && !PPC_82xx
+	---help---
+	  This option enables kernel support for larger than 32-bit physical
+	  addresses.  This feature may not be available on all cores.
+
+	  If you have more than 3.5GB of RAM or so, you also need to enable
+	  SWIOTLB under Kernel Options for this to work.  The actual number
+	  is platform-dependent.
+
+	  If in doubt, say N here.
+
+config ALTIVEC
+	bool "AltiVec Support"
+	depends on 6xx || PPC_BOOK3S_64 || (PPC_E500MC && PPC64)
+	---help---
+	  This option enables kernel support for the Altivec extensions to the
+	  PowerPC processor. The kernel currently supports saving and restoring
+	  altivec registers, and turning on the 'altivec enable' bit so user
+	  processes can execute altivec instructions.
+
+	  This option is only usefully if you have a processor that supports
+	  altivec (G4, otherwise known as 74xx series), but does not have
+	  any affect on a non-altivec cpu (it does, however add code to the
+	  kernel).
+
+	  If in doubt, say Y here.
+
+config VSX
+	bool "VSX Support"
+	depends on PPC_BOOK3S_64 && ALTIVEC && PPC_FPU
+	---help---
+
+	  This option enables kernel support for the Vector Scaler extensions
+	  to the PowerPC processor. The kernel currently supports saving and
+	  restoring VSX registers, and turning on the 'VSX enable' bit so user
+	  processes can execute VSX instructions.
+
+	  This option is only useful if you have a processor that supports
+	  VSX (P7 and above), but does not have any affect on a non-VSX
+	  CPUs (it does, however add code to the kernel).
+
+	  If in doubt, say Y here.
+
+config PPC_ICSWX
+	bool "Support for PowerPC icswx coprocessor instruction"
+	depends on PPC_BOOK3S_64
+	default n
+	---help---
+
+	  This option enables kernel support for the PowerPC Initiate
+	  Coprocessor Store Word (icswx) coprocessor instruction on POWER7
+	  or newer processors.
+
+	  This option is only useful if you have a processor that supports
+	  the icswx coprocessor instruction. It does not have any effect
+	  on processors without the icswx coprocessor instruction.
+
+	  This option slightly increases kernel memory usage.
+
+	  If in doubt, say N here.
+
+config PPC_ICSWX_PID
+	bool "icswx requires direct PID management"
+	depends on PPC_ICSWX
+	default y
+	---help---
+	  The PID register in server is used explicitly for ICSWX.  In
+	  embedded systems PID management is done by the system.
+
+config PPC_ICSWX_USE_SIGILL
+	bool "Should a bad CT cause a SIGILL?"
+	depends on PPC_ICSWX
+	default n
+	---help---
+	  Should a bad CT used for "non-record form ICSWX" cause an
+	  illegal instruction signal or should it be silent as
+	  architected.
+
+	  If in doubt, say N here.
+
+config SPE_POSSIBLE
+	def_bool y
+	depends on E200 || (E500 && !PPC_E500MC)
+
+config SPE
+	bool "SPE Support"
+	depends on SPE_POSSIBLE
+	default y
+	---help---
+	  This option enables kernel support for the Signal Processing
+	  Extensions (SPE) to the PowerPC processor. The kernel currently
+	  supports saving and restoring SPE registers, and turning on the
+	  'spe enable' bit so user processes can execute SPE instructions.
+
+	  This option is only useful if you have a processor that supports
+	  SPE (e500, otherwise known as 85xx series), but does not have any
+	  effect on a non-spe cpu (it does, however add code to the kernel).
+
+	  If in doubt, say Y here.
+
+config PPC_STD_MMU
+	def_bool y
+	depends on PPC_BOOK3S
+
+config PPC_STD_MMU_32
+	def_bool y
+	depends on PPC_STD_MMU && PPC32
+
+config PPC_STD_MMU_64
+	def_bool y
+	depends on PPC_STD_MMU && PPC64
+
+config PPC_MMU_NOHASH
+	def_bool y
+	depends on !PPC_STD_MMU
+
+config PPC_BOOK3E_MMU
+	def_bool y
+	depends on FSL_BOOKE || PPC_BOOK3E
+
+config PPC_MM_SLICES
+	bool
+	default y if (!PPC_FSL_BOOK3E && PPC64 && HUGETLB_PAGE) || (PPC_STD_MMU_64 && PPC_64K_PAGES)
+	default n
+
+config PPC_HAVE_PMU_SUPPORT
+       bool
+
+config PPC_PERF_CTRS
+       def_bool y
+       depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT
+       help
+         This enables the powerpc-specific perf_event back-end.
+
+config SMP
+	depends on PPC_BOOK3S || PPC_BOOK3E || FSL_BOOKE || PPC_47x
+	bool "Symmetric multi-processing support"
+	---help---
+	  This enables support for systems with more than one CPU. If you have
+	  a system with only one CPU, say N. If you have a system with more
+	  than one CPU, say Y.  Note that the kernel does not currently
+	  support SMP machines with 603/603e/603ev or PPC750 ("G3") processors
+	  since they have inadequate hardware support for multiprocessor
+	  operation.
+
+	  If you say N here, the kernel will run on single and multiprocessor
+	  machines, but will use only one CPU of a multiprocessor machine. If
+	  you say Y here, the kernel will run on single-processor machines.
+	  On a single-processor machine, the kernel will run faster if you say
+	  N here.
+
+	  If you don't know what to do here, say N.
+
+config NR_CPUS
+	int "Maximum number of CPUs (2-8192)"
+	range 2 8192
+	depends on SMP
+	default "32" if PPC64
+	default "4"
+
+config NOT_COHERENT_CACHE
+	bool
+	depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
+	default n if PPC_47x
+	default y
+
+config CHECK_CACHE_COHERENCY
+	bool
+
+config PPC_DOORBELL
+	bool
+	default n
+
+endmenu
+
+choice
+	prompt "Endianness selection"
+	default CPU_BIG_ENDIAN
+	help
+	  This option selects whether a big endian or little endian kernel will
+	  be built.
+
+config CPU_BIG_ENDIAN
+	bool "Build big endian kernel"
+	help
+	  Build a big endian kernel.
+
+	  If unsure, select this option.
+
+config CPU_LITTLE_ENDIAN
+	bool "Build little endian kernel"
+	select PPC64_BOOT_WRAPPER
+	help
+	  Build a little endian kernel.
+
+	  Note that if cross compiling a little endian kernel,
+	  CROSS_COMPILE must point to a toolchain capable of targeting
+	  little endian powerpc.
+
+endchoice
+
+config PPC64_BOOT_WRAPPER
+	def_bool n
+	depends on CPU_LITTLE_ENDIAN
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
new file mode 100644
index 000000000..469ef170d
--- /dev/null
+++ b/arch/powerpc/platforms/Makefile
@@ -0,0 +1,24 @@
+
+subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+
+obj-$(CONFIG_FSL_ULI1575)	+= fsl_uli1575.o
+
+obj-$(CONFIG_PPC_PMAC)		+= powermac/
+obj-$(CONFIG_PPC_CHRP)		+= chrp/
+obj-$(CONFIG_40x)		+= 40x/
+obj-$(CONFIG_44x)		+= 44x/
+obj-$(CONFIG_PPC_MPC512x)	+= 512x/
+obj-$(CONFIG_PPC_MPC52xx)	+= 52xx/
+obj-$(CONFIG_PPC_8xx)		+= 8xx/
+obj-$(CONFIG_PPC_82xx)		+= 82xx/
+obj-$(CONFIG_PPC_83xx)		+= 83xx/
+obj-$(CONFIG_FSL_SOC_BOOKE)	+= 85xx/
+obj-$(CONFIG_PPC_86xx)		+= 86xx/
+obj-$(CONFIG_PPC_POWERNV)	+= powernv/
+obj-$(CONFIG_PPC_PSERIES)	+= pseries/
+obj-$(CONFIG_PPC_MAPLE)		+= maple/
+obj-$(CONFIG_PPC_PASEMI)	+= pasemi/
+obj-$(CONFIG_PPC_CELL)		+= cell/
+obj-$(CONFIG_PPC_PS3)		+= ps3/
+obj-$(CONFIG_EMBEDDED6xx)	+= embedded6xx/
+obj-$(CONFIG_AMIGAONE)		+= amigaone/
diff --git a/arch/powerpc/platforms/amigaone/Kconfig b/arch/powerpc/platforms/amigaone/Kconfig
new file mode 100644
index 000000000..128de25cc
--- /dev/null
+++ b/arch/powerpc/platforms/amigaone/Kconfig
@@ -0,0 +1,18 @@
+config AMIGAONE
+	bool "Eyetech AmigaOne/MAI Teron"
+	depends on 6xx && BROKEN_ON_SMP
+	select PPC_I8259
+	select PPC_INDIRECT_PCI
+	select PPC_UDBG_16550
+	select PCI
+	select NOT_COHERENT_CACHE
+	select CHECK_CACHE_COHERENCY
+	select DEFAULT_UIMAGE
+	select HAVE_PCSPKR_PLATFORM
+	help
+	Select AmigaOne for the following machines:
+	- AmigaOne SE/Teron CX (G3 only)
+	- AmigaOne XE/Teron PX
+	- uA1/Teron mini
+	  More information is available at:
+	  <http://amigaone-linux.sourceforge.net/>.
diff --git a/arch/powerpc/platforms/amigaone/Makefile b/arch/powerpc/platforms/amigaone/Makefile
new file mode 100644
index 000000000..e6885b3b2
--- /dev/null
+++ b/arch/powerpc/platforms/amigaone/Makefile
@@ -0,0 +1 @@
+obj-y	+= setup.o
diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c
new file mode 100644
index 000000000..2fe120462
--- /dev/null
+++ b/arch/powerpc/platforms/amigaone/setup.c
@@ -0,0 +1,174 @@
+/*
+ * AmigaOne platform setup
+ *
+ * Copyright 2008 Gerhard Pircher (gerhard_pircher@gmx.net)
+ *
+ *   Based on original amigaone_setup.c source code
+ * Copyright 2003 by Hans-Joerg Frieden and Thomas Frieden
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/seq_file.h>
+#include <generated/utsrelease.h>
+
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/pci-bridge.h>
+#include <asm/i8259.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+#include <asm/dma.h>
+
+extern void __flush_disable_L1(void);
+
+void amigaone_show_cpuinfo(struct seq_file *m)
+{
+	seq_printf(m, "vendor\t\t: Eyetech Ltd.\n");
+}
+
+static int __init amigaone_add_bridge(struct device_node *dev)
+{
+	const u32 *cfg_addr, *cfg_data;
+	int len;
+	const int *bus_range;
+	struct pci_controller *hose;
+
+	printk(KERN_INFO "Adding PCI host bridge %s\n", dev->full_name);
+
+	cfg_addr = of_get_address(dev, 0, NULL, NULL);
+	cfg_data = of_get_address(dev, 1, NULL, NULL);
+	if ((cfg_addr == NULL) || (cfg_data == NULL))
+		return -ENODEV;
+
+	bus_range = of_get_property(dev, "bus-range", &len);
+	if ((bus_range == NULL) || (len < 2 * sizeof(int)))
+		printk(KERN_WARNING "Can't get bus-range for %s, assume"
+		       " bus 0\n", dev->full_name);
+
+	hose = pcibios_alloc_controller(dev);
+	if (hose == NULL)
+		return -ENOMEM;
+
+	hose->first_busno = bus_range ? bus_range[0] : 0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+	setup_indirect_pci(hose, cfg_addr[0], cfg_data[0], 0);
+
+	/* Interpret the "ranges" property */
+	/* This also maps the I/O region and sets isa_io/mem_base */
+	pci_process_bridge_OF_ranges(hose, dev, 1);
+
+	return 0;
+}
+
+void __init amigaone_setup_arch(void)
+{
+	struct device_node *np;
+	int phb = -ENODEV;
+
+	/* Lookup PCI host bridges. */
+	for_each_compatible_node(np, "pci", "mai-logic,articia-s")
+		phb = amigaone_add_bridge(np);
+
+	BUG_ON(phb != 0);
+
+	if (ppc_md.progress)
+		ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0);
+}
+
+void __init amigaone_init_IRQ(void)
+{
+	struct device_node *pic, *np = NULL;
+	const unsigned long *prop = NULL;
+	unsigned long int_ack = 0;
+
+	/* Search for ISA interrupt controller. */
+	pic = of_find_compatible_node(NULL, "interrupt-controller",
+	                              "pnpPNP,000");
+	BUG_ON(pic == NULL);
+
+	/* Look for interrupt acknowledge address in the PCI root node. */
+	np = of_find_compatible_node(NULL, "pci", "mai-logic,articia-s");
+	if (np) {
+		prop = of_get_property(np, "8259-interrupt-acknowledge", NULL);
+		if (prop)
+			int_ack = prop[0];
+		of_node_put(np);
+	}
+
+	if (int_ack == 0)
+		printk(KERN_WARNING "Cannot find PCI interrupt acknowledge"
+		       " address, polling\n");
+
+	i8259_init(pic, int_ack);
+	ppc_md.get_irq = i8259_irq;
+	irq_set_default_host(i8259_get_host());
+}
+
+static int __init request_isa_regions(void)
+{
+	request_region(0x00, 0x20, "dma1");
+	request_region(0x40, 0x20, "timer");
+	request_region(0x80, 0x10, "dma page reg");
+	request_region(0xc0, 0x20, "dma2");
+
+	return 0;
+}
+machine_device_initcall(amigaone, request_isa_regions);
+
+void amigaone_restart(char *cmd)
+{
+	local_irq_disable();
+
+	/* Flush and disable caches. */
+	__flush_disable_L1();
+
+        /* Set SRR0 to the reset vector and turn on MSR_IP. */
+	mtspr(SPRN_SRR0, 0xfff00100);
+	mtspr(SPRN_SRR1, MSR_IP);
+
+	/* Do an rfi to jump back to firmware. */
+	__asm__ __volatile__("rfi" : : : "memory");
+
+	/* Not reached. */
+	while (1);
+}
+
+static int __init amigaone_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "eyetech,amigaone")) {
+		/*
+		 * Coherent memory access cause complete system lockup! Thus
+		 * disable this CPU feature, even if the CPU needs it.
+		 */
+		cur_cpu_spec->cpu_features &= ~CPU_FTR_NEED_COHERENT;
+
+		ISA_DMA_THRESHOLD = 0x00ffffff;
+		DMA_MODE_READ = 0x44;
+		DMA_MODE_WRITE = 0x48;
+
+		return 1;
+	}
+
+	return 0;
+}
+
+define_machine(amigaone) {
+	.name			= "AmigaOne",
+	.probe			= amigaone_probe,
+	.setup_arch		= amigaone_setup_arch,
+	.show_cpuinfo		= amigaone_show_cpuinfo,
+	.init_IRQ		= amigaone_init_IRQ,
+	.restart		= amigaone_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
new file mode 100644
index 000000000..2f23133ab
--- /dev/null
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -0,0 +1,129 @@
+config PPC_CELL
+	bool
+	default n
+
+config PPC_CELL_COMMON
+	bool
+	select PPC_CELL
+	select PPC_DCR_MMIO
+	select PPC_INDIRECT_PIO
+	select PPC_INDIRECT_MMIO
+	select PPC_NATIVE
+	select PPC_RTAS
+	select IRQ_EDGE_EOI_HANDLER
+
+config PPC_CELL_NATIVE
+	bool
+	select PPC_CELL_COMMON
+	select MPIC
+	select PPC_IO_WORKAROUNDS
+	select IBM_EMAC_EMAC4
+	select IBM_EMAC_RGMII
+	select IBM_EMAC_ZMII #test only
+	select IBM_EMAC_TAH  #test only
+	default n
+
+config PPC_IBM_CELL_BLADE
+	bool "IBM Cell Blade"
+	depends on PPC64 && PPC_BOOK3S
+	select PPC_CELL_NATIVE
+	select PPC_OF_PLATFORM_PCI
+	select PCI
+	select MMIO_NVRAM
+	select PPC_UDBG_16550
+	select UDBG_RTAS_CONSOLE
+
+config PPC_CELL_QPACE
+	bool "IBM Cell - QPACE"
+	depends on PPC64 && PPC_BOOK3S
+	select PPC_CELL_COMMON
+
+config AXON_MSI
+	bool
+	depends on PPC_IBM_CELL_BLADE && PCI_MSI
+	default y
+
+menu "Cell Broadband Engine options"
+	depends on PPC_CELL
+
+config SPU_FS
+	tristate "SPU file system"
+	default m
+	depends on PPC_CELL
+	select SPU_BASE
+	select MEMORY_HOTPLUG
+	help
+	  The SPU file system is used to access Synergistic Processing
+	  Units on machines implementing the Broadband Processor
+	  Architecture.
+
+config SPU_FS_64K_LS
+	bool "Use 64K pages to map SPE local  store"
+	# we depend on PPC_MM_SLICES for now rather than selecting
+	# it because we depend on hugetlbfs hooks being present. We
+	# will fix that when the generic code has been improved to
+	# not require hijacking hugetlbfs hooks.
+	depends on SPU_FS && PPC_MM_SLICES && !PPC_64K_PAGES
+	default y
+	select PPC_HAS_HASH_64K
+	help
+	  This option causes SPE local stores to be mapped in process
+	  address spaces using 64K pages while the rest of the kernel
+	  uses 4K pages. This can improve performances of applications
+	  using multiple SPEs by lowering the TLB pressure on them.
+
+config SPU_BASE
+	bool
+	default n
+	select PPC_COPRO_BASE
+
+config CBE_RAS
+	bool "RAS features for bare metal Cell BE"
+	depends on PPC_CELL_NATIVE
+	default y
+
+config PPC_IBM_CELL_RESETBUTTON
+	bool "IBM Cell Blade Pinhole reset button"
+	depends on CBE_RAS && PPC_IBM_CELL_BLADE
+	default y
+	help
+	  Support Pinhole Resetbutton on IBM Cell blades.
+	  This adds a method to trigger system reset via front panel pinhole button.
+
+config PPC_IBM_CELL_POWERBUTTON
+	tristate "IBM Cell Blade power button"
+	depends on PPC_IBM_CELL_BLADE && INPUT_EVDEV
+	default y
+	help
+	  Support Powerbutton on IBM Cell blades.
+	  This will enable the powerbutton as an input device.
+
+config CBE_THERM
+	tristate "CBE thermal support"
+	default m
+	depends on CBE_RAS && SPU_BASE
+
+config PPC_PMI
+	tristate
+	default y
+	depends on CPU_FREQ_CBE_PMI || PPC_IBM_CELL_POWERBUTTON
+	help
+	  PMI (Platform Management Interrupt) is a way to
+	  communicate with the BMC (Baseboard Management Controller).
+	  It is used in some IBM Cell blades.
+
+config CBE_CPUFREQ_SPU_GOVERNOR
+	tristate "CBE frequency scaling based on SPU usage"
+	depends on SPU_FS && CPU_FREQ
+	default m
+	help
+	  This governor checks for spu usage to adjust the cpu frequency.
+	  If no spu is running on a given cpu, that cpu will be throttled to
+	  the minimal possible frequency.
+
+endmenu
+
+config OPROFILE_CELL
+	def_bool y
+	depends on PPC_CELL_NATIVE && (OPROFILE = m || OPROFILE = y) && SPU_BASE
+
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
new file mode 100644
index 000000000..34699bddf
--- /dev/null
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -0,0 +1,31 @@
+obj-$(CONFIG_PPC_CELL_COMMON)		+= cbe_regs.o interrupt.o pervasive.o
+
+obj-$(CONFIG_PPC_CELL_NATIVE)		+= iommu.o setup.o spider-pic.o \
+					   pmu.o spider-pci.o
+obj-$(CONFIG_CBE_RAS)			+= ras.o
+
+obj-$(CONFIG_CBE_THERM)			+= cbe_thermal.o
+obj-$(CONFIG_CBE_CPUFREQ_SPU_GOVERNOR)	+= cpufreq_spudemand.o
+
+obj-$(CONFIG_PPC_IBM_CELL_POWERBUTTON)	+= cbe_powerbutton.o
+
+ifeq ($(CONFIG_SMP),y)
+obj-$(CONFIG_PPC_CELL_NATIVE)		+= smp.o
+obj-$(CONFIG_PPC_CELL_QPACE)		+= smp.o
+endif
+
+# needed only when building loadable spufs.ko
+spu-priv1-$(CONFIG_PPC_CELL_COMMON)	+= spu_priv1_mmio.o
+spu-manage-$(CONFIG_PPC_CELL_COMMON)	+= spu_manage.o
+
+obj-$(CONFIG_SPU_BASE)			+= spu_callbacks.o spu_base.o \
+					   spu_notify.o \
+					   spu_syscalls.o \
+					   $(spu-priv1-y) \
+					   $(spu-manage-y) \
+					   spufs/
+
+obj-$(CONFIG_AXON_MSI)			+= axon_msi.o
+
+# qpace setup
+obj-$(CONFIG_PPC_CELL_QPACE)		+= qpace_setup.o
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
new file mode 100644
index 000000000..623bd9614
--- /dev/null
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -0,0 +1,492 @@
+/*
+ * Copyright 2007, Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/export.h>
+#include <linux/of_platform.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+
+#include <asm/dcr.h>
+#include <asm/machdep.h>
+#include <asm/prom.h>
+
+
+/*
+ * MSIC registers, specified as offsets from dcr_base
+ */
+#define MSIC_CTRL_REG	0x0
+
+/* Base Address registers specify FIFO location in BE memory */
+#define MSIC_BASE_ADDR_HI_REG	0x3
+#define MSIC_BASE_ADDR_LO_REG	0x4
+
+/* Hold the read/write offsets into the FIFO */
+#define MSIC_READ_OFFSET_REG	0x5
+#define MSIC_WRITE_OFFSET_REG	0x6
+
+
+/* MSIC control register flags */
+#define MSIC_CTRL_ENABLE		0x0001
+#define MSIC_CTRL_FIFO_FULL_ENABLE	0x0002
+#define MSIC_CTRL_IRQ_ENABLE		0x0008
+#define MSIC_CTRL_FULL_STOP_ENABLE	0x0010
+
+/*
+ * The MSIC can be configured to use a FIFO of 32KB, 64KB, 128KB or 256KB.
+ * Currently we're using a 64KB FIFO size.
+ */
+#define MSIC_FIFO_SIZE_SHIFT	16
+#define MSIC_FIFO_SIZE_BYTES	(1 << MSIC_FIFO_SIZE_SHIFT)
+
+/*
+ * To configure the FIFO size as (1 << n) bytes, we write (n - 15) into bits
+ * 8-9 of the MSIC control reg.
+ */
+#define MSIC_CTRL_FIFO_SIZE	(((MSIC_FIFO_SIZE_SHIFT - 15) << 8) & 0x300)
+
+/*
+ * We need to mask the read/write offsets to make sure they stay within
+ * the bounds of the FIFO. Also they should always be 16-byte aligned.
+ */
+#define MSIC_FIFO_SIZE_MASK	((MSIC_FIFO_SIZE_BYTES - 1) & ~0xFu)
+
+/* Each entry in the FIFO is 16 bytes, the first 4 bytes hold the irq # */
+#define MSIC_FIFO_ENTRY_SIZE	0x10
+
+
+struct axon_msic {
+	struct irq_domain *irq_domain;
+	__le32 *fifo_virt;
+	dma_addr_t fifo_phys;
+	dcr_host_t dcr_host;
+	u32 read_offset;
+#ifdef DEBUG
+	u32 __iomem *trigger;
+#endif
+};
+
+#ifdef DEBUG
+void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic);
+#else
+static inline void axon_msi_debug_setup(struct device_node *dn,
+					struct axon_msic *msic) { }
+#endif
+
+
+static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val)
+{
+	pr_devel("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n);
+
+	dcr_write(msic->dcr_host, dcr_n, val);
+}
+
+static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct axon_msic *msic = irq_get_handler_data(irq);
+	u32 write_offset, msi;
+	int idx;
+	int retry = 0;
+
+	write_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG);
+	pr_devel("axon_msi: original write_offset 0x%x\n", write_offset);
+
+	/* write_offset doesn't wrap properly, so we have to mask it */
+	write_offset &= MSIC_FIFO_SIZE_MASK;
+
+	while (msic->read_offset != write_offset && retry < 100) {
+		idx  = msic->read_offset / sizeof(__le32);
+		msi  = le32_to_cpu(msic->fifo_virt[idx]);
+		msi &= 0xFFFF;
+
+		pr_devel("axon_msi: woff %x roff %x msi %x\n",
+			  write_offset, msic->read_offset, msi);
+
+		if (msi < nr_irqs && irq_get_chip_data(msi) == msic) {
+			generic_handle_irq(msi);
+			msic->fifo_virt[idx] = cpu_to_le32(0xffffffff);
+		} else {
+			/*
+			 * Reading the MSIC_WRITE_OFFSET_REG does not
+			 * reliably flush the outstanding DMA to the
+			 * FIFO buffer. Here we were reading stale
+			 * data, so we need to retry.
+			 */
+			udelay(1);
+			retry++;
+			pr_devel("axon_msi: invalid irq 0x%x!\n", msi);
+			continue;
+		}
+
+		if (retry) {
+			pr_devel("axon_msi: late irq 0x%x, retry %d\n",
+				 msi, retry);
+			retry = 0;
+		}
+
+		msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
+		msic->read_offset &= MSIC_FIFO_SIZE_MASK;
+	}
+
+	if (retry) {
+		printk(KERN_WARNING "axon_msi: irq timed out\n");
+
+		msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
+		msic->read_offset &= MSIC_FIFO_SIZE_MASK;
+	}
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+static struct axon_msic *find_msi_translator(struct pci_dev *dev)
+{
+	struct irq_domain *irq_domain;
+	struct device_node *dn, *tmp;
+	const phandle *ph;
+	struct axon_msic *msic = NULL;
+
+	dn = of_node_get(pci_device_to_OF_node(dev));
+	if (!dn) {
+		dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
+		return NULL;
+	}
+
+	for (; dn; dn = of_get_next_parent(dn)) {
+		ph = of_get_property(dn, "msi-translator", NULL);
+		if (ph)
+			break;
+	}
+
+	if (!ph) {
+		dev_dbg(&dev->dev,
+			"axon_msi: no msi-translator property found\n");
+		goto out_error;
+	}
+
+	tmp = dn;
+	dn = of_find_node_by_phandle(*ph);
+	of_node_put(tmp);
+	if (!dn) {
+		dev_dbg(&dev->dev,
+			"axon_msi: msi-translator doesn't point to a node\n");
+		goto out_error;
+	}
+
+	irq_domain = irq_find_host(dn);
+	if (!irq_domain) {
+		dev_dbg(&dev->dev, "axon_msi: no irq_domain found for node %s\n",
+			dn->full_name);
+		goto out_error;
+	}
+
+	msic = irq_domain->host_data;
+
+out_error:
+	of_node_put(dn);
+
+	return msic;
+}
+
+static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg)
+{
+	struct device_node *dn;
+	struct msi_desc *entry;
+	int len;
+	const u32 *prop;
+
+	dn = of_node_get(pci_device_to_OF_node(dev));
+	if (!dn) {
+		dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
+		return -ENODEV;
+	}
+
+	entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
+
+	for (; dn; dn = of_get_next_parent(dn)) {
+		if (entry->msi_attrib.is_64) {
+			prop = of_get_property(dn, "msi-address-64", &len);
+			if (prop)
+				break;
+		}
+
+		prop = of_get_property(dn, "msi-address-32", &len);
+		if (prop)
+			break;
+	}
+
+	if (!prop) {
+		dev_dbg(&dev->dev,
+			"axon_msi: no msi-address-(32|64) properties found\n");
+		return -ENOENT;
+	}
+
+	switch (len) {
+	case 8:
+		msg->address_hi = prop[0];
+		msg->address_lo = prop[1];
+		break;
+	case 4:
+		msg->address_hi = 0;
+		msg->address_lo = prop[0];
+		break;
+	default:
+		dev_dbg(&dev->dev,
+			"axon_msi: malformed msi-address-(32|64) property\n");
+		of_node_put(dn);
+		return -EINVAL;
+	}
+
+	of_node_put(dn);
+
+	return 0;
+}
+
+static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	unsigned int virq, rc;
+	struct msi_desc *entry;
+	struct msi_msg msg;
+	struct axon_msic *msic;
+
+	msic = find_msi_translator(dev);
+	if (!msic)
+		return -ENODEV;
+
+	rc = setup_msi_msg_address(dev, &msg);
+	if (rc)
+		return rc;
+
+	list_for_each_entry(entry, &dev->msi_list, list) {
+		virq = irq_create_direct_mapping(msic->irq_domain);
+		if (virq == NO_IRQ) {
+			dev_warn(&dev->dev,
+				 "axon_msi: virq allocation failed!\n");
+			return -1;
+		}
+		dev_dbg(&dev->dev, "axon_msi: allocated virq 0x%x\n", virq);
+
+		irq_set_msi_desc(virq, entry);
+		msg.data = virq;
+		pci_write_msi_msg(virq, &msg);
+	}
+
+	return 0;
+}
+
+static void axon_msi_teardown_msi_irqs(struct pci_dev *dev)
+{
+	struct msi_desc *entry;
+
+	dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n");
+
+	list_for_each_entry(entry, &dev->msi_list, list) {
+		if (entry->irq == NO_IRQ)
+			continue;
+
+		irq_set_msi_desc(entry->irq, NULL);
+		irq_dispose_mapping(entry->irq);
+	}
+}
+
+static struct irq_chip msic_irq_chip = {
+	.irq_mask	= pci_msi_mask_irq,
+	.irq_unmask	= pci_msi_unmask_irq,
+	.irq_shutdown	= pci_msi_mask_irq,
+	.name		= "AXON-MSI",
+};
+
+static int msic_host_map(struct irq_domain *h, unsigned int virq,
+			 irq_hw_number_t hw)
+{
+	irq_set_chip_data(virq, h->host_data);
+	irq_set_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops msic_host_ops = {
+	.map	= msic_host_map,
+};
+
+static void axon_msi_shutdown(struct platform_device *device)
+{
+	struct axon_msic *msic = dev_get_drvdata(&device->dev);
+	u32 tmp;
+
+	pr_devel("axon_msi: disabling %s\n",
+		  msic->irq_domain->of_node->full_name);
+	tmp  = dcr_read(msic->dcr_host, MSIC_CTRL_REG);
+	tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE;
+	msic_dcr_write(msic, MSIC_CTRL_REG, tmp);
+}
+
+static int axon_msi_probe(struct platform_device *device)
+{
+	struct device_node *dn = device->dev.of_node;
+	struct axon_msic *msic;
+	unsigned int virq;
+	int dcr_base, dcr_len;
+
+	pr_devel("axon_msi: setting up dn %s\n", dn->full_name);
+
+	msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL);
+	if (!msic) {
+		printk(KERN_ERR "axon_msi: couldn't allocate msic for %s\n",
+		       dn->full_name);
+		goto out;
+	}
+
+	dcr_base = dcr_resource_start(dn, 0);
+	dcr_len = dcr_resource_len(dn, 0);
+
+	if (dcr_base == 0 || dcr_len == 0) {
+		printk(KERN_ERR
+		       "axon_msi: couldn't parse dcr properties on %s\n",
+			dn->full_name);
+		goto out_free_msic;
+	}
+
+	msic->dcr_host = dcr_map(dn, dcr_base, dcr_len);
+	if (!DCR_MAP_OK(msic->dcr_host)) {
+		printk(KERN_ERR "axon_msi: dcr_map failed for %s\n",
+		       dn->full_name);
+		goto out_free_msic;
+	}
+
+	msic->fifo_virt = dma_alloc_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES,
+					     &msic->fifo_phys, GFP_KERNEL);
+	if (!msic->fifo_virt) {
+		printk(KERN_ERR "axon_msi: couldn't allocate fifo for %s\n",
+		       dn->full_name);
+		goto out_free_msic;
+	}
+
+	virq = irq_of_parse_and_map(dn, 0);
+	if (virq == NO_IRQ) {
+		printk(KERN_ERR "axon_msi: irq parse and map failed for %s\n",
+		       dn->full_name);
+		goto out_free_fifo;
+	}
+	memset(msic->fifo_virt, 0xff, MSIC_FIFO_SIZE_BYTES);
+
+	/* We rely on being able to stash a virq in a u16, so limit irqs to < 65536 */
+	msic->irq_domain = irq_domain_add_nomap(dn, 65536, &msic_host_ops, msic);
+	if (!msic->irq_domain) {
+		printk(KERN_ERR "axon_msi: couldn't allocate irq_domain for %s\n",
+		       dn->full_name);
+		goto out_free_fifo;
+	}
+
+	irq_set_handler_data(virq, msic);
+	irq_set_chained_handler(virq, axon_msi_cascade);
+	pr_devel("axon_msi: irq 0x%x setup for axon_msi\n", virq);
+
+	/* Enable the MSIC hardware */
+	msic_dcr_write(msic, MSIC_BASE_ADDR_HI_REG, msic->fifo_phys >> 32);
+	msic_dcr_write(msic, MSIC_BASE_ADDR_LO_REG,
+				  msic->fifo_phys & 0xFFFFFFFF);
+	msic_dcr_write(msic, MSIC_CTRL_REG,
+			MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE |
+			MSIC_CTRL_FIFO_SIZE);
+
+	msic->read_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG)
+				& MSIC_FIFO_SIZE_MASK;
+
+	dev_set_drvdata(&device->dev, msic);
+
+	ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs;
+	ppc_md.teardown_msi_irqs = axon_msi_teardown_msi_irqs;
+
+	axon_msi_debug_setup(dn, msic);
+
+	printk(KERN_DEBUG "axon_msi: setup MSIC on %s\n", dn->full_name);
+
+	return 0;
+
+out_free_fifo:
+	dma_free_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES, msic->fifo_virt,
+			  msic->fifo_phys);
+out_free_msic:
+	kfree(msic);
+out:
+
+	return -1;
+}
+
+static const struct of_device_id axon_msi_device_id[] = {
+	{
+		.compatible	= "ibm,axon-msic"
+	},
+	{}
+};
+
+static struct platform_driver axon_msi_driver = {
+	.probe		= axon_msi_probe,
+	.shutdown	= axon_msi_shutdown,
+	.driver = {
+		.name = "axon-msi",
+		.of_match_table = axon_msi_device_id,
+	},
+};
+
+static int __init axon_msi_init(void)
+{
+	return platform_driver_register(&axon_msi_driver);
+}
+subsys_initcall(axon_msi_init);
+
+
+#ifdef DEBUG
+static int msic_set(void *data, u64 val)
+{
+	struct axon_msic *msic = data;
+	out_le32(msic->trigger, val);
+	return 0;
+}
+
+static int msic_get(void *data, u64 *val)
+{
+	*val = 0;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_msic, msic_get, msic_set, "%llu\n");
+
+void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic)
+{
+	char name[8];
+	u64 addr;
+
+	addr = of_translate_address(dn, of_get_property(dn, "reg", NULL));
+	if (addr == OF_BAD_ADDR) {
+		pr_devel("axon_msi: couldn't translate reg property\n");
+		return;
+	}
+
+	msic->trigger = ioremap(addr, 0x4);
+	if (!msic->trigger) {
+		pr_devel("axon_msi: ioremap failed\n");
+		return;
+	}
+
+	snprintf(name, sizeof(name), "msic_%d", of_node_to_nid(dn));
+
+	if (!debugfs_create_file(name, 0600, powerpc_debugfs_root,
+				 msic, &fops_msic)) {
+		pr_devel("axon_msi: debugfs_create_file failed!\n");
+		return;
+	}
+}
+#endif /* DEBUG */
diff --git a/arch/powerpc/platforms/cell/cbe_powerbutton.c b/arch/powerpc/platforms/cell/cbe_powerbutton.c
new file mode 100644
index 000000000..2bb803130
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_powerbutton.c
@@ -0,0 +1,118 @@
+/*
+ * driver for powerbutton on IBM cell blades
+ *
+ * (C) Copyright IBM Corp. 2005-2008
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/input.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <asm/pmi.h>
+#include <asm/prom.h>
+
+static struct input_dev *button_dev;
+static struct platform_device *button_pdev;
+
+static void cbe_powerbutton_handle_pmi(pmi_message_t pmi_msg)
+{
+	BUG_ON(pmi_msg.type != PMI_TYPE_POWER_BUTTON);
+
+	input_report_key(button_dev, KEY_POWER, 1);
+	input_sync(button_dev);
+	input_report_key(button_dev, KEY_POWER, 0);
+	input_sync(button_dev);
+}
+
+static struct pmi_handler cbe_pmi_handler = {
+	.type			= PMI_TYPE_POWER_BUTTON,
+	.handle_pmi_message	= cbe_powerbutton_handle_pmi,
+};
+
+static int __init cbe_powerbutton_init(void)
+{
+	int ret = 0;
+	struct input_dev *dev;
+
+	if (!of_machine_is_compatible("IBM,CBPLUS-1.0")) {
+		printk(KERN_ERR "%s: Not a cell blade.\n", __func__);
+		ret = -ENODEV;
+		goto out;
+	}
+
+	dev = input_allocate_device();
+	if (!dev) {
+		ret = -ENOMEM;
+		printk(KERN_ERR "%s: Not enough memory.\n", __func__);
+		goto out;
+	}
+
+	set_bit(EV_KEY, dev->evbit);
+	set_bit(KEY_POWER, dev->keybit);
+
+	dev->name = "Power Button";
+	dev->id.bustype = BUS_HOST;
+
+	/* this makes the button look like an acpi power button
+	 * no clue whether anyone relies on that though */
+	dev->id.product = 0x02;
+	dev->phys = "LNXPWRBN/button/input0";
+
+	button_pdev = platform_device_register_simple("power_button", 0, NULL, 0);
+	if (IS_ERR(button_pdev)) {
+		ret = PTR_ERR(button_pdev);
+		goto out_free_input;
+	}
+
+	dev->dev.parent = &button_pdev->dev;
+	ret = input_register_device(dev);
+	if (ret) {
+		printk(KERN_ERR "%s: Failed to register device\n", __func__);
+		goto out_free_pdev;
+	}
+
+	button_dev = dev;
+
+	ret = pmi_register_handler(&cbe_pmi_handler);
+	if (ret) {
+		printk(KERN_ERR "%s: Failed to register with pmi.\n", __func__);
+		goto out_free_pdev;
+	}
+
+	goto out;
+
+out_free_pdev:
+	platform_device_unregister(button_pdev);
+out_free_input:
+	input_free_device(dev);
+out:
+	return ret;
+}
+
+static void __exit cbe_powerbutton_exit(void)
+{
+	pmi_unregister_handler(&cbe_pmi_handler);
+	platform_device_unregister(button_pdev);
+	input_free_device(button_dev);
+}
+
+module_init(cbe_powerbutton_init);
+module_exit(cbe_powerbutton_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
new file mode 100644
index 000000000..1428d583c
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -0,0 +1,281 @@
+/*
+ * cbe_regs.c
+ *
+ * Accessor routines for the various MMIO register blocks of the CBE
+ *
+ * (c) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
+ */
+
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/ptrace.h>
+#include <asm/cell-regs.h>
+
+/*
+ * Current implementation uses "cpu" nodes. We build our own mapping
+ * array of cpu numbers to cpu nodes locally for now to allow interrupt
+ * time code to have a fast path rather than call of_get_cpu_node(). If
+ * we implement cpu hotplug, we'll have to install an appropriate norifier
+ * in order to release references to the cpu going away
+ */
+static struct cbe_regs_map
+{
+	struct device_node *cpu_node;
+	struct device_node *be_node;
+	struct cbe_pmd_regs __iomem *pmd_regs;
+	struct cbe_iic_regs __iomem *iic_regs;
+	struct cbe_mic_tm_regs __iomem *mic_tm_regs;
+	struct cbe_pmd_shadow_regs pmd_shadow_regs;
+} cbe_regs_maps[MAX_CBE];
+static int cbe_regs_map_count;
+
+static struct cbe_thread_map
+{
+	struct device_node *cpu_node;
+	struct device_node *be_node;
+	struct cbe_regs_map *regs;
+	unsigned int thread_id;
+	unsigned int cbe_id;
+} cbe_thread_map[NR_CPUS];
+
+static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = {CPU_BITS_NONE} };
+static cpumask_t cbe_first_online_cpu = { CPU_BITS_NONE };
+
+static struct cbe_regs_map *cbe_find_map(struct device_node *np)
+{
+	int i;
+	struct device_node *tmp_np;
+
+	if (strcasecmp(np->type, "spe")) {
+		for (i = 0; i < cbe_regs_map_count; i++)
+			if (cbe_regs_maps[i].cpu_node == np ||
+			    cbe_regs_maps[i].be_node == np)
+				return &cbe_regs_maps[i];
+		return NULL;
+	}
+
+	if (np->data)
+		return np->data;
+
+	/* walk up path until cpu or be node was found */
+	tmp_np = np;
+	do {
+		tmp_np = tmp_np->parent;
+		/* on a correct devicetree we wont get up to root */
+		BUG_ON(!tmp_np);
+	} while (strcasecmp(tmp_np->type, "cpu") &&
+		 strcasecmp(tmp_np->type, "be"));
+
+	np->data = cbe_find_map(tmp_np);
+
+	return np->data;
+}
+
+struct cbe_pmd_regs __iomem *cbe_get_pmd_regs(struct device_node *np)
+{
+	struct cbe_regs_map *map = cbe_find_map(np);
+	if (map == NULL)
+		return NULL;
+	return map->pmd_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_pmd_regs);
+
+struct cbe_pmd_regs __iomem *cbe_get_cpu_pmd_regs(int cpu)
+{
+	struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+	if (map == NULL)
+		return NULL;
+	return map->pmd_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_cpu_pmd_regs);
+
+struct cbe_pmd_shadow_regs *cbe_get_pmd_shadow_regs(struct device_node *np)
+{
+	struct cbe_regs_map *map = cbe_find_map(np);
+	if (map == NULL)
+		return NULL;
+	return &map->pmd_shadow_regs;
+}
+
+struct cbe_pmd_shadow_regs *cbe_get_cpu_pmd_shadow_regs(int cpu)
+{
+	struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+	if (map == NULL)
+		return NULL;
+	return &map->pmd_shadow_regs;
+}
+
+struct cbe_iic_regs __iomem *cbe_get_iic_regs(struct device_node *np)
+{
+	struct cbe_regs_map *map = cbe_find_map(np);
+	if (map == NULL)
+		return NULL;
+	return map->iic_regs;
+}
+
+struct cbe_iic_regs __iomem *cbe_get_cpu_iic_regs(int cpu)
+{
+	struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+	if (map == NULL)
+		return NULL;
+	return map->iic_regs;
+}
+
+struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np)
+{
+	struct cbe_regs_map *map = cbe_find_map(np);
+	if (map == NULL)
+		return NULL;
+	return map->mic_tm_regs;
+}
+
+struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu)
+{
+	struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+	if (map == NULL)
+		return NULL;
+	return map->mic_tm_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_cpu_mic_tm_regs);
+
+u32 cbe_get_hw_thread_id(int cpu)
+{
+	return cbe_thread_map[cpu].thread_id;
+}
+EXPORT_SYMBOL_GPL(cbe_get_hw_thread_id);
+
+u32 cbe_cpu_to_node(int cpu)
+{
+	return cbe_thread_map[cpu].cbe_id;
+}
+EXPORT_SYMBOL_GPL(cbe_cpu_to_node);
+
+u32 cbe_node_to_cpu(int node)
+{
+	return cpumask_first(&cbe_local_mask[node]);
+
+}
+EXPORT_SYMBOL_GPL(cbe_node_to_cpu);
+
+static struct device_node *cbe_get_be_node(int cpu_id)
+{
+	struct device_node *np;
+
+	for_each_node_by_type (np, "be") {
+		int len,i;
+		const phandle *cpu_handle;
+
+		cpu_handle = of_get_property(np, "cpus", &len);
+
+		/*
+		 * the CAB SLOF tree is non compliant, so we just assume
+		 * there is only one node
+		 */
+		if (WARN_ON_ONCE(!cpu_handle))
+			return np;
+
+		for (i=0; i<len; i++)
+			if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL))
+				return np;
+	}
+
+	return NULL;
+}
+
+void __init cbe_fill_regs_map(struct cbe_regs_map *map)
+{
+	if(map->be_node) {
+		struct device_node *be, *np;
+
+		be = map->be_node;
+
+		for_each_node_by_type(np, "pervasive")
+			if (of_get_parent(np) == be)
+				map->pmd_regs = of_iomap(np, 0);
+
+		for_each_node_by_type(np, "CBEA-Internal-Interrupt-Controller")
+			if (of_get_parent(np) == be)
+				map->iic_regs = of_iomap(np, 2);
+
+		for_each_node_by_type(np, "mic-tm")
+			if (of_get_parent(np) == be)
+				map->mic_tm_regs = of_iomap(np, 0);
+	} else {
+		struct device_node *cpu;
+		/* That hack must die die die ! */
+		const struct address_prop {
+			unsigned long address;
+			unsigned int len;
+		} __attribute__((packed)) *prop;
+
+		cpu = map->cpu_node;
+
+		prop = of_get_property(cpu, "pervasive", NULL);
+		if (prop != NULL)
+			map->pmd_regs = ioremap(prop->address, prop->len);
+
+		prop = of_get_property(cpu, "iic", NULL);
+		if (prop != NULL)
+			map->iic_regs = ioremap(prop->address, prop->len);
+
+		prop = of_get_property(cpu, "mic-tm", NULL);
+		if (prop != NULL)
+			map->mic_tm_regs = ioremap(prop->address, prop->len);
+	}
+}
+
+
+void __init cbe_regs_init(void)
+{
+	int i;
+	unsigned int thread_id;
+	struct device_node *cpu;
+
+	/* Build local fast map of CPUs */
+	for_each_possible_cpu(i) {
+		cbe_thread_map[i].cpu_node = of_get_cpu_node(i, &thread_id);
+		cbe_thread_map[i].be_node = cbe_get_be_node(i);
+		cbe_thread_map[i].thread_id = thread_id;
+	}
+
+	/* Find maps for each device tree CPU */
+	for_each_node_by_type(cpu, "cpu") {
+		struct cbe_regs_map *map;
+		unsigned int cbe_id;
+
+		cbe_id = cbe_regs_map_count++;
+		map = &cbe_regs_maps[cbe_id];
+
+		if (cbe_regs_map_count > MAX_CBE) {
+			printk(KERN_ERR "cbe_regs: More BE chips than supported"
+			       "!\n");
+			cbe_regs_map_count--;
+			of_node_put(cpu);
+			return;
+		}
+		map->cpu_node = cpu;
+
+		for_each_possible_cpu(i) {
+			struct cbe_thread_map *thread = &cbe_thread_map[i];
+
+			if (thread->cpu_node == cpu) {
+				thread->regs = map;
+				thread->cbe_id = cbe_id;
+				map->be_node = thread->be_node;
+				cpumask_set_cpu(i, &cbe_local_mask[cbe_id]);
+				if(thread->thread_id == 0)
+					cpumask_set_cpu(i, &cbe_first_online_cpu);
+			}
+		}
+
+		cbe_fill_regs_map(map);
+	}
+}
+
diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c
new file mode 100644
index 000000000..2c15ff094
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_thermal.c
@@ -0,0 +1,399 @@
+/*
+ * thermal support for the cell processor
+ *
+ * This module adds some sysfs attributes to cpu and spu nodes.
+ * Base for measurements are the digital thermal sensors (DTS)
+ * located on the chip.
+ * The accuracy is 2 degrees, starting from 65 up to 125 degrees celsius
+ * The attributes can be found under
+ * /sys/devices/system/cpu/cpuX/thermal
+ * /sys/devices/system/spu/spuX/thermal
+ *
+ * The following attributes are added for each node:
+ * temperature:
+ *	contains the current temperature measured by the DTS
+ * throttle_begin:
+ *	throttling begins when temperature is greater or equal to
+ *	throttle_begin. Setting this value to 125 prevents throttling.
+ * throttle_end:
+ *	throttling is being ceased, if the temperature is lower than
+ *	throttle_end. Due to a delay between applying throttling and
+ *	a reduced temperature this value should be less than throttle_begin.
+ *	A value equal to throttle_begin provides only a very little hysteresis.
+ * throttle_full_stop:
+ *	If the temperatrue is greater or equal to throttle_full_stop,
+ *	full throttling is applied to the cpu or spu. This value should be
+ *	greater than throttle_begin and throttle_end. Setting this value to
+ *	65 prevents the unit from running code at all.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <asm/spu.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/cell-regs.h>
+
+#include "spu_priv1_mmio.h"
+
+#define TEMP_MIN 65
+#define TEMP_MAX 125
+
+#define DEVICE_PREFIX_ATTR(_prefix,_name,_mode)			\
+struct device_attribute attr_ ## _prefix ## _ ## _name = {	\
+	.attr = { .name = __stringify(_name), .mode = _mode },	\
+	.show	= _prefix ## _show_ ## _name,			\
+	.store	= _prefix ## _store_ ## _name,			\
+};
+
+static inline u8 reg_to_temp(u8 reg_value)
+{
+	return ((reg_value & 0x3f) << 1) + TEMP_MIN;
+}
+
+static inline u8 temp_to_reg(u8 temp)
+{
+	return ((temp - TEMP_MIN) >> 1) & 0x3f;
+}
+
+static struct cbe_pmd_regs __iomem *get_pmd_regs(struct device *dev)
+{
+	struct spu *spu;
+
+	spu = container_of(dev, struct spu, dev);
+
+	return cbe_get_pmd_regs(spu_devnode(spu));
+}
+
+/* returns the value for a given spu in a given register */
+static u8 spu_read_register_value(struct device *dev, union spe_reg __iomem *reg)
+{
+	union spe_reg value;
+	struct spu *spu;
+
+	spu = container_of(dev, struct spu, dev);
+	value.val = in_be64(&reg->val);
+
+	return value.spe[spu->spe_id];
+}
+
+static ssize_t spu_show_temp(struct device *dev, struct device_attribute *attr,
+			char *buf)
+{
+	u8 value;
+	struct cbe_pmd_regs __iomem *pmd_regs;
+
+	pmd_regs = get_pmd_regs(dev);
+
+	value = spu_read_register_value(dev, &pmd_regs->ts_ctsr1);
+
+	return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+static ssize_t show_throttle(struct cbe_pmd_regs __iomem *pmd_regs, char *buf, int pos)
+{
+	u64 value;
+
+	value = in_be64(&pmd_regs->tm_tpr.val);
+	/* access the corresponding byte */
+	value >>= pos;
+	value &= 0x3F;
+
+	return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+static ssize_t store_throttle(struct cbe_pmd_regs __iomem *pmd_regs, const char *buf, size_t size, int pos)
+{
+	u64 reg_value;
+	unsigned int temp;
+	u64 new_value;
+	int ret;
+
+	ret = sscanf(buf, "%u", &temp);
+
+	if (ret != 1 || temp < TEMP_MIN || temp > TEMP_MAX)
+		return -EINVAL;
+
+	new_value = temp_to_reg(temp);
+
+	reg_value = in_be64(&pmd_regs->tm_tpr.val);
+
+	/* zero out bits for new value */
+	reg_value &= ~(0xffull << pos);
+	/* set bits to new value */
+	reg_value |= new_value << pos;
+
+	out_be64(&pmd_regs->tm_tpr.val, reg_value);
+	return size;
+}
+
+static ssize_t spu_show_throttle_end(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(get_pmd_regs(dev), buf, 0);
+}
+
+static ssize_t spu_show_throttle_begin(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(get_pmd_regs(dev), buf, 8);
+}
+
+static ssize_t spu_show_throttle_full_stop(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(get_pmd_regs(dev), buf, 16);
+}
+
+static ssize_t spu_store_throttle_end(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(get_pmd_regs(dev), buf, size, 0);
+}
+
+static ssize_t spu_store_throttle_begin(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(get_pmd_regs(dev), buf, size, 8);
+}
+
+static ssize_t spu_store_throttle_full_stop(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(get_pmd_regs(dev), buf, size, 16);
+}
+
+static ssize_t ppe_show_temp(struct device *dev, char *buf, int pos)
+{
+	struct cbe_pmd_regs __iomem *pmd_regs;
+	u64 value;
+
+	pmd_regs = cbe_get_cpu_pmd_regs(dev->id);
+	value = in_be64(&pmd_regs->ts_ctsr2);
+
+	value = (value >> pos) & 0x3f;
+
+	return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+
+/* shows the temperature of the DTS on the PPE,
+ * located near the linear thermal sensor */
+static ssize_t ppe_show_temp0(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return ppe_show_temp(dev, buf, 32);
+}
+
+/* shows the temperature of the second DTS on the PPE */
+static ssize_t ppe_show_temp1(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return ppe_show_temp(dev, buf, 0);
+}
+
+static ssize_t ppe_show_throttle_end(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 32);
+}
+
+static ssize_t ppe_show_throttle_begin(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 40);
+}
+
+static ssize_t ppe_show_throttle_full_stop(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 48);
+}
+
+static ssize_t ppe_store_throttle_end(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 32);
+}
+
+static ssize_t ppe_store_throttle_begin(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 40);
+}
+
+static ssize_t ppe_store_throttle_full_stop(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 48);
+}
+
+
+static struct device_attribute attr_spu_temperature = {
+	.attr = {.name = "temperature", .mode = 0400 },
+	.show = spu_show_temp,
+};
+
+static DEVICE_PREFIX_ATTR(spu, throttle_end, 0600);
+static DEVICE_PREFIX_ATTR(spu, throttle_begin, 0600);
+static DEVICE_PREFIX_ATTR(spu, throttle_full_stop, 0600);
+
+
+static struct attribute *spu_attributes[] = {
+	&attr_spu_temperature.attr,
+	&attr_spu_throttle_end.attr,
+	&attr_spu_throttle_begin.attr,
+	&attr_spu_throttle_full_stop.attr,
+	NULL,
+};
+
+static struct attribute_group spu_attribute_group = {
+	.name	= "thermal",
+	.attrs	= spu_attributes,
+};
+
+static struct device_attribute attr_ppe_temperature0 = {
+	.attr = {.name = "temperature0", .mode = 0400 },
+	.show = ppe_show_temp0,
+};
+
+static struct device_attribute attr_ppe_temperature1 = {
+	.attr = {.name = "temperature1", .mode = 0400 },
+	.show = ppe_show_temp1,
+};
+
+static DEVICE_PREFIX_ATTR(ppe, throttle_end, 0600);
+static DEVICE_PREFIX_ATTR(ppe, throttle_begin, 0600);
+static DEVICE_PREFIX_ATTR(ppe, throttle_full_stop, 0600);
+
+static struct attribute *ppe_attributes[] = {
+	&attr_ppe_temperature0.attr,
+	&attr_ppe_temperature1.attr,
+	&attr_ppe_throttle_end.attr,
+	&attr_ppe_throttle_begin.attr,
+	&attr_ppe_throttle_full_stop.attr,
+	NULL,
+};
+
+static struct attribute_group ppe_attribute_group = {
+	.name	= "thermal",
+	.attrs	= ppe_attributes,
+};
+
+/*
+ * initialize throttling with default values
+ */
+static int __init init_default_values(void)
+{
+	int cpu;
+	struct cbe_pmd_regs __iomem *pmd_regs;
+	struct device *dev;
+	union ppe_spe_reg tpr;
+	union spe_reg str1;
+	u64 str2;
+	union spe_reg cr1;
+	u64 cr2;
+
+	/* TPR defaults */
+	/* ppe
+	 *	1F - no full stop
+	 *	08 - dynamic throttling starts if over 80 degrees
+	 *	03 - dynamic throttling ceases if below 70 degrees */
+	tpr.ppe = 0x1F0803;
+	/* spe
+	 *	10 - full stopped when over 96 degrees
+	 *	08 - dynamic throttling starts if over 80 degrees
+	 *	03 - dynamic throttling ceases if below 70 degrees
+	 */
+	tpr.spe = 0x100803;
+
+	/* STR defaults */
+	/* str1
+	 *	10 - stop 16 of 32 cycles
+	 */
+	str1.val = 0x1010101010101010ull;
+	/* str2
+	 *	10 - stop 16 of 32 cycles
+	 */
+	str2 = 0x10;
+
+	/* CR defaults */
+	/* cr1
+	 *	4 - normal operation
+	 */
+	cr1.val = 0x0404040404040404ull;
+	/* cr2
+	 *	4 - normal operation
+	 */
+	cr2 = 0x04;
+
+	for_each_possible_cpu (cpu) {
+		pr_debug("processing cpu %d\n", cpu);
+		dev = get_cpu_device(cpu);
+
+		if (!dev) {
+			pr_info("invalid dev pointer for cbe_thermal\n");
+			return -EINVAL;
+		}
+
+		pmd_regs = cbe_get_cpu_pmd_regs(dev->id);
+
+		if (!pmd_regs) {
+			pr_info("invalid CBE regs pointer for cbe_thermal\n");
+			return -EINVAL;
+		}
+
+		out_be64(&pmd_regs->tm_str2, str2);
+		out_be64(&pmd_regs->tm_str1.val, str1.val);
+		out_be64(&pmd_regs->tm_tpr.val, tpr.val);
+		out_be64(&pmd_regs->tm_cr1.val, cr1.val);
+		out_be64(&pmd_regs->tm_cr2, cr2);
+	}
+
+	return 0;
+}
+
+
+static int __init thermal_init(void)
+{
+	int rc = init_default_values();
+
+	if (rc == 0) {
+		spu_add_dev_attr_group(&spu_attribute_group);
+		cpu_add_dev_attr_group(&ppe_attribute_group);
+	}
+
+	return rc;
+}
+module_init(thermal_init);
+
+static void __exit thermal_exit(void)
+{
+	spu_remove_dev_attr_group(&spu_attribute_group);
+	cpu_remove_dev_attr_group(&ppe_attribute_group);
+}
+module_exit(thermal_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
+
diff --git a/arch/powerpc/platforms/cell/cell.h b/arch/powerpc/platforms/cell/cell.h
new file mode 100644
index 000000000..ef143dfee
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cell.h
@@ -0,0 +1,24 @@
+/*
+ * Cell Platform common data structures
+ *
+ * Copyright 2015, Daniel Axtens, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef CELL_H
+#define CELL_H
+
+#include <asm/pci-bridge.h>
+
+extern struct pci_controller_ops cell_pci_controller_ops;
+
+#endif
diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
new file mode 100644
index 000000000..82607d621
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
@@ -0,0 +1,171 @@
+/*
+ * spu aware cpufreq governor for the cell processor
+ *
+ * © Copyright IBM Corporation 2006-2008
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <linux/atomic.h>
+#include <asm/machdep.h>
+#include <asm/spu.h>
+
+#define POLL_TIME	100000		/* in µs */
+#define EXP		753		/* exp(-1) in fixed-point */
+
+struct spu_gov_info_struct {
+	unsigned long busy_spus;	/* fixed-point */
+	struct cpufreq_policy *policy;
+	struct delayed_work work;
+	unsigned int poll_int;		/* µs */
+};
+static DEFINE_PER_CPU(struct spu_gov_info_struct, spu_gov_info);
+
+static int calc_freq(struct spu_gov_info_struct *info)
+{
+	int cpu;
+	int busy_spus;
+
+	cpu = info->policy->cpu;
+	busy_spus = atomic_read(&cbe_spu_info[cpu_to_node(cpu)].busy_spus);
+
+	CALC_LOAD(info->busy_spus, EXP, busy_spus * FIXED_1);
+	pr_debug("cpu %d: busy_spus=%d, info->busy_spus=%ld\n",
+			cpu, busy_spus, info->busy_spus);
+
+	return info->policy->max * info->busy_spus / FIXED_1;
+}
+
+static void spu_gov_work(struct work_struct *work)
+{
+	struct spu_gov_info_struct *info;
+	int delay;
+	unsigned long target_freq;
+
+	info = container_of(work, struct spu_gov_info_struct, work.work);
+
+	/* after cancel_delayed_work_sync we unset info->policy */
+	BUG_ON(info->policy == NULL);
+
+	target_freq = calc_freq(info);
+	__cpufreq_driver_target(info->policy, target_freq, CPUFREQ_RELATION_H);
+
+	delay = usecs_to_jiffies(info->poll_int);
+	schedule_delayed_work_on(info->policy->cpu, &info->work, delay);
+}
+
+static void spu_gov_init_work(struct spu_gov_info_struct *info)
+{
+	int delay = usecs_to_jiffies(info->poll_int);
+	INIT_DEFERRABLE_WORK(&info->work, spu_gov_work);
+	schedule_delayed_work_on(info->policy->cpu, &info->work, delay);
+}
+
+static void spu_gov_cancel_work(struct spu_gov_info_struct *info)
+{
+	cancel_delayed_work_sync(&info->work);
+}
+
+static int spu_gov_govern(struct cpufreq_policy *policy, unsigned int event)
+{
+	unsigned int cpu = policy->cpu;
+	struct spu_gov_info_struct *info, *affected_info;
+	int i;
+	int ret = 0;
+
+	info = &per_cpu(spu_gov_info, cpu);
+
+	switch (event) {
+	case CPUFREQ_GOV_START:
+		if (!cpu_online(cpu)) {
+			printk(KERN_ERR "cpu %d is not online\n", cpu);
+			ret = -EINVAL;
+			break;
+		}
+
+		if (!policy->cur) {
+			printk(KERN_ERR "no cpu specified in policy\n");
+			ret = -EINVAL;
+			break;
+		}
+
+		/* initialize spu_gov_info for all affected cpus */
+		for_each_cpu(i, policy->cpus) {
+			affected_info = &per_cpu(spu_gov_info, i);
+			affected_info->policy = policy;
+		}
+
+		info->poll_int = POLL_TIME;
+
+		/* setup timer */
+		spu_gov_init_work(info);
+
+		break;
+
+	case CPUFREQ_GOV_STOP:
+		/* cancel timer */
+		spu_gov_cancel_work(info);
+
+		/* clean spu_gov_info for all affected cpus */
+		for_each_cpu (i, policy->cpus) {
+			info = &per_cpu(spu_gov_info, i);
+			info->policy = NULL;
+		}
+
+		break;
+	}
+
+	return ret;
+}
+
+static struct cpufreq_governor spu_governor = {
+	.name = "spudemand",
+	.governor = spu_gov_govern,
+	.owner = THIS_MODULE,
+};
+
+/*
+ * module init and destoy
+ */
+
+static int __init spu_gov_init(void)
+{
+	int ret;
+
+	ret = cpufreq_register_governor(&spu_governor);
+	if (ret)
+		printk(KERN_ERR "registration of governor failed\n");
+	return ret;
+}
+
+static void __exit spu_gov_exit(void)
+{
+	cpufreq_unregister_governor(&spu_governor);
+}
+
+
+module_init(spu_gov_init);
+module_exit(spu_gov_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
+
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
new file mode 100644
index 000000000..3af8324c1
--- /dev/null
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -0,0 +1,411 @@
+/*
+ * Cell Internal Interrupt Controller
+ *
+ * Copyright (C) 2006 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *                    IBM, Corp.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * TODO:
+ * - Fix various assumptions related to HW CPU numbers vs. linux CPU numbers
+ *   vs node numbers in the setup code
+ * - Implement proper handling of maxcpus=1/2 (that is, routing of irqs from
+ *   a non-active node to the active node)
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/export.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/kernel_stat.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/ptrace.h>
+#include <asm/machdep.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+
+struct iic {
+	struct cbe_iic_thread_regs __iomem *regs;
+	u8 target_id;
+	u8 eoi_stack[16];
+	int eoi_ptr;
+	struct device_node *node;
+};
+
+static DEFINE_PER_CPU(struct iic, cpu_iic);
+#define IIC_NODE_COUNT	2
+static struct irq_domain *iic_host;
+
+/* Convert between "pending" bits and hw irq number */
+static irq_hw_number_t iic_pending_to_hwnum(struct cbe_iic_pending_bits bits)
+{
+	unsigned char unit = bits.source & 0xf;
+	unsigned char node = bits.source >> 4;
+	unsigned char class = bits.class & 3;
+
+	/* Decode IPIs */
+	if (bits.flags & CBE_IIC_IRQ_IPI)
+		return IIC_IRQ_TYPE_IPI | (bits.prio >> 4);
+	else
+		return (node << IIC_IRQ_NODE_SHIFT) | (class << 4) | unit;
+}
+
+static void iic_mask(struct irq_data *d)
+{
+}
+
+static void iic_unmask(struct irq_data *d)
+{
+}
+
+static void iic_eoi(struct irq_data *d)
+{
+	struct iic *iic = this_cpu_ptr(&cpu_iic);
+	out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]);
+	BUG_ON(iic->eoi_ptr < 0);
+}
+
+static struct irq_chip iic_chip = {
+	.name = "CELL-IIC",
+	.irq_mask = iic_mask,
+	.irq_unmask = iic_unmask,
+	.irq_eoi = iic_eoi,
+};
+
+
+static void iic_ioexc_eoi(struct irq_data *d)
+{
+}
+
+static void iic_ioexc_cascade(unsigned int irq, struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct cbe_iic_regs __iomem *node_iic =
+		(void __iomem *)irq_desc_get_handler_data(desc);
+	unsigned int base = (irq & 0xffffff00) | IIC_IRQ_TYPE_IOEXC;
+	unsigned long bits, ack;
+	int cascade;
+
+	for (;;) {
+		bits = in_be64(&node_iic->iic_is);
+		if (bits == 0)
+			break;
+		/* pre-ack edge interrupts */
+		ack = bits & IIC_ISR_EDGE_MASK;
+		if (ack)
+			out_be64(&node_iic->iic_is, ack);
+		/* handle them */
+		for (cascade = 63; cascade >= 0; cascade--)
+			if (bits & (0x8000000000000000UL >> cascade)) {
+				unsigned int cirq =
+					irq_linear_revmap(iic_host,
+							  base | cascade);
+				if (cirq != NO_IRQ)
+					generic_handle_irq(cirq);
+			}
+		/* post-ack level interrupts */
+		ack = bits & ~IIC_ISR_EDGE_MASK;
+		if (ack)
+			out_be64(&node_iic->iic_is, ack);
+	}
+	chip->irq_eoi(&desc->irq_data);
+}
+
+
+static struct irq_chip iic_ioexc_chip = {
+	.name = "CELL-IOEX",
+	.irq_mask = iic_mask,
+	.irq_unmask = iic_unmask,
+	.irq_eoi = iic_ioexc_eoi,
+};
+
+/* Get an IRQ number from the pending state register of the IIC */
+static unsigned int iic_get_irq(void)
+{
+	struct cbe_iic_pending_bits pending;
+	struct iic *iic;
+	unsigned int virq;
+
+	iic = this_cpu_ptr(&cpu_iic);
+	*(unsigned long *) &pending =
+		in_be64((u64 __iomem *) &iic->regs->pending_destr);
+	if (!(pending.flags & CBE_IIC_IRQ_VALID))
+		return NO_IRQ;
+	virq = irq_linear_revmap(iic_host, iic_pending_to_hwnum(pending));
+	if (virq == NO_IRQ)
+		return NO_IRQ;
+	iic->eoi_stack[++iic->eoi_ptr] = pending.prio;
+	BUG_ON(iic->eoi_ptr > 15);
+	return virq;
+}
+
+void iic_setup_cpu(void)
+{
+	out_be64(&this_cpu_ptr(&cpu_iic)->regs->prio, 0xff);
+}
+
+u8 iic_get_target_id(int cpu)
+{
+	return per_cpu(cpu_iic, cpu).target_id;
+}
+
+EXPORT_SYMBOL_GPL(iic_get_target_id);
+
+#ifdef CONFIG_SMP
+
+/* Use the highest interrupt priorities for IPI */
+static inline int iic_msg_to_irq(int msg)
+{
+	return IIC_IRQ_TYPE_IPI + 0xf - msg;
+}
+
+void iic_message_pass(int cpu, int msg)
+{
+	out_be64(&per_cpu(cpu_iic, cpu).regs->generate, (0xf - msg) << 4);
+}
+
+struct irq_domain *iic_get_irq_host(int node)
+{
+	return iic_host;
+}
+EXPORT_SYMBOL_GPL(iic_get_irq_host);
+
+static void iic_request_ipi(int msg)
+{
+	int virq;
+
+	virq = irq_create_mapping(iic_host, iic_msg_to_irq(msg));
+	if (virq == NO_IRQ) {
+		printk(KERN_ERR
+		       "iic: failed to map IPI %s\n", smp_ipi_name[msg]);
+		return;
+	}
+
+	/*
+	 * If smp_request_message_ipi encounters an error it will notify
+	 * the error.  If a message is not needed it will return non-zero.
+	 */
+	if (smp_request_message_ipi(virq, msg))
+		irq_dispose_mapping(virq);
+}
+
+void iic_request_IPIs(void)
+{
+	iic_request_ipi(PPC_MSG_CALL_FUNCTION);
+	iic_request_ipi(PPC_MSG_RESCHEDULE);
+	iic_request_ipi(PPC_MSG_TICK_BROADCAST);
+	iic_request_ipi(PPC_MSG_DEBUGGER_BREAK);
+}
+
+#endif /* CONFIG_SMP */
+
+
+static int iic_host_match(struct irq_domain *h, struct device_node *node)
+{
+	return of_device_is_compatible(node,
+				    "IBM,CBEA-Internal-Interrupt-Controller");
+}
+
+static int iic_host_map(struct irq_domain *h, unsigned int virq,
+			irq_hw_number_t hw)
+{
+	switch (hw & IIC_IRQ_TYPE_MASK) {
+	case IIC_IRQ_TYPE_IPI:
+		irq_set_chip_and_handler(virq, &iic_chip, handle_percpu_irq);
+		break;
+	case IIC_IRQ_TYPE_IOEXC:
+		irq_set_chip_and_handler(virq, &iic_ioexc_chip,
+					 handle_edge_eoi_irq);
+		break;
+	default:
+		irq_set_chip_and_handler(virq, &iic_chip, handle_edge_eoi_irq);
+	}
+	return 0;
+}
+
+static int iic_host_xlate(struct irq_domain *h, struct device_node *ct,
+			   const u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	unsigned int node, ext, unit, class;
+	const u32 *val;
+
+	if (!of_device_is_compatible(ct,
+				     "IBM,CBEA-Internal-Interrupt-Controller"))
+		return -ENODEV;
+	if (intsize != 1)
+		return -ENODEV;
+	val = of_get_property(ct, "#interrupt-cells", NULL);
+	if (val == NULL || *val != 1)
+		return -ENODEV;
+
+	node = intspec[0] >> 24;
+	ext = (intspec[0] >> 16) & 0xff;
+	class = (intspec[0] >> 8) & 0xff;
+	unit = intspec[0] & 0xff;
+
+	/* Check if node is in supported range */
+	if (node > 1)
+		return -EINVAL;
+
+	/* Build up interrupt number, special case for IO exceptions */
+	*out_hwirq = (node << IIC_IRQ_NODE_SHIFT);
+	if (unit == IIC_UNIT_IIC && class == 1)
+		*out_hwirq |= IIC_IRQ_TYPE_IOEXC | ext;
+	else
+		*out_hwirq |= IIC_IRQ_TYPE_NORMAL |
+			(class << IIC_IRQ_CLASS_SHIFT) | unit;
+
+	/* Dummy flags, ignored by iic code */
+	*out_flags = IRQ_TYPE_EDGE_RISING;
+
+	return 0;
+}
+
+static const struct irq_domain_ops iic_host_ops = {
+	.match = iic_host_match,
+	.map = iic_host_map,
+	.xlate = iic_host_xlate,
+};
+
+static void __init init_one_iic(unsigned int hw_cpu, unsigned long addr,
+				struct device_node *node)
+{
+	/* XXX FIXME: should locate the linux CPU number from the HW cpu
+	 * number properly. We are lucky for now
+	 */
+	struct iic *iic = &per_cpu(cpu_iic, hw_cpu);
+
+	iic->regs = ioremap(addr, sizeof(struct cbe_iic_thread_regs));
+	BUG_ON(iic->regs == NULL);
+
+	iic->target_id = ((hw_cpu & 2) << 3) | ((hw_cpu & 1) ? 0xf : 0xe);
+	iic->eoi_stack[0] = 0xff;
+	iic->node = of_node_get(node);
+	out_be64(&iic->regs->prio, 0);
+
+	printk(KERN_INFO "IIC for CPU %d target id 0x%x : %s\n",
+	       hw_cpu, iic->target_id, node->full_name);
+}
+
+static int __init setup_iic(void)
+{
+	struct device_node *dn;
+	struct resource r0, r1;
+	unsigned int node, cascade, found = 0;
+	struct cbe_iic_regs __iomem *node_iic;
+	const u32 *np;
+
+	for (dn = NULL;
+	     (dn = of_find_node_by_name(dn,"interrupt-controller")) != NULL;) {
+		if (!of_device_is_compatible(dn,
+				     "IBM,CBEA-Internal-Interrupt-Controller"))
+			continue;
+		np = of_get_property(dn, "ibm,interrupt-server-ranges", NULL);
+		if (np == NULL) {
+			printk(KERN_WARNING "IIC: CPU association not found\n");
+			of_node_put(dn);
+			return -ENODEV;
+		}
+		if (of_address_to_resource(dn, 0, &r0) ||
+		    of_address_to_resource(dn, 1, &r1)) {
+			printk(KERN_WARNING "IIC: Can't resolve addresses\n");
+			of_node_put(dn);
+			return -ENODEV;
+		}
+		found++;
+		init_one_iic(np[0], r0.start, dn);
+		init_one_iic(np[1], r1.start, dn);
+
+		/* Setup cascade for IO exceptions. XXX cleanup tricks to get
+		 * node vs CPU etc...
+		 * Note that we configure the IIC_IRR here with a hard coded
+		 * priority of 1. We might want to improve that later.
+		 */
+		node = np[0] >> 1;
+		node_iic = cbe_get_cpu_iic_regs(np[0]);
+		cascade = node << IIC_IRQ_NODE_SHIFT;
+		cascade |= 1 << IIC_IRQ_CLASS_SHIFT;
+		cascade |= IIC_UNIT_IIC;
+		cascade = irq_create_mapping(iic_host, cascade);
+		if (cascade == NO_IRQ)
+			continue;
+		/*
+		 * irq_data is a generic pointer that gets passed back
+		 * to us later, so the forced cast is fine.
+		 */
+		irq_set_handler_data(cascade, (void __force *)node_iic);
+		irq_set_chained_handler(cascade, iic_ioexc_cascade);
+		out_be64(&node_iic->iic_ir,
+			 (1 << 12)		/* priority */ |
+			 (node << 4)		/* dest node */ |
+			 IIC_UNIT_THREAD_0	/* route them to thread 0 */);
+		/* Flush pending (make sure it triggers if there is
+		 * anything pending
+		 */
+		out_be64(&node_iic->iic_is, 0xfffffffffffffffful);
+	}
+
+	if (found)
+		return 0;
+	else
+		return -ENODEV;
+}
+
+void __init iic_init_IRQ(void)
+{
+	/* Setup an irq host data structure */
+	iic_host = irq_domain_add_linear(NULL, IIC_SOURCE_COUNT, &iic_host_ops,
+					 NULL);
+	BUG_ON(iic_host == NULL);
+	irq_set_default_host(iic_host);
+
+	/* Discover and initialize iics */
+	if (setup_iic() < 0)
+		panic("IIC: Failed to initialize !\n");
+
+	/* Set master interrupt handling function */
+	ppc_md.get_irq = iic_get_irq;
+
+	/* Enable on current CPU */
+	iic_setup_cpu();
+}
+
+void iic_set_interrupt_routing(int cpu, int thread, int priority)
+{
+	struct cbe_iic_regs __iomem *iic_regs = cbe_get_cpu_iic_regs(cpu);
+	u64 iic_ir = 0;
+	int node = cpu >> 1;
+
+	/* Set which node and thread will handle the next interrupt */
+	iic_ir |= CBE_IIC_IR_PRIO(priority) |
+		  CBE_IIC_IR_DEST_NODE(node);
+	if (thread == 0)
+		iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_0);
+	else
+		iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_1);
+	out_be64(&iic_regs->iic_ir, iic_ir);
+}
diff --git a/arch/powerpc/platforms/cell/interrupt.h b/arch/powerpc/platforms/cell/interrupt.h
new file mode 100644
index 000000000..4f60ae6ca
--- /dev/null
+++ b/arch/powerpc/platforms/cell/interrupt.h
@@ -0,0 +1,89 @@
+#ifndef ASM_CELL_PIC_H
+#define ASM_CELL_PIC_H
+#ifdef __KERNEL__
+/*
+ * Mapping of IIC pending bits into per-node interrupt numbers.
+ *
+ * Interrupt numbers are in the range 0...0x1ff where the top bit
+ * (0x100) represent the source node. Only 2 nodes are supported with
+ * the current code though it's trivial to extend that if necessary using
+ * higher level bits
+ *
+ * The bottom 8 bits are split into 2 type bits and 6 data bits that
+ * depend on the type:
+ *
+ * 00 (0x00 | data) : normal interrupt. data is (class << 4) | source
+ * 01 (0x40 | data) : IO exception. data is the exception number as
+ *                    defined by bit numbers in IIC_SR
+ * 10 (0x80 | data) : IPI. data is the IPI number (obtained from the priority)
+ *                    and node is always 0 (IPIs are per-cpu, their source is
+ *                    not relevant)
+ * 11 (0xc0 | data) : reserved
+ *
+ * In addition, interrupt number 0x80000000 is defined as always invalid
+ * (that is the node field is expected to never extend to move than 23 bits)
+ *
+ */
+
+enum {
+	IIC_IRQ_INVALID		= 0x80000000u,
+	IIC_IRQ_NODE_MASK	= 0x100,
+	IIC_IRQ_NODE_SHIFT	= 8,
+	IIC_IRQ_MAX		= 0x1ff,
+	IIC_IRQ_TYPE_MASK	= 0xc0,
+	IIC_IRQ_TYPE_NORMAL	= 0x00,
+	IIC_IRQ_TYPE_IOEXC	= 0x40,
+	IIC_IRQ_TYPE_IPI	= 0x80,
+	IIC_IRQ_CLASS_SHIFT	= 4,
+	IIC_IRQ_CLASS_0		= 0x00,
+	IIC_IRQ_CLASS_1		= 0x10,
+	IIC_IRQ_CLASS_2		= 0x20,
+	IIC_SOURCE_COUNT	= 0x200,
+
+	/* Here are defined the various source/dest units. Avoid using those
+	 * definitions if you can, they are mostly here for reference
+	 */
+	IIC_UNIT_SPU_0		= 0x4,
+	IIC_UNIT_SPU_1		= 0x7,
+	IIC_UNIT_SPU_2		= 0x3,
+	IIC_UNIT_SPU_3		= 0x8,
+	IIC_UNIT_SPU_4		= 0x2,
+	IIC_UNIT_SPU_5		= 0x9,
+	IIC_UNIT_SPU_6		= 0x1,
+	IIC_UNIT_SPU_7		= 0xa,
+	IIC_UNIT_IOC_0		= 0x0,
+	IIC_UNIT_IOC_1		= 0xb,
+	IIC_UNIT_THREAD_0	= 0xe, /* target only */
+	IIC_UNIT_THREAD_1	= 0xf, /* target only */
+	IIC_UNIT_IIC		= 0xe, /* source only (IO exceptions) */
+
+	/* Base numbers for the external interrupts */
+	IIC_IRQ_EXT_IOIF0	=
+		IIC_IRQ_TYPE_NORMAL | IIC_IRQ_CLASS_2 | IIC_UNIT_IOC_0,
+	IIC_IRQ_EXT_IOIF1	=
+		IIC_IRQ_TYPE_NORMAL | IIC_IRQ_CLASS_2 | IIC_UNIT_IOC_1,
+
+	/* Base numbers for the IIC_ISR interrupts */
+	IIC_IRQ_IOEX_TMI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 63,
+	IIC_IRQ_IOEX_PMI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 62,
+	IIC_IRQ_IOEX_ATI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 61,
+	IIC_IRQ_IOEX_MATBFI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 60,
+	IIC_IRQ_IOEX_ELDI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 59,
+
+	/* Which bits in IIC_ISR are edge sensitive */
+	IIC_ISR_EDGE_MASK	= 0x4ul,
+};
+
+extern void iic_init_IRQ(void);
+extern void iic_message_pass(int cpu, int msg);
+extern void iic_request_IPIs(void);
+extern void iic_setup_cpu(void);
+
+extern u8 iic_get_target_id(int cpu);
+
+extern void spider_init_IRQ(void);
+
+extern void iic_set_interrupt_routing(int cpu, int thread, int priority);
+
+#endif
+#endif /* ASM_CELL_PIC_H */
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
new file mode 100644
index 000000000..21b502398
--- /dev/null
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -0,0 +1,1237 @@
+/*
+ * IOMMU implementation for Cell Broadband Processor Architecture
+ *
+ * (C) Copyright IBM Corporation 2006-2008
+ *
+ * Author: Jeremy Kerr <jk@ozlabs.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/memblock.h>
+
+#include <asm/prom.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/udbg.h>
+#include <asm/firmware.h>
+#include <asm/cell-regs.h>
+
+#include "cell.h"
+#include "interrupt.h"
+
+/* Define CELL_IOMMU_REAL_UNMAP to actually unmap non-used pages
+ * instead of leaving them mapped to some dummy page. This can be
+ * enabled once the appropriate workarounds for spider bugs have
+ * been enabled
+ */
+#define CELL_IOMMU_REAL_UNMAP
+
+/* Define CELL_IOMMU_STRICT_PROTECTION to enforce protection of
+ * IO PTEs based on the transfer direction. That can be enabled
+ * once spider-net has been fixed to pass the correct direction
+ * to the DMA mapping functions
+ */
+#define CELL_IOMMU_STRICT_PROTECTION
+
+
+#define NR_IOMMUS			2
+
+/* IOC mmap registers */
+#define IOC_Reg_Size			0x2000
+
+#define IOC_IOPT_CacheInvd		0x908
+#define IOC_IOPT_CacheInvd_NE_Mask	0xffe0000000000000ul
+#define IOC_IOPT_CacheInvd_IOPTE_Mask	0x000003fffffffff8ul
+#define IOC_IOPT_CacheInvd_Busy		0x0000000000000001ul
+
+#define IOC_IOST_Origin			0x918
+#define IOC_IOST_Origin_E		0x8000000000000000ul
+#define IOC_IOST_Origin_HW		0x0000000000000800ul
+#define IOC_IOST_Origin_HL		0x0000000000000400ul
+
+#define IOC_IO_ExcpStat			0x920
+#define IOC_IO_ExcpStat_V		0x8000000000000000ul
+#define IOC_IO_ExcpStat_SPF_Mask	0x6000000000000000ul
+#define IOC_IO_ExcpStat_SPF_S		0x6000000000000000ul
+#define IOC_IO_ExcpStat_SPF_P		0x2000000000000000ul
+#define IOC_IO_ExcpStat_ADDR_Mask	0x00000007fffff000ul
+#define IOC_IO_ExcpStat_RW_Mask		0x0000000000000800ul
+#define IOC_IO_ExcpStat_IOID_Mask	0x00000000000007fful
+
+#define IOC_IO_ExcpMask			0x928
+#define IOC_IO_ExcpMask_SFE		0x4000000000000000ul
+#define IOC_IO_ExcpMask_PFE		0x2000000000000000ul
+
+#define IOC_IOCmd_Offset		0x1000
+
+#define IOC_IOCmd_Cfg			0xc00
+#define IOC_IOCmd_Cfg_TE		0x0000800000000000ul
+
+
+/* Segment table entries */
+#define IOSTE_V			0x8000000000000000ul /* valid */
+#define IOSTE_H			0x4000000000000000ul /* cache hint */
+#define IOSTE_PT_Base_RPN_Mask  0x3ffffffffffff000ul /* base RPN of IOPT */
+#define IOSTE_NPPT_Mask		0x0000000000000fe0ul /* no. pages in IOPT */
+#define IOSTE_PS_Mask		0x0000000000000007ul /* page size */
+#define IOSTE_PS_4K		0x0000000000000001ul /*   - 4kB  */
+#define IOSTE_PS_64K		0x0000000000000003ul /*   - 64kB */
+#define IOSTE_PS_1M		0x0000000000000005ul /*   - 1MB  */
+#define IOSTE_PS_16M		0x0000000000000007ul /*   - 16MB */
+
+
+/* IOMMU sizing */
+#define IO_SEGMENT_SHIFT	28
+#define IO_PAGENO_BITS(shift)	(IO_SEGMENT_SHIFT - (shift))
+
+/* The high bit needs to be set on every DMA address */
+#define SPIDER_DMA_OFFSET	0x80000000ul
+
+struct iommu_window {
+	struct list_head list;
+	struct cbe_iommu *iommu;
+	unsigned long offset;
+	unsigned long size;
+	unsigned int ioid;
+	struct iommu_table table;
+};
+
+#define NAMESIZE 8
+struct cbe_iommu {
+	int nid;
+	char name[NAMESIZE];
+	void __iomem *xlate_regs;
+	void __iomem *cmd_regs;
+	unsigned long *stab;
+	unsigned long *ptab;
+	void *pad_page;
+	struct list_head windows;
+};
+
+/* Static array of iommus, one per node
+ *   each contains a list of windows, keyed from dma_window property
+ *   - on bus setup, look for a matching window, or create one
+ *   - on dev setup, assign iommu_table ptr
+ */
+static struct cbe_iommu iommus[NR_IOMMUS];
+static int cbe_nr_iommus;
+
+static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte,
+		long n_ptes)
+{
+	u64 __iomem *reg;
+	u64 val;
+	long n;
+
+	reg = iommu->xlate_regs + IOC_IOPT_CacheInvd;
+
+	while (n_ptes > 0) {
+		/* we can invalidate up to 1 << 11 PTEs at once */
+		n = min(n_ptes, 1l << 11);
+		val = (((n /*- 1*/) << 53) & IOC_IOPT_CacheInvd_NE_Mask)
+			| (__pa(pte) & IOC_IOPT_CacheInvd_IOPTE_Mask)
+		        | IOC_IOPT_CacheInvd_Busy;
+
+		out_be64(reg, val);
+		while (in_be64(reg) & IOC_IOPT_CacheInvd_Busy)
+			;
+
+		n_ptes -= n;
+		pte += n;
+	}
+}
+
+static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
+		unsigned long uaddr, enum dma_data_direction direction,
+		struct dma_attrs *attrs)
+{
+	int i;
+	unsigned long *io_pte, base_pte;
+	struct iommu_window *window =
+		container_of(tbl, struct iommu_window, table);
+
+	/* implementing proper protection causes problems with the spidernet
+	 * driver - check mapping directions later, but allow read & write by
+	 * default for now.*/
+#ifdef CELL_IOMMU_STRICT_PROTECTION
+	/* to avoid referencing a global, we use a trick here to setup the
+	 * protection bit. "prot" is setup to be 3 fields of 4 bits apprended
+	 * together for each of the 3 supported direction values. It is then
+	 * shifted left so that the fields matching the desired direction
+	 * lands on the appropriate bits, and other bits are masked out.
+	 */
+	const unsigned long prot = 0xc48;
+	base_pte =
+		((prot << (52 + 4 * direction)) &
+		 (CBE_IOPTE_PP_W | CBE_IOPTE_PP_R)) |
+		CBE_IOPTE_M | CBE_IOPTE_SO_RW |
+		(window->ioid & CBE_IOPTE_IOID_Mask);
+#else
+	base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M |
+		CBE_IOPTE_SO_RW | (window->ioid & CBE_IOPTE_IOID_Mask);
+#endif
+	if (unlikely(dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs)))
+		base_pte &= ~CBE_IOPTE_SO_RW;
+
+	io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
+
+	for (i = 0; i < npages; i++, uaddr += (1 << tbl->it_page_shift))
+		io_pte[i] = base_pte | (__pa(uaddr) & CBE_IOPTE_RPN_Mask);
+
+	mb();
+
+	invalidate_tce_cache(window->iommu, io_pte, npages);
+
+	pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n",
+		 index, npages, direction, base_pte);
+	return 0;
+}
+
+static void tce_free_cell(struct iommu_table *tbl, long index, long npages)
+{
+
+	int i;
+	unsigned long *io_pte, pte;
+	struct iommu_window *window =
+		container_of(tbl, struct iommu_window, table);
+
+	pr_debug("tce_free_cell(index=%lx,n=%lx)\n", index, npages);
+
+#ifdef CELL_IOMMU_REAL_UNMAP
+	pte = 0;
+#else
+	/* spider bridge does PCI reads after freeing - insert a mapping
+	 * to a scratch page instead of an invalid entry */
+	pte = CBE_IOPTE_PP_R | CBE_IOPTE_M | CBE_IOPTE_SO_RW |
+		__pa(window->iommu->pad_page) |
+		(window->ioid & CBE_IOPTE_IOID_Mask);
+#endif
+
+	io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
+
+	for (i = 0; i < npages; i++)
+		io_pte[i] = pte;
+
+	mb();
+
+	invalidate_tce_cache(window->iommu, io_pte, npages);
+}
+
+static irqreturn_t ioc_interrupt(int irq, void *data)
+{
+	unsigned long stat, spf;
+	struct cbe_iommu *iommu = data;
+
+	stat = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat);
+	spf = stat & IOC_IO_ExcpStat_SPF_Mask;
+
+	/* Might want to rate limit it */
+	printk(KERN_ERR "iommu: DMA exception 0x%016lx\n", stat);
+	printk(KERN_ERR "  V=%d, SPF=[%c%c], RW=%s, IOID=0x%04x\n",
+	       !!(stat & IOC_IO_ExcpStat_V),
+	       (spf == IOC_IO_ExcpStat_SPF_S) ? 'S' : ' ',
+	       (spf == IOC_IO_ExcpStat_SPF_P) ? 'P' : ' ',
+	       (stat & IOC_IO_ExcpStat_RW_Mask) ? "Read" : "Write",
+	       (unsigned int)(stat & IOC_IO_ExcpStat_IOID_Mask));
+	printk(KERN_ERR "  page=0x%016lx\n",
+	       stat & IOC_IO_ExcpStat_ADDR_Mask);
+
+	/* clear interrupt */
+	stat &= ~IOC_IO_ExcpStat_V;
+	out_be64(iommu->xlate_regs + IOC_IO_ExcpStat, stat);
+
+	return IRQ_HANDLED;
+}
+
+static int cell_iommu_find_ioc(int nid, unsigned long *base)
+{
+	struct device_node *np;
+	struct resource r;
+
+	*base = 0;
+
+	/* First look for new style /be nodes */
+	for_each_node_by_name(np, "ioc") {
+		if (of_node_to_nid(np) != nid)
+			continue;
+		if (of_address_to_resource(np, 0, &r)) {
+			printk(KERN_ERR "iommu: can't get address for %s\n",
+			       np->full_name);
+			continue;
+		}
+		*base = r.start;
+		of_node_put(np);
+		return 0;
+	}
+
+	/* Ok, let's try the old way */
+	for_each_node_by_type(np, "cpu") {
+		const unsigned int *nidp;
+		const unsigned long *tmp;
+
+		nidp = of_get_property(np, "node-id", NULL);
+		if (nidp && *nidp == nid) {
+			tmp = of_get_property(np, "ioc-translation", NULL);
+			if (tmp) {
+				*base = *tmp;
+				of_node_put(np);
+				return 0;
+			}
+		}
+	}
+
+	return -ENODEV;
+}
+
+static void cell_iommu_setup_stab(struct cbe_iommu *iommu,
+				unsigned long dbase, unsigned long dsize,
+				unsigned long fbase, unsigned long fsize)
+{
+	struct page *page;
+	unsigned long segments, stab_size;
+
+	segments = max(dbase + dsize, fbase + fsize) >> IO_SEGMENT_SHIFT;
+
+	pr_debug("%s: iommu[%d]: segments: %lu\n",
+			__func__, iommu->nid, segments);
+
+	/* set up the segment table */
+	stab_size = segments * sizeof(unsigned long);
+	page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(stab_size));
+	BUG_ON(!page);
+	iommu->stab = page_address(page);
+	memset(iommu->stab, 0, stab_size);
+}
+
+static unsigned long *cell_iommu_alloc_ptab(struct cbe_iommu *iommu,
+		unsigned long base, unsigned long size, unsigned long gap_base,
+		unsigned long gap_size, unsigned long page_shift)
+{
+	struct page *page;
+	int i;
+	unsigned long reg, segments, pages_per_segment, ptab_size,
+		      n_pte_pages, start_seg, *ptab;
+
+	start_seg = base >> IO_SEGMENT_SHIFT;
+	segments  = size >> IO_SEGMENT_SHIFT;
+	pages_per_segment = 1ull << IO_PAGENO_BITS(page_shift);
+	/* PTEs for each segment must start on a 4K bounday */
+	pages_per_segment = max(pages_per_segment,
+				(1 << 12) / sizeof(unsigned long));
+
+	ptab_size = segments * pages_per_segment * sizeof(unsigned long);
+	pr_debug("%s: iommu[%d]: ptab_size: %lu, order: %d\n", __func__,
+			iommu->nid, ptab_size, get_order(ptab_size));
+	page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(ptab_size));
+	BUG_ON(!page);
+
+	ptab = page_address(page);
+	memset(ptab, 0, ptab_size);
+
+	/* number of 4K pages needed for a page table */
+	n_pte_pages = (pages_per_segment * sizeof(unsigned long)) >> 12;
+
+	pr_debug("%s: iommu[%d]: stab at %p, ptab at %p, n_pte_pages: %lu\n",
+			__func__, iommu->nid, iommu->stab, ptab,
+			n_pte_pages);
+
+	/* initialise the STEs */
+	reg = IOSTE_V | ((n_pte_pages - 1) << 5);
+
+	switch (page_shift) {
+	case 12: reg |= IOSTE_PS_4K;  break;
+	case 16: reg |= IOSTE_PS_64K; break;
+	case 20: reg |= IOSTE_PS_1M;  break;
+	case 24: reg |= IOSTE_PS_16M; break;
+	default: BUG();
+	}
+
+	gap_base = gap_base >> IO_SEGMENT_SHIFT;
+	gap_size = gap_size >> IO_SEGMENT_SHIFT;
+
+	pr_debug("Setting up IOMMU stab:\n");
+	for (i = start_seg; i < (start_seg + segments); i++) {
+		if (i >= gap_base && i < (gap_base + gap_size)) {
+			pr_debug("\toverlap at %d, skipping\n", i);
+			continue;
+		}
+		iommu->stab[i] = reg | (__pa(ptab) + (n_pte_pages << 12) *
+					(i - start_seg));
+		pr_debug("\t[%d] 0x%016lx\n", i, iommu->stab[i]);
+	}
+
+	return ptab;
+}
+
+static void cell_iommu_enable_hardware(struct cbe_iommu *iommu)
+{
+	int ret;
+	unsigned long reg, xlate_base;
+	unsigned int virq;
+
+	if (cell_iommu_find_ioc(iommu->nid, &xlate_base))
+		panic("%s: missing IOC register mappings for node %d\n",
+		      __func__, iommu->nid);
+
+	iommu->xlate_regs = ioremap(xlate_base, IOC_Reg_Size);
+	iommu->cmd_regs = iommu->xlate_regs + IOC_IOCmd_Offset;
+
+	/* ensure that the STEs have updated */
+	mb();
+
+	/* setup interrupts for the iommu. */
+	reg = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat);
+	out_be64(iommu->xlate_regs + IOC_IO_ExcpStat,
+			reg & ~IOC_IO_ExcpStat_V);
+	out_be64(iommu->xlate_regs + IOC_IO_ExcpMask,
+			IOC_IO_ExcpMask_PFE | IOC_IO_ExcpMask_SFE);
+
+	virq = irq_create_mapping(NULL,
+			IIC_IRQ_IOEX_ATI | (iommu->nid << IIC_IRQ_NODE_SHIFT));
+	BUG_ON(virq == NO_IRQ);
+
+	ret = request_irq(virq, ioc_interrupt, 0, iommu->name, iommu);
+	BUG_ON(ret);
+
+	/* set the IOC segment table origin register (and turn on the iommu) */
+	reg = IOC_IOST_Origin_E | __pa(iommu->stab) | IOC_IOST_Origin_HW;
+	out_be64(iommu->xlate_regs + IOC_IOST_Origin, reg);
+	in_be64(iommu->xlate_regs + IOC_IOST_Origin);
+
+	/* turn on IO translation */
+	reg = in_be64(iommu->cmd_regs + IOC_IOCmd_Cfg) | IOC_IOCmd_Cfg_TE;
+	out_be64(iommu->cmd_regs + IOC_IOCmd_Cfg, reg);
+}
+
+static void cell_iommu_setup_hardware(struct cbe_iommu *iommu,
+	unsigned long base, unsigned long size)
+{
+	cell_iommu_setup_stab(iommu, base, size, 0, 0);
+	iommu->ptab = cell_iommu_alloc_ptab(iommu, base, size, 0, 0,
+					    IOMMU_PAGE_SHIFT_4K);
+	cell_iommu_enable_hardware(iommu);
+}
+
+#if 0/* Unused for now */
+static struct iommu_window *find_window(struct cbe_iommu *iommu,
+		unsigned long offset, unsigned long size)
+{
+	struct iommu_window *window;
+
+	/* todo: check for overlapping (but not equal) windows) */
+
+	list_for_each_entry(window, &(iommu->windows), list) {
+		if (window->offset == offset && window->size == size)
+			return window;
+	}
+
+	return NULL;
+}
+#endif
+
+static inline u32 cell_iommu_get_ioid(struct device_node *np)
+{
+	const u32 *ioid;
+
+	ioid = of_get_property(np, "ioid", NULL);
+	if (ioid == NULL) {
+		printk(KERN_WARNING "iommu: missing ioid for %s using 0\n",
+		       np->full_name);
+		return 0;
+	}
+
+	return *ioid;
+}
+
+static struct iommu_window * __init
+cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
+			unsigned long offset, unsigned long size,
+			unsigned long pte_offset)
+{
+	struct iommu_window *window;
+	struct page *page;
+	u32 ioid;
+
+	ioid = cell_iommu_get_ioid(np);
+
+	window = kzalloc_node(sizeof(*window), GFP_KERNEL, iommu->nid);
+	BUG_ON(window == NULL);
+
+	window->offset = offset;
+	window->size = size;
+	window->ioid = ioid;
+	window->iommu = iommu;
+
+	window->table.it_blocksize = 16;
+	window->table.it_base = (unsigned long)iommu->ptab;
+	window->table.it_index = iommu->nid;
+	window->table.it_page_shift = IOMMU_PAGE_SHIFT_4K;
+	window->table.it_offset =
+		(offset >> window->table.it_page_shift) + pte_offset;
+	window->table.it_size = size >> window->table.it_page_shift;
+
+	iommu_init_table(&window->table, iommu->nid);
+
+	pr_debug("\tioid      %d\n", window->ioid);
+	pr_debug("\tblocksize %ld\n", window->table.it_blocksize);
+	pr_debug("\tbase      0x%016lx\n", window->table.it_base);
+	pr_debug("\toffset    0x%lx\n", window->table.it_offset);
+	pr_debug("\tsize      %ld\n", window->table.it_size);
+
+	list_add(&window->list, &iommu->windows);
+
+	if (offset != 0)
+		return window;
+
+	/* We need to map and reserve the first IOMMU page since it's used
+	 * by the spider workaround. In theory, we only need to do that when
+	 * running on spider but it doesn't really matter.
+	 *
+	 * This code also assumes that we have a window that starts at 0,
+	 * which is the case on all spider based blades.
+	 */
+	page = alloc_pages_node(iommu->nid, GFP_KERNEL, 0);
+	BUG_ON(!page);
+	iommu->pad_page = page_address(page);
+	clear_page(iommu->pad_page);
+
+	__set_bit(0, window->table.it_map);
+	tce_build_cell(&window->table, window->table.it_offset, 1,
+		       (unsigned long)iommu->pad_page, DMA_TO_DEVICE, NULL);
+
+	return window;
+}
+
+static struct cbe_iommu *cell_iommu_for_node(int nid)
+{
+	int i;
+
+	for (i = 0; i < cbe_nr_iommus; i++)
+		if (iommus[i].nid == nid)
+			return &iommus[i];
+	return NULL;
+}
+
+static unsigned long cell_dma_direct_offset;
+
+static unsigned long dma_iommu_fixed_base;
+
+/* iommu_fixed_is_weak is set if booted with iommu_fixed=weak */
+static int iommu_fixed_is_weak;
+
+static struct iommu_table *cell_get_iommu_table(struct device *dev)
+{
+	struct iommu_window *window;
+	struct cbe_iommu *iommu;
+
+	/* Current implementation uses the first window available in that
+	 * node's iommu. We -might- do something smarter later though it may
+	 * never be necessary
+	 */
+	iommu = cell_iommu_for_node(dev_to_node(dev));
+	if (iommu == NULL || list_empty(&iommu->windows)) {
+		dev_err(dev, "iommu: missing iommu for %s (node %d)\n",
+		       of_node_full_name(dev->of_node), dev_to_node(dev));
+		return NULL;
+	}
+	window = list_entry(iommu->windows.next, struct iommu_window, list);
+
+	return &window->table;
+}
+
+/* A coherent allocation implies strong ordering */
+
+static void *dma_fixed_alloc_coherent(struct device *dev, size_t size,
+				      dma_addr_t *dma_handle, gfp_t flag,
+				      struct dma_attrs *attrs)
+{
+	if (iommu_fixed_is_weak)
+		return iommu_alloc_coherent(dev, cell_get_iommu_table(dev),
+					    size, dma_handle,
+					    device_to_mask(dev), flag,
+					    dev_to_node(dev));
+	else
+		return dma_direct_ops.alloc(dev, size, dma_handle, flag,
+					    attrs);
+}
+
+static void dma_fixed_free_coherent(struct device *dev, size_t size,
+				    void *vaddr, dma_addr_t dma_handle,
+				    struct dma_attrs *attrs)
+{
+	if (iommu_fixed_is_weak)
+		iommu_free_coherent(cell_get_iommu_table(dev), size, vaddr,
+				    dma_handle);
+	else
+		dma_direct_ops.free(dev, size, vaddr, dma_handle, attrs);
+}
+
+static dma_addr_t dma_fixed_map_page(struct device *dev, struct page *page,
+				     unsigned long offset, size_t size,
+				     enum dma_data_direction direction,
+				     struct dma_attrs *attrs)
+{
+	if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+		return dma_direct_ops.map_page(dev, page, offset, size,
+					       direction, attrs);
+	else
+		return iommu_map_page(dev, cell_get_iommu_table(dev), page,
+				      offset, size, device_to_mask(dev),
+				      direction, attrs);
+}
+
+static void dma_fixed_unmap_page(struct device *dev, dma_addr_t dma_addr,
+				 size_t size, enum dma_data_direction direction,
+				 struct dma_attrs *attrs)
+{
+	if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+		dma_direct_ops.unmap_page(dev, dma_addr, size, direction,
+					  attrs);
+	else
+		iommu_unmap_page(cell_get_iommu_table(dev), dma_addr, size,
+				 direction, attrs);
+}
+
+static int dma_fixed_map_sg(struct device *dev, struct scatterlist *sg,
+			   int nents, enum dma_data_direction direction,
+			   struct dma_attrs *attrs)
+{
+	if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+		return dma_direct_ops.map_sg(dev, sg, nents, direction, attrs);
+	else
+		return ppc_iommu_map_sg(dev, cell_get_iommu_table(dev), sg,
+					nents, device_to_mask(dev),
+					direction, attrs);
+}
+
+static void dma_fixed_unmap_sg(struct device *dev, struct scatterlist *sg,
+			       int nents, enum dma_data_direction direction,
+			       struct dma_attrs *attrs)
+{
+	if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+		dma_direct_ops.unmap_sg(dev, sg, nents, direction, attrs);
+	else
+		ppc_iommu_unmap_sg(cell_get_iommu_table(dev), sg, nents,
+				   direction, attrs);
+}
+
+static int dma_fixed_dma_supported(struct device *dev, u64 mask)
+{
+	return mask == DMA_BIT_MASK(64);
+}
+
+static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask);
+
+struct dma_map_ops dma_iommu_fixed_ops = {
+	.alloc          = dma_fixed_alloc_coherent,
+	.free           = dma_fixed_free_coherent,
+	.map_sg         = dma_fixed_map_sg,
+	.unmap_sg       = dma_fixed_unmap_sg,
+	.dma_supported  = dma_fixed_dma_supported,
+	.set_dma_mask   = dma_set_mask_and_switch,
+	.map_page       = dma_fixed_map_page,
+	.unmap_page     = dma_fixed_unmap_page,
+};
+
+static void cell_dma_dev_setup_fixed(struct device *dev);
+
+static void cell_dma_dev_setup(struct device *dev)
+{
+	/* Order is important here, these are not mutually exclusive */
+	if (get_dma_ops(dev) == &dma_iommu_fixed_ops)
+		cell_dma_dev_setup_fixed(dev);
+	else if (get_pci_dma_ops() == &dma_iommu_ops)
+		set_iommu_table_base(dev, cell_get_iommu_table(dev));
+	else if (get_pci_dma_ops() == &dma_direct_ops)
+		set_dma_offset(dev, cell_dma_direct_offset);
+	else
+		BUG();
+}
+
+static void cell_pci_dma_dev_setup(struct pci_dev *dev)
+{
+	cell_dma_dev_setup(&dev->dev);
+}
+
+static int cell_of_bus_notify(struct notifier_block *nb, unsigned long action,
+			      void *data)
+{
+	struct device *dev = data;
+
+	/* We are only intereted in device addition */
+	if (action != BUS_NOTIFY_ADD_DEVICE)
+		return 0;
+
+	/* We use the PCI DMA ops */
+	dev->archdata.dma_ops = get_pci_dma_ops();
+
+	cell_dma_dev_setup(dev);
+
+	return 0;
+}
+
+static struct notifier_block cell_of_bus_notifier = {
+	.notifier_call = cell_of_bus_notify
+};
+
+static int __init cell_iommu_get_window(struct device_node *np,
+					 unsigned long *base,
+					 unsigned long *size)
+{
+	const __be32 *dma_window;
+	unsigned long index;
+
+	/* Use ibm,dma-window if available, else, hard code ! */
+	dma_window = of_get_property(np, "ibm,dma-window", NULL);
+	if (dma_window == NULL) {
+		*base = 0;
+		*size = 0x80000000u;
+		return -ENODEV;
+	}
+
+	of_parse_dma_window(np, dma_window, &index, base, size);
+	return 0;
+}
+
+static struct cbe_iommu * __init cell_iommu_alloc(struct device_node *np)
+{
+	struct cbe_iommu *iommu;
+	int nid, i;
+
+	/* Get node ID */
+	nid = of_node_to_nid(np);
+	if (nid < 0) {
+		printk(KERN_ERR "iommu: failed to get node for %s\n",
+		       np->full_name);
+		return NULL;
+	}
+	pr_debug("iommu: setting up iommu for node %d (%s)\n",
+		 nid, np->full_name);
+
+	/* XXX todo: If we can have multiple windows on the same IOMMU, which
+	 * isn't the case today, we probably want here to check whether the
+	 * iommu for that node is already setup.
+	 * However, there might be issue with getting the size right so let's
+	 * ignore that for now. We might want to completely get rid of the
+	 * multiple window support since the cell iommu supports per-page ioids
+	 */
+
+	if (cbe_nr_iommus >= NR_IOMMUS) {
+		printk(KERN_ERR "iommu: too many IOMMUs detected ! (%s)\n",
+		       np->full_name);
+		return NULL;
+	}
+
+	/* Init base fields */
+	i = cbe_nr_iommus++;
+	iommu = &iommus[i];
+	iommu->stab = NULL;
+	iommu->nid = nid;
+	snprintf(iommu->name, sizeof(iommu->name), "iommu%d", i);
+	INIT_LIST_HEAD(&iommu->windows);
+
+	return iommu;
+}
+
+static void __init cell_iommu_init_one(struct device_node *np,
+				       unsigned long offset)
+{
+	struct cbe_iommu *iommu;
+	unsigned long base, size;
+
+	iommu = cell_iommu_alloc(np);
+	if (!iommu)
+		return;
+
+	/* Obtain a window for it */
+	cell_iommu_get_window(np, &base, &size);
+
+	pr_debug("\ttranslating window 0x%lx...0x%lx\n",
+		 base, base + size - 1);
+
+	/* Initialize the hardware */
+	cell_iommu_setup_hardware(iommu, base, size);
+
+	/* Setup the iommu_table */
+	cell_iommu_setup_window(iommu, np, base, size,
+				offset >> IOMMU_PAGE_SHIFT_4K);
+}
+
+static void __init cell_disable_iommus(void)
+{
+	int node;
+	unsigned long base, val;
+	void __iomem *xregs, *cregs;
+
+	/* Make sure IOC translation is disabled on all nodes */
+	for_each_online_node(node) {
+		if (cell_iommu_find_ioc(node, &base))
+			continue;
+		xregs = ioremap(base, IOC_Reg_Size);
+		if (xregs == NULL)
+			continue;
+		cregs = xregs + IOC_IOCmd_Offset;
+
+		pr_debug("iommu: cleaning up iommu on node %d\n", node);
+
+		out_be64(xregs + IOC_IOST_Origin, 0);
+		(void)in_be64(xregs + IOC_IOST_Origin);
+		val = in_be64(cregs + IOC_IOCmd_Cfg);
+		val &= ~IOC_IOCmd_Cfg_TE;
+		out_be64(cregs + IOC_IOCmd_Cfg, val);
+		(void)in_be64(cregs + IOC_IOCmd_Cfg);
+
+		iounmap(xregs);
+	}
+}
+
+static int __init cell_iommu_init_disabled(void)
+{
+	struct device_node *np = NULL;
+	unsigned long base = 0, size;
+
+	/* When no iommu is present, we use direct DMA ops */
+	set_pci_dma_ops(&dma_direct_ops);
+
+	/* First make sure all IOC translation is turned off */
+	cell_disable_iommus();
+
+	/* If we have no Axon, we set up the spider DMA magic offset */
+	if (of_find_node_by_name(NULL, "axon") == NULL)
+		cell_dma_direct_offset = SPIDER_DMA_OFFSET;
+
+	/* Now we need to check to see where the memory is mapped
+	 * in PCI space. We assume that all busses use the same dma
+	 * window which is always the case so far on Cell, thus we
+	 * pick up the first pci-internal node we can find and check
+	 * the DMA window from there.
+	 */
+	for_each_node_by_name(np, "axon") {
+		if (np->parent == NULL || np->parent->parent != NULL)
+			continue;
+		if (cell_iommu_get_window(np, &base, &size) == 0)
+			break;
+	}
+	if (np == NULL) {
+		for_each_node_by_name(np, "pci-internal") {
+			if (np->parent == NULL || np->parent->parent != NULL)
+				continue;
+			if (cell_iommu_get_window(np, &base, &size) == 0)
+				break;
+		}
+	}
+	of_node_put(np);
+
+	/* If we found a DMA window, we check if it's big enough to enclose
+	 * all of physical memory. If not, we force enable IOMMU
+	 */
+	if (np && size < memblock_end_of_DRAM()) {
+		printk(KERN_WARNING "iommu: force-enabled, dma window"
+		       " (%ldMB) smaller than total memory (%lldMB)\n",
+		       size >> 20, memblock_end_of_DRAM() >> 20);
+		return -ENODEV;
+	}
+
+	cell_dma_direct_offset += base;
+
+	if (cell_dma_direct_offset != 0)
+		cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup;
+
+	printk("iommu: disabled, direct DMA offset is 0x%lx\n",
+	       cell_dma_direct_offset);
+
+	return 0;
+}
+
+/*
+ *  Fixed IOMMU mapping support
+ *
+ *  This code adds support for setting up a fixed IOMMU mapping on certain
+ *  cell machines. For 64-bit devices this avoids the performance overhead of
+ *  mapping and unmapping pages at runtime. 32-bit devices are unable to use
+ *  the fixed mapping.
+ *
+ *  The fixed mapping is established at boot, and maps all of physical memory
+ *  1:1 into device space at some offset. On machines with < 30 GB of memory
+ *  we setup the fixed mapping immediately above the normal IOMMU window.
+ *
+ *  For example a machine with 4GB of memory would end up with the normal
+ *  IOMMU window from 0-2GB and the fixed mapping window from 2GB to 6GB. In
+ *  this case a 64-bit device wishing to DMA to 1GB would be told to DMA to
+ *  3GB, plus any offset required by firmware. The firmware offset is encoded
+ *  in the "dma-ranges" property.
+ *
+ *  On machines with 30GB or more of memory, we are unable to place the fixed
+ *  mapping above the normal IOMMU window as we would run out of address space.
+ *  Instead we move the normal IOMMU window to coincide with the hash page
+ *  table, this region does not need to be part of the fixed mapping as no
+ *  device should ever be DMA'ing to it. We then setup the fixed mapping
+ *  from 0 to 32GB.
+ */
+
+static u64 cell_iommu_get_fixed_address(struct device *dev)
+{
+	u64 cpu_addr, size, best_size, dev_addr = OF_BAD_ADDR;
+	struct device_node *np;
+	const u32 *ranges = NULL;
+	int i, len, best, naddr, nsize, pna, range_size;
+
+	np = of_node_get(dev->of_node);
+	while (1) {
+		naddr = of_n_addr_cells(np);
+		nsize = of_n_size_cells(np);
+		np = of_get_next_parent(np);
+		if (!np)
+			break;
+
+		ranges = of_get_property(np, "dma-ranges", &len);
+
+		/* Ignore empty ranges, they imply no translation required */
+		if (ranges && len > 0)
+			break;
+	}
+
+	if (!ranges) {
+		dev_dbg(dev, "iommu: no dma-ranges found\n");
+		goto out;
+	}
+
+	len /= sizeof(u32);
+
+	pna = of_n_addr_cells(np);
+	range_size = naddr + nsize + pna;
+
+	/* dma-ranges format:
+	 * child addr	: naddr cells
+	 * parent addr	: pna cells
+	 * size		: nsize cells
+	 */
+	for (i = 0, best = -1, best_size = 0; i < len; i += range_size) {
+		cpu_addr = of_translate_dma_address(np, ranges + i + naddr);
+		size = of_read_number(ranges + i + naddr + pna, nsize);
+
+		if (cpu_addr == 0 && size > best_size) {
+			best = i;
+			best_size = size;
+		}
+	}
+
+	if (best >= 0) {
+		dev_addr = of_read_number(ranges + best, naddr);
+	} else
+		dev_dbg(dev, "iommu: no suitable range found!\n");
+
+out:
+	of_node_put(np);
+
+	return dev_addr;
+}
+
+static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask)
+{
+	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+		return -EIO;
+
+	if (dma_mask == DMA_BIT_MASK(64) &&
+		cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR)
+	{
+		dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
+		set_dma_ops(dev, &dma_iommu_fixed_ops);
+	} else {
+		dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
+		set_dma_ops(dev, get_pci_dma_ops());
+	}
+
+	cell_dma_dev_setup(dev);
+
+	*dev->dma_mask = dma_mask;
+
+	return 0;
+}
+
+static void cell_dma_dev_setup_fixed(struct device *dev)
+{
+	u64 addr;
+
+	addr = cell_iommu_get_fixed_address(dev) + dma_iommu_fixed_base;
+	set_dma_offset(dev, addr);
+
+	dev_dbg(dev, "iommu: fixed addr = %llx\n", addr);
+}
+
+static void insert_16M_pte(unsigned long addr, unsigned long *ptab,
+			   unsigned long base_pte)
+{
+	unsigned long segment, offset;
+
+	segment = addr >> IO_SEGMENT_SHIFT;
+	offset = (addr >> 24) - (segment << IO_PAGENO_BITS(24));
+	ptab = ptab + (segment * (1 << 12) / sizeof(unsigned long));
+
+	pr_debug("iommu: addr %lx ptab %p segment %lx offset %lx\n",
+		  addr, ptab, segment, offset);
+
+	ptab[offset] = base_pte | (__pa(addr) & CBE_IOPTE_RPN_Mask);
+}
+
+static void cell_iommu_setup_fixed_ptab(struct cbe_iommu *iommu,
+	struct device_node *np, unsigned long dbase, unsigned long dsize,
+	unsigned long fbase, unsigned long fsize)
+{
+	unsigned long base_pte, uaddr, ioaddr, *ptab;
+
+	ptab = cell_iommu_alloc_ptab(iommu, fbase, fsize, dbase, dsize, 24);
+
+	dma_iommu_fixed_base = fbase;
+
+	pr_debug("iommu: mapping 0x%lx pages from 0x%lx\n", fsize, fbase);
+
+	base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M |
+		(cell_iommu_get_ioid(np) & CBE_IOPTE_IOID_Mask);
+
+	if (iommu_fixed_is_weak)
+		pr_info("IOMMU: Using weak ordering for fixed mapping\n");
+	else {
+		pr_info("IOMMU: Using strong ordering for fixed mapping\n");
+		base_pte |= CBE_IOPTE_SO_RW;
+	}
+
+	for (uaddr = 0; uaddr < fsize; uaddr += (1 << 24)) {
+		/* Don't touch the dynamic region */
+		ioaddr = uaddr + fbase;
+		if (ioaddr >= dbase && ioaddr < (dbase + dsize)) {
+			pr_debug("iommu: fixed/dynamic overlap, skipping\n");
+			continue;
+		}
+
+		insert_16M_pte(uaddr, ptab, base_pte);
+	}
+
+	mb();
+}
+
+static int __init cell_iommu_fixed_mapping_init(void)
+{
+	unsigned long dbase, dsize, fbase, fsize, hbase, hend;
+	struct cbe_iommu *iommu;
+	struct device_node *np;
+
+	/* The fixed mapping is only supported on axon machines */
+	np = of_find_node_by_name(NULL, "axon");
+	of_node_put(np);
+
+	if (!np) {
+		pr_debug("iommu: fixed mapping disabled, no axons found\n");
+		return -1;
+	}
+
+	/* We must have dma-ranges properties for fixed mapping to work */
+	np = of_find_node_with_property(NULL, "dma-ranges");
+	of_node_put(np);
+
+	if (!np) {
+		pr_debug("iommu: no dma-ranges found, no fixed mapping\n");
+		return -1;
+	}
+
+	/* The default setup is to have the fixed mapping sit after the
+	 * dynamic region, so find the top of the largest IOMMU window
+	 * on any axon, then add the size of RAM and that's our max value.
+	 * If that is > 32GB we have to do other shennanigans.
+	 */
+	fbase = 0;
+	for_each_node_by_name(np, "axon") {
+		cell_iommu_get_window(np, &dbase, &dsize);
+		fbase = max(fbase, dbase + dsize);
+	}
+
+	fbase = _ALIGN_UP(fbase, 1 << IO_SEGMENT_SHIFT);
+	fsize = memblock_phys_mem_size();
+
+	if ((fbase + fsize) <= 0x800000000ul)
+		hbase = 0; /* use the device tree window */
+	else {
+		/* If we're over 32 GB we need to cheat. We can't map all of
+		 * RAM with the fixed mapping, and also fit the dynamic
+		 * region. So try to place the dynamic region where the hash
+		 * table sits, drivers never need to DMA to it, we don't
+		 * need a fixed mapping for that area.
+		 */
+		if (!htab_address) {
+			pr_debug("iommu: htab is NULL, on LPAR? Huh?\n");
+			return -1;
+		}
+		hbase = __pa(htab_address);
+		hend  = hbase + htab_size_bytes;
+
+		/* The window must start and end on a segment boundary */
+		if ((hbase != _ALIGN_UP(hbase, 1 << IO_SEGMENT_SHIFT)) ||
+		    (hend != _ALIGN_UP(hend, 1 << IO_SEGMENT_SHIFT))) {
+			pr_debug("iommu: hash window not segment aligned\n");
+			return -1;
+		}
+
+		/* Check the hash window fits inside the real DMA window */
+		for_each_node_by_name(np, "axon") {
+			cell_iommu_get_window(np, &dbase, &dsize);
+
+			if (hbase < dbase || (hend > (dbase + dsize))) {
+				pr_debug("iommu: hash window doesn't fit in"
+					 "real DMA window\n");
+				return -1;
+			}
+		}
+
+		fbase = 0;
+	}
+
+	/* Setup the dynamic regions */
+	for_each_node_by_name(np, "axon") {
+		iommu = cell_iommu_alloc(np);
+		BUG_ON(!iommu);
+
+		if (hbase == 0)
+			cell_iommu_get_window(np, &dbase, &dsize);
+		else {
+			dbase = hbase;
+			dsize = htab_size_bytes;
+		}
+
+		printk(KERN_DEBUG "iommu: node %d, dynamic window 0x%lx-0x%lx "
+			"fixed window 0x%lx-0x%lx\n", iommu->nid, dbase,
+			 dbase + dsize, fbase, fbase + fsize);
+
+		cell_iommu_setup_stab(iommu, dbase, dsize, fbase, fsize);
+		iommu->ptab = cell_iommu_alloc_ptab(iommu, dbase, dsize, 0, 0,
+						    IOMMU_PAGE_SHIFT_4K);
+		cell_iommu_setup_fixed_ptab(iommu, np, dbase, dsize,
+					     fbase, fsize);
+		cell_iommu_enable_hardware(iommu);
+		cell_iommu_setup_window(iommu, np, dbase, dsize, 0);
+	}
+
+	dma_iommu_ops.set_dma_mask = dma_set_mask_and_switch;
+	set_pci_dma_ops(&dma_iommu_ops);
+
+	return 0;
+}
+
+static int iommu_fixed_disabled;
+
+static int __init setup_iommu_fixed(char *str)
+{
+	struct device_node *pciep;
+
+	if (strcmp(str, "off") == 0)
+		iommu_fixed_disabled = 1;
+
+	/* If we can find a pcie-endpoint in the device tree assume that
+	 * we're on a triblade or a CAB so by default the fixed mapping
+	 * should be set to be weakly ordered; but only if the boot
+	 * option WASN'T set for strong ordering
+	 */
+	pciep = of_find_node_by_type(NULL, "pcie-endpoint");
+
+	if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0))
+		iommu_fixed_is_weak = 1;
+
+	of_node_put(pciep);
+
+	return 1;
+}
+__setup("iommu_fixed=", setup_iommu_fixed);
+
+static u64 cell_dma_get_required_mask(struct device *dev)
+{
+	struct dma_map_ops *dma_ops;
+
+	if (!dev->dma_mask)
+		return 0;
+
+	if (!iommu_fixed_disabled &&
+			cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR)
+		return DMA_BIT_MASK(64);
+
+	dma_ops = get_dma_ops(dev);
+	if (dma_ops->get_required_mask)
+		return dma_ops->get_required_mask(dev);
+
+	WARN_ONCE(1, "no get_required_mask in %p ops", dma_ops);
+
+	return DMA_BIT_MASK(64);
+}
+
+static int __init cell_iommu_init(void)
+{
+	struct device_node *np;
+
+	/* If IOMMU is disabled or we have little enough RAM to not need
+	 * to enable it, we setup a direct mapping.
+	 *
+	 * Note: should we make sure we have the IOMMU actually disabled ?
+	 */
+	if (iommu_is_off ||
+	    (!iommu_force_on && memblock_end_of_DRAM() <= 0x80000000ull))
+		if (cell_iommu_init_disabled() == 0)
+			goto bail;
+
+	/* Setup various callbacks */
+	cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup;
+	ppc_md.dma_get_required_mask = cell_dma_get_required_mask;
+	ppc_md.tce_build = tce_build_cell;
+	ppc_md.tce_free = tce_free_cell;
+
+	if (!iommu_fixed_disabled && cell_iommu_fixed_mapping_init() == 0)
+		goto bail;
+
+	/* Create an iommu for each /axon node.  */
+	for_each_node_by_name(np, "axon") {
+		if (np->parent == NULL || np->parent->parent != NULL)
+			continue;
+		cell_iommu_init_one(np, 0);
+	}
+
+	/* Create an iommu for each toplevel /pci-internal node for
+	 * old hardware/firmware
+	 */
+	for_each_node_by_name(np, "pci-internal") {
+		if (np->parent == NULL || np->parent->parent != NULL)
+			continue;
+		cell_iommu_init_one(np, SPIDER_DMA_OFFSET);
+	}
+
+	/* Setup default PCI iommu ops */
+	set_pci_dma_ops(&dma_iommu_ops);
+
+ bail:
+	/* Register callbacks on OF platform device addition/removal
+	 * to handle linking them to the right DMA operations
+	 */
+	bus_register_notifier(&platform_bus_type, &cell_of_bus_notifier);
+
+	return 0;
+}
+machine_arch_initcall(cell, cell_iommu_init);
diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c
new file mode 100644
index 000000000..d17e98bc0
--- /dev/null
+++ b/arch/powerpc/platforms/cell/pervasive.c
@@ -0,0 +1,133 @@
+/*
+ * CBE Pervasive Monitor and Debug
+ *
+ * (C) Copyright IBM Corporation 2005
+ *
+ * Authors: Maximino Aguilar (maguilar@us.ibm.com)
+ *          Michael N. Day (mnday@us.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/kallsyms.h>
+
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/pgtable.h>
+#include <asm/reg.h>
+#include <asm/cell-regs.h>
+
+#include "pervasive.h"
+
+static void cbe_power_save(void)
+{
+	unsigned long ctrl, thread_switch_control;
+
+	/* Ensure our interrupt state is properly tracked */
+	if (!prep_irq_for_idle())
+		return;
+
+	ctrl = mfspr(SPRN_CTRLF);
+
+	/* Enable DEC and EE interrupt request */
+	thread_switch_control  = mfspr(SPRN_TSC_CELL);
+	thread_switch_control |= TSC_CELL_EE_ENABLE | TSC_CELL_EE_BOOST;
+
+	switch (ctrl & CTRL_CT) {
+	case CTRL_CT0:
+		thread_switch_control |= TSC_CELL_DEC_ENABLE_0;
+		break;
+	case CTRL_CT1:
+		thread_switch_control |= TSC_CELL_DEC_ENABLE_1;
+		break;
+	default:
+		printk(KERN_WARNING "%s: unknown configuration\n",
+			__func__);
+		break;
+	}
+	mtspr(SPRN_TSC_CELL, thread_switch_control);
+
+	/*
+	 * go into low thread priority, medium priority will be
+	 * restored for us after wake-up.
+	 */
+	HMT_low();
+
+	/*
+	 * atomically disable thread execution and runlatch.
+	 * External and Decrementer exceptions are still handled when the
+	 * thread is disabled but now enter in cbe_system_reset_exception()
+	 */
+	ctrl &= ~(CTRL_RUNLATCH | CTRL_TE);
+	mtspr(SPRN_CTRLT, ctrl);
+
+	/* Re-enable interrupts in MSR */
+	__hard_irq_enable();
+}
+
+static int cbe_system_reset_exception(struct pt_regs *regs)
+{
+	switch (regs->msr & SRR1_WAKEMASK) {
+	case SRR1_WAKEEE:
+		do_IRQ(regs);
+		break;
+	case SRR1_WAKEDEC:
+		timer_interrupt(regs);
+		break;
+	case SRR1_WAKEMT:
+		return cbe_sysreset_hack();
+#ifdef CONFIG_CBE_RAS
+	case SRR1_WAKESYSERR:
+		cbe_system_error_exception(regs);
+		break;
+	case SRR1_WAKETHERM:
+		cbe_thermal_exception(regs);
+		break;
+#endif /* CONFIG_CBE_RAS */
+	default:
+		/* do system reset */
+		return 0;
+	}
+	/* everything handled */
+	return 1;
+}
+
+void __init cbe_pervasive_init(void)
+{
+	int cpu;
+
+	if (!cpu_has_feature(CPU_FTR_PAUSE_ZERO))
+		return;
+
+	for_each_possible_cpu(cpu) {
+		struct cbe_pmd_regs __iomem *regs = cbe_get_cpu_pmd_regs(cpu);
+		if (!regs)
+			continue;
+
+		 /* Enable Pause(0) control bit */
+		out_be64(&regs->pmcr, in_be64(&regs->pmcr) |
+					    CBE_PMD_PAUSE_ZERO_CONTROL);
+	}
+
+	ppc_md.power_save = cbe_power_save;
+	ppc_md.system_reset_exception = cbe_system_reset_exception;
+}
diff --git a/arch/powerpc/platforms/cell/pervasive.h b/arch/powerpc/platforms/cell/pervasive.h
new file mode 100644
index 000000000..fd4d7b709
--- /dev/null
+++ b/arch/powerpc/platforms/cell/pervasive.h
@@ -0,0 +1,42 @@
+/*
+ * Cell Pervasive Monitor and Debug interface and HW structures
+ *
+ * (C) Copyright IBM Corporation 2005
+ *
+ * Authors: Maximino Aguilar (maguilar@us.ibm.com)
+ *          David J. Erb (djerb@us.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#ifndef PERVASIVE_H
+#define PERVASIVE_H
+
+extern void cbe_pervasive_init(void);
+extern void cbe_system_error_exception(struct pt_regs *regs);
+extern void cbe_maintenance_exception(struct pt_regs *regs);
+extern void cbe_thermal_exception(struct pt_regs *regs);
+
+#ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON
+extern int cbe_sysreset_hack(void);
+#else
+static inline int cbe_sysreset_hack(void)
+{
+	return 1;
+}
+#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */
+
+#endif
diff --git a/arch/powerpc/platforms/cell/pmu.c b/arch/powerpc/platforms/cell/pmu.c
new file mode 100644
index 000000000..348a27b12
--- /dev/null
+++ b/arch/powerpc/platforms/cell/pmu.c
@@ -0,0 +1,424 @@
+/*
+ * Cell Broadband Engine Performance Monitor
+ *
+ * (C) Copyright IBM Corporation 2001,2006
+ *
+ * Author:
+ *    David Erb (djerb@us.ibm.com)
+ *    Kevin Corry (kevcorry@us.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <linux/export.h>
+#include <asm/io.h>
+#include <asm/irq_regs.h>
+#include <asm/machdep.h>
+#include <asm/pmc.h>
+#include <asm/reg.h>
+#include <asm/spu.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+
+/*
+ * When writing to write-only mmio addresses, save a shadow copy. All of the
+ * registers are 32-bit, but stored in the upper-half of a 64-bit field in
+ * pmd_regs.
+ */
+
+#define WRITE_WO_MMIO(reg, x)					\
+	do {							\
+		u32 _x = (x);					\
+		struct cbe_pmd_regs __iomem *pmd_regs;		\
+		struct cbe_pmd_shadow_regs *shadow_regs;	\
+		pmd_regs = cbe_get_cpu_pmd_regs(cpu);		\
+		shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);	\
+		out_be64(&(pmd_regs->reg), (((u64)_x) << 32));	\
+		shadow_regs->reg = _x;				\
+	} while (0)
+
+#define READ_SHADOW_REG(val, reg)				\
+	do {							\
+		struct cbe_pmd_shadow_regs *shadow_regs;	\
+		shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);	\
+		(val) = shadow_regs->reg;			\
+	} while (0)
+
+#define READ_MMIO_UPPER32(val, reg)				\
+	do {							\
+		struct cbe_pmd_regs __iomem *pmd_regs;		\
+		pmd_regs = cbe_get_cpu_pmd_regs(cpu);		\
+		(val) = (u32)(in_be64(&pmd_regs->reg) >> 32);	\
+	} while (0)
+
+/*
+ * Physical counter registers.
+ * Each physical counter can act as one 32-bit counter or two 16-bit counters.
+ */
+
+u32 cbe_read_phys_ctr(u32 cpu, u32 phys_ctr)
+{
+	u32 val_in_latch, val = 0;
+
+	if (phys_ctr < NR_PHYS_CTRS) {
+		READ_SHADOW_REG(val_in_latch, counter_value_in_latch);
+
+		/* Read the latch or the actual counter, whichever is newer. */
+		if (val_in_latch & (1 << phys_ctr)) {
+			READ_SHADOW_REG(val, pm_ctr[phys_ctr]);
+		} else {
+			READ_MMIO_UPPER32(val, pm_ctr[phys_ctr]);
+		}
+	}
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_phys_ctr);
+
+void cbe_write_phys_ctr(u32 cpu, u32 phys_ctr, u32 val)
+{
+	struct cbe_pmd_shadow_regs *shadow_regs;
+	u32 pm_ctrl;
+
+	if (phys_ctr < NR_PHYS_CTRS) {
+		/* Writing to a counter only writes to a hardware latch.
+		 * The new value is not propagated to the actual counter
+		 * until the performance monitor is enabled.
+		 */
+		WRITE_WO_MMIO(pm_ctr[phys_ctr], val);
+
+		pm_ctrl = cbe_read_pm(cpu, pm_control);
+		if (pm_ctrl & CBE_PM_ENABLE_PERF_MON) {
+			/* The counters are already active, so we need to
+			 * rewrite the pm_control register to "re-enable"
+			 * the PMU.
+			 */
+			cbe_write_pm(cpu, pm_control, pm_ctrl);
+		} else {
+			shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);
+			shadow_regs->counter_value_in_latch |= (1 << phys_ctr);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(cbe_write_phys_ctr);
+
+/*
+ * "Logical" counter registers.
+ * These will read/write 16-bits or 32-bits depending on the
+ * current size of the counter. Counters 4 - 7 are always 16-bit.
+ */
+
+u32 cbe_read_ctr(u32 cpu, u32 ctr)
+{
+	u32 val;
+	u32 phys_ctr = ctr & (NR_PHYS_CTRS - 1);
+
+	val = cbe_read_phys_ctr(cpu, phys_ctr);
+
+	if (cbe_get_ctr_size(cpu, phys_ctr) == 16)
+		val = (ctr < NR_PHYS_CTRS) ? (val >> 16) : (val & 0xffff);
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_ctr);
+
+void cbe_write_ctr(u32 cpu, u32 ctr, u32 val)
+{
+	u32 phys_ctr;
+	u32 phys_val;
+
+	phys_ctr = ctr & (NR_PHYS_CTRS - 1);
+
+	if (cbe_get_ctr_size(cpu, phys_ctr) == 16) {
+		phys_val = cbe_read_phys_ctr(cpu, phys_ctr);
+
+		if (ctr < NR_PHYS_CTRS)
+			val = (val << 16) | (phys_val & 0xffff);
+		else
+			val = (val & 0xffff) | (phys_val & 0xffff0000);
+	}
+
+	cbe_write_phys_ctr(cpu, phys_ctr, val);
+}
+EXPORT_SYMBOL_GPL(cbe_write_ctr);
+
+/*
+ * Counter-control registers.
+ * Each "logical" counter has a corresponding control register.
+ */
+
+u32 cbe_read_pm07_control(u32 cpu, u32 ctr)
+{
+	u32 pm07_control = 0;
+
+	if (ctr < NR_CTRS)
+		READ_SHADOW_REG(pm07_control, pm07_control[ctr]);
+
+	return pm07_control;
+}
+EXPORT_SYMBOL_GPL(cbe_read_pm07_control);
+
+void cbe_write_pm07_control(u32 cpu, u32 ctr, u32 val)
+{
+	if (ctr < NR_CTRS)
+		WRITE_WO_MMIO(pm07_control[ctr], val);
+}
+EXPORT_SYMBOL_GPL(cbe_write_pm07_control);
+
+/*
+ * Other PMU control registers. Most of these are write-only.
+ */
+
+u32 cbe_read_pm(u32 cpu, enum pm_reg_name reg)
+{
+	u32 val = 0;
+
+	switch (reg) {
+	case group_control:
+		READ_SHADOW_REG(val, group_control);
+		break;
+
+	case debug_bus_control:
+		READ_SHADOW_REG(val, debug_bus_control);
+		break;
+
+	case trace_address:
+		READ_MMIO_UPPER32(val, trace_address);
+		break;
+
+	case ext_tr_timer:
+		READ_SHADOW_REG(val, ext_tr_timer);
+		break;
+
+	case pm_status:
+		READ_MMIO_UPPER32(val, pm_status);
+		break;
+
+	case pm_control:
+		READ_SHADOW_REG(val, pm_control);
+		break;
+
+	case pm_interval:
+		READ_MMIO_UPPER32(val, pm_interval);
+		break;
+
+	case pm_start_stop:
+		READ_SHADOW_REG(val, pm_start_stop);
+		break;
+	}
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_pm);
+
+void cbe_write_pm(u32 cpu, enum pm_reg_name reg, u32 val)
+{
+	switch (reg) {
+	case group_control:
+		WRITE_WO_MMIO(group_control, val);
+		break;
+
+	case debug_bus_control:
+		WRITE_WO_MMIO(debug_bus_control, val);
+		break;
+
+	case trace_address:
+		WRITE_WO_MMIO(trace_address, val);
+		break;
+
+	case ext_tr_timer:
+		WRITE_WO_MMIO(ext_tr_timer, val);
+		break;
+
+	case pm_status:
+		WRITE_WO_MMIO(pm_status, val);
+		break;
+
+	case pm_control:
+		WRITE_WO_MMIO(pm_control, val);
+		break;
+
+	case pm_interval:
+		WRITE_WO_MMIO(pm_interval, val);
+		break;
+
+	case pm_start_stop:
+		WRITE_WO_MMIO(pm_start_stop, val);
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(cbe_write_pm);
+
+/*
+ * Get/set the size of a physical counter to either 16 or 32 bits.
+ */
+
+u32 cbe_get_ctr_size(u32 cpu, u32 phys_ctr)
+{
+	u32 pm_ctrl, size = 0;
+
+	if (phys_ctr < NR_PHYS_CTRS) {
+		pm_ctrl = cbe_read_pm(cpu, pm_control);
+		size = (pm_ctrl & CBE_PM_16BIT_CTR(phys_ctr)) ? 16 : 32;
+	}
+
+	return size;
+}
+EXPORT_SYMBOL_GPL(cbe_get_ctr_size);
+
+void cbe_set_ctr_size(u32 cpu, u32 phys_ctr, u32 ctr_size)
+{
+	u32 pm_ctrl;
+
+	if (phys_ctr < NR_PHYS_CTRS) {
+		pm_ctrl = cbe_read_pm(cpu, pm_control);
+		switch (ctr_size) {
+		case 16:
+			pm_ctrl |= CBE_PM_16BIT_CTR(phys_ctr);
+			break;
+
+		case 32:
+			pm_ctrl &= ~CBE_PM_16BIT_CTR(phys_ctr);
+			break;
+		}
+		cbe_write_pm(cpu, pm_control, pm_ctrl);
+	}
+}
+EXPORT_SYMBOL_GPL(cbe_set_ctr_size);
+
+/*
+ * Enable/disable the entire performance monitoring unit.
+ * When we enable the PMU, all pending writes to counters get committed.
+ */
+
+void cbe_enable_pm(u32 cpu)
+{
+	struct cbe_pmd_shadow_regs *shadow_regs;
+	u32 pm_ctrl;
+
+	shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);
+	shadow_regs->counter_value_in_latch = 0;
+
+	pm_ctrl = cbe_read_pm(cpu, pm_control) | CBE_PM_ENABLE_PERF_MON;
+	cbe_write_pm(cpu, pm_control, pm_ctrl);
+}
+EXPORT_SYMBOL_GPL(cbe_enable_pm);
+
+void cbe_disable_pm(u32 cpu)
+{
+	u32 pm_ctrl;
+	pm_ctrl = cbe_read_pm(cpu, pm_control) & ~CBE_PM_ENABLE_PERF_MON;
+	cbe_write_pm(cpu, pm_control, pm_ctrl);
+}
+EXPORT_SYMBOL_GPL(cbe_disable_pm);
+
+/*
+ * Reading from the trace_buffer.
+ * The trace buffer is two 64-bit registers. Reading from
+ * the second half automatically increments the trace_address.
+ */
+
+void cbe_read_trace_buffer(u32 cpu, u64 *buf)
+{
+	struct cbe_pmd_regs __iomem *pmd_regs = cbe_get_cpu_pmd_regs(cpu);
+
+	*buf++ = in_be64(&pmd_regs->trace_buffer_0_63);
+	*buf++ = in_be64(&pmd_regs->trace_buffer_64_127);
+}
+EXPORT_SYMBOL_GPL(cbe_read_trace_buffer);
+
+/*
+ * Enabling/disabling interrupts for the entire performance monitoring unit.
+ */
+
+u32 cbe_get_and_clear_pm_interrupts(u32 cpu)
+{
+	/* Reading pm_status clears the interrupt bits. */
+	return cbe_read_pm(cpu, pm_status);
+}
+EXPORT_SYMBOL_GPL(cbe_get_and_clear_pm_interrupts);
+
+void cbe_enable_pm_interrupts(u32 cpu, u32 thread, u32 mask)
+{
+	/* Set which node and thread will handle the next interrupt. */
+	iic_set_interrupt_routing(cpu, thread, 0);
+
+	/* Enable the interrupt bits in the pm_status register. */
+	if (mask)
+		cbe_write_pm(cpu, pm_status, mask);
+}
+EXPORT_SYMBOL_GPL(cbe_enable_pm_interrupts);
+
+void cbe_disable_pm_interrupts(u32 cpu)
+{
+	cbe_get_and_clear_pm_interrupts(cpu);
+	cbe_write_pm(cpu, pm_status, 0);
+}
+EXPORT_SYMBOL_GPL(cbe_disable_pm_interrupts);
+
+static irqreturn_t cbe_pm_irq(int irq, void *dev_id)
+{
+	perf_irq(get_irq_regs());
+	return IRQ_HANDLED;
+}
+
+static int __init cbe_init_pm_irq(void)
+{
+	unsigned int irq;
+	int rc, node;
+
+	for_each_online_node(node) {
+		irq = irq_create_mapping(NULL, IIC_IRQ_IOEX_PMI |
+					       (node << IIC_IRQ_NODE_SHIFT));
+		if (irq == NO_IRQ) {
+			printk("ERROR: Unable to allocate irq for node %d\n",
+			       node);
+			return -EINVAL;
+		}
+
+		rc = request_irq(irq, cbe_pm_irq,
+				 0, "cbe-pmu-0", NULL);
+		if (rc) {
+			printk("ERROR: Request for irq on node %d failed\n",
+			       node);
+			return rc;
+		}
+	}
+
+	return 0;
+}
+machine_arch_initcall(cell, cbe_init_pm_irq);
+
+void cbe_sync_irq(int node)
+{
+	unsigned int irq;
+
+	irq = irq_find_mapping(NULL,
+			       IIC_IRQ_IOEX_PMI
+			       | (node << IIC_IRQ_NODE_SHIFT));
+
+	if (irq == NO_IRQ) {
+		printk(KERN_WARNING "ERROR, unable to get existing irq %d " \
+		"for node %d\n", irq, node);
+		return;
+	}
+
+	synchronize_irq(irq);
+}
+EXPORT_SYMBOL_GPL(cbe_sync_irq);
+
diff --git a/arch/powerpc/platforms/cell/qpace_setup.c b/arch/powerpc/platforms/cell/qpace_setup.c
new file mode 100644
index 000000000..d328140dc
--- /dev/null
+++ b/arch/powerpc/platforms/cell/qpace_setup.c
@@ -0,0 +1,148 @@
+/*
+ *  linux/arch/powerpc/platforms/cell/qpace_setup.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Adapted from 'alpha' version by Gary Thomas
+ *  Modified by Cort Dougan (cort@cs.nmt.edu)
+ *  Modified by PPC64 Team, IBM Corp
+ *  Modified by Cell Team, IBM Deutschland Entwicklung GmbH
+ *  Modified by Benjamin Krill <ben@codiert.org>, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/console.h>
+#include <linux/of_platform.h>
+
+#include <asm/mmu.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/kexec.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/cputable.h>
+#include <asm/irq.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/udbg.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+#include "pervasive.h"
+#include "ras.h"
+
+static void qpace_show_cpuinfo(struct seq_file *m)
+{
+	struct device_node *root;
+	const char *model = "";
+
+	root = of_find_node_by_path("/");
+	if (root)
+		model = of_get_property(root, "model", NULL);
+	seq_printf(m, "machine\t\t: CHRP %s\n", model);
+	of_node_put(root);
+}
+
+static void qpace_progress(char *s, unsigned short hex)
+{
+	printk("*** %04x : %s\n", hex, s ? s : "");
+}
+
+static const struct of_device_id qpace_bus_ids[] __initconst = {
+	{ .type = "soc", },
+	{ .compatible = "soc", },
+	{ .type = "spider", },
+	{ .type = "axon", },
+	{ .type = "plb5", },
+	{ .type = "plb4", },
+	{ .type = "opb", },
+	{ .type = "ebc", },
+	{},
+};
+
+static int __init qpace_publish_devices(void)
+{
+	int node;
+
+	/* Publish OF platform devices for southbridge IOs */
+	of_platform_bus_probe(NULL, qpace_bus_ids, NULL);
+
+	/* There is no device for the MIC memory controller, thus we create
+	 * a platform device for it to attach the EDAC driver to.
+	 */
+	for_each_online_node(node) {
+		if (cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(node)) == NULL)
+			continue;
+		platform_device_register_simple("cbe-mic", node, NULL, 0);
+	}
+
+	return 0;
+}
+machine_subsys_initcall(qpace, qpace_publish_devices);
+
+static void __init qpace_setup_arch(void)
+{
+#ifdef CONFIG_SPU_BASE
+	spu_priv1_ops = &spu_priv1_mmio_ops;
+	spu_management_ops = &spu_management_of_ops;
+#endif
+
+	cbe_regs_init();
+
+#ifdef CONFIG_CBE_RAS
+	cbe_ras_init();
+#endif
+
+#ifdef CONFIG_SMP
+	smp_init_cell();
+#endif
+
+	/* init to some ~sane value until calibrate_delay() runs */
+	loops_per_jiffy = 50000000;
+
+	cbe_pervasive_init();
+#ifdef CONFIG_DUMMY_CONSOLE
+	conswitchp = &dummy_con;
+#endif
+}
+
+static int __init qpace_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "IBM,QPACE"))
+		return 0;
+
+	hpte_init_native();
+	pm_power_off = rtas_power_off;
+
+	return 1;
+}
+
+define_machine(qpace) {
+	.name			= "QPACE",
+	.probe			= qpace_probe,
+	.setup_arch		= qpace_setup_arch,
+	.show_cpuinfo		= qpace_show_cpuinfo,
+	.restart		= rtas_restart,
+	.halt			= rtas_halt,
+	.get_boot_time		= rtas_get_boot_time,
+	.get_rtc_time		= rtas_get_rtc_time,
+	.set_rtc_time		= rtas_set_rtc_time,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= qpace_progress,
+	.init_IRQ		= iic_init_IRQ,
+};
diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c
new file mode 100644
index 000000000..e865d7481
--- /dev/null
+++ b/arch/powerpc/platforms/cell/ras.c
@@ -0,0 +1,356 @@
+/*
+ * Copyright 2006-2008, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/reboot.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+
+#include <asm/kexec.h>
+#include <asm/reg.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/cell-regs.h>
+
+#include "ras.h"
+
+
+static void dump_fir(int cpu)
+{
+	struct cbe_pmd_regs __iomem *pregs = cbe_get_cpu_pmd_regs(cpu);
+	struct cbe_iic_regs __iomem *iregs = cbe_get_cpu_iic_regs(cpu);
+
+	if (pregs == NULL)
+		return;
+
+	/* Todo: do some nicer parsing of bits and based on them go down
+	 * to other sub-units FIRs and not only IIC
+	 */
+	printk(KERN_ERR "Global Checkstop FIR    : 0x%016llx\n",
+	       in_be64(&pregs->checkstop_fir));
+	printk(KERN_ERR "Global Recoverable FIR  : 0x%016llx\n",
+	       in_be64(&pregs->checkstop_fir));
+	printk(KERN_ERR "Global MachineCheck FIR : 0x%016llx\n",
+	       in_be64(&pregs->spec_att_mchk_fir));
+
+	if (iregs == NULL)
+		return;
+	printk(KERN_ERR "IOC FIR                 : 0x%016llx\n",
+	       in_be64(&iregs->ioc_fir));
+
+}
+
+void cbe_system_error_exception(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	printk(KERN_ERR "System Error Interrupt on CPU %d !\n", cpu);
+	dump_fir(cpu);
+	dump_stack();
+}
+
+void cbe_maintenance_exception(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	/*
+	 * Nothing implemented for the maintenance interrupt at this point
+	 */
+
+	printk(KERN_ERR "Unhandled Maintenance interrupt on CPU %d !\n", cpu);
+	dump_stack();
+}
+
+void cbe_thermal_exception(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	/*
+	 * Nothing implemented for the thermal interrupt at this point
+	 */
+
+	printk(KERN_ERR "Unhandled Thermal interrupt on CPU %d !\n", cpu);
+	dump_stack();
+}
+
+static int cbe_machine_check_handler(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	printk(KERN_ERR "Machine Check Interrupt on CPU %d !\n", cpu);
+	dump_fir(cpu);
+
+	/* No recovery from this code now, lets continue */
+	return 0;
+}
+
+struct ptcal_area {
+	struct list_head list;
+	int nid;
+	int order;
+	struct page *pages;
+};
+
+static LIST_HEAD(ptcal_list);
+
+static int ptcal_start_tok, ptcal_stop_tok;
+
+static int __init cbe_ptcal_enable_on_node(int nid, int order)
+{
+	struct ptcal_area *area;
+	int ret = -ENOMEM;
+	unsigned long addr;
+
+	if (is_kdump_kernel())
+		rtas_call(ptcal_stop_tok, 1, 1, NULL, nid);
+
+	area = kmalloc(sizeof(*area), GFP_KERNEL);
+	if (!area)
+		goto out_err;
+
+	area->nid = nid;
+	area->order = order;
+	area->pages = alloc_pages_exact_node(area->nid,
+						GFP_KERNEL|__GFP_THISNODE,
+						area->order);
+
+	if (!area->pages) {
+		printk(KERN_WARNING "%s: no page on node %d\n",
+			__func__, area->nid);
+		goto out_free_area;
+	}
+
+	/*
+	 * We move the ptcal area to the middle of the allocated
+	 * page, in order to avoid prefetches in memcpy and similar
+	 * functions stepping on it.
+	 */
+	addr = __pa(page_address(area->pages)) + (PAGE_SIZE >> 1);
+	printk(KERN_DEBUG "%s: enabling PTCAL on node %d address=0x%016lx\n",
+			__func__, area->nid, addr);
+
+	ret = -EIO;
+	if (rtas_call(ptcal_start_tok, 3, 1, NULL, area->nid,
+				(unsigned int)(addr >> 32),
+				(unsigned int)(addr & 0xffffffff))) {
+		printk(KERN_ERR "%s: error enabling PTCAL on node %d!\n",
+				__func__, nid);
+		goto out_free_pages;
+	}
+
+	list_add(&area->list, &ptcal_list);
+
+	return 0;
+
+out_free_pages:
+	__free_pages(area->pages, area->order);
+out_free_area:
+	kfree(area);
+out_err:
+	return ret;
+}
+
+static int __init cbe_ptcal_enable(void)
+{
+	const u32 *size;
+	struct device_node *np;
+	int order, found_mic = 0;
+
+	np = of_find_node_by_path("/rtas");
+	if (!np)
+		return -ENODEV;
+
+	size = of_get_property(np, "ibm,cbe-ptcal-size", NULL);
+	if (!size) {
+		of_node_put(np);
+		return -ENODEV;
+	}
+
+	pr_debug("%s: enabling PTCAL, size = 0x%x\n", __func__, *size);
+	order = get_order(*size);
+	of_node_put(np);
+
+	/* support for malta device trees, with be@/mic@ nodes */
+	for_each_node_by_type(np, "mic-tm") {
+		cbe_ptcal_enable_on_node(of_node_to_nid(np), order);
+		found_mic = 1;
+	}
+
+	if (found_mic)
+		return 0;
+
+	/* support for older device tree - use cpu nodes */
+	for_each_node_by_type(np, "cpu") {
+		const u32 *nid = of_get_property(np, "node-id", NULL);
+		if (!nid) {
+			printk(KERN_ERR "%s: node %s is missing node-id?\n",
+					__func__, np->full_name);
+			continue;
+		}
+		cbe_ptcal_enable_on_node(*nid, order);
+		found_mic = 1;
+	}
+
+	return found_mic ? 0 : -ENODEV;
+}
+
+static int cbe_ptcal_disable(void)
+{
+	struct ptcal_area *area, *tmp;
+	int ret = 0;
+
+	pr_debug("%s: disabling PTCAL\n", __func__);
+
+	list_for_each_entry_safe(area, tmp, &ptcal_list, list) {
+		/* disable ptcal on this node */
+		if (rtas_call(ptcal_stop_tok, 1, 1, NULL, area->nid)) {
+			printk(KERN_ERR "%s: error disabling PTCAL "
+					"on node %d!\n", __func__,
+					area->nid);
+			ret = -EIO;
+			continue;
+		}
+
+		/* ensure we can access the PTCAL area */
+		memset(page_address(area->pages), 0,
+				1 << (area->order + PAGE_SHIFT));
+
+		/* clean up */
+		list_del(&area->list);
+		__free_pages(area->pages, area->order);
+		kfree(area);
+	}
+
+	return ret;
+}
+
+static int cbe_ptcal_notify_reboot(struct notifier_block *nb,
+		unsigned long code, void *data)
+{
+	return cbe_ptcal_disable();
+}
+
+static void cbe_ptcal_crash_shutdown(void)
+{
+	cbe_ptcal_disable();
+}
+
+static struct notifier_block cbe_ptcal_reboot_notifier = {
+	.notifier_call = cbe_ptcal_notify_reboot
+};
+
+#ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON
+static int sysreset_hack;
+
+static int __init cbe_sysreset_init(void)
+{
+	struct cbe_pmd_regs __iomem *regs;
+
+	sysreset_hack = of_machine_is_compatible("IBM,CBPLUS-1.0");
+	if (!sysreset_hack)
+		return 0;
+
+	regs = cbe_get_cpu_pmd_regs(0);
+	if (!regs)
+		return 0;
+
+	/* Enable JTAG system-reset hack */
+	out_be32(&regs->fir_mode_reg,
+		in_be32(&regs->fir_mode_reg) |
+		CBE_PMD_FIR_MODE_M8);
+
+	return 0;
+}
+device_initcall(cbe_sysreset_init);
+
+int cbe_sysreset_hack(void)
+{
+	struct cbe_pmd_regs __iomem *regs;
+
+	/*
+	 * The BMC can inject user triggered system reset exceptions,
+	 * but cannot set the system reset reason in srr1,
+	 * so check an extra register here.
+	 */
+	if (sysreset_hack && (smp_processor_id() == 0)) {
+		regs = cbe_get_cpu_pmd_regs(0);
+		if (!regs)
+			return 0;
+		if (in_be64(&regs->ras_esc_0) & 0x0000ffff) {
+			out_be64(&regs->ras_esc_0, 0);
+			return 0;
+		}
+	}
+	return 1;
+}
+#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */
+
+int __init cbe_ptcal_init(void)
+{
+	int ret;
+	ptcal_start_tok = rtas_token("ibm,cbe-start-ptcal");
+	ptcal_stop_tok = rtas_token("ibm,cbe-stop-ptcal");
+
+	if (ptcal_start_tok == RTAS_UNKNOWN_SERVICE
+			|| ptcal_stop_tok == RTAS_UNKNOWN_SERVICE)
+		return -ENODEV;
+
+	ret = register_reboot_notifier(&cbe_ptcal_reboot_notifier);
+	if (ret)
+		goto out1;
+
+	ret = crash_shutdown_register(&cbe_ptcal_crash_shutdown);
+	if (ret)
+		goto out2;
+
+	return cbe_ptcal_enable();
+
+out2:
+	unregister_reboot_notifier(&cbe_ptcal_reboot_notifier);
+out1:
+	printk(KERN_ERR "Can't disable PTCAL, so not enabling\n");
+	return ret;
+}
+
+arch_initcall(cbe_ptcal_init);
+
+void __init cbe_ras_init(void)
+{
+	unsigned long hid0;
+
+	/*
+	 * Enable System Error & thermal interrupts and wakeup conditions
+	 */
+
+	hid0 = mfspr(SPRN_HID0);
+	hid0 |= HID0_CBE_THERM_INT_EN | HID0_CBE_THERM_WAKEUP |
+		HID0_CBE_SYSERR_INT_EN | HID0_CBE_SYSERR_WAKEUP;
+	mtspr(SPRN_HID0, hid0);
+	mb();
+
+	/*
+	 * Install machine check handler. Leave setting of precise mode to
+	 * what the firmware did for now
+	 */
+	ppc_md.machine_check_exception = cbe_machine_check_handler;
+	mb();
+
+	/*
+	 * For now, we assume that IOC_FIR is already set to forward some
+	 * error conditions to the System Error handler. If that is not true
+	 * then it will have to be fixed up here.
+	 */
+}
diff --git a/arch/powerpc/platforms/cell/ras.h b/arch/powerpc/platforms/cell/ras.h
new file mode 100644
index 000000000..eb7ee54c8
--- /dev/null
+++ b/arch/powerpc/platforms/cell/ras.h
@@ -0,0 +1,9 @@
+#ifndef RAS_H
+#define RAS_H
+
+extern void cbe_system_error_exception(struct pt_regs *regs);
+extern void cbe_maintenance_exception(struct pt_regs *regs);
+extern void cbe_thermal_exception(struct pt_regs *regs);
+extern void cbe_ras_init(void);
+
+#endif /* RAS_H */
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
new file mode 100644
index 000000000..36cff28d0
--- /dev/null
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -0,0 +1,286 @@
+/*
+ *  linux/arch/powerpc/platforms/cell/cell_setup.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Adapted from 'alpha' version by Gary Thomas
+ *  Modified by Cort Dougan (cort@cs.nmt.edu)
+ *  Modified by PPC64 Team, IBM Corp
+ *  Modified by Cell Team, IBM Deutschland Entwicklung GmbH
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#undef DEBUG
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/export.h>
+#include <linux/unistd.h>
+#include <linux/user.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/console.h>
+#include <linux/mutex.h>
+#include <linux/memory_hotplug.h>
+#include <linux/of_platform.h>
+
+#include <asm/mmu.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/nvram.h>
+#include <asm/cputable.h>
+#include <asm/ppc-pci.h>
+#include <asm/irq.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/cell-regs.h>
+#include <asm/io-workarounds.h>
+
+#include "cell.h"
+#include "interrupt.h"
+#include "pervasive.h"
+#include "ras.h"
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+static void cell_show_cpuinfo(struct seq_file *m)
+{
+	struct device_node *root;
+	const char *model = "";
+
+	root = of_find_node_by_path("/");
+	if (root)
+		model = of_get_property(root, "model", NULL);
+	seq_printf(m, "machine\t\t: CHRP %s\n", model);
+	of_node_put(root);
+}
+
+static void cell_progress(char *s, unsigned short hex)
+{
+	printk("*** %04x : %s\n", hex, s ? s : "");
+}
+
+static void cell_fixup_pcie_rootcomplex(struct pci_dev *dev)
+{
+	struct pci_controller *hose;
+	const char *s;
+	int i;
+
+	if (!machine_is(cell))
+		return;
+
+	/* We're searching for a direct child of the PHB */
+	if (dev->bus->self != NULL || dev->devfn != 0)
+		return;
+
+	hose = pci_bus_to_host(dev->bus);
+	if (hose == NULL)
+		return;
+
+	/* Only on PCIE */
+	if (!of_device_is_compatible(hose->dn, "pciex"))
+		return;
+
+	/* And only on axon */
+	s = of_get_property(hose->dn, "model", NULL);
+	if (!s || strcmp(s, "Axon") != 0)
+		return;
+
+	for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) {
+		dev->resource[i].start = dev->resource[i].end = 0;
+		dev->resource[i].flags = 0;
+	}
+
+	printk(KERN_DEBUG "PCI: Hiding resources on Axon PCIE RC %s\n",
+	       pci_name(dev));
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, cell_fixup_pcie_rootcomplex);
+
+static int cell_setup_phb(struct pci_controller *phb)
+{
+	const char *model;
+	struct device_node *np;
+
+	int rc = rtas_setup_phb(phb);
+	if (rc)
+		return rc;
+
+	phb->controller_ops = cell_pci_controller_ops;
+
+	np = phb->dn;
+	model = of_get_property(np, "model", NULL);
+	if (model == NULL || strcmp(np->name, "pci"))
+		return 0;
+
+	/* Setup workarounds for spider */
+	if (strcmp(model, "Spider"))
+		return 0;
+
+	iowa_register_bus(phb, &spiderpci_ops, &spiderpci_iowa_init,
+				  (void *)SPIDER_PCI_REG_BASE);
+	return 0;
+}
+
+static const struct of_device_id cell_bus_ids[] __initconst = {
+	{ .type = "soc", },
+	{ .compatible = "soc", },
+	{ .type = "spider", },
+	{ .type = "axon", },
+	{ .type = "plb5", },
+	{ .type = "plb4", },
+	{ .type = "opb", },
+	{ .type = "ebc", },
+	{},
+};
+
+static int __init cell_publish_devices(void)
+{
+	struct device_node *root = of_find_node_by_path("/");
+	struct device_node *np;
+	int node;
+
+	/* Publish OF platform devices for southbridge IOs */
+	of_platform_bus_probe(NULL, cell_bus_ids, NULL);
+
+	/* On spider based blades, we need to manually create the OF
+	 * platform devices for the PCI host bridges
+	 */
+	for_each_child_of_node(root, np) {
+		if (np->type == NULL || (strcmp(np->type, "pci") != 0 &&
+					 strcmp(np->type, "pciex") != 0))
+			continue;
+		of_platform_device_create(np, NULL, NULL);
+	}
+
+	/* There is no device for the MIC memory controller, thus we create
+	 * a platform device for it to attach the EDAC driver to.
+	 */
+	for_each_online_node(node) {
+		if (cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(node)) == NULL)
+			continue;
+		platform_device_register_simple("cbe-mic", node, NULL, 0);
+	}
+
+	return 0;
+}
+machine_subsys_initcall(cell, cell_publish_devices);
+
+static void __init mpic_init_IRQ(void)
+{
+	struct device_node *dn;
+	struct mpic *mpic;
+
+	for (dn = NULL;
+	     (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
+		if (!of_device_is_compatible(dn, "CBEA,platform-open-pic"))
+			continue;
+
+		/* The MPIC driver will get everything it needs from the
+		 * device-tree, just pass 0 to all arguments
+		 */
+		mpic = mpic_alloc(dn, 0, MPIC_SECONDARY | MPIC_NO_RESET,
+				0, 0, " MPIC     ");
+		if (mpic == NULL)
+			continue;
+		mpic_init(mpic);
+	}
+}
+
+
+static void __init cell_init_irq(void)
+{
+	iic_init_IRQ();
+	spider_init_IRQ();
+	mpic_init_IRQ();
+}
+
+static void __init cell_set_dabrx(void)
+{
+	mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER);
+}
+
+static void __init cell_setup_arch(void)
+{
+#ifdef CONFIG_SPU_BASE
+	spu_priv1_ops = &spu_priv1_mmio_ops;
+	spu_management_ops = &spu_management_of_ops;
+#endif
+
+	cbe_regs_init();
+
+	cell_set_dabrx();
+
+#ifdef CONFIG_CBE_RAS
+	cbe_ras_init();
+#endif
+
+#ifdef CONFIG_SMP
+	smp_init_cell();
+#endif
+	/* init to some ~sane value until calibrate_delay() runs */
+	loops_per_jiffy = 50000000;
+
+	/* Find and initialize PCI host bridges */
+	init_pci_config_tokens();
+
+	cbe_pervasive_init();
+#ifdef CONFIG_DUMMY_CONSOLE
+	conswitchp = &dummy_con;
+#endif
+
+	mmio_nvram_init();
+}
+
+static int __init cell_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "IBM,CBEA") &&
+	    !of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
+		return 0;
+
+	hpte_init_native();
+	pm_power_off = rtas_power_off;
+
+	return 1;
+}
+
+define_machine(cell) {
+	.name			= "Cell",
+	.probe			= cell_probe,
+	.setup_arch		= cell_setup_arch,
+	.show_cpuinfo		= cell_show_cpuinfo,
+	.restart		= rtas_restart,
+	.halt			= rtas_halt,
+	.get_boot_time		= rtas_get_boot_time,
+	.get_rtc_time		= rtas_get_rtc_time,
+	.set_rtc_time		= rtas_set_rtc_time,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= cell_progress,
+	.init_IRQ       	= cell_init_irq,
+	.pci_setup_phb		= cell_setup_phb,
+};
+
+struct pci_controller_ops cell_pci_controller_ops;
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
new file mode 100644
index 000000000..895560f4b
--- /dev/null
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -0,0 +1,168 @@
+/*
+ * SMP support for BPA machines.
+ *
+ * Dave Engebretsen, Peter Bergner, and
+ * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
+ *
+ * Plus various changes from other IBM teams...
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/paca.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/cputhreads.h>
+#include <asm/code-patching.h>
+
+#include "interrupt.h"
+#include <asm/udbg.h>
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/*
+ * The Primary thread of each non-boot processor was started from the OF client
+ * interface by prom_hold_cpus and is spinning on secondary_hold_spinloop.
+ */
+static cpumask_t of_spin_map;
+
+/**
+ * smp_startup_cpu() - start the given cpu
+ *
+ * At boot time, there is nothing to do for primary threads which were
+ * started from Open Firmware.  For anything else, call RTAS with the
+ * appropriate start location.
+ *
+ * Returns:
+ *	0	- failure
+ *	1	- success
+ */
+static inline int smp_startup_cpu(unsigned int lcpu)
+{
+	int status;
+	unsigned long start_here =
+			__pa(ppc_function_entry(generic_secondary_smp_init));
+	unsigned int pcpu;
+	int start_cpu;
+
+	if (cpumask_test_cpu(lcpu, &of_spin_map))
+		/* Already started by OF and sitting in spin loop */
+		return 1;
+
+	pcpu = get_hard_smp_processor_id(lcpu);
+
+	/* Fixup atomic count: it exited inside IRQ handler. */
+	task_thread_info(paca[lcpu].__current)->preempt_count	= 0;
+
+	/*
+	 * If the RTAS start-cpu token does not exist then presume the
+	 * cpu is already spinning.
+	 */
+	start_cpu = rtas_token("start-cpu");
+	if (start_cpu == RTAS_UNKNOWN_SERVICE)
+		return 1;
+
+	status = rtas_call(start_cpu, 3, 1, NULL, pcpu, start_here, lcpu);
+	if (status != 0) {
+		printk(KERN_ERR "start-cpu failed: %i\n", status);
+		return 0;
+	}
+
+	return 1;
+}
+
+static void smp_cell_setup_cpu(int cpu)
+{
+	if (cpu != boot_cpuid)
+		iic_setup_cpu();
+
+	/*
+	 * change default DABRX to allow user watchpoints
+	 */
+	mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER);
+}
+
+static int smp_cell_kick_cpu(int nr)
+{
+	BUG_ON(nr < 0 || nr >= NR_CPUS);
+
+	if (!smp_startup_cpu(nr))
+		return -ENOENT;
+
+	/*
+	 * The processor is currently spinning, waiting for the
+	 * cpu_start field to become non-zero After we set cpu_start,
+	 * the processor will continue on to secondary_start
+	 */
+	paca[nr].cpu_start = 1;
+
+	return 0;
+}
+
+static struct smp_ops_t bpa_iic_smp_ops = {
+	.message_pass	= iic_message_pass,
+	.probe		= iic_request_IPIs,
+	.kick_cpu	= smp_cell_kick_cpu,
+	.setup_cpu	= smp_cell_setup_cpu,
+	.cpu_bootable	= smp_generic_cpu_bootable,
+};
+
+/* This is called very early */
+void __init smp_init_cell(void)
+{
+	int i;
+
+	DBG(" -> smp_init_cell()\n");
+
+	smp_ops = &bpa_iic_smp_ops;
+
+	/* Mark threads which are still spinning in hold loops. */
+	if (cpu_has_feature(CPU_FTR_SMT)) {
+		for_each_present_cpu(i) {
+			if (cpu_thread_in_core(i) == 0)
+				cpumask_set_cpu(i, &of_spin_map);
+		}
+	} else
+		cpumask_copy(&of_spin_map, cpu_present_mask);
+
+	cpumask_clear_cpu(boot_cpuid, &of_spin_map);
+
+	/* Non-lpar has additional take/give timebase */
+	if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
+		smp_ops->give_timebase = rtas_give_timebase;
+		smp_ops->take_timebase = rtas_take_timebase;
+	}
+
+	DBG(" <- smp_init_cell()\n");
+}
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
new file mode 100644
index 000000000..f1f787889
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -0,0 +1,184 @@
+/*
+ * IO workarounds for PCI on Celleb/Cell platform
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+
+#include <asm/ppc-pci.h>
+#include <asm/pci-bridge.h>
+#include <asm/io-workarounds.h>
+
+#define SPIDER_PCI_DISABLE_PREFETCH
+
+struct spiderpci_iowa_private {
+	void __iomem *regs;
+};
+
+static void spiderpci_io_flush(struct iowa_bus *bus)
+{
+	struct spiderpci_iowa_private *priv;
+	u32 val;
+
+	priv = bus->private;
+	val = in_be32(priv->regs + SPIDER_PCI_DUMMY_READ);
+	iosync();
+}
+
+#define SPIDER_PCI_MMIO_READ(name, ret)					\
+static ret spiderpci_##name(const PCI_IO_ADDR addr)			\
+{									\
+	ret val = __do_##name(addr);					\
+	spiderpci_io_flush(iowa_mem_find_bus(addr));			\
+	return val;							\
+}
+
+#define SPIDER_PCI_MMIO_READ_STR(name)					\
+static void spiderpci_##name(const PCI_IO_ADDR addr, void *buf, 	\
+			     unsigned long count)			\
+{									\
+	__do_##name(addr, buf, count);					\
+	spiderpci_io_flush(iowa_mem_find_bus(addr));			\
+}
+
+SPIDER_PCI_MMIO_READ(readb, u8)
+SPIDER_PCI_MMIO_READ(readw, u16)
+SPIDER_PCI_MMIO_READ(readl, u32)
+SPIDER_PCI_MMIO_READ(readq, u64)
+SPIDER_PCI_MMIO_READ(readw_be, u16)
+SPIDER_PCI_MMIO_READ(readl_be, u32)
+SPIDER_PCI_MMIO_READ(readq_be, u64)
+SPIDER_PCI_MMIO_READ_STR(readsb)
+SPIDER_PCI_MMIO_READ_STR(readsw)
+SPIDER_PCI_MMIO_READ_STR(readsl)
+
+static void spiderpci_memcpy_fromio(void *dest, const PCI_IO_ADDR src,
+				    unsigned long n)
+{
+	__do_memcpy_fromio(dest, src, n);
+	spiderpci_io_flush(iowa_mem_find_bus(src));
+}
+
+static int __init spiderpci_pci_setup_chip(struct pci_controller *phb,
+					   void __iomem *regs)
+{
+	void *dummy_page_va;
+	dma_addr_t dummy_page_da;
+
+#ifdef SPIDER_PCI_DISABLE_PREFETCH
+	u32 val = in_be32(regs + SPIDER_PCI_VCI_CNTL_STAT);
+	pr_debug("SPIDER_IOWA:PVCI_Control_Status was 0x%08x\n", val);
+	out_be32(regs + SPIDER_PCI_VCI_CNTL_STAT, val | 0x8);
+#endif /* SPIDER_PCI_DISABLE_PREFETCH */
+
+	/* setup dummy read */
+	/*
+	 * On CellBlade, we can't know that which XDR memory is used by
+	 * kmalloc() to allocate dummy_page_va.
+	 * In order to imporve the performance, the XDR which is used to
+	 * allocate dummy_page_va is the nearest the spider-pci.
+	 * We have to select the CBE which is the nearest the spider-pci
+	 * to allocate memory from the best XDR, but I don't know that
+	 * how to do.
+	 *
+	 * Celleb does not have this problem, because it has only one XDR.
+	 */
+	dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!dummy_page_va) {
+		pr_err("SPIDERPCI-IOWA:Alloc dummy_page_va failed.\n");
+		return -1;
+	}
+
+	dummy_page_da = dma_map_single(phb->parent, dummy_page_va,
+				       PAGE_SIZE, DMA_FROM_DEVICE);
+	if (dma_mapping_error(phb->parent, dummy_page_da)) {
+		pr_err("SPIDER-IOWA:Map dummy page filed.\n");
+		kfree(dummy_page_va);
+		return -1;
+	}
+
+	out_be32(regs + SPIDER_PCI_DUMMY_READ_BASE, dummy_page_da);
+
+	return 0;
+}
+
+int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data)
+{
+	void __iomem *regs = NULL;
+	struct spiderpci_iowa_private *priv;
+	struct device_node *np = bus->phb->dn;
+	struct resource r;
+	unsigned long offset = (unsigned long)data;
+
+	pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%s)\n",
+		 np->full_name);
+
+	priv = kzalloc(sizeof(struct spiderpci_iowa_private), GFP_KERNEL);
+	if (!priv) {
+		pr_err("SPIDERPCI-IOWA:"
+		       "Can't allocate struct spiderpci_iowa_private");
+		return -1;
+	}
+
+	if (of_address_to_resource(np, 0, &r)) {
+		pr_err("SPIDERPCI-IOWA:Can't get resource.\n");
+		goto error;
+	}
+
+	regs = ioremap(r.start + offset, SPIDER_PCI_REG_SIZE);
+	if (!regs) {
+		pr_err("SPIDERPCI-IOWA:ioremap failed.\n");
+		goto error;
+	}
+	priv->regs = regs;
+	bus->private = priv;
+
+	if (spiderpci_pci_setup_chip(bus->phb, regs))
+		goto error;
+
+	return 0;
+
+error:
+	kfree(priv);
+	bus->private = NULL;
+
+	if (regs)
+		iounmap(regs);
+
+	return -1;
+}
+
+struct ppc_pci_io spiderpci_ops = {
+	.readb = spiderpci_readb,
+	.readw = spiderpci_readw,
+	.readl = spiderpci_readl,
+	.readq = spiderpci_readq,
+	.readw_be = spiderpci_readw_be,
+	.readl_be = spiderpci_readl_be,
+	.readq_be = spiderpci_readq_be,
+	.readsb = spiderpci_readsb,
+	.readsw = spiderpci_readsw,
+	.readsl = spiderpci_readsl,
+	.memcpy_fromio = spiderpci_memcpy_fromio,
+};
+
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
new file mode 100644
index 000000000..1f72f4ab6
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -0,0 +1,359 @@
+/*
+ * External Interrupt Controller on Spider South Bridge
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/ioport.h>
+
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+
+#include "interrupt.h"
+
+/* register layout taken from Spider spec, table 7.4-4 */
+enum {
+	TIR_DEN		= 0x004, /* Detection Enable Register */
+	TIR_MSK		= 0x084, /* Mask Level Register */
+	TIR_EDC		= 0x0c0, /* Edge Detection Clear Register */
+	TIR_PNDA	= 0x100, /* Pending Register A */
+	TIR_PNDB	= 0x104, /* Pending Register B */
+	TIR_CS		= 0x144, /* Current Status Register */
+	TIR_LCSA	= 0x150, /* Level Current Status Register A */
+	TIR_LCSB	= 0x154, /* Level Current Status Register B */
+	TIR_LCSC	= 0x158, /* Level Current Status Register C */
+	TIR_LCSD	= 0x15c, /* Level Current Status Register D */
+	TIR_CFGA	= 0x200, /* Setting Register A0 */
+	TIR_CFGB	= 0x204, /* Setting Register B0 */
+			/* 0x208 ... 0x3ff Setting Register An/Bn */
+	TIR_PPNDA	= 0x400, /* Packet Pending Register A */
+	TIR_PPNDB	= 0x404, /* Packet Pending Register B */
+	TIR_PIERA	= 0x408, /* Packet Output Error Register A */
+	TIR_PIERB	= 0x40c, /* Packet Output Error Register B */
+	TIR_PIEN	= 0x444, /* Packet Output Enable Register */
+	TIR_PIPND	= 0x454, /* Packet Output Pending Register */
+	TIRDID		= 0x484, /* Spider Device ID Register */
+	REISTIM		= 0x500, /* Reissue Command Timeout Time Setting */
+	REISTIMEN	= 0x504, /* Reissue Command Timeout Setting */
+	REISWAITEN	= 0x508, /* Reissue Wait Control*/
+};
+
+#define SPIDER_CHIP_COUNT	4
+#define SPIDER_SRC_COUNT	64
+#define SPIDER_IRQ_INVALID	63
+
+struct spider_pic {
+	struct irq_domain		*host;
+	void __iomem		*regs;
+	unsigned int		node_id;
+};
+static struct spider_pic spider_pics[SPIDER_CHIP_COUNT];
+
+static struct spider_pic *spider_irq_data_to_pic(struct irq_data *d)
+{
+	return irq_data_get_irq_chip_data(d);
+}
+
+static void __iomem *spider_get_irq_config(struct spider_pic *pic,
+					   unsigned int src)
+{
+	return pic->regs + TIR_CFGA + 8 * src;
+}
+
+static void spider_unmask_irq(struct irq_data *d)
+{
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
+
+	out_be32(cfg, in_be32(cfg) | 0x30000000u);
+}
+
+static void spider_mask_irq(struct irq_data *d)
+{
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
+
+	out_be32(cfg, in_be32(cfg) & ~0x30000000u);
+}
+
+static void spider_ack_irq(struct irq_data *d)
+{
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	unsigned int src = irqd_to_hwirq(d);
+
+	/* Reset edge detection logic if necessary
+	 */
+	if (irqd_is_level_type(d))
+		return;
+
+	/* Only interrupts 47 to 50 can be set to edge */
+	if (src < 47 || src > 50)
+		return;
+
+	/* Perform the clear of the edge logic */
+	out_be32(pic->regs + TIR_EDC, 0x100 | (src & 0xf));
+}
+
+static int spider_set_irq_type(struct irq_data *d, unsigned int type)
+{
+	unsigned int sense = type & IRQ_TYPE_SENSE_MASK;
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	unsigned int hw = irqd_to_hwirq(d);
+	void __iomem *cfg = spider_get_irq_config(pic, hw);
+	u32 old_mask;
+	u32 ic;
+
+	/* Note that only level high is supported for most interrupts */
+	if (sense != IRQ_TYPE_NONE && sense != IRQ_TYPE_LEVEL_HIGH &&
+	    (hw < 47 || hw > 50))
+		return -EINVAL;
+
+	/* Decode sense type */
+	switch(sense) {
+	case IRQ_TYPE_EDGE_RISING:
+		ic = 0x3;
+		break;
+	case IRQ_TYPE_EDGE_FALLING:
+		ic = 0x2;
+		break;
+	case IRQ_TYPE_LEVEL_LOW:
+		ic = 0x0;
+		break;
+	case IRQ_TYPE_LEVEL_HIGH:
+	case IRQ_TYPE_NONE:
+		ic = 0x1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Configure the source. One gross hack that was there before and
+	 * that I've kept around is the priority to the BE which I set to
+	 * be the same as the interrupt source number. I don't know whether
+	 * that's supposed to make any kind of sense however, we'll have to
+	 * decide that, but for now, I'm not changing the behaviour.
+	 */
+	old_mask = in_be32(cfg) & 0x30000000u;
+	out_be32(cfg, old_mask | (ic << 24) | (0x7 << 16) |
+		 (pic->node_id << 4) | 0xe);
+	out_be32(cfg + 4, (0x2 << 16) | (hw & 0xff));
+
+	return 0;
+}
+
+static struct irq_chip spider_pic = {
+	.name = "SPIDER",
+	.irq_unmask = spider_unmask_irq,
+	.irq_mask = spider_mask_irq,
+	.irq_ack = spider_ack_irq,
+	.irq_set_type = spider_set_irq_type,
+};
+
+static int spider_host_map(struct irq_domain *h, unsigned int virq,
+			irq_hw_number_t hw)
+{
+	irq_set_chip_data(virq, h->host_data);
+	irq_set_chip_and_handler(virq, &spider_pic, handle_level_irq);
+
+	/* Set default irq type */
+	irq_set_irq_type(virq, IRQ_TYPE_NONE);
+
+	return 0;
+}
+
+static int spider_host_xlate(struct irq_domain *h, struct device_node *ct,
+			   const u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	/* Spider interrupts have 2 cells, first is the interrupt source,
+	 * second, well, I don't know for sure yet ... We mask the top bits
+	 * because old device-trees encode a node number in there
+	 */
+	*out_hwirq = intspec[0] & 0x3f;
+	*out_flags = IRQ_TYPE_LEVEL_HIGH;
+	return 0;
+}
+
+static const struct irq_domain_ops spider_host_ops = {
+	.map = spider_host_map,
+	.xlate = spider_host_xlate,
+};
+
+static void spider_irq_cascade(unsigned int irq, struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct spider_pic *pic = irq_desc_get_handler_data(desc);
+	unsigned int cs, virq;
+
+	cs = in_be32(pic->regs + TIR_CS) >> 24;
+	if (cs == SPIDER_IRQ_INVALID)
+		virq = NO_IRQ;
+	else
+		virq = irq_linear_revmap(pic->host, cs);
+
+	if (virq != NO_IRQ)
+		generic_handle_irq(virq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+/* For hooking up the cascace we have a problem. Our device-tree is
+ * crap and we don't know on which BE iic interrupt we are hooked on at
+ * least not the "standard" way. We can reconstitute it based on two
+ * informations though: which BE node we are connected to and whether
+ * we are connected to IOIF0 or IOIF1. Right now, we really only care
+ * about the IBM cell blade and we know that its firmware gives us an
+ * interrupt-map property which is pretty strange.
+ */
+static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
+{
+	unsigned int virq;
+	const u32 *imap, *tmp;
+	int imaplen, intsize, unit;
+	struct device_node *iic;
+
+	/* First, we check whether we have a real "interrupts" in the device
+	 * tree in case the device-tree is ever fixed
+	 */
+	virq = irq_of_parse_and_map(pic->host->of_node, 0);
+	if (virq)
+		return virq;
+
+	/* Now do the horrible hacks */
+	tmp = of_get_property(pic->host->of_node, "#interrupt-cells", NULL);
+	if (tmp == NULL)
+		return NO_IRQ;
+	intsize = *tmp;
+	imap = of_get_property(pic->host->of_node, "interrupt-map", &imaplen);
+	if (imap == NULL || imaplen < (intsize + 1))
+		return NO_IRQ;
+	iic = of_find_node_by_phandle(imap[intsize]);
+	if (iic == NULL)
+		return NO_IRQ;
+	imap += intsize + 1;
+	tmp = of_get_property(iic, "#interrupt-cells", NULL);
+	if (tmp == NULL) {
+		of_node_put(iic);
+		return NO_IRQ;
+	}
+	intsize = *tmp;
+	/* Assume unit is last entry of interrupt specifier */
+	unit = imap[intsize - 1];
+	/* Ok, we have a unit, now let's try to get the node */
+	tmp = of_get_property(iic, "ibm,interrupt-server-ranges", NULL);
+	if (tmp == NULL) {
+		of_node_put(iic);
+		return NO_IRQ;
+	}
+	/* ugly as hell but works for now */
+	pic->node_id = (*tmp) >> 1;
+	of_node_put(iic);
+
+	/* Ok, now let's get cracking. You may ask me why I just didn't match
+	 * the iic host from the iic OF node, but that way I'm still compatible
+	 * with really really old old firmwares for which we don't have a node
+	 */
+	/* Manufacture an IIC interrupt number of class 2 */
+	virq = irq_create_mapping(NULL,
+				  (pic->node_id << IIC_IRQ_NODE_SHIFT) |
+				  (2 << IIC_IRQ_CLASS_SHIFT) |
+				  unit);
+	if (virq == NO_IRQ)
+		printk(KERN_ERR "spider_pic: failed to map cascade !");
+	return virq;
+}
+
+
+static void __init spider_init_one(struct device_node *of_node, int chip,
+				   unsigned long addr)
+{
+	struct spider_pic *pic = &spider_pics[chip];
+	int i, virq;
+
+	/* Map registers */
+	pic->regs = ioremap(addr, 0x1000);
+	if (pic->regs == NULL)
+		panic("spider_pic: can't map registers !");
+
+	/* Allocate a host */
+	pic->host = irq_domain_add_linear(of_node, SPIDER_SRC_COUNT,
+					  &spider_host_ops, pic);
+	if (pic->host == NULL)
+		panic("spider_pic: can't allocate irq host !");
+
+	/* Go through all sources and disable them */
+	for (i = 0; i < SPIDER_SRC_COUNT; i++) {
+		void __iomem *cfg = pic->regs + TIR_CFGA + 8 * i;
+		out_be32(cfg, in_be32(cfg) & ~0x30000000u);
+	}
+
+	/* do not mask any interrupts because of level */
+	out_be32(pic->regs + TIR_MSK, 0x0);
+
+	/* enable interrupt packets to be output */
+	out_be32(pic->regs + TIR_PIEN, in_be32(pic->regs + TIR_PIEN) | 0x1);
+
+	/* Hook up the cascade interrupt to the iic and nodeid */
+	virq = spider_find_cascade_and_node(pic);
+	if (virq == NO_IRQ)
+		return;
+	irq_set_handler_data(virq, pic);
+	irq_set_chained_handler(virq, spider_irq_cascade);
+
+	printk(KERN_INFO "spider_pic: node %d, addr: 0x%lx %s\n",
+	       pic->node_id, addr, of_node->full_name);
+
+	/* Enable the interrupt detection enable bit. Do this last! */
+	out_be32(pic->regs + TIR_DEN, in_be32(pic->regs + TIR_DEN) | 0x1);
+}
+
+void __init spider_init_IRQ(void)
+{
+	struct resource r;
+	struct device_node *dn;
+	int chip = 0;
+
+	/* XXX node numbers are totally bogus. We _hope_ we get the device
+	 * nodes in the right order here but that's definitely not guaranteed,
+	 * we need to get the node from the device tree instead.
+	 * There is currently no proper property for it (but our whole
+	 * device-tree is bogus anyway) so all we can do is pray or maybe test
+	 * the address and deduce the node-id
+	 */
+	for (dn = NULL;
+	     (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
+		if (of_device_is_compatible(dn, "CBEA,platform-spider-pic")) {
+			if (of_address_to_resource(dn, 0, &r)) {
+				printk(KERN_WARNING "spider-pic: Failed\n");
+				continue;
+			}
+		} else if (of_device_is_compatible(dn, "sti,platform-spider-pic")
+			   && (chip < 2)) {
+			static long hard_coded_pics[] =
+				{ 0x24000008000ul, 0x34000008000ul};
+			r.start = hard_coded_pics[chip];
+		} else
+			continue;
+		spider_init_one(dn, chip++, r.start);
+	}
+}
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
new file mode 100644
index 000000000..f7af74f83
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -0,0 +1,811 @@
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/linux_logo.h>
+#include <linux/syscore_ops.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/spu_csa.h>
+#include <asm/xmon.h>
+#include <asm/prom.h>
+#include <asm/kexec.h>
+
+const struct spu_management_ops *spu_management_ops;
+EXPORT_SYMBOL_GPL(spu_management_ops);
+
+const struct spu_priv1_ops *spu_priv1_ops;
+EXPORT_SYMBOL_GPL(spu_priv1_ops);
+
+struct cbe_spu_info cbe_spu_info[MAX_NUMNODES];
+EXPORT_SYMBOL_GPL(cbe_spu_info);
+
+/*
+ * The spufs fault-handling code needs to call force_sig_info to raise signals
+ * on DMA errors. Export it here to avoid general kernel-wide access to this
+ * function
+ */
+EXPORT_SYMBOL_GPL(force_sig_info);
+
+/*
+ * Protects cbe_spu_info and spu->number.
+ */
+static DEFINE_SPINLOCK(spu_lock);
+
+/*
+ * List of all spus in the system.
+ *
+ * This list is iterated by callers from irq context and callers that
+ * want to sleep.  Thus modifications need to be done with both
+ * spu_full_list_lock and spu_full_list_mutex held, while iterating
+ * through it requires either of these locks.
+ *
+ * In addition spu_full_list_lock protects all assignmens to
+ * spu->mm.
+ */
+static LIST_HEAD(spu_full_list);
+static DEFINE_SPINLOCK(spu_full_list_lock);
+static DEFINE_MUTEX(spu_full_list_mutex);
+
+void spu_invalidate_slbs(struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	unsigned long flags;
+
+	spin_lock_irqsave(&spu->register_lock, flags);
+	if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK)
+		out_be64(&priv2->slb_invalidate_all_W, 0UL);
+	spin_unlock_irqrestore(&spu->register_lock, flags);
+}
+EXPORT_SYMBOL_GPL(spu_invalidate_slbs);
+
+/* This is called by the MM core when a segment size is changed, to
+ * request a flush of all the SPEs using a given mm
+ */
+void spu_flush_all_slbs(struct mm_struct *mm)
+{
+	struct spu *spu;
+	unsigned long flags;
+
+	spin_lock_irqsave(&spu_full_list_lock, flags);
+	list_for_each_entry(spu, &spu_full_list, full_list) {
+		if (spu->mm == mm)
+			spu_invalidate_slbs(spu);
+	}
+	spin_unlock_irqrestore(&spu_full_list_lock, flags);
+}
+
+/* The hack below stinks... try to do something better one of
+ * these days... Does it even work properly with NR_CPUS == 1 ?
+ */
+static inline void mm_needs_global_tlbie(struct mm_struct *mm)
+{
+	int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;
+
+	/* Global TLBIE broadcast required with SPEs. */
+	bitmap_fill(cpumask_bits(mm_cpumask(mm)), nr);
+}
+
+void spu_associate_mm(struct spu *spu, struct mm_struct *mm)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&spu_full_list_lock, flags);
+	spu->mm = mm;
+	spin_unlock_irqrestore(&spu_full_list_lock, flags);
+	if (mm)
+		mm_needs_global_tlbie(mm);
+}
+EXPORT_SYMBOL_GPL(spu_associate_mm);
+
+int spu_64k_pages_available(void)
+{
+	return mmu_psize_defs[MMU_PAGE_64K].shift != 0;
+}
+EXPORT_SYMBOL_GPL(spu_64k_pages_available);
+
+static void spu_restart_dma(struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags))
+		out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+	else {
+		set_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags);
+		mb();
+	}
+}
+
+static inline void spu_load_slb(struct spu *spu, int slbe, struct copro_slb *slb)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	pr_debug("%s: adding SLB[%d] 0x%016llx 0x%016llx\n",
+			__func__, slbe, slb->vsid, slb->esid);
+
+	out_be64(&priv2->slb_index_W, slbe);
+	/* set invalid before writing vsid */
+	out_be64(&priv2->slb_esid_RW, 0);
+	/* now it's safe to write the vsid */
+	out_be64(&priv2->slb_vsid_RW, slb->vsid);
+	/* setting the new esid makes the entry valid again */
+	out_be64(&priv2->slb_esid_RW, slb->esid);
+}
+
+static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
+{
+	struct copro_slb slb;
+	int ret;
+
+	ret = copro_calculate_slb(spu->mm, ea, &slb);
+	if (ret)
+		return ret;
+
+	spu_load_slb(spu, spu->slb_replace, &slb);
+
+	spu->slb_replace++;
+	if (spu->slb_replace >= 8)
+		spu->slb_replace = 0;
+
+	spu_restart_dma(spu);
+	spu->stats.slb_flt++;
+	return 0;
+}
+
+extern int hash_page(unsigned long ea, unsigned long access,
+		     unsigned long trap, unsigned long dsisr); //XXX
+static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
+{
+	int ret;
+
+	pr_debug("%s, %llx, %lx\n", __func__, dsisr, ea);
+
+	/*
+	 * Handle kernel space hash faults immediately. User hash
+	 * faults need to be deferred to process context.
+	 */
+	if ((dsisr & MFC_DSISR_PTE_NOT_FOUND) &&
+	    (REGION_ID(ea) != USER_REGION_ID)) {
+
+		spin_unlock(&spu->register_lock);
+		ret = hash_page(ea, _PAGE_PRESENT, 0x300, dsisr);
+		spin_lock(&spu->register_lock);
+
+		if (!ret) {
+			spu_restart_dma(spu);
+			return 0;
+		}
+	}
+
+	spu->class_1_dar = ea;
+	spu->class_1_dsisr = dsisr;
+
+	spu->stop_callback(spu, 1);
+
+	spu->class_1_dar = 0;
+	spu->class_1_dsisr = 0;
+
+	return 0;
+}
+
+static void __spu_kernel_slb(void *addr, struct copro_slb *slb)
+{
+	unsigned long ea = (unsigned long)addr;
+	u64 llp;
+
+	if (REGION_ID(ea) == KERNEL_REGION_ID)
+		llp = mmu_psize_defs[mmu_linear_psize].sllp;
+	else
+		llp = mmu_psize_defs[mmu_virtual_psize].sllp;
+
+	slb->vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) |
+		SLB_VSID_KERNEL | llp;
+	slb->esid = (ea & ESID_MASK) | SLB_ESID_V;
+}
+
+/**
+ * Given an array of @nr_slbs SLB entries, @slbs, return non-zero if the
+ * address @new_addr is present.
+ */
+static inline int __slb_present(struct copro_slb *slbs, int nr_slbs,
+		void *new_addr)
+{
+	unsigned long ea = (unsigned long)new_addr;
+	int i;
+
+	for (i = 0; i < nr_slbs; i++)
+		if (!((slbs[i].esid ^ ea) & ESID_MASK))
+			return 1;
+
+	return 0;
+}
+
+/**
+ * Setup the SPU kernel SLBs, in preparation for a context save/restore. We
+ * need to map both the context save area, and the save/restore code.
+ *
+ * Because the lscsa and code may cross segment boundaires, we check to see
+ * if mappings are required for the start and end of each range. We currently
+ * assume that the mappings are smaller that one segment - if not, something
+ * is seriously wrong.
+ */
+void spu_setup_kernel_slbs(struct spu *spu, struct spu_lscsa *lscsa,
+		void *code, int code_size)
+{
+	struct copro_slb slbs[4];
+	int i, nr_slbs = 0;
+	/* start and end addresses of both mappings */
+	void *addrs[] = {
+		lscsa, (void *)lscsa + sizeof(*lscsa) - 1,
+		code, code + code_size - 1
+	};
+
+	/* check the set of addresses, and create a new entry in the slbs array
+	 * if there isn't already a SLB for that address */
+	for (i = 0; i < ARRAY_SIZE(addrs); i++) {
+		if (__slb_present(slbs, nr_slbs, addrs[i]))
+			continue;
+
+		__spu_kernel_slb(addrs[i], &slbs[nr_slbs]);
+		nr_slbs++;
+	}
+
+	spin_lock_irq(&spu->register_lock);
+	/* Add the set of SLBs */
+	for (i = 0; i < nr_slbs; i++)
+		spu_load_slb(spu, i, &slbs[i]);
+	spin_unlock_irq(&spu->register_lock);
+}
+EXPORT_SYMBOL_GPL(spu_setup_kernel_slbs);
+
+static irqreturn_t
+spu_irq_class_0(int irq, void *data)
+{
+	struct spu *spu;
+	unsigned long stat, mask;
+
+	spu = data;
+
+	spin_lock(&spu->register_lock);
+	mask = spu_int_mask_get(spu, 0);
+	stat = spu_int_stat_get(spu, 0) & mask;
+
+	spu->class_0_pending |= stat;
+	spu->class_0_dar = spu_mfc_dar_get(spu);
+	spu->stop_callback(spu, 0);
+	spu->class_0_pending = 0;
+	spu->class_0_dar = 0;
+
+	spu_int_stat_clear(spu, 0, stat);
+	spin_unlock(&spu->register_lock);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t
+spu_irq_class_1(int irq, void *data)
+{
+	struct spu *spu;
+	unsigned long stat, mask, dar, dsisr;
+
+	spu = data;
+
+	/* atomically read & clear class1 status. */
+	spin_lock(&spu->register_lock);
+	mask  = spu_int_mask_get(spu, 1);
+	stat  = spu_int_stat_get(spu, 1) & mask;
+	dar   = spu_mfc_dar_get(spu);
+	dsisr = spu_mfc_dsisr_get(spu);
+	if (stat & CLASS1_STORAGE_FAULT_INTR)
+		spu_mfc_dsisr_set(spu, 0ul);
+	spu_int_stat_clear(spu, 1, stat);
+
+	pr_debug("%s: %lx %lx %lx %lx\n", __func__, mask, stat,
+			dar, dsisr);
+
+	if (stat & CLASS1_SEGMENT_FAULT_INTR)
+		__spu_trap_data_seg(spu, dar);
+
+	if (stat & CLASS1_STORAGE_FAULT_INTR)
+		__spu_trap_data_map(spu, dar, dsisr);
+
+	if (stat & CLASS1_LS_COMPARE_SUSPEND_ON_GET_INTR)
+		;
+
+	if (stat & CLASS1_LS_COMPARE_SUSPEND_ON_PUT_INTR)
+		;
+
+	spu->class_1_dsisr = 0;
+	spu->class_1_dar = 0;
+
+	spin_unlock(&spu->register_lock);
+
+	return stat ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static irqreturn_t
+spu_irq_class_2(int irq, void *data)
+{
+	struct spu *spu;
+	unsigned long stat;
+	unsigned long mask;
+	const int mailbox_intrs =
+		CLASS2_MAILBOX_THRESHOLD_INTR | CLASS2_MAILBOX_INTR;
+
+	spu = data;
+	spin_lock(&spu->register_lock);
+	stat = spu_int_stat_get(spu, 2);
+	mask = spu_int_mask_get(spu, 2);
+	/* ignore interrupts we're not waiting for */
+	stat &= mask;
+	/* mailbox interrupts are level triggered. mask them now before
+	 * acknowledging */
+	if (stat & mailbox_intrs)
+		spu_int_mask_and(spu, 2, ~(stat & mailbox_intrs));
+	/* acknowledge all interrupts before the callbacks */
+	spu_int_stat_clear(spu, 2, stat);
+
+	pr_debug("class 2 interrupt %d, %lx, %lx\n", irq, stat, mask);
+
+	if (stat & CLASS2_MAILBOX_INTR)
+		spu->ibox_callback(spu);
+
+	if (stat & CLASS2_SPU_STOP_INTR)
+		spu->stop_callback(spu, 2);
+
+	if (stat & CLASS2_SPU_HALT_INTR)
+		spu->stop_callback(spu, 2);
+
+	if (stat & CLASS2_SPU_DMA_TAG_GROUP_COMPLETE_INTR)
+		spu->mfc_callback(spu);
+
+	if (stat & CLASS2_MAILBOX_THRESHOLD_INTR)
+		spu->wbox_callback(spu);
+
+	spu->stats.class2_intr++;
+
+	spin_unlock(&spu->register_lock);
+
+	return stat ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static int spu_request_irqs(struct spu *spu)
+{
+	int ret = 0;
+
+	if (spu->irqs[0] != NO_IRQ) {
+		snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0",
+			 spu->number);
+		ret = request_irq(spu->irqs[0], spu_irq_class_0,
+				  0, spu->irq_c0, spu);
+		if (ret)
+			goto bail0;
+	}
+	if (spu->irqs[1] != NO_IRQ) {
+		snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1",
+			 spu->number);
+		ret = request_irq(spu->irqs[1], spu_irq_class_1,
+				  0, spu->irq_c1, spu);
+		if (ret)
+			goto bail1;
+	}
+	if (spu->irqs[2] != NO_IRQ) {
+		snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2",
+			 spu->number);
+		ret = request_irq(spu->irqs[2], spu_irq_class_2,
+				  0, spu->irq_c2, spu);
+		if (ret)
+			goto bail2;
+	}
+	return 0;
+
+bail2:
+	if (spu->irqs[1] != NO_IRQ)
+		free_irq(spu->irqs[1], spu);
+bail1:
+	if (spu->irqs[0] != NO_IRQ)
+		free_irq(spu->irqs[0], spu);
+bail0:
+	return ret;
+}
+
+static void spu_free_irqs(struct spu *spu)
+{
+	if (spu->irqs[0] != NO_IRQ)
+		free_irq(spu->irqs[0], spu);
+	if (spu->irqs[1] != NO_IRQ)
+		free_irq(spu->irqs[1], spu);
+	if (spu->irqs[2] != NO_IRQ)
+		free_irq(spu->irqs[2], spu);
+}
+
+void spu_init_channels(struct spu *spu)
+{
+	static const struct {
+		 unsigned channel;
+		 unsigned count;
+	} zero_list[] = {
+		{ 0x00, 1, }, { 0x01, 1, }, { 0x03, 1, }, { 0x04, 1, },
+		{ 0x18, 1, }, { 0x19, 1, }, { 0x1b, 1, }, { 0x1d, 1, },
+	}, count_list[] = {
+		{ 0x00, 0, }, { 0x03, 0, }, { 0x04, 0, }, { 0x15, 16, },
+		{ 0x17, 1, }, { 0x18, 0, }, { 0x19, 0, }, { 0x1b, 0, },
+		{ 0x1c, 1, }, { 0x1d, 0, }, { 0x1e, 1, },
+	};
+	struct spu_priv2 __iomem *priv2;
+	int i;
+
+	priv2 = spu->priv2;
+
+	/* initialize all channel data to zero */
+	for (i = 0; i < ARRAY_SIZE(zero_list); i++) {
+		int count;
+
+		out_be64(&priv2->spu_chnlcntptr_RW, zero_list[i].channel);
+		for (count = 0; count < zero_list[i].count; count++)
+			out_be64(&priv2->spu_chnldata_RW, 0);
+	}
+
+	/* initialize channel counts to meaningful values */
+	for (i = 0; i < ARRAY_SIZE(count_list); i++) {
+		out_be64(&priv2->spu_chnlcntptr_RW, count_list[i].channel);
+		out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count);
+	}
+}
+EXPORT_SYMBOL_GPL(spu_init_channels);
+
+static struct bus_type spu_subsys = {
+	.name = "spu",
+	.dev_name = "spu",
+};
+
+int spu_add_dev_attr(struct device_attribute *attr)
+{
+	struct spu *spu;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list)
+		device_create_file(&spu->dev, attr);
+	mutex_unlock(&spu_full_list_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(spu_add_dev_attr);
+
+int spu_add_dev_attr_group(struct attribute_group *attrs)
+{
+	struct spu *spu;
+	int rc = 0;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list) {
+		rc = sysfs_create_group(&spu->dev.kobj, attrs);
+
+		/* we're in trouble here, but try unwinding anyway */
+		if (rc) {
+			printk(KERN_ERR "%s: can't create sysfs group '%s'\n",
+					__func__, attrs->name);
+
+			list_for_each_entry_continue_reverse(spu,
+					&spu_full_list, full_list)
+				sysfs_remove_group(&spu->dev.kobj, attrs);
+			break;
+		}
+	}
+
+	mutex_unlock(&spu_full_list_mutex);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(spu_add_dev_attr_group);
+
+
+void spu_remove_dev_attr(struct device_attribute *attr)
+{
+	struct spu *spu;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list)
+		device_remove_file(&spu->dev, attr);
+	mutex_unlock(&spu_full_list_mutex);
+}
+EXPORT_SYMBOL_GPL(spu_remove_dev_attr);
+
+void spu_remove_dev_attr_group(struct attribute_group *attrs)
+{
+	struct spu *spu;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list)
+		sysfs_remove_group(&spu->dev.kobj, attrs);
+	mutex_unlock(&spu_full_list_mutex);
+}
+EXPORT_SYMBOL_GPL(spu_remove_dev_attr_group);
+
+static int spu_create_dev(struct spu *spu)
+{
+	int ret;
+
+	spu->dev.id = spu->number;
+	spu->dev.bus = &spu_subsys;
+	ret = device_register(&spu->dev);
+	if (ret) {
+		printk(KERN_ERR "Can't register SPU %d with sysfs\n",
+				spu->number);
+		return ret;
+	}
+
+	sysfs_add_device_to_node(&spu->dev, spu->node);
+
+	return 0;
+}
+
+static int __init create_spu(void *data)
+{
+	struct spu *spu;
+	int ret;
+	static int number;
+	unsigned long flags;
+
+	ret = -ENOMEM;
+	spu = kzalloc(sizeof (*spu), GFP_KERNEL);
+	if (!spu)
+		goto out;
+
+	spu->alloc_state = SPU_FREE;
+
+	spin_lock_init(&spu->register_lock);
+	spin_lock(&spu_lock);
+	spu->number = number++;
+	spin_unlock(&spu_lock);
+
+	ret = spu_create_spu(spu, data);
+
+	if (ret)
+		goto out_free;
+
+	spu_mfc_sdr_setup(spu);
+	spu_mfc_sr1_set(spu, 0x33);
+	ret = spu_request_irqs(spu);
+	if (ret)
+		goto out_destroy;
+
+	ret = spu_create_dev(spu);
+	if (ret)
+		goto out_free_irqs;
+
+	mutex_lock(&cbe_spu_info[spu->node].list_mutex);
+	list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus);
+	cbe_spu_info[spu->node].n_spus++;
+	mutex_unlock(&cbe_spu_info[spu->node].list_mutex);
+
+	mutex_lock(&spu_full_list_mutex);
+	spin_lock_irqsave(&spu_full_list_lock, flags);
+	list_add(&spu->full_list, &spu_full_list);
+	spin_unlock_irqrestore(&spu_full_list_lock, flags);
+	mutex_unlock(&spu_full_list_mutex);
+
+	spu->stats.util_state = SPU_UTIL_IDLE_LOADED;
+	spu->stats.tstamp = ktime_get_ns();
+
+	INIT_LIST_HEAD(&spu->aff_list);
+
+	goto out;
+
+out_free_irqs:
+	spu_free_irqs(spu);
+out_destroy:
+	spu_destroy_spu(spu);
+out_free:
+	kfree(spu);
+out:
+	return ret;
+}
+
+static const char *spu_state_names[] = {
+	"user", "system", "iowait", "idle"
+};
+
+static unsigned long long spu_acct_time(struct spu *spu,
+		enum spu_utilization_state state)
+{
+	unsigned long long time = spu->stats.times[state];
+
+	/*
+	 * If the spu is idle or the context is stopped, utilization
+	 * statistics are not updated.  Apply the time delta from the
+	 * last recorded state of the spu.
+	 */
+	if (spu->stats.util_state == state)
+		time += ktime_get_ns() - spu->stats.tstamp;
+
+	return time / NSEC_PER_MSEC;
+}
+
+
+static ssize_t spu_stat_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct spu *spu = container_of(dev, struct spu, dev);
+
+	return sprintf(buf, "%s %llu %llu %llu %llu "
+		      "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+		spu_state_names[spu->stats.util_state],
+		spu_acct_time(spu, SPU_UTIL_USER),
+		spu_acct_time(spu, SPU_UTIL_SYSTEM),
+		spu_acct_time(spu, SPU_UTIL_IOWAIT),
+		spu_acct_time(spu, SPU_UTIL_IDLE_LOADED),
+		spu->stats.vol_ctx_switch,
+		spu->stats.invol_ctx_switch,
+		spu->stats.slb_flt,
+		spu->stats.hash_flt,
+		spu->stats.min_flt,
+		spu->stats.maj_flt,
+		spu->stats.class2_intr,
+		spu->stats.libassist);
+}
+
+static DEVICE_ATTR(stat, 0444, spu_stat_show, NULL);
+
+#ifdef CONFIG_KEXEC
+
+struct crash_spu_info {
+	struct spu *spu;
+	u32 saved_spu_runcntl_RW;
+	u32 saved_spu_status_R;
+	u32 saved_spu_npc_RW;
+	u64 saved_mfc_sr1_RW;
+	u64 saved_mfc_dar;
+	u64 saved_mfc_dsisr;
+};
+
+#define CRASH_NUM_SPUS	16	/* Enough for current hardware */
+static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS];
+
+static void crash_kexec_stop_spus(void)
+{
+	struct spu *spu;
+	int i;
+	u64 tmp;
+
+	for (i = 0; i < CRASH_NUM_SPUS; i++) {
+		if (!crash_spu_info[i].spu)
+			continue;
+
+		spu = crash_spu_info[i].spu;
+
+		crash_spu_info[i].saved_spu_runcntl_RW =
+			in_be32(&spu->problem->spu_runcntl_RW);
+		crash_spu_info[i].saved_spu_status_R =
+			in_be32(&spu->problem->spu_status_R);
+		crash_spu_info[i].saved_spu_npc_RW =
+			in_be32(&spu->problem->spu_npc_RW);
+
+		crash_spu_info[i].saved_mfc_dar    = spu_mfc_dar_get(spu);
+		crash_spu_info[i].saved_mfc_dsisr  = spu_mfc_dsisr_get(spu);
+		tmp = spu_mfc_sr1_get(spu);
+		crash_spu_info[i].saved_mfc_sr1_RW = tmp;
+
+		tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+		spu_mfc_sr1_set(spu, tmp);
+
+		__delay(200);
+	}
+}
+
+static void crash_register_spus(struct list_head *list)
+{
+	struct spu *spu;
+	int ret;
+
+	list_for_each_entry(spu, list, full_list) {
+		if (WARN_ON(spu->number >= CRASH_NUM_SPUS))
+			continue;
+
+		crash_spu_info[spu->number].spu = spu;
+	}
+
+	ret = crash_shutdown_register(&crash_kexec_stop_spus);
+	if (ret)
+		printk(KERN_ERR "Could not register SPU crash handler");
+}
+
+#else
+static inline void crash_register_spus(struct list_head *list)
+{
+}
+#endif
+
+static void spu_shutdown(void)
+{
+	struct spu *spu;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list) {
+		spu_free_irqs(spu);
+		spu_destroy_spu(spu);
+	}
+	mutex_unlock(&spu_full_list_mutex);
+}
+
+static struct syscore_ops spu_syscore_ops = {
+	.shutdown = spu_shutdown,
+};
+
+static int __init init_spu_base(void)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		mutex_init(&cbe_spu_info[i].list_mutex);
+		INIT_LIST_HEAD(&cbe_spu_info[i].spus);
+	}
+
+	if (!spu_management_ops)
+		goto out;
+
+	/* create system subsystem for spus */
+	ret = subsys_system_register(&spu_subsys, NULL);
+	if (ret)
+		goto out;
+
+	ret = spu_enumerate_spus(create_spu);
+
+	if (ret < 0) {
+		printk(KERN_WARNING "%s: Error initializing spus\n",
+			__func__);
+		goto out_unregister_subsys;
+	}
+
+	if (ret > 0)
+		fb_append_extra_logo(&logo_spe_clut224, ret);
+
+	mutex_lock(&spu_full_list_mutex);
+	xmon_register_spus(&spu_full_list);
+	crash_register_spus(&spu_full_list);
+	mutex_unlock(&spu_full_list_mutex);
+	spu_add_dev_attr(&dev_attr_stat);
+	register_syscore_ops(&spu_syscore_ops);
+
+	spu_init_affinity();
+
+	return 0;
+
+ out_unregister_subsys:
+	bus_unregister(&spu_subsys);
+ out:
+	return ret;
+}
+module_init(init_spu_base);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>");
diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c
new file mode 100644
index 000000000..a494028b2
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_callbacks.c
@@ -0,0 +1,74 @@
+/*
+ * System call callback functions for SPUs
+ */
+
+#undef DEBUG
+
+#include <linux/kallsyms.h>
+#include <linux/export.h>
+#include <linux/syscalls.h>
+
+#include <asm/spu.h>
+#include <asm/syscalls.h>
+#include <asm/unistd.h>
+
+/*
+ * This table defines the system calls that an SPU can call.
+ * It is currently a subset of the 64 bit powerpc system calls,
+ * with the exact semantics.
+ *
+ * The reasons for disabling some of the system calls are:
+ * 1. They interact with the way SPU syscalls are handled
+ *    and we can't let them execute ever:
+ *	restart_syscall, exit, for, execve, ptrace, ...
+ * 2. They are deprecated and replaced by other means:
+ *	uselib, pciconfig_*, sysfs, ...
+ * 3. They are somewhat interacting with the system in a way
+ *    we don't want an SPU to:
+ *	reboot, init_module, mount, kexec_load
+ * 4. They are optional and we can't rely on them being
+ *    linked into the kernel. Unfortunately, the cond_syscall
+ *    helper does not work here as it does not add the necessary
+ *    opd symbols:
+ *	mbind, mq_open, ipc, ...
+ */
+
+static void *spu_syscall_table[] = {
+#define SYSCALL(func)		sys_ni_syscall,
+#define COMPAT_SYS(func)	sys_ni_syscall,
+#define PPC_SYS(func)		sys_ni_syscall,
+#define OLDSYS(func)		sys_ni_syscall,
+#define SYS32ONLY(func)		sys_ni_syscall,
+#define PPC64ONLY(func)		sys_ni_syscall,
+#define SYSX(f, f3264, f32)	sys_ni_syscall,
+
+#define SYSCALL_SPU(func)	sys_##func,
+#define COMPAT_SYS_SPU(func)	sys_##func,
+#define PPC_SYS_SPU(func)	ppc_##func,
+#define SYSX_SPU(f, f3264, f32)	f,
+
+#include <asm/systbl.h>
+};
+
+long spu_sys_callback(struct spu_syscall_block *s)
+{
+	long (*syscall)(u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 a6);
+
+	if (s->nr_ret >= ARRAY_SIZE(spu_syscall_table)) {
+		pr_debug("%s: invalid syscall #%lld", __func__, s->nr_ret);
+		return -ENOSYS;
+	}
+
+	syscall = spu_syscall_table[s->nr_ret];
+
+	pr_debug("SPU-syscall "
+		 "%pSR:syscall%lld(%llx, %llx, %llx, %llx, %llx, %llx)\n",
+		 syscall,
+		 s->nr_ret,
+		 s->parm[0], s->parm[1], s->parm[2],
+		 s->parm[3], s->parm[4], s->parm[5]);
+
+	return syscall(s->parm[0], s->parm[1], s->parm[2],
+		       s->parm[3], s->parm[4], s->parm[5]);
+}
+EXPORT_SYMBOL_GPL(spu_sys_callback);
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
new file mode 100644
index 000000000..c3327f3d8
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -0,0 +1,555 @@
+/*
+ * spu management operations for of based platforms
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ * Copyright 2006 Sony Corp.
+ * (C) Copyright 2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/export.h>
+#include <linux/ptrace.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/firmware.h>
+#include <asm/prom.h>
+
+#include "spufs/spufs.h"
+#include "interrupt.h"
+
+struct device_node *spu_devnode(struct spu *spu)
+{
+	return spu->devnode;
+}
+
+EXPORT_SYMBOL_GPL(spu_devnode);
+
+static u64 __init find_spu_unit_number(struct device_node *spe)
+{
+	const unsigned int *prop;
+	int proplen;
+
+	/* new device trees should provide the physical-id attribute */
+	prop = of_get_property(spe, "physical-id", &proplen);
+	if (proplen == 4)
+		return (u64)*prop;
+
+	/* celleb device tree provides the unit-id */
+	prop = of_get_property(spe, "unit-id", &proplen);
+	if (proplen == 4)
+		return (u64)*prop;
+
+	/* legacy device trees provide the id in the reg attribute */
+	prop = of_get_property(spe, "reg", &proplen);
+	if (proplen == 4)
+		return (u64)*prop;
+
+	return 0;
+}
+
+static void spu_unmap(struct spu *spu)
+{
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		iounmap(spu->priv1);
+	iounmap(spu->priv2);
+	iounmap(spu->problem);
+	iounmap((__force u8 __iomem *)spu->local_store);
+}
+
+static int __init spu_map_interrupts_old(struct spu *spu,
+	struct device_node *np)
+{
+	unsigned int isrc;
+	const u32 *tmp;
+	int nid;
+
+	/* Get the interrupt source unit from the device-tree */
+	tmp = of_get_property(np, "isrc", NULL);
+	if (!tmp)
+		return -ENODEV;
+	isrc = tmp[0];
+
+	tmp = of_get_property(np->parent->parent, "node-id", NULL);
+	if (!tmp) {
+		printk(KERN_WARNING "%s: can't find node-id\n", __func__);
+		nid = spu->node;
+	} else
+		nid = tmp[0];
+
+	/* Add the node number */
+	isrc |= nid << IIC_IRQ_NODE_SHIFT;
+
+	/* Now map interrupts of all 3 classes */
+	spu->irqs[0] = irq_create_mapping(NULL, IIC_IRQ_CLASS_0 | isrc);
+	spu->irqs[1] = irq_create_mapping(NULL, IIC_IRQ_CLASS_1 | isrc);
+	spu->irqs[2] = irq_create_mapping(NULL, IIC_IRQ_CLASS_2 | isrc);
+
+	/* Right now, we only fail if class 2 failed */
+	return spu->irqs[2] == NO_IRQ ? -EINVAL : 0;
+}
+
+static void __iomem * __init spu_map_prop_old(struct spu *spu,
+					      struct device_node *n,
+					      const char *name)
+{
+	const struct address_prop {
+		unsigned long address;
+		unsigned int len;
+	} __attribute__((packed)) *prop;
+	int proplen;
+
+	prop = of_get_property(n, name, &proplen);
+	if (prop == NULL || proplen != sizeof (struct address_prop))
+		return NULL;
+
+	return ioremap(prop->address, prop->len);
+}
+
+static int __init spu_map_device_old(struct spu *spu)
+{
+	struct device_node *node = spu->devnode;
+	const char *prop;
+	int ret;
+
+	ret = -ENODEV;
+	spu->name = of_get_property(node, "name", NULL);
+	if (!spu->name)
+		goto out;
+
+	prop = of_get_property(node, "local-store", NULL);
+	if (!prop)
+		goto out;
+	spu->local_store_phys = *(unsigned long *)prop;
+
+	/* we use local store as ram, not io memory */
+	spu->local_store = (void __force *)
+		spu_map_prop_old(spu, node, "local-store");
+	if (!spu->local_store)
+		goto out;
+
+	prop = of_get_property(node, "problem", NULL);
+	if (!prop)
+		goto out_unmap;
+	spu->problem_phys = *(unsigned long *)prop;
+
+	spu->problem = spu_map_prop_old(spu, node, "problem");
+	if (!spu->problem)
+		goto out_unmap;
+
+	spu->priv2 = spu_map_prop_old(spu, node, "priv2");
+	if (!spu->priv2)
+		goto out_unmap;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+		spu->priv1 = spu_map_prop_old(spu, node, "priv1");
+		if (!spu->priv1)
+			goto out_unmap;
+	}
+
+	ret = 0;
+	goto out;
+
+out_unmap:
+	spu_unmap(spu);
+out:
+	return ret;
+}
+
+static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
+{
+	struct of_phandle_args oirq;
+	int ret;
+	int i;
+
+	for (i=0; i < 3; i++) {
+		ret = of_irq_parse_one(np, i, &oirq);
+		if (ret) {
+			pr_debug("spu_new: failed to get irq %d\n", i);
+			goto err;
+		}
+		ret = -EINVAL;
+		pr_debug("  irq %d no 0x%x on %s\n", i, oirq.args[0],
+			 oirq.np->full_name);
+		spu->irqs[i] = irq_create_of_mapping(&oirq);
+		if (spu->irqs[i] == NO_IRQ) {
+			pr_debug("spu_new: failed to map it !\n");
+			goto err;
+		}
+	}
+	return 0;
+
+err:
+	pr_debug("failed to map irq %x for spu %s\n", *oirq.args,
+		spu->name);
+	for (; i >= 0; i--) {
+		if (spu->irqs[i] != NO_IRQ)
+			irq_dispose_mapping(spu->irqs[i]);
+	}
+	return ret;
+}
+
+static int spu_map_resource(struct spu *spu, int nr,
+			    void __iomem** virt, unsigned long *phys)
+{
+	struct device_node *np = spu->devnode;
+	struct resource resource = { };
+	unsigned long len;
+	int ret;
+
+	ret = of_address_to_resource(np, nr, &resource);
+	if (ret)
+		return ret;
+	if (phys)
+		*phys = resource.start;
+	len = resource_size(&resource);
+	*virt = ioremap(resource.start, len);
+	if (!*virt)
+		return -EINVAL;
+	return 0;
+}
+
+static int __init spu_map_device(struct spu *spu)
+{
+	struct device_node *np = spu->devnode;
+	int ret = -ENODEV;
+
+	spu->name = of_get_property(np, "name", NULL);
+	if (!spu->name)
+		goto out;
+
+	ret = spu_map_resource(spu, 0, (void __iomem**)&spu->local_store,
+			       &spu->local_store_phys);
+	if (ret) {
+		pr_debug("spu_new: failed to map %s resource 0\n",
+			 np->full_name);
+		goto out;
+	}
+	ret = spu_map_resource(spu, 1, (void __iomem**)&spu->problem,
+			       &spu->problem_phys);
+	if (ret) {
+		pr_debug("spu_new: failed to map %s resource 1\n",
+			 np->full_name);
+		goto out_unmap;
+	}
+	ret = spu_map_resource(spu, 2, (void __iomem**)&spu->priv2, NULL);
+	if (ret) {
+		pr_debug("spu_new: failed to map %s resource 2\n",
+			 np->full_name);
+		goto out_unmap;
+	}
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		ret = spu_map_resource(spu, 3,
+			       (void __iomem**)&spu->priv1, NULL);
+	if (ret) {
+		pr_debug("spu_new: failed to map %s resource 3\n",
+			 np->full_name);
+		goto out_unmap;
+	}
+	pr_debug("spu_new: %s maps:\n", np->full_name);
+	pr_debug("  local store   : 0x%016lx -> 0x%p\n",
+		 spu->local_store_phys, spu->local_store);
+	pr_debug("  problem state : 0x%016lx -> 0x%p\n",
+		 spu->problem_phys, spu->problem);
+	pr_debug("  priv2         :                       0x%p\n", spu->priv2);
+	pr_debug("  priv1         :                       0x%p\n", spu->priv1);
+
+	return 0;
+
+out_unmap:
+	spu_unmap(spu);
+out:
+	pr_debug("failed to map spe %s: %d\n", spu->name, ret);
+	return ret;
+}
+
+static int __init of_enumerate_spus(int (*fn)(void *data))
+{
+	int ret;
+	struct device_node *node;
+	unsigned int n = 0;
+
+	ret = -ENODEV;
+	for (node = of_find_node_by_type(NULL, "spe");
+			node; node = of_find_node_by_type(node, "spe")) {
+		ret = fn(node);
+		if (ret) {
+			printk(KERN_WARNING "%s: Error initializing %s\n",
+				__func__, node->name);
+			break;
+		}
+		n++;
+	}
+	return ret ? ret : n;
+}
+
+static int __init of_create_spu(struct spu *spu, void *data)
+{
+	int ret;
+	struct device_node *spe = (struct device_node *)data;
+	static int legacy_map = 0, legacy_irq = 0;
+
+	spu->devnode = of_node_get(spe);
+	spu->spe_id = find_spu_unit_number(spe);
+
+	spu->node = of_node_to_nid(spe);
+	if (spu->node >= MAX_NUMNODES) {
+		printk(KERN_WARNING "SPE %s on node %d ignored,"
+		       " node number too big\n", spe->full_name, spu->node);
+		printk(KERN_WARNING "Check if CONFIG_NUMA is enabled.\n");
+		ret = -ENODEV;
+		goto out;
+	}
+
+	ret = spu_map_device(spu);
+	if (ret) {
+		if (!legacy_map) {
+			legacy_map = 1;
+			printk(KERN_WARNING "%s: Legacy device tree found, "
+				"trying to map old style\n", __func__);
+		}
+		ret = spu_map_device_old(spu);
+		if (ret) {
+			printk(KERN_ERR "Unable to map %s\n",
+				spu->name);
+			goto out;
+		}
+	}
+
+	ret = spu_map_interrupts(spu, spe);
+	if (ret) {
+		if (!legacy_irq) {
+			legacy_irq = 1;
+			printk(KERN_WARNING "%s: Legacy device tree found, "
+				"trying old style irq\n", __func__);
+		}
+		ret = spu_map_interrupts_old(spu, spe);
+		if (ret) {
+			printk(KERN_ERR "%s: could not map interrupts\n",
+				spu->name);
+			goto out_unmap;
+		}
+	}
+
+	pr_debug("Using SPE %s %p %p %p %p %d\n", spu->name,
+		spu->local_store, spu->problem, spu->priv1,
+		spu->priv2, spu->number);
+	goto out;
+
+out_unmap:
+	spu_unmap(spu);
+out:
+	return ret;
+}
+
+static int of_destroy_spu(struct spu *spu)
+{
+	spu_unmap(spu);
+	of_node_put(spu->devnode);
+	return 0;
+}
+
+static void enable_spu_by_master_run(struct spu_context *ctx)
+{
+	ctx->ops->master_start(ctx);
+}
+
+static void disable_spu_by_master_run(struct spu_context *ctx)
+{
+	ctx->ops->master_stop(ctx);
+}
+
+/* Hardcoded affinity idxs for qs20 */
+#define QS20_SPES_PER_BE 8
+static int qs20_reg_idxs[QS20_SPES_PER_BE] =   { 0, 2, 4, 6, 7, 5, 3, 1 };
+static int qs20_reg_memory[QS20_SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 };
+
+static struct spu *spu_lookup_reg(int node, u32 reg)
+{
+	struct spu *spu;
+	const u32 *spu_reg;
+
+	list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+		spu_reg = of_get_property(spu_devnode(spu), "reg", NULL);
+		if (*spu_reg == reg)
+			return spu;
+	}
+	return NULL;
+}
+
+static void init_affinity_qs20_harcoded(void)
+{
+	int node, i;
+	struct spu *last_spu, *spu;
+	u32 reg;
+
+	for (node = 0; node < MAX_NUMNODES; node++) {
+		last_spu = NULL;
+		for (i = 0; i < QS20_SPES_PER_BE; i++) {
+			reg = qs20_reg_idxs[i];
+			spu = spu_lookup_reg(node, reg);
+			if (!spu)
+				continue;
+			spu->has_mem_affinity = qs20_reg_memory[reg];
+			if (last_spu)
+				list_add_tail(&spu->aff_list,
+						&last_spu->aff_list);
+			last_spu = spu;
+		}
+	}
+}
+
+static int of_has_vicinity(void)
+{
+	struct device_node *dn;
+
+	for_each_node_by_type(dn, "spe") {
+		if (of_find_property(dn, "vicinity", NULL))  {
+			of_node_put(dn);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static struct spu *devnode_spu(int cbe, struct device_node *dn)
+{
+	struct spu *spu;
+
+	list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list)
+		if (spu_devnode(spu) == dn)
+			return spu;
+	return NULL;
+}
+
+static struct spu *
+neighbour_spu(int cbe, struct device_node *target, struct device_node *avoid)
+{
+	struct spu *spu;
+	struct device_node *spu_dn;
+	const phandle *vic_handles;
+	int lenp, i;
+
+	list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) {
+		spu_dn = spu_devnode(spu);
+		if (spu_dn == avoid)
+			continue;
+		vic_handles = of_get_property(spu_dn, "vicinity", &lenp);
+		for (i=0; i < (lenp / sizeof(phandle)); i++) {
+			if (vic_handles[i] == target->phandle)
+				return spu;
+		}
+	}
+	return NULL;
+}
+
+static void init_affinity_node(int cbe)
+{
+	struct spu *spu, *last_spu;
+	struct device_node *vic_dn, *last_spu_dn;
+	phandle avoid_ph;
+	const phandle *vic_handles;
+	const char *name;
+	int lenp, i, added;
+
+	last_spu = list_first_entry(&cbe_spu_info[cbe].spus, struct spu,
+								cbe_list);
+	avoid_ph = 0;
+	for (added = 1; added < cbe_spu_info[cbe].n_spus; added++) {
+		last_spu_dn = spu_devnode(last_spu);
+		vic_handles = of_get_property(last_spu_dn, "vicinity", &lenp);
+
+		/*
+		 * Walk through each phandle in vicinity property of the spu
+		 * (tipically two vicinity phandles per spe node)
+		 */
+		for (i = 0; i < (lenp / sizeof(phandle)); i++) {
+			if (vic_handles[i] == avoid_ph)
+				continue;
+
+			vic_dn = of_find_node_by_phandle(vic_handles[i]);
+			if (!vic_dn)
+				continue;
+
+			/* a neighbour might be spe, mic-tm, or bif0 */
+			name = of_get_property(vic_dn, "name", NULL);
+			if (!name)
+				continue;
+
+			if (strcmp(name, "spe") == 0) {
+				spu = devnode_spu(cbe, vic_dn);
+				avoid_ph = last_spu_dn->phandle;
+			} else {
+				/*
+				 * "mic-tm" and "bif0" nodes do not have
+				 * vicinity property. So we need to find the
+				 * spe which has vic_dn as neighbour, but
+				 * skipping the one we came from (last_spu_dn)
+				 */
+				spu = neighbour_spu(cbe, vic_dn, last_spu_dn);
+				if (!spu)
+					continue;
+				if (!strcmp(name, "mic-tm")) {
+					last_spu->has_mem_affinity = 1;
+					spu->has_mem_affinity = 1;
+				}
+				avoid_ph = vic_dn->phandle;
+			}
+
+			list_add_tail(&spu->aff_list, &last_spu->aff_list);
+			last_spu = spu;
+			break;
+		}
+	}
+}
+
+static void init_affinity_fw(void)
+{
+	int cbe;
+
+	for (cbe = 0; cbe < MAX_NUMNODES; cbe++)
+		init_affinity_node(cbe);
+}
+
+static int __init init_affinity(void)
+{
+	if (of_has_vicinity()) {
+		init_affinity_fw();
+	} else {
+		long root = of_get_flat_dt_root();
+		if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
+			init_affinity_qs20_harcoded();
+		else
+			printk("No affinity configuration found\n");
+	}
+
+	return 0;
+}
+
+const struct spu_management_ops spu_management_of_ops = {
+	.enumerate_spus = of_enumerate_spus,
+	.create_spu = of_create_spu,
+	.destroy_spu = of_destroy_spu,
+	.enable_spu = enable_spu_by_master_run,
+	.disable_spu = disable_spu_by_master_run,
+	.init_affinity = init_affinity,
+};
diff --git a/arch/powerpc/platforms/cell/spu_notify.c b/arch/powerpc/platforms/cell/spu_notify.c
new file mode 100644
index 000000000..afdf857c3
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_notify.c
@@ -0,0 +1,68 @@
+/*
+ * Move OProfile dependencies from spufs module to the kernel so it
+ * can run on non-cell PPC.
+ *
+ * Copyright (C) IBM 2005
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/export.h>
+#include <linux/notifier.h>
+#include <asm/spu.h>
+#include "spufs/spufs.h"
+
+static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
+
+void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
+{
+	blocking_notifier_call_chain(&spu_switch_notifier,
+				     ctx ? ctx->object_id : 0, spu);
+}
+EXPORT_SYMBOL_GPL(spu_switch_notify);
+
+int spu_switch_event_register(struct notifier_block *n)
+{
+	int ret;
+	ret = blocking_notifier_chain_register(&spu_switch_notifier, n);
+	if (!ret)
+		notify_spus_active();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(spu_switch_event_register);
+
+int spu_switch_event_unregister(struct notifier_block *n)
+{
+	return blocking_notifier_chain_unregister(&spu_switch_notifier, n);
+}
+EXPORT_SYMBOL_GPL(spu_switch_event_unregister);
+
+void spu_set_profile_private_kref(struct spu_context *ctx,
+				  struct kref *prof_info_kref,
+				  void (* prof_info_release) (struct kref *kref))
+{
+	ctx->prof_priv_kref = prof_info_kref;
+	ctx->prof_priv_release = prof_info_release;
+}
+EXPORT_SYMBOL_GPL(spu_set_profile_private_kref);
+
+void *spu_get_profile_private_kref(struct spu_context *ctx)
+{
+	return ctx->prof_priv_kref;
+}
+EXPORT_SYMBOL_GPL(spu_get_profile_private_kref);
+
diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.c b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
new file mode 100644
index 000000000..66d33724f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
@@ -0,0 +1,180 @@
+/*
+ * spu hypervisor abstraction for direct hardware access.
+ *
+ *  (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *  Copyright 2006 Sony Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/ptrace.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+#include <linux/sched.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/firmware.h>
+#include <asm/prom.h>
+
+#include "interrupt.h"
+#include "spu_priv1_mmio.h"
+
+static void int_mask_and(struct spu *spu, int class, u64 mask)
+{
+	u64 old_mask;
+
+	old_mask = in_be64(&spu->priv1->int_mask_RW[class]);
+	out_be64(&spu->priv1->int_mask_RW[class], old_mask & mask);
+}
+
+static void int_mask_or(struct spu *spu, int class, u64 mask)
+{
+	u64 old_mask;
+
+	old_mask = in_be64(&spu->priv1->int_mask_RW[class]);
+	out_be64(&spu->priv1->int_mask_RW[class], old_mask | mask);
+}
+
+static void int_mask_set(struct spu *spu, int class, u64 mask)
+{
+	out_be64(&spu->priv1->int_mask_RW[class], mask);
+}
+
+static u64 int_mask_get(struct spu *spu, int class)
+{
+	return in_be64(&spu->priv1->int_mask_RW[class]);
+}
+
+static void int_stat_clear(struct spu *spu, int class, u64 stat)
+{
+	out_be64(&spu->priv1->int_stat_RW[class], stat);
+}
+
+static u64 int_stat_get(struct spu *spu, int class)
+{
+	return in_be64(&spu->priv1->int_stat_RW[class]);
+}
+
+static void cpu_affinity_set(struct spu *spu, int cpu)
+{
+	u64 target;
+	u64 route;
+
+	if (nr_cpus_node(spu->node)) {
+		const struct cpumask *spumask = cpumask_of_node(spu->node),
+			*cpumask = cpumask_of_node(cpu_to_node(cpu));
+
+		if (!cpumask_intersects(spumask, cpumask))
+			return;
+	}
+
+	target = iic_get_target_id(cpu);
+	route = target << 48 | target << 32 | target << 16;
+	out_be64(&spu->priv1->int_route_RW, route);
+}
+
+static u64 mfc_dar_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->mfc_dar_RW);
+}
+
+static u64 mfc_dsisr_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->mfc_dsisr_RW);
+}
+
+static void mfc_dsisr_set(struct spu *spu, u64 dsisr)
+{
+	out_be64(&spu->priv1->mfc_dsisr_RW, dsisr);
+}
+
+static void mfc_sdr_setup(struct spu *spu)
+{
+	out_be64(&spu->priv1->mfc_sdr_RW, mfspr(SPRN_SDR1));
+}
+
+static void mfc_sr1_set(struct spu *spu, u64 sr1)
+{
+	out_be64(&spu->priv1->mfc_sr1_RW, sr1);
+}
+
+static u64 mfc_sr1_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->mfc_sr1_RW);
+}
+
+static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id)
+{
+	out_be64(&spu->priv1->mfc_tclass_id_RW, tclass_id);
+}
+
+static u64 mfc_tclass_id_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->mfc_tclass_id_RW);
+}
+
+static void tlb_invalidate(struct spu *spu)
+{
+	out_be64(&spu->priv1->tlb_invalidate_entry_W, 0ul);
+}
+
+static void resource_allocation_groupID_set(struct spu *spu, u64 id)
+{
+	out_be64(&spu->priv1->resource_allocation_groupID_RW, id);
+}
+
+static u64 resource_allocation_groupID_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->resource_allocation_groupID_RW);
+}
+
+static void resource_allocation_enable_set(struct spu *spu, u64 enable)
+{
+	out_be64(&spu->priv1->resource_allocation_enable_RW, enable);
+}
+
+static u64 resource_allocation_enable_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->resource_allocation_enable_RW);
+}
+
+const struct spu_priv1_ops spu_priv1_mmio_ops =
+{
+	.int_mask_and = int_mask_and,
+	.int_mask_or = int_mask_or,
+	.int_mask_set = int_mask_set,
+	.int_mask_get = int_mask_get,
+	.int_stat_clear = int_stat_clear,
+	.int_stat_get = int_stat_get,
+	.cpu_affinity_set = cpu_affinity_set,
+	.mfc_dar_get = mfc_dar_get,
+	.mfc_dsisr_get = mfc_dsisr_get,
+	.mfc_dsisr_set = mfc_dsisr_set,
+	.mfc_sdr_setup = mfc_sdr_setup,
+	.mfc_sr1_set = mfc_sr1_set,
+	.mfc_sr1_get = mfc_sr1_get,
+	.mfc_tclass_id_set = mfc_tclass_id_set,
+	.mfc_tclass_id_get = mfc_tclass_id_get,
+	.tlb_invalidate = tlb_invalidate,
+	.resource_allocation_groupID_set = resource_allocation_groupID_set,
+	.resource_allocation_groupID_get = resource_allocation_groupID_get,
+	.resource_allocation_enable_set = resource_allocation_enable_set,
+	.resource_allocation_enable_get = resource_allocation_enable_get,
+};
diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.h b/arch/powerpc/platforms/cell/spu_priv1_mmio.h
new file mode 100644
index 000000000..7b62bd1cc
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.h
@@ -0,0 +1,26 @@
+/*
+ * spu hypervisor abstraction for direct hardware access.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef SPU_PRIV1_MMIO_H
+#define SPU_PRIV1_MMIO_H
+
+struct device_node *spu_devnode(struct spu *spu);
+
+#endif /* SPU_PRIV1_MMIO_H */
diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c
new file mode 100644
index 000000000..5e6e0bad6
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -0,0 +1,178 @@
+/*
+ * SPU file system -- system call stubs
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ * (C) Copyright 2006-2007, IBM Corporation
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/syscalls.h>
+#include <linux/rcupdate.h>
+#include <linux/binfmts.h>
+
+#include <asm/spu.h>
+
+/* protected by rcu */
+static struct spufs_calls *spufs_calls;
+
+#ifdef CONFIG_SPU_FS_MODULE
+
+static inline struct spufs_calls *spufs_calls_get(void)
+{
+	struct spufs_calls *calls = NULL;
+
+	rcu_read_lock();
+	calls = rcu_dereference(spufs_calls);
+	if (calls && !try_module_get(calls->owner))
+		calls = NULL;
+	rcu_read_unlock();
+
+	return calls;
+}
+
+static inline void spufs_calls_put(struct spufs_calls *calls)
+{
+	BUG_ON(calls != spufs_calls);
+
+	/* we don't need to rcu this, as we hold a reference to the module */
+	module_put(spufs_calls->owner);
+}
+
+#else /* !defined CONFIG_SPU_FS_MODULE */
+
+static inline struct spufs_calls *spufs_calls_get(void)
+{
+	return spufs_calls;
+}
+
+static inline void spufs_calls_put(struct spufs_calls *calls) { }
+
+#endif /* CONFIG_SPU_FS_MODULE */
+
+SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags,
+	umode_t, mode, int, neighbor_fd)
+{
+	long ret;
+	struct spufs_calls *calls;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return -ENOSYS;
+
+	if (flags & SPU_CREATE_AFFINITY_SPU) {
+		struct fd neighbor = fdget(neighbor_fd);
+		ret = -EBADF;
+		if (neighbor.file) {
+			ret = calls->create_thread(name, flags, mode, neighbor.file);
+			fdput(neighbor);
+		}
+	} else
+		ret = calls->create_thread(name, flags, mode, NULL);
+
+	spufs_calls_put(calls);
+	return ret;
+}
+
+asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus)
+{
+	long ret;
+	struct fd arg;
+	struct spufs_calls *calls;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return -ENOSYS;
+
+	ret = -EBADF;
+	arg = fdget(fd);
+	if (arg.file) {
+		ret = calls->spu_run(arg.file, unpc, ustatus);
+		fdput(arg);
+	}
+
+	spufs_calls_put(calls);
+	return ret;
+}
+
+#ifdef CONFIG_COREDUMP
+int elf_coredump_extra_notes_size(void)
+{
+	struct spufs_calls *calls;
+	int ret;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return 0;
+
+	ret = calls->coredump_extra_notes_size();
+
+	spufs_calls_put(calls);
+
+	return ret;
+}
+
+int elf_coredump_extra_notes_write(struct coredump_params *cprm)
+{
+	struct spufs_calls *calls;
+	int ret;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return 0;
+
+	ret = calls->coredump_extra_notes_write(cprm);
+
+	spufs_calls_put(calls);
+
+	return ret;
+}
+#endif
+
+void notify_spus_active(void)
+{
+	struct spufs_calls *calls;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return;
+
+	calls->notify_spus_active();
+	spufs_calls_put(calls);
+
+	return;
+}
+
+int register_spu_syscalls(struct spufs_calls *calls)
+{
+	if (spufs_calls)
+		return -EBUSY;
+
+	rcu_assign_pointer(spufs_calls, calls);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(register_spu_syscalls);
+
+void unregister_spu_syscalls(struct spufs_calls *calls)
+{
+	BUG_ON(spufs_calls->owner != calls->owner);
+	RCU_INIT_POINTER(spufs_calls, NULL);
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(unregister_spu_syscalls);
diff --git a/arch/powerpc/platforms/cell/spufs/.gitignore b/arch/powerpc/platforms/cell/spufs/.gitignore
new file mode 100644
index 000000000..a09ee8d84
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/.gitignore
@@ -0,0 +1,2 @@
+spu_save_dump.h
+spu_restore_dump.h
diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile
new file mode 100644
index 000000000..52a7d2596
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/Makefile
@@ -0,0 +1,62 @@
+
+obj-$(CONFIG_SPU_FS) += spufs.o
+spufs-y += inode.o file.o context.o syscalls.o
+spufs-y += sched.o backing_ops.o hw_ops.o run.o gang.o
+spufs-y += switch.o fault.o lscsa_alloc.o
+spufs-$(CONFIG_COREDUMP) += coredump.o
+
+# magic for the trace events
+CFLAGS_sched.o := -I$(src)
+
+# Rules to build switch.o with the help of SPU tool chain
+SPU_CROSS	:= spu-
+SPU_CC		:= $(SPU_CROSS)gcc
+SPU_AS		:= $(SPU_CROSS)gcc
+SPU_LD		:= $(SPU_CROSS)ld
+SPU_OBJCOPY	:= $(SPU_CROSS)objcopy
+SPU_CFLAGS	:= -O2 -Wall -I$(srctree)/include -D__KERNEL__
+SPU_AFLAGS	:= -c -D__ASSEMBLY__ -I$(srctree)/include -D__KERNEL__
+SPU_LDFLAGS	:= -N -Ttext=0x0
+
+$(obj)/switch.o: $(obj)/spu_save_dump.h $(obj)/spu_restore_dump.h
+clean-files := spu_save_dump.h spu_restore_dump.h
+
+# Compile SPU files
+      cmd_spu_cc = $(SPU_CC) $(SPU_CFLAGS) -c -o $@ $<
+quiet_cmd_spu_cc = SPU_CC  $@
+$(obj)/spu_%.o: $(src)/spu_%.c
+	$(call if_changed,spu_cc)
+
+# Assemble SPU files
+      cmd_spu_as = $(SPU_AS) $(SPU_AFLAGS) -o $@ $<
+quiet_cmd_spu_as = SPU_AS  $@
+$(obj)/spu_%.o: $(src)/spu_%.S
+	$(call if_changed,spu_as)
+
+# Link SPU Executables
+      cmd_spu_ld = $(SPU_LD) $(SPU_LDFLAGS) -o $@ $^
+quiet_cmd_spu_ld = SPU_LD  $@
+$(obj)/spu_%: $(obj)/spu_%_crt0.o $(obj)/spu_%.o
+	$(call if_changed,spu_ld)
+
+# Copy into binary format
+      cmd_spu_objcopy = $(SPU_OBJCOPY) -O binary $< $@
+quiet_cmd_spu_objcopy = OBJCOPY $@
+$(obj)/spu_%.bin: $(src)/spu_%
+	$(call if_changed,spu_objcopy)
+
+# create C code from ELF executable
+cmd_hexdump   = ( \
+		echo "/*" ; \
+		echo " * $*_dump.h: Copyright (C) 2005 IBM." ; \
+		echo " * Hex-dump auto generated from $*.c." ; \
+		echo " * Do not edit!" ; \
+		echo " */" ; \
+		echo "static unsigned int $*_code[] " \
+			"__attribute__((__aligned__(128))) = {" ; \
+		hexdump -v -e '"0x" 4/1 "%02x" "," "\n"' $< ; \
+		echo "};" ; \
+		) > $@
+quiet_cmd_hexdump = HEXDUMP $@
+$(obj)/%_dump.h: $(obj)/%.bin
+	$(call if_changed,hexdump)
diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c
new file mode 100644
index 000000000..6e8a9ef85
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c
@@ -0,0 +1,413 @@
+/* backing_ops.c - query/set operations on saved SPU context.
+ *
+ * Copyright (C) IBM 2005
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * These register operations allow SPUFS to operate on saved
+ * SPU contexts rather than hardware.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/poll.h>
+
+#include <asm/io.h>
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/spu_info.h>
+#include <asm/mmu_context.h>
+#include "spufs.h"
+
+/*
+ * Reads/writes to various problem and priv2 registers require
+ * state changes, i.e.  generate SPU events, modify channel
+ * counts, etc.
+ */
+
+static void gen_spu_event(struct spu_context *ctx, u32 event)
+{
+	u64 ch0_cnt;
+	u64 ch0_data;
+	u64 ch1_data;
+
+	ch0_cnt = ctx->csa.spu_chnlcnt_RW[0];
+	ch0_data = ctx->csa.spu_chnldata_RW[0];
+	ch1_data = ctx->csa.spu_chnldata_RW[1];
+	ctx->csa.spu_chnldata_RW[0] |= event;
+	if ((ch0_cnt == 0) && !(ch0_data & event) && (ch1_data & event)) {
+		ctx->csa.spu_chnlcnt_RW[0] = 1;
+	}
+}
+
+static int spu_backing_mbox_read(struct spu_context *ctx, u32 * data)
+{
+	u32 mbox_stat;
+	int ret = 0;
+
+	spin_lock(&ctx->csa.register_lock);
+	mbox_stat = ctx->csa.prob.mb_stat_R;
+	if (mbox_stat & 0x0000ff) {
+		/* Read the first available word.
+		 * Implementation note: the depth
+		 * of pu_mb_R is currently 1.
+		 */
+		*data = ctx->csa.prob.pu_mb_R;
+		ctx->csa.prob.mb_stat_R &= ~(0x0000ff);
+		ctx->csa.spu_chnlcnt_RW[28] = 1;
+		gen_spu_event(ctx, MFC_PU_MAILBOX_AVAILABLE_EVENT);
+		ret = 4;
+	}
+	spin_unlock(&ctx->csa.register_lock);
+	return ret;
+}
+
+static u32 spu_backing_mbox_stat_read(struct spu_context *ctx)
+{
+	return ctx->csa.prob.mb_stat_R;
+}
+
+static unsigned int spu_backing_mbox_stat_poll(struct spu_context *ctx,
+					  unsigned int events)
+{
+	int ret;
+	u32 stat;
+
+	ret = 0;
+	spin_lock_irq(&ctx->csa.register_lock);
+	stat = ctx->csa.prob.mb_stat_R;
+
+	/* if the requested event is there, return the poll
+	   mask, otherwise enable the interrupt to get notified,
+	   but first mark any pending interrupts as done so
+	   we don't get woken up unnecessarily */
+
+	if (events & (POLLIN | POLLRDNORM)) {
+		if (stat & 0xff0000)
+			ret |= POLLIN | POLLRDNORM;
+		else {
+			ctx->csa.priv1.int_stat_class2_RW &=
+				~CLASS2_MAILBOX_INTR;
+			ctx->csa.priv1.int_mask_class2_RW |=
+				CLASS2_ENABLE_MAILBOX_INTR;
+		}
+	}
+	if (events & (POLLOUT | POLLWRNORM)) {
+		if (stat & 0x00ff00)
+			ret = POLLOUT | POLLWRNORM;
+		else {
+			ctx->csa.priv1.int_stat_class2_RW &=
+				~CLASS2_MAILBOX_THRESHOLD_INTR;
+			ctx->csa.priv1.int_mask_class2_RW |=
+				CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR;
+		}
+	}
+	spin_unlock_irq(&ctx->csa.register_lock);
+	return ret;
+}
+
+static int spu_backing_ibox_read(struct spu_context *ctx, u32 * data)
+{
+	int ret;
+
+	spin_lock(&ctx->csa.register_lock);
+	if (ctx->csa.prob.mb_stat_R & 0xff0000) {
+		/* Read the first available word.
+		 * Implementation note: the depth
+		 * of puint_mb_R is currently 1.
+		 */
+		*data = ctx->csa.priv2.puint_mb_R;
+		ctx->csa.prob.mb_stat_R &= ~(0xff0000);
+		ctx->csa.spu_chnlcnt_RW[30] = 1;
+		gen_spu_event(ctx, MFC_PU_INT_MAILBOX_AVAILABLE_EVENT);
+		ret = 4;
+	} else {
+		/* make sure we get woken up by the interrupt */
+		ctx->csa.priv1.int_mask_class2_RW |= CLASS2_ENABLE_MAILBOX_INTR;
+		ret = 0;
+	}
+	spin_unlock(&ctx->csa.register_lock);
+	return ret;
+}
+
+static int spu_backing_wbox_write(struct spu_context *ctx, u32 data)
+{
+	int ret;
+
+	spin_lock(&ctx->csa.register_lock);
+	if ((ctx->csa.prob.mb_stat_R) & 0x00ff00) {
+		int slot = ctx->csa.spu_chnlcnt_RW[29];
+		int avail = (ctx->csa.prob.mb_stat_R & 0x00ff00) >> 8;
+
+		/* We have space to write wbox_data.
+		 * Implementation note: the depth
+		 * of spu_mb_W is currently 4.
+		 */
+		BUG_ON(avail != (4 - slot));
+		ctx->csa.spu_mailbox_data[slot] = data;
+		ctx->csa.spu_chnlcnt_RW[29] = ++slot;
+		ctx->csa.prob.mb_stat_R &= ~(0x00ff00);
+		ctx->csa.prob.mb_stat_R |= (((4 - slot) & 0xff) << 8);
+		gen_spu_event(ctx, MFC_SPU_MAILBOX_WRITTEN_EVENT);
+		ret = 4;
+	} else {
+		/* make sure we get woken up by the interrupt when space
+		   becomes available */
+		ctx->csa.priv1.int_mask_class2_RW |=
+			CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR;
+		ret = 0;
+	}
+	spin_unlock(&ctx->csa.register_lock);
+	return ret;
+}
+
+static u32 spu_backing_signal1_read(struct spu_context *ctx)
+{
+	return ctx->csa.spu_chnldata_RW[3];
+}
+
+static void spu_backing_signal1_write(struct spu_context *ctx, u32 data)
+{
+	spin_lock(&ctx->csa.register_lock);
+	if (ctx->csa.priv2.spu_cfg_RW & 0x1)
+		ctx->csa.spu_chnldata_RW[3] |= data;
+	else
+		ctx->csa.spu_chnldata_RW[3] = data;
+	ctx->csa.spu_chnlcnt_RW[3] = 1;
+	gen_spu_event(ctx, MFC_SIGNAL_1_EVENT);
+	spin_unlock(&ctx->csa.register_lock);
+}
+
+static u32 spu_backing_signal2_read(struct spu_context *ctx)
+{
+	return ctx->csa.spu_chnldata_RW[4];
+}
+
+static void spu_backing_signal2_write(struct spu_context *ctx, u32 data)
+{
+	spin_lock(&ctx->csa.register_lock);
+	if (ctx->csa.priv2.spu_cfg_RW & 0x2)
+		ctx->csa.spu_chnldata_RW[4] |= data;
+	else
+		ctx->csa.spu_chnldata_RW[4] = data;
+	ctx->csa.spu_chnlcnt_RW[4] = 1;
+	gen_spu_event(ctx, MFC_SIGNAL_2_EVENT);
+	spin_unlock(&ctx->csa.register_lock);
+}
+
+static void spu_backing_signal1_type_set(struct spu_context *ctx, u64 val)
+{
+	u64 tmp;
+
+	spin_lock(&ctx->csa.register_lock);
+	tmp = ctx->csa.priv2.spu_cfg_RW;
+	if (val)
+		tmp |= 1;
+	else
+		tmp &= ~1;
+	ctx->csa.priv2.spu_cfg_RW = tmp;
+	spin_unlock(&ctx->csa.register_lock);
+}
+
+static u64 spu_backing_signal1_type_get(struct spu_context *ctx)
+{
+	return ((ctx->csa.priv2.spu_cfg_RW & 1) != 0);
+}
+
+static void spu_backing_signal2_type_set(struct spu_context *ctx, u64 val)
+{
+	u64 tmp;
+
+	spin_lock(&ctx->csa.register_lock);
+	tmp = ctx->csa.priv2.spu_cfg_RW;
+	if (val)
+		tmp |= 2;
+	else
+		tmp &= ~2;
+	ctx->csa.priv2.spu_cfg_RW = tmp;
+	spin_unlock(&ctx->csa.register_lock);
+}
+
+static u64 spu_backing_signal2_type_get(struct spu_context *ctx)
+{
+	return ((ctx->csa.priv2.spu_cfg_RW & 2) != 0);
+}
+
+static u32 spu_backing_npc_read(struct spu_context *ctx)
+{
+	return ctx->csa.prob.spu_npc_RW;
+}
+
+static void spu_backing_npc_write(struct spu_context *ctx, u32 val)
+{
+	ctx->csa.prob.spu_npc_RW = val;
+}
+
+static u32 spu_backing_status_read(struct spu_context *ctx)
+{
+	return ctx->csa.prob.spu_status_R;
+}
+
+static char *spu_backing_get_ls(struct spu_context *ctx)
+{
+	return ctx->csa.lscsa->ls;
+}
+
+static void spu_backing_privcntl_write(struct spu_context *ctx, u64 val)
+{
+	ctx->csa.priv2.spu_privcntl_RW = val;
+}
+
+static u32 spu_backing_runcntl_read(struct spu_context *ctx)
+{
+	return ctx->csa.prob.spu_runcntl_RW;
+}
+
+static void spu_backing_runcntl_write(struct spu_context *ctx, u32 val)
+{
+	spin_lock(&ctx->csa.register_lock);
+	ctx->csa.prob.spu_runcntl_RW = val;
+	if (val & SPU_RUNCNTL_RUNNABLE) {
+		ctx->csa.prob.spu_status_R &=
+			~SPU_STATUS_STOPPED_BY_STOP &
+			~SPU_STATUS_STOPPED_BY_HALT &
+			~SPU_STATUS_SINGLE_STEP &
+			~SPU_STATUS_INVALID_INSTR &
+			~SPU_STATUS_INVALID_CH;
+		ctx->csa.prob.spu_status_R |= SPU_STATUS_RUNNING;
+	} else {
+		ctx->csa.prob.spu_status_R &= ~SPU_STATUS_RUNNING;
+	}
+	spin_unlock(&ctx->csa.register_lock);
+}
+
+static void spu_backing_runcntl_stop(struct spu_context *ctx)
+{
+	spu_backing_runcntl_write(ctx, SPU_RUNCNTL_STOP);
+}
+
+static void spu_backing_master_start(struct spu_context *ctx)
+{
+	struct spu_state *csa = &ctx->csa;
+	u64 sr1;
+
+	spin_lock(&csa->register_lock);
+	sr1 = csa->priv1.mfc_sr1_RW | MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	csa->priv1.mfc_sr1_RW = sr1;
+	spin_unlock(&csa->register_lock);
+}
+
+static void spu_backing_master_stop(struct spu_context *ctx)
+{
+	struct spu_state *csa = &ctx->csa;
+	u64 sr1;
+
+	spin_lock(&csa->register_lock);
+	sr1 = csa->priv1.mfc_sr1_RW & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	csa->priv1.mfc_sr1_RW = sr1;
+	spin_unlock(&csa->register_lock);
+}
+
+static int spu_backing_set_mfc_query(struct spu_context * ctx, u32 mask,
+					u32 mode)
+{
+	struct spu_problem_collapsed *prob = &ctx->csa.prob;
+	int ret;
+
+	spin_lock(&ctx->csa.register_lock);
+	ret = -EAGAIN;
+	if (prob->dma_querytype_RW)
+		goto out;
+	ret = 0;
+	/* FIXME: what are the side-effects of this? */
+	prob->dma_querymask_RW = mask;
+	prob->dma_querytype_RW = mode;
+	/* In the current implementation, the SPU context is always
+	 * acquired in runnable state when new bits are added to the
+	 * mask (tagwait), so it's sufficient just to mask
+	 * dma_tagstatus_R with the 'mask' parameter here.
+	 */
+	ctx->csa.prob.dma_tagstatus_R &= mask;
+out:
+	spin_unlock(&ctx->csa.register_lock);
+
+	return ret;
+}
+
+static u32 spu_backing_read_mfc_tagstatus(struct spu_context * ctx)
+{
+	return ctx->csa.prob.dma_tagstatus_R;
+}
+
+static u32 spu_backing_get_mfc_free_elements(struct spu_context *ctx)
+{
+	return ctx->csa.prob.dma_qstatus_R;
+}
+
+static int spu_backing_send_mfc_command(struct spu_context *ctx,
+					struct mfc_dma_command *cmd)
+{
+	int ret;
+
+	spin_lock(&ctx->csa.register_lock);
+	ret = -EAGAIN;
+	/* FIXME: set up priv2->puq */
+	spin_unlock(&ctx->csa.register_lock);
+
+	return ret;
+}
+
+static void spu_backing_restart_dma(struct spu_context *ctx)
+{
+	ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_RESTART_DMA_COMMAND;
+}
+
+struct spu_context_ops spu_backing_ops = {
+	.mbox_read = spu_backing_mbox_read,
+	.mbox_stat_read = spu_backing_mbox_stat_read,
+	.mbox_stat_poll = spu_backing_mbox_stat_poll,
+	.ibox_read = spu_backing_ibox_read,
+	.wbox_write = spu_backing_wbox_write,
+	.signal1_read = spu_backing_signal1_read,
+	.signal1_write = spu_backing_signal1_write,
+	.signal2_read = spu_backing_signal2_read,
+	.signal2_write = spu_backing_signal2_write,
+	.signal1_type_set = spu_backing_signal1_type_set,
+	.signal1_type_get = spu_backing_signal1_type_get,
+	.signal2_type_set = spu_backing_signal2_type_set,
+	.signal2_type_get = spu_backing_signal2_type_get,
+	.npc_read = spu_backing_npc_read,
+	.npc_write = spu_backing_npc_write,
+	.status_read = spu_backing_status_read,
+	.get_ls = spu_backing_get_ls,
+	.privcntl_write = spu_backing_privcntl_write,
+	.runcntl_read = spu_backing_runcntl_read,
+	.runcntl_write = spu_backing_runcntl_write,
+	.runcntl_stop = spu_backing_runcntl_stop,
+	.master_start = spu_backing_master_start,
+	.master_stop = spu_backing_master_stop,
+	.set_mfc_query = spu_backing_set_mfc_query,
+	.read_mfc_tagstatus = spu_backing_read_mfc_tagstatus,
+	.get_mfc_free_elements = spu_backing_get_mfc_free_elements,
+	.send_mfc_command = spu_backing_send_mfc_command,
+	.restart_dma = spu_backing_restart_dma,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
new file mode 100644
index 000000000..3b4152fae
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -0,0 +1,186 @@
+/*
+ * SPU file system -- SPU context management
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/sched.h>
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include "spufs.h"
+#include "sputrace.h"
+
+
+atomic_t nr_spu_contexts = ATOMIC_INIT(0);
+
+struct spu_context *alloc_spu_context(struct spu_gang *gang)
+{
+	struct spu_context *ctx;
+
+	ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
+	if (!ctx)
+		goto out;
+	/* Binding to physical processor deferred
+	 * until spu_activate().
+	 */
+	if (spu_init_csa(&ctx->csa))
+		goto out_free;
+	spin_lock_init(&ctx->mmio_lock);
+	mutex_init(&ctx->mapping_lock);
+	kref_init(&ctx->kref);
+	mutex_init(&ctx->state_mutex);
+	mutex_init(&ctx->run_mutex);
+	init_waitqueue_head(&ctx->ibox_wq);
+	init_waitqueue_head(&ctx->wbox_wq);
+	init_waitqueue_head(&ctx->stop_wq);
+	init_waitqueue_head(&ctx->mfc_wq);
+	init_waitqueue_head(&ctx->run_wq);
+	ctx->state = SPU_STATE_SAVED;
+	ctx->ops = &spu_backing_ops;
+	ctx->owner = get_task_mm(current);
+	INIT_LIST_HEAD(&ctx->rq);
+	INIT_LIST_HEAD(&ctx->aff_list);
+	if (gang)
+		spu_gang_add_ctx(gang, ctx);
+
+	__spu_update_sched_info(ctx);
+	spu_set_timeslice(ctx);
+	ctx->stats.util_state = SPU_UTIL_IDLE_LOADED;
+	ctx->stats.tstamp = ktime_get_ns();
+
+	atomic_inc(&nr_spu_contexts);
+	goto out;
+out_free:
+	kfree(ctx);
+	ctx = NULL;
+out:
+	return ctx;
+}
+
+void destroy_spu_context(struct kref *kref)
+{
+	struct spu_context *ctx;
+	ctx = container_of(kref, struct spu_context, kref);
+	spu_context_nospu_trace(destroy_spu_context__enter, ctx);
+	mutex_lock(&ctx->state_mutex);
+	spu_deactivate(ctx);
+	mutex_unlock(&ctx->state_mutex);
+	spu_fini_csa(&ctx->csa);
+	if (ctx->gang)
+		spu_gang_remove_ctx(ctx->gang, ctx);
+	if (ctx->prof_priv_kref)
+		kref_put(ctx->prof_priv_kref, ctx->prof_priv_release);
+	BUG_ON(!list_empty(&ctx->rq));
+	atomic_dec(&nr_spu_contexts);
+	kfree(ctx->switch_log);
+	kfree(ctx);
+}
+
+struct spu_context * get_spu_context(struct spu_context *ctx)
+{
+	kref_get(&ctx->kref);
+	return ctx;
+}
+
+int put_spu_context(struct spu_context *ctx)
+{
+	return kref_put(&ctx->kref, &destroy_spu_context);
+}
+
+/* give up the mm reference when the context is about to be destroyed */
+void spu_forget(struct spu_context *ctx)
+{
+	struct mm_struct *mm;
+
+	/*
+	 * This is basically an open-coded spu_acquire_saved, except that
+	 * we don't acquire the state mutex interruptible, and we don't
+	 * want this context to be rescheduled on release.
+	 */
+	mutex_lock(&ctx->state_mutex);
+	if (ctx->state != SPU_STATE_SAVED)
+		spu_deactivate(ctx);
+
+	mm = ctx->owner;
+	ctx->owner = NULL;
+	mmput(mm);
+	spu_release(ctx);
+}
+
+void spu_unmap_mappings(struct spu_context *ctx)
+{
+	mutex_lock(&ctx->mapping_lock);
+	if (ctx->local_store)
+		unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1);
+	if (ctx->mfc)
+		unmap_mapping_range(ctx->mfc, 0, SPUFS_MFC_MAP_SIZE, 1);
+	if (ctx->cntl)
+		unmap_mapping_range(ctx->cntl, 0, SPUFS_CNTL_MAP_SIZE, 1);
+	if (ctx->signal1)
+		unmap_mapping_range(ctx->signal1, 0, SPUFS_SIGNAL_MAP_SIZE, 1);
+	if (ctx->signal2)
+		unmap_mapping_range(ctx->signal2, 0, SPUFS_SIGNAL_MAP_SIZE, 1);
+	if (ctx->mss)
+		unmap_mapping_range(ctx->mss, 0, SPUFS_MSS_MAP_SIZE, 1);
+	if (ctx->psmap)
+		unmap_mapping_range(ctx->psmap, 0, SPUFS_PS_MAP_SIZE, 1);
+	mutex_unlock(&ctx->mapping_lock);
+}
+
+/**
+ * spu_acquire_saved - lock spu contex and make sure it is in saved state
+ * @ctx:	spu contex to lock
+ */
+int spu_acquire_saved(struct spu_context *ctx)
+{
+	int ret;
+
+	spu_context_nospu_trace(spu_acquire_saved__enter, ctx);
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	if (ctx->state != SPU_STATE_SAVED) {
+		set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags);
+		spu_deactivate(ctx);
+	}
+
+	return 0;
+}
+
+/**
+ * spu_release_saved - unlock spu context and return it to the runqueue
+ * @ctx:	context to unlock
+ */
+void spu_release_saved(struct spu_context *ctx)
+{
+	BUG_ON(ctx->state != SPU_STATE_SAVED);
+
+	if (test_and_clear_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags) &&
+			test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+		spu_activate(ctx, 0);
+
+	spu_release(ctx);
+}
+
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
new file mode 100644
index 000000000..be6212ddb
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -0,0 +1,211 @@
+/*
+ * SPU core dump code
+ *
+ * (C) Copyright 2006 IBM Corp.
+ *
+ * Author: Dwayne Grant McConnell <decimal@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/elf.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/list.h>
+#include <linux/syscalls.h>
+#include <linux/coredump.h>
+#include <linux/binfmts.h>
+
+#include <asm/uaccess.h>
+
+#include "spufs.h"
+
+static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer,
+				size_t size, loff_t *off)
+{
+	u64 data;
+	int ret;
+
+	if (spufs_coredump_read[num].read)
+		return spufs_coredump_read[num].read(ctx, buffer, size, off);
+
+	data = spufs_coredump_read[num].get(ctx);
+	ret = snprintf(buffer, size, "0x%.16llx", data);
+	if (ret >= size)
+		return size;
+	return ++ret; /* count trailing NULL */
+}
+
+static int spufs_ctx_note_size(struct spu_context *ctx, int dfd)
+{
+	int i, sz, total = 0;
+	char *name;
+	char fullname[80];
+
+	for (i = 0; spufs_coredump_read[i].name != NULL; i++) {
+		name = spufs_coredump_read[i].name;
+		sz = spufs_coredump_read[i].size;
+
+		sprintf(fullname, "SPU/%d/%s", dfd, name);
+
+		total += sizeof(struct elf_note);
+		total += roundup(strlen(fullname) + 1, 4);
+		total += roundup(sz, 4);
+	}
+
+	return total;
+}
+
+static int match_context(const void *v, struct file *file, unsigned fd)
+{
+	struct spu_context *ctx;
+	if (file->f_op != &spufs_context_fops)
+		return 0;
+	ctx = SPUFS_I(file_inode(file))->i_ctx;
+	if (ctx->flags & SPU_CREATE_NOSCHED)
+		return 0;
+	return fd + 1;
+}
+
+/*
+ * The additional architecture-specific notes for Cell are various
+ * context files in the spu context.
+ *
+ * This function iterates over all open file descriptors and sees
+ * if they are a directory in spufs.  In that case we use spufs
+ * internal functionality to dump them without needing to actually
+ * open the files.
+ */
+/*
+ * descriptor table is not shared, so files can't change or go away.
+ */
+static struct spu_context *coredump_next_context(int *fd)
+{
+	struct file *file;
+	int n = iterate_fd(current->files, *fd, match_context, NULL);
+	if (!n)
+		return NULL;
+	*fd = n - 1;
+	file = fcheck(*fd);
+	return SPUFS_I(file_inode(file))->i_ctx;
+}
+
+int spufs_coredump_extra_notes_size(void)
+{
+	struct spu_context *ctx;
+	int size = 0, rc, fd;
+
+	fd = 0;
+	while ((ctx = coredump_next_context(&fd)) != NULL) {
+		rc = spu_acquire_saved(ctx);
+		if (rc)
+			break;
+		rc = spufs_ctx_note_size(ctx, fd);
+		spu_release_saved(ctx);
+		if (rc < 0)
+			break;
+
+		size += rc;
+
+		/* start searching the next fd next time */
+		fd++;
+	}
+
+	return size;
+}
+
+static int spufs_arch_write_note(struct spu_context *ctx, int i,
+				  struct coredump_params *cprm, int dfd)
+{
+	loff_t pos = 0;
+	int sz, rc, total = 0;
+	const int bufsz = PAGE_SIZE;
+	char *name;
+	char fullname[80], *buf;
+	struct elf_note en;
+
+	buf = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	name = spufs_coredump_read[i].name;
+	sz = spufs_coredump_read[i].size;
+
+	sprintf(fullname, "SPU/%d/%s", dfd, name);
+	en.n_namesz = strlen(fullname) + 1;
+	en.n_descsz = sz;
+	en.n_type = NT_SPU;
+
+	if (!dump_emit(cprm, &en, sizeof(en)))
+		goto Eio;
+
+	if (!dump_emit(cprm, fullname, en.n_namesz))
+		goto Eio;
+
+	if (!dump_align(cprm, 4))
+		goto Eio;
+
+	do {
+		rc = do_coredump_read(i, ctx, buf, bufsz, &pos);
+		if (rc > 0) {
+			if (!dump_emit(cprm, buf, rc))
+				goto Eio;
+			total += rc;
+		}
+	} while (rc == bufsz && total < sz);
+
+	if (rc < 0)
+		goto out;
+
+	if (!dump_skip(cprm,
+		       roundup(cprm->written - total + sz, 4) - cprm->written))
+		goto Eio;
+out:
+	free_page((unsigned long)buf);
+	return rc;
+Eio:
+	free_page((unsigned long)buf);
+	return -EIO;
+}
+
+int spufs_coredump_extra_notes_write(struct coredump_params *cprm)
+{
+	struct spu_context *ctx;
+	int fd, j, rc;
+
+	fd = 0;
+	while ((ctx = coredump_next_context(&fd)) != NULL) {
+		rc = spu_acquire_saved(ctx);
+		if (rc)
+			return rc;
+
+		for (j = 0; spufs_coredump_read[j].name != NULL; j++) {
+			rc = spufs_arch_write_note(ctx, j, cprm, fd);
+			if (rc) {
+				spu_release_saved(ctx);
+				return rc;
+			}
+		}
+
+		spu_release_saved(ctx);
+
+		/* start searching the next fd next time */
+		fd++;
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
new file mode 100644
index 000000000..d98f845ac
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -0,0 +1,191 @@
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+
+#include "spufs.h"
+
+/**
+ * Handle an SPE event, depending on context SPU_CREATE_EVENTS_ENABLED flag.
+ *
+ * If the context was created with events, we just set the return event.
+ * Otherwise, send an appropriate signal to the process.
+ */
+static void spufs_handle_event(struct spu_context *ctx,
+				unsigned long ea, int type)
+{
+	siginfo_t info;
+
+	if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
+		ctx->event_return |= type;
+		wake_up_all(&ctx->stop_wq);
+		return;
+	}
+
+	memset(&info, 0, sizeof(info));
+
+	switch (type) {
+	case SPE_EVENT_INVALID_DMA:
+		info.si_signo = SIGBUS;
+		info.si_code = BUS_OBJERR;
+		break;
+	case SPE_EVENT_SPE_DATA_STORAGE:
+		info.si_signo = SIGSEGV;
+		info.si_addr = (void __user *)ea;
+		info.si_code = SEGV_ACCERR;
+		ctx->ops->restart_dma(ctx);
+		break;
+	case SPE_EVENT_DMA_ALIGNMENT:
+		info.si_signo = SIGBUS;
+		/* DAR isn't set for an alignment fault :( */
+		info.si_code = BUS_ADRALN;
+		break;
+	case SPE_EVENT_SPE_ERROR:
+		info.si_signo = SIGILL;
+		info.si_addr = (void __user *)(unsigned long)
+			ctx->ops->npc_read(ctx) - 4;
+		info.si_code = ILL_ILLOPC;
+		break;
+	}
+
+	if (info.si_signo)
+		force_sig_info(info.si_signo, &info, current);
+}
+
+int spufs_handle_class0(struct spu_context *ctx)
+{
+	unsigned long stat = ctx->csa.class_0_pending & CLASS0_INTR_MASK;
+
+	if (likely(!stat))
+		return 0;
+
+	if (stat & CLASS0_DMA_ALIGNMENT_INTR)
+		spufs_handle_event(ctx, ctx->csa.class_0_dar,
+			SPE_EVENT_DMA_ALIGNMENT);
+
+	if (stat & CLASS0_INVALID_DMA_COMMAND_INTR)
+		spufs_handle_event(ctx, ctx->csa.class_0_dar,
+			SPE_EVENT_INVALID_DMA);
+
+	if (stat & CLASS0_SPU_ERROR_INTR)
+		spufs_handle_event(ctx, ctx->csa.class_0_dar,
+			SPE_EVENT_SPE_ERROR);
+
+	ctx->csa.class_0_pending = 0;
+
+	return -EIO;
+}
+
+/*
+ * bottom half handler for page faults, we can't do this from
+ * interrupt context, since we might need to sleep.
+ * we also need to give up the mutex so we can get scheduled
+ * out while waiting for the backing store.
+ *
+ * TODO: try calling hash_page from the interrupt handler first
+ *       in order to speed up the easy case.
+ */
+int spufs_handle_class1(struct spu_context *ctx)
+{
+	u64 ea, dsisr, access;
+	unsigned long flags;
+	unsigned flt = 0;
+	int ret;
+
+	/*
+	 * dar and dsisr get passed from the registers
+	 * to the spu_context, to this function, but not
+	 * back to the spu if it gets scheduled again.
+	 *
+	 * if we don't handle the fault for a saved context
+	 * in time, we can still expect to get the same fault
+	 * the immediately after the context restore.
+	 */
+	ea = ctx->csa.class_1_dar;
+	dsisr = ctx->csa.class_1_dsisr;
+
+	if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
+		return 0;
+
+	spuctx_switch_state(ctx, SPU_UTIL_IOWAIT);
+
+	pr_debug("ctx %p: ea %016llx, dsisr %016llx state %d\n", ctx, ea,
+		dsisr, ctx->state);
+
+	ctx->stats.hash_flt++;
+	if (ctx->state == SPU_STATE_RUNNABLE)
+		ctx->spu->stats.hash_flt++;
+
+	/* we must not hold the lock when entering copro_handle_mm_fault */
+	spu_release(ctx);
+
+	access = (_PAGE_PRESENT | _PAGE_USER);
+	access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
+	local_irq_save(flags);
+	ret = hash_page(ea, access, 0x300, dsisr);
+	local_irq_restore(flags);
+
+	/* hashing failed, so try the actual fault handler */
+	if (ret)
+		ret = copro_handle_mm_fault(current->mm, ea, dsisr, &flt);
+
+	/*
+	 * This is nasty: we need the state_mutex for all the bookkeeping even
+	 * if the syscall was interrupted by a signal. ewww.
+	 */
+	mutex_lock(&ctx->state_mutex);
+
+	/*
+	 * Clear dsisr under ctxt lock after handling the fault, so that
+	 * time slicing will not preempt the context while the page fault
+	 * handler is running. Context switch code removes mappings.
+	 */
+	ctx->csa.class_1_dar = ctx->csa.class_1_dsisr = 0;
+
+	/*
+	 * If we handled the fault successfully and are in runnable
+	 * state, restart the DMA.
+	 * In case of unhandled error report the problem to user space.
+	 */
+	if (!ret) {
+		if (flt & VM_FAULT_MAJOR)
+			ctx->stats.maj_flt++;
+		else
+			ctx->stats.min_flt++;
+		if (ctx->state == SPU_STATE_RUNNABLE) {
+			if (flt & VM_FAULT_MAJOR)
+				ctx->spu->stats.maj_flt++;
+			else
+				ctx->spu->stats.min_flt++;
+		}
+
+		if (ctx->spu)
+			ctx->ops->restart_dma(ctx);
+	} else
+		spufs_handle_event(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE);
+
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+	return ret;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
new file mode 100644
index 000000000..d966bbe58
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -0,0 +1,2771 @@
+/*
+ * SPU file system -- file contents
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/fs.h>
+#include <linux/ioctl.h>
+#include <linux/export.h>
+#include <linux/pagemap.h>
+#include <linux/poll.h>
+#include <linux/ptrace.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+
+#include <asm/io.h>
+#include <asm/time.h>
+#include <asm/spu.h>
+#include <asm/spu_info.h>
+#include <asm/uaccess.h>
+
+#include "spufs.h"
+#include "sputrace.h"
+
+#define SPUFS_MMAP_4K (PAGE_SIZE == 0x1000)
+
+/* Simple attribute files */
+struct spufs_attr {
+	int (*get)(void *, u64 *);
+	int (*set)(void *, u64);
+	char get_buf[24];       /* enough to store a u64 and "\n\0" */
+	char set_buf[24];
+	void *data;
+	const char *fmt;        /* format for read operation */
+	struct mutex mutex;     /* protects access to these buffers */
+};
+
+static int spufs_attr_open(struct inode *inode, struct file *file,
+		int (*get)(void *, u64 *), int (*set)(void *, u64),
+		const char *fmt)
+{
+	struct spufs_attr *attr;
+
+	attr = kmalloc(sizeof(*attr), GFP_KERNEL);
+	if (!attr)
+		return -ENOMEM;
+
+	attr->get = get;
+	attr->set = set;
+	attr->data = inode->i_private;
+	attr->fmt = fmt;
+	mutex_init(&attr->mutex);
+	file->private_data = attr;
+
+	return nonseekable_open(inode, file);
+}
+
+static int spufs_attr_release(struct inode *inode, struct file *file)
+{
+       kfree(file->private_data);
+	return 0;
+}
+
+static ssize_t spufs_attr_read(struct file *file, char __user *buf,
+		size_t len, loff_t *ppos)
+{
+	struct spufs_attr *attr;
+	size_t size;
+	ssize_t ret;
+
+	attr = file->private_data;
+	if (!attr->get)
+		return -EACCES;
+
+	ret = mutex_lock_interruptible(&attr->mutex);
+	if (ret)
+		return ret;
+
+	if (*ppos) {		/* continued read */
+		size = strlen(attr->get_buf);
+	} else {		/* first read */
+		u64 val;
+		ret = attr->get(attr->data, &val);
+		if (ret)
+			goto out;
+
+		size = scnprintf(attr->get_buf, sizeof(attr->get_buf),
+				 attr->fmt, (unsigned long long)val);
+	}
+
+	ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size);
+out:
+	mutex_unlock(&attr->mutex);
+	return ret;
+}
+
+static ssize_t spufs_attr_write(struct file *file, const char __user *buf,
+		size_t len, loff_t *ppos)
+{
+	struct spufs_attr *attr;
+	u64 val;
+	size_t size;
+	ssize_t ret;
+
+	attr = file->private_data;
+	if (!attr->set)
+		return -EACCES;
+
+	ret = mutex_lock_interruptible(&attr->mutex);
+	if (ret)
+		return ret;
+
+	ret = -EFAULT;
+	size = min(sizeof(attr->set_buf) - 1, len);
+	if (copy_from_user(attr->set_buf, buf, size))
+		goto out;
+
+	ret = len; /* claim we got the whole input */
+	attr->set_buf[size] = '\0';
+	val = simple_strtol(attr->set_buf, NULL, 0);
+	attr->set(attr->data, val);
+out:
+	mutex_unlock(&attr->mutex);
+	return ret;
+}
+
+#define DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt)	\
+static int __fops ## _open(struct inode *inode, struct file *file)	\
+{									\
+	__simple_attr_check_format(__fmt, 0ull);			\
+	return spufs_attr_open(inode, file, __get, __set, __fmt);	\
+}									\
+static const struct file_operations __fops = {				\
+	.open	 = __fops ## _open,					\
+	.release = spufs_attr_release,					\
+	.read	 = spufs_attr_read,					\
+	.write	 = spufs_attr_write,					\
+	.llseek  = generic_file_llseek,					\
+};
+
+
+static int
+spufs_mem_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->local_store = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static int
+spufs_mem_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->local_store = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static ssize_t
+__spufs_mem_read(struct spu_context *ctx, char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	char *local_store = ctx->ops->get_ls(ctx);
+	return simple_read_from_buffer(buffer, size, pos, local_store,
+					LS_SIZE);
+}
+
+static ssize_t
+spufs_mem_read(struct file *file, char __user *buffer,
+				size_t size, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	ssize_t ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_mem_read(ctx, buffer, size, pos);
+	spu_release(ctx);
+
+	return ret;
+}
+
+static ssize_t
+spufs_mem_write(struct file *file, const char __user *buffer,
+					size_t size, loff_t *ppos)
+{
+	struct spu_context *ctx = file->private_data;
+	char *local_store;
+	loff_t pos = *ppos;
+	int ret;
+
+	if (pos > LS_SIZE)
+		return -EFBIG;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	local_store = ctx->ops->get_ls(ctx);
+	size = simple_write_to_buffer(local_store, LS_SIZE, ppos, buffer, size);
+	spu_release(ctx);
+
+	return size;
+}
+
+static int
+spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct spu_context *ctx	= vma->vm_file->private_data;
+	unsigned long address = (unsigned long)vmf->virtual_address;
+	unsigned long pfn, offset;
+
+#ifdef CONFIG_SPU_FS_64K_LS
+	struct spu_state *csa = &ctx->csa;
+	int psize;
+
+	/* Check what page size we are using */
+	psize = get_slice_psize(vma->vm_mm, address);
+
+	/* Some sanity checking */
+	BUG_ON(csa->use_big_pages != (psize == MMU_PAGE_64K));
+
+	/* Wow, 64K, cool, we need to align the address though */
+	if (csa->use_big_pages) {
+		BUG_ON(vma->vm_start & 0xffff);
+		address &= ~0xfffful;
+	}
+#endif /* CONFIG_SPU_FS_64K_LS */
+
+	offset = vmf->pgoff << PAGE_SHIFT;
+	if (offset >= LS_SIZE)
+		return VM_FAULT_SIGBUS;
+
+	pr_debug("spufs_mem_mmap_fault address=0x%lx, offset=0x%lx\n",
+			address, offset);
+
+	if (spu_acquire(ctx))
+		return VM_FAULT_NOPAGE;
+
+	if (ctx->state == SPU_STATE_SAVED) {
+		vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
+		pfn = vmalloc_to_pfn(ctx->csa.lscsa->ls + offset);
+	} else {
+		vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
+		pfn = (ctx->spu->local_store_phys + offset) >> PAGE_SHIFT;
+	}
+	vm_insert_pfn(vma, address, pfn);
+
+	spu_release(ctx);
+
+	return VM_FAULT_NOPAGE;
+}
+
+static int spufs_mem_mmap_access(struct vm_area_struct *vma,
+				unsigned long address,
+				void *buf, int len, int write)
+{
+	struct spu_context *ctx = vma->vm_file->private_data;
+	unsigned long offset = address - vma->vm_start;
+	char *local_store;
+
+	if (write && !(vma->vm_flags & VM_WRITE))
+		return -EACCES;
+	if (spu_acquire(ctx))
+		return -EINTR;
+	if ((offset + len) > vma->vm_end)
+		len = vma->vm_end - offset;
+	local_store = ctx->ops->get_ls(ctx);
+	if (write)
+		memcpy_toio(local_store + offset, buf, len);
+	else
+		memcpy_fromio(buf, local_store + offset, len);
+	spu_release(ctx);
+	return len;
+}
+
+static const struct vm_operations_struct spufs_mem_mmap_vmops = {
+	.fault = spufs_mem_mmap_fault,
+	.access = spufs_mem_mmap_access,
+};
+
+static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+#ifdef CONFIG_SPU_FS_64K_LS
+	struct spu_context	*ctx = file->private_data;
+	struct spu_state	*csa = &ctx->csa;
+
+	/* Sanity check VMA alignment */
+	if (csa->use_big_pages) {
+		pr_debug("spufs_mem_mmap 64K, start=0x%lx, end=0x%lx,"
+			 " pgoff=0x%lx\n", vma->vm_start, vma->vm_end,
+			 vma->vm_pgoff);
+		if (vma->vm_start & 0xffff)
+			return -EINVAL;
+		if (vma->vm_pgoff & 0xf)
+			return -EINVAL;
+	}
+#endif /* CONFIG_SPU_FS_64K_LS */
+
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_mem_mmap_vmops;
+	return 0;
+}
+
+#ifdef CONFIG_SPU_FS_64K_LS
+static unsigned long spufs_get_unmapped_area(struct file *file,
+		unsigned long addr, unsigned long len, unsigned long pgoff,
+		unsigned long flags)
+{
+	struct spu_context	*ctx = file->private_data;
+	struct spu_state	*csa = &ctx->csa;
+
+	/* If not using big pages, fallback to normal MM g_u_a */
+	if (!csa->use_big_pages)
+		return current->mm->get_unmapped_area(file, addr, len,
+						      pgoff, flags);
+
+	/* Else, try to obtain a 64K pages slice */
+	return slice_get_unmapped_area(addr, len, flags,
+				       MMU_PAGE_64K, 1);
+}
+#endif /* CONFIG_SPU_FS_64K_LS */
+
+static const struct file_operations spufs_mem_fops = {
+	.open			= spufs_mem_open,
+	.release		= spufs_mem_release,
+	.read			= spufs_mem_read,
+	.write			= spufs_mem_write,
+	.llseek			= generic_file_llseek,
+	.mmap			= spufs_mem_mmap,
+#ifdef CONFIG_SPU_FS_64K_LS
+	.get_unmapped_area	= spufs_get_unmapped_area,
+#endif
+};
+
+static int spufs_ps_fault(struct vm_area_struct *vma,
+				    struct vm_fault *vmf,
+				    unsigned long ps_offs,
+				    unsigned long ps_size)
+{
+	struct spu_context *ctx = vma->vm_file->private_data;
+	unsigned long area, offset = vmf->pgoff << PAGE_SHIFT;
+	int ret = 0;
+
+	spu_context_nospu_trace(spufs_ps_fault__enter, ctx);
+
+	if (offset >= ps_size)
+		return VM_FAULT_SIGBUS;
+
+	if (fatal_signal_pending(current))
+		return VM_FAULT_SIGBUS;
+
+	/*
+	 * Because we release the mmap_sem, the context may be destroyed while
+	 * we're in spu_wait. Grab an extra reference so it isn't destroyed
+	 * in the meantime.
+	 */
+	get_spu_context(ctx);
+
+	/*
+	 * We have to wait for context to be loaded before we have
+	 * pages to hand out to the user, but we don't want to wait
+	 * with the mmap_sem held.
+	 * It is possible to drop the mmap_sem here, but then we need
+	 * to return VM_FAULT_NOPAGE because the mappings may have
+	 * hanged.
+	 */
+	if (spu_acquire(ctx))
+		goto refault;
+
+	if (ctx->state == SPU_STATE_SAVED) {
+		up_read(&current->mm->mmap_sem);
+		spu_context_nospu_trace(spufs_ps_fault__sleep, ctx);
+		ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
+		spu_context_trace(spufs_ps_fault__wake, ctx, ctx->spu);
+		down_read(&current->mm->mmap_sem);
+	} else {
+		area = ctx->spu->problem_phys + ps_offs;
+		vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
+					(area + offset) >> PAGE_SHIFT);
+		spu_context_trace(spufs_ps_fault__insert, ctx, ctx->spu);
+	}
+
+	if (!ret)
+		spu_release(ctx);
+
+refault:
+	put_spu_context(ctx);
+	return VM_FAULT_NOPAGE;
+}
+
+#if SPUFS_MMAP_4K
+static int spufs_cntl_mmap_fault(struct vm_area_struct *vma,
+					   struct vm_fault *vmf)
+{
+	return spufs_ps_fault(vma, vmf, 0x4000, SPUFS_CNTL_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_cntl_mmap_vmops = {
+	.fault = spufs_cntl_mmap_fault,
+};
+
+/*
+ * mmap support for problem state control area [0x4000 - 0x4fff].
+ */
+static int spufs_cntl_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_cntl_mmap_vmops;
+	return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_cntl_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_cntl_get(void *data, u64 *val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	*val = ctx->ops->status_read(ctx);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static int spufs_cntl_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->runcntl_write(ctx, val);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static int spufs_cntl_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->cntl = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return simple_attr_open(inode, file, spufs_cntl_get,
+					spufs_cntl_set, "0x%08lx");
+}
+
+static int
+spufs_cntl_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	simple_attr_release(inode, file);
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->cntl = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static const struct file_operations spufs_cntl_fops = {
+	.open = spufs_cntl_open,
+	.release = spufs_cntl_release,
+	.read = simple_attr_read,
+	.write = simple_attr_write,
+	.llseek	= generic_file_llseek,
+	.mmap = spufs_cntl_mmap,
+};
+
+static int
+spufs_regs_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	file->private_data = i->i_ctx;
+	return 0;
+}
+
+static ssize_t
+__spufs_regs_read(struct spu_context *ctx, char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return simple_read_from_buffer(buffer, size, pos,
+				      lscsa->gprs, sizeof lscsa->gprs);
+}
+
+static ssize_t
+spufs_regs_read(struct file *file, char __user *buffer,
+		size_t size, loff_t *pos)
+{
+	int ret;
+	struct spu_context *ctx = file->private_data;
+
+	/* pre-check for file position: if we'd return EOF, there's no point
+	 * causing a deschedule */
+	if (*pos >= sizeof(ctx->csa.lscsa->gprs))
+		return 0;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_regs_read(ctx, buffer, size, pos);
+	spu_release_saved(ctx);
+	return ret;
+}
+
+static ssize_t
+spufs_regs_write(struct file *file, const char __user *buffer,
+		 size_t size, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	int ret;
+
+	if (*pos >= sizeof(lscsa->gprs))
+		return -EFBIG;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+
+	size = simple_write_to_buffer(lscsa->gprs, sizeof(lscsa->gprs), pos,
+					buffer, size);
+
+	spu_release_saved(ctx);
+	return size;
+}
+
+static const struct file_operations spufs_regs_fops = {
+	.open	 = spufs_regs_open,
+	.read    = spufs_regs_read,
+	.write   = spufs_regs_write,
+	.llseek  = generic_file_llseek,
+};
+
+static ssize_t
+__spufs_fpcr_read(struct spu_context *ctx, char __user * buffer,
+			size_t size, loff_t * pos)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return simple_read_from_buffer(buffer, size, pos,
+				      &lscsa->fpcr, sizeof(lscsa->fpcr));
+}
+
+static ssize_t
+spufs_fpcr_read(struct file *file, char __user * buffer,
+		size_t size, loff_t * pos)
+{
+	int ret;
+	struct spu_context *ctx = file->private_data;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_fpcr_read(ctx, buffer, size, pos);
+	spu_release_saved(ctx);
+	return ret;
+}
+
+static ssize_t
+spufs_fpcr_write(struct file *file, const char __user * buffer,
+		 size_t size, loff_t * pos)
+{
+	struct spu_context *ctx = file->private_data;
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	int ret;
+
+	if (*pos >= sizeof(lscsa->fpcr))
+		return -EFBIG;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+
+	size = simple_write_to_buffer(&lscsa->fpcr, sizeof(lscsa->fpcr), pos,
+					buffer, size);
+
+	spu_release_saved(ctx);
+	return size;
+}
+
+static const struct file_operations spufs_fpcr_fops = {
+	.open = spufs_regs_open,
+	.read = spufs_fpcr_read,
+	.write = spufs_fpcr_write,
+	.llseek = generic_file_llseek,
+};
+
+/* generic open function for all pipe-like files */
+static int spufs_pipe_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	file->private_data = i->i_ctx;
+
+	return nonseekable_open(inode, file);
+}
+
+/*
+ * Read as many bytes from the mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - no more data available in the mailbox
+ * - end of the user provided buffer
+ * - end of the mapped area
+ */
+static ssize_t spufs_mbox_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 mbox_data, __user *udata;
+	ssize_t count;
+
+	if (len < 4)
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	udata = (void __user *)buf;
+
+	count = spu_acquire(ctx);
+	if (count)
+		return count;
+
+	for (count = 0; (count + 4) <= len; count += 4, udata++) {
+		int ret;
+		ret = ctx->ops->mbox_read(ctx, &mbox_data);
+		if (ret == 0)
+			break;
+
+		/*
+		 * at the end of the mapped area, we can fault
+		 * but still need to return the data we have
+		 * read successfully so far.
+		 */
+		ret = __put_user(mbox_data, udata);
+		if (ret) {
+			if (!count)
+				count = -EFAULT;
+			break;
+		}
+	}
+	spu_release(ctx);
+
+	if (!count)
+		count = -EAGAIN;
+
+	return count;
+}
+
+static const struct file_operations spufs_mbox_fops = {
+	.open	= spufs_pipe_open,
+	.read	= spufs_mbox_read,
+	.llseek	= no_llseek,
+};
+
+static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	ssize_t ret;
+	u32 mbox_stat;
+
+	if (len < 4)
+		return -EINVAL;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	mbox_stat = ctx->ops->mbox_stat_read(ctx) & 0xff;
+
+	spu_release(ctx);
+
+	if (copy_to_user(buf, &mbox_stat, sizeof mbox_stat))
+		return -EFAULT;
+
+	return 4;
+}
+
+static const struct file_operations spufs_mbox_stat_fops = {
+	.open	= spufs_pipe_open,
+	.read	= spufs_mbox_stat_read,
+	.llseek = no_llseek,
+};
+
+/* low-level ibox access function */
+size_t spu_ibox_read(struct spu_context *ctx, u32 *data)
+{
+	return ctx->ops->ibox_read(ctx, data);
+}
+
+static int spufs_ibox_fasync(int fd, struct file *file, int on)
+{
+	struct spu_context *ctx = file->private_data;
+
+	return fasync_helper(fd, file, on, &ctx->ibox_fasync);
+}
+
+/* interrupt-level ibox callback function. */
+void spufs_ibox_callback(struct spu *spu)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	if (!ctx)
+		return;
+
+	wake_up_all(&ctx->ibox_wq);
+	kill_fasync(&ctx->ibox_fasync, SIGIO, POLLIN);
+}
+
+/*
+ * Read as many bytes from the interrupt mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - no more data available in the mailbox
+ * - end of the user provided buffer
+ * - end of the mapped area
+ *
+ * If the file is opened without O_NONBLOCK, we wait here until
+ * any data is available, but return when we have been able to
+ * read something.
+ */
+static ssize_t spufs_ibox_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 ibox_data, __user *udata;
+	ssize_t count;
+
+	if (len < 4)
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	udata = (void __user *)buf;
+
+	count = spu_acquire(ctx);
+	if (count)
+		goto out;
+
+	/* wait only for the first element */
+	count = 0;
+	if (file->f_flags & O_NONBLOCK) {
+		if (!spu_ibox_read(ctx, &ibox_data)) {
+			count = -EAGAIN;
+			goto out_unlock;
+		}
+	} else {
+		count = spufs_wait(ctx->ibox_wq, spu_ibox_read(ctx, &ibox_data));
+		if (count)
+			goto out;
+	}
+
+	/* if we can't write at all, return -EFAULT */
+	count = __put_user(ibox_data, udata);
+	if (count)
+		goto out_unlock;
+
+	for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
+		int ret;
+		ret = ctx->ops->ibox_read(ctx, &ibox_data);
+		if (ret == 0)
+			break;
+		/*
+		 * at the end of the mapped area, we can fault
+		 * but still need to return the data we have
+		 * read successfully so far.
+		 */
+		ret = __put_user(ibox_data, udata);
+		if (ret)
+			break;
+	}
+
+out_unlock:
+	spu_release(ctx);
+out:
+	return count;
+}
+
+static unsigned int spufs_ibox_poll(struct file *file, poll_table *wait)
+{
+	struct spu_context *ctx = file->private_data;
+	unsigned int mask;
+
+	poll_wait(file, &ctx->ibox_wq, wait);
+
+	/*
+	 * For now keep this uninterruptible and also ignore the rule
+	 * that poll should not sleep.  Will be fixed later.
+	 */
+	mutex_lock(&ctx->state_mutex);
+	mask = ctx->ops->mbox_stat_poll(ctx, POLLIN | POLLRDNORM);
+	spu_release(ctx);
+
+	return mask;
+}
+
+static const struct file_operations spufs_ibox_fops = {
+	.open	= spufs_pipe_open,
+	.read	= spufs_ibox_read,
+	.poll	= spufs_ibox_poll,
+	.fasync	= spufs_ibox_fasync,
+	.llseek = no_llseek,
+};
+
+static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	ssize_t ret;
+	u32 ibox_stat;
+
+	if (len < 4)
+		return -EINVAL;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ibox_stat = (ctx->ops->mbox_stat_read(ctx) >> 16) & 0xff;
+	spu_release(ctx);
+
+	if (copy_to_user(buf, &ibox_stat, sizeof ibox_stat))
+		return -EFAULT;
+
+	return 4;
+}
+
+static const struct file_operations spufs_ibox_stat_fops = {
+	.open	= spufs_pipe_open,
+	.read	= spufs_ibox_stat_read,
+	.llseek = no_llseek,
+};
+
+/* low-level mailbox write */
+size_t spu_wbox_write(struct spu_context *ctx, u32 data)
+{
+	return ctx->ops->wbox_write(ctx, data);
+}
+
+static int spufs_wbox_fasync(int fd, struct file *file, int on)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	ret = fasync_helper(fd, file, on, &ctx->wbox_fasync);
+
+	return ret;
+}
+
+/* interrupt-level wbox callback function. */
+void spufs_wbox_callback(struct spu *spu)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	if (!ctx)
+		return;
+
+	wake_up_all(&ctx->wbox_wq);
+	kill_fasync(&ctx->wbox_fasync, SIGIO, POLLOUT);
+}
+
+/*
+ * Write as many bytes to the interrupt mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - the mailbox is full
+ * - end of the user provided buffer
+ * - end of the mapped area
+ *
+ * If the file is opened without O_NONBLOCK, we wait here until
+ * space is availabyl, but return when we have been able to
+ * write something.
+ */
+static ssize_t spufs_wbox_write(struct file *file, const char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 wbox_data, __user *udata;
+	ssize_t count;
+
+	if (len < 4)
+		return -EINVAL;
+
+	udata = (void __user *)buf;
+	if (!access_ok(VERIFY_READ, buf, len))
+		return -EFAULT;
+
+	if (__get_user(wbox_data, udata))
+		return -EFAULT;
+
+	count = spu_acquire(ctx);
+	if (count)
+		goto out;
+
+	/*
+	 * make sure we can at least write one element, by waiting
+	 * in case of !O_NONBLOCK
+	 */
+	count = 0;
+	if (file->f_flags & O_NONBLOCK) {
+		if (!spu_wbox_write(ctx, wbox_data)) {
+			count = -EAGAIN;
+			goto out_unlock;
+		}
+	} else {
+		count = spufs_wait(ctx->wbox_wq, spu_wbox_write(ctx, wbox_data));
+		if (count)
+			goto out;
+	}
+
+
+	/* write as much as possible */
+	for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
+		int ret;
+		ret = __get_user(wbox_data, udata);
+		if (ret)
+			break;
+
+		ret = spu_wbox_write(ctx, wbox_data);
+		if (ret == 0)
+			break;
+	}
+
+out_unlock:
+	spu_release(ctx);
+out:
+	return count;
+}
+
+static unsigned int spufs_wbox_poll(struct file *file, poll_table *wait)
+{
+	struct spu_context *ctx = file->private_data;
+	unsigned int mask;
+
+	poll_wait(file, &ctx->wbox_wq, wait);
+
+	/*
+	 * For now keep this uninterruptible and also ignore the rule
+	 * that poll should not sleep.  Will be fixed later.
+	 */
+	mutex_lock(&ctx->state_mutex);
+	mask = ctx->ops->mbox_stat_poll(ctx, POLLOUT | POLLWRNORM);
+	spu_release(ctx);
+
+	return mask;
+}
+
+static const struct file_operations spufs_wbox_fops = {
+	.open	= spufs_pipe_open,
+	.write	= spufs_wbox_write,
+	.poll	= spufs_wbox_poll,
+	.fasync	= spufs_wbox_fasync,
+	.llseek = no_llseek,
+};
+
+static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	ssize_t ret;
+	u32 wbox_stat;
+
+	if (len < 4)
+		return -EINVAL;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	wbox_stat = (ctx->ops->mbox_stat_read(ctx) >> 8) & 0xff;
+	spu_release(ctx);
+
+	if (copy_to_user(buf, &wbox_stat, sizeof wbox_stat))
+		return -EFAULT;
+
+	return 4;
+}
+
+static const struct file_operations spufs_wbox_stat_fops = {
+	.open	= spufs_pipe_open,
+	.read	= spufs_wbox_stat_read,
+	.llseek = no_llseek,
+};
+
+static int spufs_signal1_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->signal1 = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_signal1_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->signal1 = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	int ret = 0;
+	u32 data;
+
+	if (len < 4)
+		return -EINVAL;
+
+	if (ctx->csa.spu_chnlcnt_RW[3]) {
+		data = ctx->csa.spu_chnldata_RW[3];
+		ret = 4;
+	}
+
+	if (!ret)
+		goto out;
+
+	if (copy_to_user(buf, &data, 4))
+		return -EFAULT;
+
+out:
+	return ret;
+}
+
+static ssize_t spufs_signal1_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	int ret;
+	struct spu_context *ctx = file->private_data;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_signal1_read(ctx, buf, len, pos);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static ssize_t spufs_signal1_write(struct file *file, const char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx;
+	ssize_t ret;
+	u32 data;
+
+	ctx = file->private_data;
+
+	if (len < 4)
+		return -EINVAL;
+
+	if (copy_from_user(&data, buf, 4))
+		return -EFAULT;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->signal1_write(ctx, data);
+	spu_release(ctx);
+
+	return 4;
+}
+
+static int
+spufs_signal1_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+#if SPUFS_SIGNAL_MAP_SIZE == 0x1000
+	return spufs_ps_fault(vma, vmf, 0x14000, SPUFS_SIGNAL_MAP_SIZE);
+#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000
+	/* For 64k pages, both signal1 and signal2 can be used to mmap the whole
+	 * signal 1 and 2 area
+	 */
+	return spufs_ps_fault(vma, vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
+#else
+#error unsupported page size
+#endif
+}
+
+static const struct vm_operations_struct spufs_signal1_mmap_vmops = {
+	.fault = spufs_signal1_mmap_fault,
+};
+
+static int spufs_signal1_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_signal1_mmap_vmops;
+	return 0;
+}
+
+static const struct file_operations spufs_signal1_fops = {
+	.open = spufs_signal1_open,
+	.release = spufs_signal1_release,
+	.read = spufs_signal1_read,
+	.write = spufs_signal1_write,
+	.mmap = spufs_signal1_mmap,
+	.llseek = no_llseek,
+};
+
+static const struct file_operations spufs_signal1_nosched_fops = {
+	.open = spufs_signal1_open,
+	.release = spufs_signal1_release,
+	.write = spufs_signal1_write,
+	.mmap = spufs_signal1_mmap,
+	.llseek = no_llseek,
+};
+
+static int spufs_signal2_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->signal2 = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_signal2_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->signal2 = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	int ret = 0;
+	u32 data;
+
+	if (len < 4)
+		return -EINVAL;
+
+	if (ctx->csa.spu_chnlcnt_RW[4]) {
+		data =  ctx->csa.spu_chnldata_RW[4];
+		ret = 4;
+	}
+
+	if (!ret)
+		goto out;
+
+	if (copy_to_user(buf, &data, 4))
+		return -EFAULT;
+
+out:
+	return ret;
+}
+
+static ssize_t spufs_signal2_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_signal2_read(ctx, buf, len, pos);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static ssize_t spufs_signal2_write(struct file *file, const char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx;
+	ssize_t ret;
+	u32 data;
+
+	ctx = file->private_data;
+
+	if (len < 4)
+		return -EINVAL;
+
+	if (copy_from_user(&data, buf, 4))
+		return -EFAULT;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->signal2_write(ctx, data);
+	spu_release(ctx);
+
+	return 4;
+}
+
+#if SPUFS_MMAP_4K
+static int
+spufs_signal2_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+#if SPUFS_SIGNAL_MAP_SIZE == 0x1000
+	return spufs_ps_fault(vma, vmf, 0x1c000, SPUFS_SIGNAL_MAP_SIZE);
+#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000
+	/* For 64k pages, both signal1 and signal2 can be used to mmap the whole
+	 * signal 1 and 2 area
+	 */
+	return spufs_ps_fault(vma, vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
+#else
+#error unsupported page size
+#endif
+}
+
+static const struct vm_operations_struct spufs_signal2_mmap_vmops = {
+	.fault = spufs_signal2_mmap_fault,
+};
+
+static int spufs_signal2_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_signal2_mmap_vmops;
+	return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_signal2_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static const struct file_operations spufs_signal2_fops = {
+	.open = spufs_signal2_open,
+	.release = spufs_signal2_release,
+	.read = spufs_signal2_read,
+	.write = spufs_signal2_write,
+	.mmap = spufs_signal2_mmap,
+	.llseek = no_llseek,
+};
+
+static const struct file_operations spufs_signal2_nosched_fops = {
+	.open = spufs_signal2_open,
+	.release = spufs_signal2_release,
+	.write = spufs_signal2_write,
+	.mmap = spufs_signal2_mmap,
+	.llseek = no_llseek,
+};
+
+/*
+ * This is a wrapper around DEFINE_SIMPLE_ATTRIBUTE which does the
+ * work of acquiring (or not) the SPU context before calling through
+ * to the actual get routine. The set routine is called directly.
+ */
+#define SPU_ATTR_NOACQUIRE	0
+#define SPU_ATTR_ACQUIRE	1
+#define SPU_ATTR_ACQUIRE_SAVED	2
+
+#define DEFINE_SPUFS_ATTRIBUTE(__name, __get, __set, __fmt, __acquire)	\
+static int __##__get(void *data, u64 *val)				\
+{									\
+	struct spu_context *ctx = data;					\
+	int ret = 0;							\
+									\
+	if (__acquire == SPU_ATTR_ACQUIRE) {				\
+		ret = spu_acquire(ctx);					\
+		if (ret)						\
+			return ret;					\
+		*val = __get(ctx);					\
+		spu_release(ctx);					\
+	} else if (__acquire == SPU_ATTR_ACQUIRE_SAVED)	{		\
+		ret = spu_acquire_saved(ctx);				\
+		if (ret)						\
+			return ret;					\
+		*val = __get(ctx);					\
+		spu_release_saved(ctx);					\
+	} else								\
+		*val = __get(ctx);					\
+									\
+	return 0;							\
+}									\
+DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__name, __##__get, __set, __fmt);
+
+static int spufs_signal1_type_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->signal1_type_set(ctx, val);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static u64 spufs_signal1_type_get(struct spu_context *ctx)
+{
+	return ctx->ops->signal1_type_get(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_signal1_type, spufs_signal1_type_get,
+		       spufs_signal1_type_set, "%llu\n", SPU_ATTR_ACQUIRE);
+
+
+static int spufs_signal2_type_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->signal2_type_set(ctx, val);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static u64 spufs_signal2_type_get(struct spu_context *ctx)
+{
+	return ctx->ops->signal2_type_get(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get,
+		       spufs_signal2_type_set, "%llu\n", SPU_ATTR_ACQUIRE);
+
+#if SPUFS_MMAP_4K
+static int
+spufs_mss_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_MSS_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_mss_mmap_vmops = {
+	.fault = spufs_mss_mmap_fault,
+};
+
+/*
+ * mmap support for problem state MFC DMA area [0x0000 - 0x0fff].
+ */
+static int spufs_mss_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_mss_mmap_vmops;
+	return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_mss_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_mss_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	file->private_data = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!i->i_openers++)
+		ctx->mss = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_mss_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->mss = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static const struct file_operations spufs_mss_fops = {
+	.open	 = spufs_mss_open,
+	.release = spufs_mss_release,
+	.mmap	 = spufs_mss_mmap,
+	.llseek  = no_llseek,
+};
+
+static int
+spufs_psmap_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_PS_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_psmap_mmap_vmops = {
+	.fault = spufs_psmap_mmap_fault,
+};
+
+/*
+ * mmap support for full problem state area [0x00000 - 0x1ffff].
+ */
+static int spufs_psmap_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_psmap_mmap_vmops;
+	return 0;
+}
+
+static int spufs_psmap_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = i->i_ctx;
+	if (!i->i_openers++)
+		ctx->psmap = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_psmap_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->psmap = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static const struct file_operations spufs_psmap_fops = {
+	.open	 = spufs_psmap_open,
+	.release = spufs_psmap_release,
+	.mmap	 = spufs_psmap_mmap,
+	.llseek  = no_llseek,
+};
+
+
+#if SPUFS_MMAP_4K
+static int
+spufs_mfc_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	return spufs_ps_fault(vma, vmf, 0x3000, SPUFS_MFC_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_mfc_mmap_vmops = {
+	.fault = spufs_mfc_mmap_fault,
+};
+
+/*
+ * mmap support for problem state MFC DMA area [0x0000 - 0x0fff].
+ */
+static int spufs_mfc_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_mfc_mmap_vmops;
+	return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_mfc_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_mfc_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	/* we don't want to deal with DMA into other processes */
+	if (ctx->owner != current->mm)
+		return -EINVAL;
+
+	if (atomic_read(&inode->i_count) != 1)
+		return -EBUSY;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->mfc = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_mfc_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->mfc = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+/* interrupt-level mfc callback function. */
+void spufs_mfc_callback(struct spu *spu)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	if (!ctx)
+		return;
+
+	wake_up_all(&ctx->mfc_wq);
+
+	pr_debug("%s %s\n", __func__, spu->name);
+	if (ctx->mfc_fasync) {
+		u32 free_elements, tagstatus;
+		unsigned int mask;
+
+		/* no need for spu_acquire in interrupt context */
+		free_elements = ctx->ops->get_mfc_free_elements(ctx);
+		tagstatus = ctx->ops->read_mfc_tagstatus(ctx);
+
+		mask = 0;
+		if (free_elements & 0xffff)
+			mask |= POLLOUT;
+		if (tagstatus & ctx->tagwait)
+			mask |= POLLIN;
+
+		kill_fasync(&ctx->mfc_fasync, SIGIO, mask);
+	}
+}
+
+static int spufs_read_mfc_tagstatus(struct spu_context *ctx, u32 *status)
+{
+	/* See if there is one tag group is complete */
+	/* FIXME we need locking around tagwait */
+	*status = ctx->ops->read_mfc_tagstatus(ctx) & ctx->tagwait;
+	ctx->tagwait &= ~*status;
+	if (*status)
+		return 1;
+
+	/* enable interrupt waiting for any tag group,
+	   may silently fail if interrupts are already enabled */
+	ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1);
+	return 0;
+}
+
+static ssize_t spufs_mfc_read(struct file *file, char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret = -EINVAL;
+	u32 status;
+
+	if (size != 4)
+		goto out;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	ret = -EINVAL;
+	if (file->f_flags & O_NONBLOCK) {
+		status = ctx->ops->read_mfc_tagstatus(ctx);
+		if (!(status & ctx->tagwait))
+			ret = -EAGAIN;
+		else
+			/* XXX(hch): shouldn't we clear ret here? */
+			ctx->tagwait &= ~status;
+	} else {
+		ret = spufs_wait(ctx->mfc_wq,
+			   spufs_read_mfc_tagstatus(ctx, &status));
+		if (ret)
+			goto out;
+	}
+	spu_release(ctx);
+
+	ret = 4;
+	if (copy_to_user(buffer, &status, 4))
+		ret = -EFAULT;
+
+out:
+	return ret;
+}
+
+static int spufs_check_valid_dma(struct mfc_dma_command *cmd)
+{
+	pr_debug("queueing DMA %x %llx %x %x %x\n", cmd->lsa,
+		 cmd->ea, cmd->size, cmd->tag, cmd->cmd);
+
+	switch (cmd->cmd) {
+	case MFC_PUT_CMD:
+	case MFC_PUTF_CMD:
+	case MFC_PUTB_CMD:
+	case MFC_GET_CMD:
+	case MFC_GETF_CMD:
+	case MFC_GETB_CMD:
+		break;
+	default:
+		pr_debug("invalid DMA opcode %x\n", cmd->cmd);
+		return -EIO;
+	}
+
+	if ((cmd->lsa & 0xf) != (cmd->ea &0xf)) {
+		pr_debug("invalid DMA alignment, ea %llx lsa %x\n",
+				cmd->ea, cmd->lsa);
+		return -EIO;
+	}
+
+	switch (cmd->size & 0xf) {
+	case 1:
+		break;
+	case 2:
+		if (cmd->lsa & 1)
+			goto error;
+		break;
+	case 4:
+		if (cmd->lsa & 3)
+			goto error;
+		break;
+	case 8:
+		if (cmd->lsa & 7)
+			goto error;
+		break;
+	case 0:
+		if (cmd->lsa & 15)
+			goto error;
+		break;
+	error:
+	default:
+		pr_debug("invalid DMA alignment %x for size %x\n",
+			cmd->lsa & 0xf, cmd->size);
+		return -EIO;
+	}
+
+	if (cmd->size > 16 * 1024) {
+		pr_debug("invalid DMA size %x\n", cmd->size);
+		return -EIO;
+	}
+
+	if (cmd->tag & 0xfff0) {
+		/* we reserve the higher tag numbers for kernel use */
+		pr_debug("invalid DMA tag\n");
+		return -EIO;
+	}
+
+	if (cmd->class) {
+		/* not supported in this version */
+		pr_debug("invalid DMA class\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int spu_send_mfc_command(struct spu_context *ctx,
+				struct mfc_dma_command cmd,
+				int *error)
+{
+	*error = ctx->ops->send_mfc_command(ctx, &cmd);
+	if (*error == -EAGAIN) {
+		/* wait for any tag group to complete
+		   so we have space for the new command */
+		ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1);
+		/* try again, because the queue might be
+		   empty again */
+		*error = ctx->ops->send_mfc_command(ctx, &cmd);
+		if (*error == -EAGAIN)
+			return 0;
+	}
+	return 1;
+}
+
+static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	struct mfc_dma_command cmd;
+	int ret = -EINVAL;
+
+	if (size != sizeof cmd)
+		goto out;
+
+	ret = -EFAULT;
+	if (copy_from_user(&cmd, buffer, sizeof cmd))
+		goto out;
+
+	ret = spufs_check_valid_dma(&cmd);
+	if (ret)
+		goto out;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		goto out;
+
+	ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
+	if (ret)
+		goto out;
+
+	if (file->f_flags & O_NONBLOCK) {
+		ret = ctx->ops->send_mfc_command(ctx, &cmd);
+	} else {
+		int status;
+		ret = spufs_wait(ctx->mfc_wq,
+				 spu_send_mfc_command(ctx, cmd, &status));
+		if (ret)
+			goto out;
+		if (status)
+			ret = status;
+	}
+
+	if (ret)
+		goto out_unlock;
+
+	ctx->tagwait |= 1 << cmd.tag;
+	ret = size;
+
+out_unlock:
+	spu_release(ctx);
+out:
+	return ret;
+}
+
+static unsigned int spufs_mfc_poll(struct file *file,poll_table *wait)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 free_elements, tagstatus;
+	unsigned int mask;
+
+	poll_wait(file, &ctx->mfc_wq, wait);
+
+	/*
+	 * For now keep this uninterruptible and also ignore the rule
+	 * that poll should not sleep.  Will be fixed later.
+	 */
+	mutex_lock(&ctx->state_mutex);
+	ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2);
+	free_elements = ctx->ops->get_mfc_free_elements(ctx);
+	tagstatus = ctx->ops->read_mfc_tagstatus(ctx);
+	spu_release(ctx);
+
+	mask = 0;
+	if (free_elements & 0xffff)
+		mask |= POLLOUT | POLLWRNORM;
+	if (tagstatus & ctx->tagwait)
+		mask |= POLLIN | POLLRDNORM;
+
+	pr_debug("%s: free %d tagstatus %d tagwait %d\n", __func__,
+		free_elements, tagstatus, ctx->tagwait);
+
+	return mask;
+}
+
+static int spufs_mfc_flush(struct file *file, fl_owner_t id)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		goto out;
+#if 0
+/* this currently hangs */
+	ret = spufs_wait(ctx->mfc_wq,
+			 ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2));
+	if (ret)
+		goto out;
+	ret = spufs_wait(ctx->mfc_wq,
+			 ctx->ops->read_mfc_tagstatus(ctx) == ctx->tagwait);
+	if (ret)
+		goto out;
+#else
+	ret = 0;
+#endif
+	spu_release(ctx);
+out:
+	return ret;
+}
+
+static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+	struct inode *inode = file_inode(file);
+	int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (!err) {
+		mutex_lock(&inode->i_mutex);
+		err = spufs_mfc_flush(file, NULL);
+		mutex_unlock(&inode->i_mutex);
+	}
+	return err;
+}
+
+static int spufs_mfc_fasync(int fd, struct file *file, int on)
+{
+	struct spu_context *ctx = file->private_data;
+
+	return fasync_helper(fd, file, on, &ctx->mfc_fasync);
+}
+
+static const struct file_operations spufs_mfc_fops = {
+	.open	 = spufs_mfc_open,
+	.release = spufs_mfc_release,
+	.read	 = spufs_mfc_read,
+	.write	 = spufs_mfc_write,
+	.poll	 = spufs_mfc_poll,
+	.flush	 = spufs_mfc_flush,
+	.fsync	 = spufs_mfc_fsync,
+	.fasync	 = spufs_mfc_fasync,
+	.mmap	 = spufs_mfc_mmap,
+	.llseek  = no_llseek,
+};
+
+static int spufs_npc_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->npc_write(ctx, val);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static u64 spufs_npc_get(struct spu_context *ctx)
+{
+	return ctx->ops->npc_read(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set,
+		       "0x%llx\n", SPU_ATTR_ACQUIRE);
+
+static int spufs_decr_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	lscsa->decr.slot[0] = (u32) val;
+	spu_release_saved(ctx);
+
+	return 0;
+}
+
+static u64 spufs_decr_get(struct spu_context *ctx)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return lscsa->decr.slot[0];
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set,
+		       "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_decr_status_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	if (val)
+		ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING;
+	else
+		ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING;
+	spu_release_saved(ctx);
+
+	return 0;
+}
+
+static u64 spufs_decr_status_get(struct spu_context *ctx)
+{
+	if (ctx->csa.priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING)
+		return SPU_DECR_STATUS_RUNNING;
+	else
+		return 0;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get,
+		       spufs_decr_status_set, "0x%llx\n",
+		       SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_event_mask_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	lscsa->event_mask.slot[0] = (u32) val;
+	spu_release_saved(ctx);
+
+	return 0;
+}
+
+static u64 spufs_event_mask_get(struct spu_context *ctx)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return lscsa->event_mask.slot[0];
+}
+
+DEFINE_SPUFS_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get,
+		       spufs_event_mask_set, "0x%llx\n",
+		       SPU_ATTR_ACQUIRE_SAVED);
+
+static u64 spufs_event_status_get(struct spu_context *ctx)
+{
+	struct spu_state *state = &ctx->csa;
+	u64 stat;
+	stat = state->spu_chnlcnt_RW[0];
+	if (stat)
+		return state->spu_chnldata_RW[0];
+	return 0;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get,
+		       NULL, "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED)
+
+static int spufs_srr0_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	lscsa->srr0.slot[0] = (u32) val;
+	spu_release_saved(ctx);
+
+	return 0;
+}
+
+static u64 spufs_srr0_get(struct spu_context *ctx)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return lscsa->srr0.slot[0];
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set,
+		       "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED)
+
+static u64 spufs_id_get(struct spu_context *ctx)
+{
+	u64 num;
+
+	if (ctx->state == SPU_STATE_RUNNABLE)
+		num = ctx->spu->number;
+	else
+		num = (unsigned int)-1;
+
+	return num;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_id_ops, spufs_id_get, NULL, "0x%llx\n",
+		       SPU_ATTR_ACQUIRE)
+
+static u64 spufs_object_id_get(struct spu_context *ctx)
+{
+	/* FIXME: Should there really be no locking here? */
+	return ctx->object_id;
+}
+
+static int spufs_object_id_set(void *data, u64 id)
+{
+	struct spu_context *ctx = data;
+	ctx->object_id = id;
+
+	return 0;
+}
+
+DEFINE_SPUFS_ATTRIBUTE(spufs_object_id_ops, spufs_object_id_get,
+		       spufs_object_id_set, "0x%llx\n", SPU_ATTR_NOACQUIRE);
+
+static u64 spufs_lslr_get(struct spu_context *ctx)
+{
+	return ctx->csa.priv2.spu_lslr_RW;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_lslr_ops, spufs_lslr_get, NULL, "0x%llx\n",
+		       SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_info_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+	file->private_data = ctx;
+	return 0;
+}
+
+static int spufs_caps_show(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+
+	if (!(ctx->flags & SPU_CREATE_NOSCHED))
+		seq_puts(s, "sched\n");
+	if (!(ctx->flags & SPU_CREATE_ISOLATE))
+		seq_puts(s, "step\n");
+	return 0;
+}
+
+static int spufs_caps_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_caps_show, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_caps_fops = {
+	.open		= spufs_caps_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static ssize_t __spufs_mbox_info_read(struct spu_context *ctx,
+			char __user *buf, size_t len, loff_t *pos)
+{
+	u32 data;
+
+	/* EOF if there's no entry in the mbox */
+	if (!(ctx->csa.prob.mb_stat_R & 0x0000ff))
+		return 0;
+
+	data = ctx->csa.prob.pu_mb_R;
+
+	return simple_read_from_buffer(buf, len, pos, &data, sizeof data);
+}
+
+static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	int ret;
+	struct spu_context *ctx = file->private_data;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	ret = __spufs_mbox_info_read(ctx, buf, len, pos);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static const struct file_operations spufs_mbox_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_mbox_info_read,
+	.llseek  = generic_file_llseek,
+};
+
+static ssize_t __spufs_ibox_info_read(struct spu_context *ctx,
+				char __user *buf, size_t len, loff_t *pos)
+{
+	u32 data;
+
+	/* EOF if there's no entry in the ibox */
+	if (!(ctx->csa.prob.mb_stat_R & 0xff0000))
+		return 0;
+
+	data = ctx->csa.priv2.puint_mb_R;
+
+	return simple_read_from_buffer(buf, len, pos, &data, sizeof data);
+}
+
+static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	ret = __spufs_ibox_info_read(ctx, buf, len, pos);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static const struct file_operations spufs_ibox_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_ibox_info_read,
+	.llseek  = generic_file_llseek,
+};
+
+static ssize_t __spufs_wbox_info_read(struct spu_context *ctx,
+			char __user *buf, size_t len, loff_t *pos)
+{
+	int i, cnt;
+	u32 data[4];
+	u32 wbox_stat;
+
+	wbox_stat = ctx->csa.prob.mb_stat_R;
+	cnt = 4 - ((wbox_stat & 0x00ff00) >> 8);
+	for (i = 0; i < cnt; i++) {
+		data[i] = ctx->csa.spu_mailbox_data[i];
+	}
+
+	return simple_read_from_buffer(buf, len, pos, &data,
+				cnt * sizeof(u32));
+}
+
+static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	ret = __spufs_wbox_info_read(ctx, buf, len, pos);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static const struct file_operations spufs_wbox_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_wbox_info_read,
+	.llseek  = generic_file_llseek,
+};
+
+static ssize_t __spufs_dma_info_read(struct spu_context *ctx,
+			char __user *buf, size_t len, loff_t *pos)
+{
+	struct spu_dma_info info;
+	struct mfc_cq_sr *qp, *spuqp;
+	int i;
+
+	info.dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW;
+	info.dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0];
+	info.dma_info_status = ctx->csa.spu_chnldata_RW[24];
+	info.dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25];
+	info.dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27];
+	for (i = 0; i < 16; i++) {
+		qp = &info.dma_info_command_data[i];
+		spuqp = &ctx->csa.priv2.spuq[i];
+
+		qp->mfc_cq_data0_RW = spuqp->mfc_cq_data0_RW;
+		qp->mfc_cq_data1_RW = spuqp->mfc_cq_data1_RW;
+		qp->mfc_cq_data2_RW = spuqp->mfc_cq_data2_RW;
+		qp->mfc_cq_data3_RW = spuqp->mfc_cq_data3_RW;
+	}
+
+	return simple_read_from_buffer(buf, len, pos, &info,
+				sizeof info);
+}
+
+static ssize_t spufs_dma_info_read(struct file *file, char __user *buf,
+			      size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	ret = __spufs_dma_info_read(ctx, buf, len, pos);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static const struct file_operations spufs_dma_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_dma_info_read,
+	.llseek = no_llseek,
+};
+
+static ssize_t __spufs_proxydma_info_read(struct spu_context *ctx,
+			char __user *buf, size_t len, loff_t *pos)
+{
+	struct spu_proxydma_info info;
+	struct mfc_cq_sr *qp, *puqp;
+	int ret = sizeof info;
+	int i;
+
+	if (len < ret)
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	info.proxydma_info_type = ctx->csa.prob.dma_querytype_RW;
+	info.proxydma_info_mask = ctx->csa.prob.dma_querymask_RW;
+	info.proxydma_info_status = ctx->csa.prob.dma_tagstatus_R;
+	for (i = 0; i < 8; i++) {
+		qp = &info.proxydma_info_command_data[i];
+		puqp = &ctx->csa.priv2.puq[i];
+
+		qp->mfc_cq_data0_RW = puqp->mfc_cq_data0_RW;
+		qp->mfc_cq_data1_RW = puqp->mfc_cq_data1_RW;
+		qp->mfc_cq_data2_RW = puqp->mfc_cq_data2_RW;
+		qp->mfc_cq_data3_RW = puqp->mfc_cq_data3_RW;
+	}
+
+	return simple_read_from_buffer(buf, len, pos, &info,
+				sizeof info);
+}
+
+static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	ret = __spufs_proxydma_info_read(ctx, buf, len, pos);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static const struct file_operations spufs_proxydma_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_proxydma_info_read,
+	.llseek = no_llseek,
+};
+
+static int spufs_show_tid(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+
+	seq_printf(s, "%d\n", ctx->tid);
+	return 0;
+}
+
+static int spufs_tid_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_show_tid, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_tid_fops = {
+	.open		= spufs_tid_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const char *ctx_state_names[] = {
+	"user", "system", "iowait", "loaded"
+};
+
+static unsigned long long spufs_acct_time(struct spu_context *ctx,
+		enum spu_utilization_state state)
+{
+	unsigned long long time = ctx->stats.times[state];
+
+	/*
+	 * In general, utilization statistics are updated by the controlling
+	 * thread as the spu context moves through various well defined
+	 * state transitions, but if the context is lazily loaded its
+	 * utilization statistics are not updated as the controlling thread
+	 * is not tightly coupled with the execution of the spu context.  We
+	 * calculate and apply the time delta from the last recorded state
+	 * of the spu context.
+	 */
+	if (ctx->spu && ctx->stats.util_state == state) {
+		time += ktime_get_ns() - ctx->stats.tstamp;
+	}
+
+	return time / NSEC_PER_MSEC;
+}
+
+static unsigned long long spufs_slb_flts(struct spu_context *ctx)
+{
+	unsigned long long slb_flts = ctx->stats.slb_flt;
+
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		slb_flts += (ctx->spu->stats.slb_flt -
+			     ctx->stats.slb_flt_base);
+	}
+
+	return slb_flts;
+}
+
+static unsigned long long spufs_class2_intrs(struct spu_context *ctx)
+{
+	unsigned long long class2_intrs = ctx->stats.class2_intr;
+
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		class2_intrs += (ctx->spu->stats.class2_intr -
+				 ctx->stats.class2_intr_base);
+	}
+
+	return class2_intrs;
+}
+
+
+static int spufs_show_stat(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "%s %llu %llu %llu %llu "
+		      "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+		ctx_state_names[ctx->stats.util_state],
+		spufs_acct_time(ctx, SPU_UTIL_USER),
+		spufs_acct_time(ctx, SPU_UTIL_SYSTEM),
+		spufs_acct_time(ctx, SPU_UTIL_IOWAIT),
+		spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED),
+		ctx->stats.vol_ctx_switch,
+		ctx->stats.invol_ctx_switch,
+		spufs_slb_flts(ctx),
+		ctx->stats.hash_flt,
+		ctx->stats.min_flt,
+		ctx->stats.maj_flt,
+		spufs_class2_intrs(ctx),
+		ctx->stats.libassist);
+	spu_release(ctx);
+	return 0;
+}
+
+static int spufs_stat_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_show_stat, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_stat_fops = {
+	.open		= spufs_stat_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static inline int spufs_switch_log_used(struct spu_context *ctx)
+{
+	return (ctx->switch_log->head - ctx->switch_log->tail) %
+		SWITCH_LOG_BUFSIZE;
+}
+
+static inline int spufs_switch_log_avail(struct spu_context *ctx)
+{
+	return SWITCH_LOG_BUFSIZE - spufs_switch_log_used(ctx);
+}
+
+static int spufs_switch_log_open(struct inode *inode, struct file *file)
+{
+	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+	int rc;
+
+	rc = spu_acquire(ctx);
+	if (rc)
+		return rc;
+
+	if (ctx->switch_log) {
+		rc = -EBUSY;
+		goto out;
+	}
+
+	ctx->switch_log = kmalloc(sizeof(struct switch_log) +
+		SWITCH_LOG_BUFSIZE * sizeof(struct switch_log_entry),
+		GFP_KERNEL);
+
+	if (!ctx->switch_log) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	ctx->switch_log->head = ctx->switch_log->tail = 0;
+	init_waitqueue_head(&ctx->switch_log->wait);
+	rc = 0;
+
+out:
+	spu_release(ctx);
+	return rc;
+}
+
+static int spufs_switch_log_release(struct inode *inode, struct file *file)
+{
+	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+	int rc;
+
+	rc = spu_acquire(ctx);
+	if (rc)
+		return rc;
+
+	kfree(ctx->switch_log);
+	ctx->switch_log = NULL;
+	spu_release(ctx);
+
+	return 0;
+}
+
+static int switch_log_sprint(struct spu_context *ctx, char *tbuf, int n)
+{
+	struct switch_log_entry *p;
+
+	p = ctx->switch_log->log + ctx->switch_log->tail % SWITCH_LOG_BUFSIZE;
+
+	return snprintf(tbuf, n, "%u.%09u %d %u %u %llu\n",
+			(unsigned int) p->tstamp.tv_sec,
+			(unsigned int) p->tstamp.tv_nsec,
+			p->spu_id,
+			(unsigned int) p->type,
+			(unsigned int) p->val,
+			(unsigned long long) p->timebase);
+}
+
+static ssize_t spufs_switch_log_read(struct file *file, char __user *buf,
+			     size_t len, loff_t *ppos)
+{
+	struct inode *inode = file_inode(file);
+	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+	int error = 0, cnt = 0;
+
+	if (!buf)
+		return -EINVAL;
+
+	error = spu_acquire(ctx);
+	if (error)
+		return error;
+
+	while (cnt < len) {
+		char tbuf[128];
+		int width;
+
+		if (spufs_switch_log_used(ctx) == 0) {
+			if (cnt > 0) {
+				/* If there's data ready to go, we can
+				 * just return straight away */
+				break;
+
+			} else if (file->f_flags & O_NONBLOCK) {
+				error = -EAGAIN;
+				break;
+
+			} else {
+				/* spufs_wait will drop the mutex and
+				 * re-acquire, but since we're in read(), the
+				 * file cannot be _released (and so
+				 * ctx->switch_log is stable).
+				 */
+				error = spufs_wait(ctx->switch_log->wait,
+						spufs_switch_log_used(ctx) > 0);
+
+				/* On error, spufs_wait returns without the
+				 * state mutex held */
+				if (error)
+					return error;
+
+				/* We may have had entries read from underneath
+				 * us while we dropped the mutex in spufs_wait,
+				 * so re-check */
+				if (spufs_switch_log_used(ctx) == 0)
+					continue;
+			}
+		}
+
+		width = switch_log_sprint(ctx, tbuf, sizeof(tbuf));
+		if (width < len)
+			ctx->switch_log->tail =
+				(ctx->switch_log->tail + 1) %
+				 SWITCH_LOG_BUFSIZE;
+		else
+			/* If the record is greater than space available return
+			 * partial buffer (so far) */
+			break;
+
+		error = copy_to_user(buf + cnt, tbuf, width);
+		if (error)
+			break;
+		cnt += width;
+	}
+
+	spu_release(ctx);
+
+	return cnt == 0 ? error : cnt;
+}
+
+static unsigned int spufs_switch_log_poll(struct file *file, poll_table *wait)
+{
+	struct inode *inode = file_inode(file);
+	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+	unsigned int mask = 0;
+	int rc;
+
+	poll_wait(file, &ctx->switch_log->wait, wait);
+
+	rc = spu_acquire(ctx);
+	if (rc)
+		return rc;
+
+	if (spufs_switch_log_used(ctx) > 0)
+		mask |= POLLIN;
+
+	spu_release(ctx);
+
+	return mask;
+}
+
+static const struct file_operations spufs_switch_log_fops = {
+	.open		= spufs_switch_log_open,
+	.read		= spufs_switch_log_read,
+	.poll		= spufs_switch_log_poll,
+	.release	= spufs_switch_log_release,
+	.llseek		= no_llseek,
+};
+
+/**
+ * Log a context switch event to a switch log reader.
+ *
+ * Must be called with ctx->state_mutex held.
+ */
+void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx,
+		u32 type, u32 val)
+{
+	if (!ctx->switch_log)
+		return;
+
+	if (spufs_switch_log_avail(ctx) > 1) {
+		struct switch_log_entry *p;
+
+		p = ctx->switch_log->log + ctx->switch_log->head;
+		ktime_get_ts(&p->tstamp);
+		p->timebase = get_tb();
+		p->spu_id = spu ? spu->number : -1;
+		p->type = type;
+		p->val = val;
+
+		ctx->switch_log->head =
+			(ctx->switch_log->head + 1) % SWITCH_LOG_BUFSIZE;
+	}
+
+	wake_up(&ctx->switch_log->wait);
+}
+
+static int spufs_show_ctx(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+	u64 mfc_control_RW;
+
+	mutex_lock(&ctx->state_mutex);
+	if (ctx->spu) {
+		struct spu *spu = ctx->spu;
+		struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+		spin_lock_irq(&spu->register_lock);
+		mfc_control_RW = in_be64(&priv2->mfc_control_RW);
+		spin_unlock_irq(&spu->register_lock);
+	} else {
+		struct spu_state *csa = &ctx->csa;
+
+		mfc_control_RW = csa->priv2.mfc_control_RW;
+	}
+
+	seq_printf(s, "%c flgs(%lx) sflgs(%lx) pri(%d) ts(%d) spu(%02d)"
+		" %c %llx %llx %llx %llx %x %x\n",
+		ctx->state == SPU_STATE_SAVED ? 'S' : 'R',
+		ctx->flags,
+		ctx->sched_flags,
+		ctx->prio,
+		ctx->time_slice,
+		ctx->spu ? ctx->spu->number : -1,
+		!list_empty(&ctx->rq) ? 'q' : ' ',
+		ctx->csa.class_0_pending,
+		ctx->csa.class_0_dar,
+		ctx->csa.class_1_dsisr,
+		mfc_control_RW,
+		ctx->ops->runcntl_read(ctx),
+		ctx->ops->status_read(ctx));
+
+	mutex_unlock(&ctx->state_mutex);
+
+	return 0;
+}
+
+static int spufs_ctx_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_show_ctx, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_ctx_fops = {
+	.open           = spufs_ctx_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = single_release,
+};
+
+const struct spufs_tree_descr spufs_dir_contents[] = {
+	{ "capabilities", &spufs_caps_fops, 0444, },
+	{ "mem",  &spufs_mem_fops,  0666, LS_SIZE, },
+	{ "regs", &spufs_regs_fops,  0666, sizeof(struct spu_reg128[128]), },
+	{ "mbox", &spufs_mbox_fops, 0444, },
+	{ "ibox", &spufs_ibox_fops, 0444, },
+	{ "wbox", &spufs_wbox_fops, 0222, },
+	{ "mbox_stat", &spufs_mbox_stat_fops, 0444, sizeof(u32), },
+	{ "ibox_stat", &spufs_ibox_stat_fops, 0444, sizeof(u32), },
+	{ "wbox_stat", &spufs_wbox_stat_fops, 0444, sizeof(u32), },
+	{ "signal1", &spufs_signal1_fops, 0666, },
+	{ "signal2", &spufs_signal2_fops, 0666, },
+	{ "signal1_type", &spufs_signal1_type, 0666, },
+	{ "signal2_type", &spufs_signal2_type, 0666, },
+	{ "cntl", &spufs_cntl_fops,  0666, },
+	{ "fpcr", &spufs_fpcr_fops, 0666, sizeof(struct spu_reg128), },
+	{ "lslr", &spufs_lslr_ops, 0444, },
+	{ "mfc", &spufs_mfc_fops, 0666, },
+	{ "mss", &spufs_mss_fops, 0666, },
+	{ "npc", &spufs_npc_ops, 0666, },
+	{ "srr0", &spufs_srr0_ops, 0666, },
+	{ "decr", &spufs_decr_ops, 0666, },
+	{ "decr_status", &spufs_decr_status_ops, 0666, },
+	{ "event_mask", &spufs_event_mask_ops, 0666, },
+	{ "event_status", &spufs_event_status_ops, 0444, },
+	{ "psmap", &spufs_psmap_fops, 0666, SPUFS_PS_MAP_SIZE, },
+	{ "phys-id", &spufs_id_ops, 0666, },
+	{ "object-id", &spufs_object_id_ops, 0666, },
+	{ "mbox_info", &spufs_mbox_info_fops, 0444, sizeof(u32), },
+	{ "ibox_info", &spufs_ibox_info_fops, 0444, sizeof(u32), },
+	{ "wbox_info", &spufs_wbox_info_fops, 0444, sizeof(u32), },
+	{ "dma_info", &spufs_dma_info_fops, 0444,
+		sizeof(struct spu_dma_info), },
+	{ "proxydma_info", &spufs_proxydma_info_fops, 0444,
+		sizeof(struct spu_proxydma_info)},
+	{ "tid", &spufs_tid_fops, 0444, },
+	{ "stat", &spufs_stat_fops, 0444, },
+	{ "switch_log", &spufs_switch_log_fops, 0444 },
+	{},
+};
+
+const struct spufs_tree_descr spufs_dir_nosched_contents[] = {
+	{ "capabilities", &spufs_caps_fops, 0444, },
+	{ "mem",  &spufs_mem_fops,  0666, LS_SIZE, },
+	{ "mbox", &spufs_mbox_fops, 0444, },
+	{ "ibox", &spufs_ibox_fops, 0444, },
+	{ "wbox", &spufs_wbox_fops, 0222, },
+	{ "mbox_stat", &spufs_mbox_stat_fops, 0444, sizeof(u32), },
+	{ "ibox_stat", &spufs_ibox_stat_fops, 0444, sizeof(u32), },
+	{ "wbox_stat", &spufs_wbox_stat_fops, 0444, sizeof(u32), },
+	{ "signal1", &spufs_signal1_nosched_fops, 0222, },
+	{ "signal2", &spufs_signal2_nosched_fops, 0222, },
+	{ "signal1_type", &spufs_signal1_type, 0666, },
+	{ "signal2_type", &spufs_signal2_type, 0666, },
+	{ "mss", &spufs_mss_fops, 0666, },
+	{ "mfc", &spufs_mfc_fops, 0666, },
+	{ "cntl", &spufs_cntl_fops,  0666, },
+	{ "npc", &spufs_npc_ops, 0666, },
+	{ "psmap", &spufs_psmap_fops, 0666, SPUFS_PS_MAP_SIZE, },
+	{ "phys-id", &spufs_id_ops, 0666, },
+	{ "object-id", &spufs_object_id_ops, 0666, },
+	{ "tid", &spufs_tid_fops, 0444, },
+	{ "stat", &spufs_stat_fops, 0444, },
+	{},
+};
+
+const struct spufs_tree_descr spufs_dir_debug_contents[] = {
+	{ ".ctx", &spufs_ctx_fops, 0444, },
+	{},
+};
+
+const struct spufs_coredump_reader spufs_coredump_read[] = {
+	{ "regs", __spufs_regs_read, NULL, sizeof(struct spu_reg128[128])},
+	{ "fpcr", __spufs_fpcr_read, NULL, sizeof(struct spu_reg128) },
+	{ "lslr", NULL, spufs_lslr_get, 19 },
+	{ "decr", NULL, spufs_decr_get, 19 },
+	{ "decr_status", NULL, spufs_decr_status_get, 19 },
+	{ "mem", __spufs_mem_read, NULL, LS_SIZE, },
+	{ "signal1", __spufs_signal1_read, NULL, sizeof(u32) },
+	{ "signal1_type", NULL, spufs_signal1_type_get, 19 },
+	{ "signal2", __spufs_signal2_read, NULL, sizeof(u32) },
+	{ "signal2_type", NULL, spufs_signal2_type_get, 19 },
+	{ "event_mask", NULL, spufs_event_mask_get, 19 },
+	{ "event_status", NULL, spufs_event_status_get, 19 },
+	{ "mbox_info", __spufs_mbox_info_read, NULL, sizeof(u32) },
+	{ "ibox_info", __spufs_ibox_info_read, NULL, sizeof(u32) },
+	{ "wbox_info", __spufs_wbox_info_read, NULL, 4 * sizeof(u32)},
+	{ "dma_info", __spufs_dma_info_read, NULL, sizeof(struct spu_dma_info)},
+	{ "proxydma_info", __spufs_proxydma_info_read,
+			   NULL, sizeof(struct spu_proxydma_info)},
+	{ "object-id", NULL, spufs_object_id_get, 19 },
+	{ "npc", NULL, spufs_npc_get, 19 },
+	{ NULL },
+};
diff --git a/arch/powerpc/platforms/cell/spufs/gang.c b/arch/powerpc/platforms/cell/spufs/gang.c
new file mode 100644
index 000000000..71a443253
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/gang.c
@@ -0,0 +1,87 @@
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#include "spufs.h"
+
+struct spu_gang *alloc_spu_gang(void)
+{
+	struct spu_gang *gang;
+
+	gang = kzalloc(sizeof *gang, GFP_KERNEL);
+	if (!gang)
+		goto out;
+
+	kref_init(&gang->kref);
+	mutex_init(&gang->mutex);
+	mutex_init(&gang->aff_mutex);
+	INIT_LIST_HEAD(&gang->list);
+	INIT_LIST_HEAD(&gang->aff_list_head);
+
+out:
+	return gang;
+}
+
+static void destroy_spu_gang(struct kref *kref)
+{
+	struct spu_gang *gang;
+	gang = container_of(kref, struct spu_gang, kref);
+	WARN_ON(gang->contexts || !list_empty(&gang->list));
+	kfree(gang);
+}
+
+struct spu_gang *get_spu_gang(struct spu_gang *gang)
+{
+	kref_get(&gang->kref);
+	return gang;
+}
+
+int put_spu_gang(struct spu_gang *gang)
+{
+	return kref_put(&gang->kref, &destroy_spu_gang);
+}
+
+void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx)
+{
+	mutex_lock(&gang->mutex);
+	ctx->gang = get_spu_gang(gang);
+	list_add(&ctx->gang_list, &gang->list);
+	gang->contexts++;
+	mutex_unlock(&gang->mutex);
+}
+
+void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx)
+{
+	mutex_lock(&gang->mutex);
+	WARN_ON(ctx->gang != gang);
+	if (!list_empty(&ctx->aff_list)) {
+		list_del_init(&ctx->aff_list);
+		gang->aff_flags &= ~AFF_OFFSETS_SET;
+	}
+	list_del_init(&ctx->gang_list);
+	gang->contexts--;
+	mutex_unlock(&gang->mutex);
+
+	put_spu_gang(gang);
+}
diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c
new file mode 100644
index 000000000..8655c4cbe
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c
@@ -0,0 +1,349 @@
+/* hw_ops.c - query/set operations on active SPU context.
+ *
+ * Copyright (C) IBM 2005
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/poll.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+
+#include <asm/io.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu_context.h>
+#include "spufs.h"
+
+static int spu_hw_mbox_read(struct spu_context *ctx, u32 * data)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 mbox_stat;
+	int ret = 0;
+
+	spin_lock_irq(&spu->register_lock);
+	mbox_stat = in_be32(&prob->mb_stat_R);
+	if (mbox_stat & 0x0000ff) {
+		*data = in_be32(&prob->pu_mb_R);
+		ret = 4;
+	}
+	spin_unlock_irq(&spu->register_lock);
+	return ret;
+}
+
+static u32 spu_hw_mbox_stat_read(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->mb_stat_R);
+}
+
+static unsigned int spu_hw_mbox_stat_poll(struct spu_context *ctx,
+					  unsigned int events)
+{
+	struct spu *spu = ctx->spu;
+	int ret = 0;
+	u32 stat;
+
+	spin_lock_irq(&spu->register_lock);
+	stat = in_be32(&spu->problem->mb_stat_R);
+
+	/* if the requested event is there, return the poll
+	   mask, otherwise enable the interrupt to get notified,
+	   but first mark any pending interrupts as done so
+	   we don't get woken up unnecessarily */
+
+	if (events & (POLLIN | POLLRDNORM)) {
+		if (stat & 0xff0000)
+			ret |= POLLIN | POLLRDNORM;
+		else {
+			spu_int_stat_clear(spu, 2, CLASS2_MAILBOX_INTR);
+			spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
+		}
+	}
+	if (events & (POLLOUT | POLLWRNORM)) {
+		if (stat & 0x00ff00)
+			ret = POLLOUT | POLLWRNORM;
+		else {
+			spu_int_stat_clear(spu, 2,
+					CLASS2_MAILBOX_THRESHOLD_INTR);
+			spu_int_mask_or(spu, 2,
+					CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR);
+		}
+	}
+	spin_unlock_irq(&spu->register_lock);
+	return ret;
+}
+
+static int spu_hw_ibox_read(struct spu_context *ctx, u32 * data)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_problem __iomem *prob = spu->problem;
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	int ret;
+
+	spin_lock_irq(&spu->register_lock);
+	if (in_be32(&prob->mb_stat_R) & 0xff0000) {
+		/* read the first available word */
+		*data = in_be64(&priv2->puint_mb_R);
+		ret = 4;
+	} else {
+		/* make sure we get woken up by the interrupt */
+		spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
+		ret = 0;
+	}
+	spin_unlock_irq(&spu->register_lock);
+	return ret;
+}
+
+static int spu_hw_wbox_write(struct spu_context *ctx, u32 data)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_problem __iomem *prob = spu->problem;
+	int ret;
+
+	spin_lock_irq(&spu->register_lock);
+	if (in_be32(&prob->mb_stat_R) & 0x00ff00) {
+		/* we have space to write wbox_data to */
+		out_be32(&prob->spu_mb_W, data);
+		ret = 4;
+	} else {
+		/* make sure we get woken up by the interrupt when space
+		   becomes available */
+		spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR);
+		ret = 0;
+	}
+	spin_unlock_irq(&spu->register_lock);
+	return ret;
+}
+
+static void spu_hw_signal1_write(struct spu_context *ctx, u32 data)
+{
+	out_be32(&ctx->spu->problem->signal_notify1, data);
+}
+
+static void spu_hw_signal2_write(struct spu_context *ctx, u32 data)
+{
+	out_be32(&ctx->spu->problem->signal_notify2, data);
+}
+
+static void spu_hw_signal1_type_set(struct spu_context *ctx, u64 val)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 tmp;
+
+	spin_lock_irq(&spu->register_lock);
+	tmp = in_be64(&priv2->spu_cfg_RW);
+	if (val)
+		tmp |= 1;
+	else
+		tmp &= ~1;
+	out_be64(&priv2->spu_cfg_RW, tmp);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static u64 spu_hw_signal1_type_get(struct spu_context *ctx)
+{
+	return ((in_be64(&ctx->spu->priv2->spu_cfg_RW) & 1) != 0);
+}
+
+static void spu_hw_signal2_type_set(struct spu_context *ctx, u64 val)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 tmp;
+
+	spin_lock_irq(&spu->register_lock);
+	tmp = in_be64(&priv2->spu_cfg_RW);
+	if (val)
+		tmp |= 2;
+	else
+		tmp &= ~2;
+	out_be64(&priv2->spu_cfg_RW, tmp);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static u64 spu_hw_signal2_type_get(struct spu_context *ctx)
+{
+	return ((in_be64(&ctx->spu->priv2->spu_cfg_RW) & 2) != 0);
+}
+
+static u32 spu_hw_npc_read(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->spu_npc_RW);
+}
+
+static void spu_hw_npc_write(struct spu_context *ctx, u32 val)
+{
+	out_be32(&ctx->spu->problem->spu_npc_RW, val);
+}
+
+static u32 spu_hw_status_read(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->spu_status_R);
+}
+
+static char *spu_hw_get_ls(struct spu_context *ctx)
+{
+	return ctx->spu->local_store;
+}
+
+static void spu_hw_privcntl_write(struct spu_context *ctx, u64 val)
+{
+	out_be64(&ctx->spu->priv2->spu_privcntl_RW, val);
+}
+
+static u32 spu_hw_runcntl_read(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->spu_runcntl_RW);
+}
+
+static void spu_hw_runcntl_write(struct spu_context *ctx, u32 val)
+{
+	spin_lock_irq(&ctx->spu->register_lock);
+	if (val & SPU_RUNCNTL_ISOLATE)
+		spu_hw_privcntl_write(ctx,
+			SPU_PRIVCNT_LOAD_REQUEST_ENABLE_MASK);
+	out_be32(&ctx->spu->problem->spu_runcntl_RW, val);
+	spin_unlock_irq(&ctx->spu->register_lock);
+}
+
+static void spu_hw_runcntl_stop(struct spu_context *ctx)
+{
+	spin_lock_irq(&ctx->spu->register_lock);
+	out_be32(&ctx->spu->problem->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+	while (in_be32(&ctx->spu->problem->spu_status_R) & SPU_STATUS_RUNNING)
+		cpu_relax();
+	spin_unlock_irq(&ctx->spu->register_lock);
+}
+
+static void spu_hw_master_start(struct spu_context *ctx)
+{
+	struct spu *spu = ctx->spu;
+	u64 sr1;
+
+	spin_lock_irq(&spu->register_lock);
+	sr1 = spu_mfc_sr1_get(spu) | MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	spu_mfc_sr1_set(spu, sr1);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static void spu_hw_master_stop(struct spu_context *ctx)
+{
+	struct spu *spu = ctx->spu;
+	u64 sr1;
+
+	spin_lock_irq(&spu->register_lock);
+	sr1 = spu_mfc_sr1_get(spu) & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	spu_mfc_sr1_set(spu, sr1);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static int spu_hw_set_mfc_query(struct spu_context * ctx, u32 mask, u32 mode)
+{
+	struct spu_problem __iomem *prob = ctx->spu->problem;
+	int ret;
+
+	spin_lock_irq(&ctx->spu->register_lock);
+	ret = -EAGAIN;
+	if (in_be32(&prob->dma_querytype_RW))
+		goto out;
+	ret = 0;
+	out_be32(&prob->dma_querymask_RW, mask);
+	out_be32(&prob->dma_querytype_RW, mode);
+out:
+	spin_unlock_irq(&ctx->spu->register_lock);
+	return ret;
+}
+
+static u32 spu_hw_read_mfc_tagstatus(struct spu_context * ctx)
+{
+	return in_be32(&ctx->spu->problem->dma_tagstatus_R);
+}
+
+static u32 spu_hw_get_mfc_free_elements(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->dma_qstatus_R);
+}
+
+static int spu_hw_send_mfc_command(struct spu_context *ctx,
+					struct mfc_dma_command *cmd)
+{
+	u32 status;
+	struct spu_problem __iomem *prob = ctx->spu->problem;
+
+	spin_lock_irq(&ctx->spu->register_lock);
+	out_be32(&prob->mfc_lsa_W, cmd->lsa);
+	out_be64(&prob->mfc_ea_W, cmd->ea);
+	out_be32(&prob->mfc_union_W.by32.mfc_size_tag32,
+				cmd->size << 16 | cmd->tag);
+	out_be32(&prob->mfc_union_W.by32.mfc_class_cmd32,
+				cmd->class << 16 | cmd->cmd);
+	status = in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32);
+	spin_unlock_irq(&ctx->spu->register_lock);
+
+	switch (status & 0xffff) {
+	case 0:
+		return 0;
+	case 2:
+		return -EAGAIN;
+	default:
+		return -EINVAL;
+	}
+}
+
+static void spu_hw_restart_dma(struct spu_context *ctx)
+{
+	struct spu_priv2 __iomem *priv2 = ctx->spu->priv2;
+
+	if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &ctx->spu->flags))
+		out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+}
+
+struct spu_context_ops spu_hw_ops = {
+	.mbox_read = spu_hw_mbox_read,
+	.mbox_stat_read = spu_hw_mbox_stat_read,
+	.mbox_stat_poll = spu_hw_mbox_stat_poll,
+	.ibox_read = spu_hw_ibox_read,
+	.wbox_write = spu_hw_wbox_write,
+	.signal1_write = spu_hw_signal1_write,
+	.signal2_write = spu_hw_signal2_write,
+	.signal1_type_set = spu_hw_signal1_type_set,
+	.signal1_type_get = spu_hw_signal1_type_get,
+	.signal2_type_set = spu_hw_signal2_type_set,
+	.signal2_type_get = spu_hw_signal2_type_get,
+	.npc_read = spu_hw_npc_read,
+	.npc_write = spu_hw_npc_write,
+	.status_read = spu_hw_status_read,
+	.get_ls = spu_hw_get_ls,
+	.privcntl_write = spu_hw_privcntl_write,
+	.runcntl_read = spu_hw_runcntl_read,
+	.runcntl_write = spu_hw_runcntl_write,
+	.runcntl_stop = spu_hw_runcntl_stop,
+	.master_start = spu_hw_master_start,
+	.master_stop = spu_hw_master_stop,
+	.set_mfc_query = spu_hw_set_mfc_query,
+	.read_mfc_tagstatus = spu_hw_read_mfc_tagstatus,
+	.get_mfc_free_elements = spu_hw_get_mfc_free_elements,
+	.send_mfc_command = spu_hw_send_mfc_command,
+	.restart_dma = spu_hw_restart_dma,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
new file mode 100644
index 000000000..1ba6307be
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -0,0 +1,811 @@
+
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fsnotify.h>
+#include <linux/backing-dev.h>
+#include <linux/init.h>
+#include <linux/ioctl.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/parser.h>
+
+#include <asm/prom.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/uaccess.h>
+
+#include "spufs.h"
+
+struct spufs_sb_info {
+	int debug;
+};
+
+static struct kmem_cache *spufs_inode_cache;
+char *isolated_loader;
+static int isolated_loader_size;
+
+static struct spufs_sb_info *spufs_get_sb_info(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
+static struct inode *
+spufs_alloc_inode(struct super_block *sb)
+{
+	struct spufs_inode_info *ei;
+
+	ei = kmem_cache_alloc(spufs_inode_cache, GFP_KERNEL);
+	if (!ei)
+		return NULL;
+
+	ei->i_gang = NULL;
+	ei->i_ctx = NULL;
+	ei->i_openers = 0;
+
+	return &ei->vfs_inode;
+}
+
+static void spufs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	kmem_cache_free(spufs_inode_cache, SPUFS_I(inode));
+}
+
+static void spufs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, spufs_i_callback);
+}
+
+static void
+spufs_init_once(void *p)
+{
+	struct spufs_inode_info *ei = p;
+
+	inode_init_once(&ei->vfs_inode);
+}
+
+static struct inode *
+spufs_new_inode(struct super_block *sb, umode_t mode)
+{
+	struct inode *inode;
+
+	inode = new_inode(sb);
+	if (!inode)
+		goto out;
+
+	inode->i_ino = get_next_ino();
+	inode->i_mode = mode;
+	inode->i_uid = current_fsuid();
+	inode->i_gid = current_fsgid();
+	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+out:
+	return inode;
+}
+
+static int
+spufs_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = d_inode(dentry);
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    (attr->ia_size != inode->i_size))
+		return -EINVAL;
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
+}
+
+
+static int
+spufs_new_file(struct super_block *sb, struct dentry *dentry,
+		const struct file_operations *fops, umode_t mode,
+		size_t size, struct spu_context *ctx)
+{
+	static const struct inode_operations spufs_file_iops = {
+		.setattr = spufs_setattr,
+	};
+	struct inode *inode;
+	int ret;
+
+	ret = -ENOSPC;
+	inode = spufs_new_inode(sb, S_IFREG | mode);
+	if (!inode)
+		goto out;
+
+	ret = 0;
+	inode->i_op = &spufs_file_iops;
+	inode->i_fop = fops;
+	inode->i_size = size;
+	inode->i_private = SPUFS_I(inode)->i_ctx = get_spu_context(ctx);
+	d_add(dentry, inode);
+out:
+	return ret;
+}
+
+static void
+spufs_evict_inode(struct inode *inode)
+{
+	struct spufs_inode_info *ei = SPUFS_I(inode);
+	clear_inode(inode);
+	if (ei->i_ctx)
+		put_spu_context(ei->i_ctx);
+	if (ei->i_gang)
+		put_spu_gang(ei->i_gang);
+}
+
+static void spufs_prune_dir(struct dentry *dir)
+{
+	struct dentry *dentry, *tmp;
+
+	mutex_lock(&d_inode(dir)->i_mutex);
+	list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) {
+		spin_lock(&dentry->d_lock);
+		if (!(d_unhashed(dentry)) && d_really_is_positive(dentry)) {
+			dget_dlock(dentry);
+			__d_drop(dentry);
+			spin_unlock(&dentry->d_lock);
+			simple_unlink(d_inode(dir), dentry);
+			/* XXX: what was dcache_lock protecting here? Other
+			 * filesystems (IB, configfs) release dcache_lock
+			 * before unlink */
+			dput(dentry);
+		} else {
+			spin_unlock(&dentry->d_lock);
+		}
+	}
+	shrink_dcache_parent(dir);
+	mutex_unlock(&d_inode(dir)->i_mutex);
+}
+
+/* Caller must hold parent->i_mutex */
+static int spufs_rmdir(struct inode *parent, struct dentry *dir)
+{
+	/* remove all entries */
+	int res;
+	spufs_prune_dir(dir);
+	d_drop(dir);
+	res = simple_rmdir(parent, dir);
+	/* We have to give up the mm_struct */
+	spu_forget(SPUFS_I(d_inode(dir))->i_ctx);
+	return res;
+}
+
+static int spufs_fill_dir(struct dentry *dir,
+		const struct spufs_tree_descr *files, umode_t mode,
+		struct spu_context *ctx)
+{
+	while (files->name && files->name[0]) {
+		int ret;
+		struct dentry *dentry = d_alloc_name(dir, files->name);
+		if (!dentry)
+			return -ENOMEM;
+		ret = spufs_new_file(dir->d_sb, dentry, files->ops,
+					files->mode & mode, files->size, ctx);
+		if (ret)
+			return ret;
+		files++;
+	}
+	return 0;
+}
+
+static int spufs_dir_close(struct inode *inode, struct file *file)
+{
+	struct spu_context *ctx;
+	struct inode *parent;
+	struct dentry *dir;
+	int ret;
+
+	dir = file->f_path.dentry;
+	parent = d_inode(dir->d_parent);
+	ctx = SPUFS_I(d_inode(dir))->i_ctx;
+
+	mutex_lock_nested(&parent->i_mutex, I_MUTEX_PARENT);
+	ret = spufs_rmdir(parent, dir);
+	mutex_unlock(&parent->i_mutex);
+	WARN_ON(ret);
+
+	return dcache_dir_close(inode, file);
+}
+
+const struct file_operations spufs_context_fops = {
+	.open		= dcache_dir_open,
+	.release	= spufs_dir_close,
+	.llseek		= dcache_dir_lseek,
+	.read		= generic_read_dir,
+	.iterate	= dcache_readdir,
+	.fsync		= noop_fsync,
+};
+EXPORT_SYMBOL_GPL(spufs_context_fops);
+
+static int
+spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
+		umode_t mode)
+{
+	int ret;
+	struct inode *inode;
+	struct spu_context *ctx;
+
+	inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR);
+	if (!inode)
+		return -ENOSPC;
+
+	if (dir->i_mode & S_ISGID) {
+		inode->i_gid = dir->i_gid;
+		inode->i_mode &= S_ISGID;
+	}
+	ctx = alloc_spu_context(SPUFS_I(dir)->i_gang); /* XXX gang */
+	SPUFS_I(inode)->i_ctx = ctx;
+	if (!ctx) {
+		iput(inode);
+		return -ENOSPC;
+	}
+
+	ctx->flags = flags;
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = &simple_dir_operations;
+
+	mutex_lock(&inode->i_mutex);
+
+	dget(dentry);
+	inc_nlink(dir);
+	inc_nlink(inode);
+
+	d_instantiate(dentry, inode);
+
+	if (flags & SPU_CREATE_NOSCHED)
+		ret = spufs_fill_dir(dentry, spufs_dir_nosched_contents,
+					 mode, ctx);
+	else
+		ret = spufs_fill_dir(dentry, spufs_dir_contents, mode, ctx);
+
+	if (!ret && spufs_get_sb_info(dir->i_sb)->debug)
+		ret = spufs_fill_dir(dentry, spufs_dir_debug_contents,
+				mode, ctx);
+
+	if (ret)
+		spufs_rmdir(dir, dentry);
+
+	mutex_unlock(&inode->i_mutex);
+
+	return ret;
+}
+
+static int spufs_context_open(struct path *path)
+{
+	int ret;
+	struct file *filp;
+
+	ret = get_unused_fd_flags(0);
+	if (ret < 0)
+		return ret;
+
+	filp = dentry_open(path, O_RDONLY, current_cred());
+	if (IS_ERR(filp)) {
+		put_unused_fd(ret);
+		return PTR_ERR(filp);
+	}
+
+	filp->f_op = &spufs_context_fops;
+	fd_install(ret, filp);
+	return ret;
+}
+
+static struct spu_context *
+spufs_assert_affinity(unsigned int flags, struct spu_gang *gang,
+						struct file *filp)
+{
+	struct spu_context *tmp, *neighbor, *err;
+	int count, node;
+	int aff_supp;
+
+	aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next,
+					struct spu, cbe_list))->aff_list);
+
+	if (!aff_supp)
+		return ERR_PTR(-EINVAL);
+
+	if (flags & SPU_CREATE_GANG)
+		return ERR_PTR(-EINVAL);
+
+	if (flags & SPU_CREATE_AFFINITY_MEM &&
+	    gang->aff_ref_ctx &&
+	    gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM)
+		return ERR_PTR(-EEXIST);
+
+	if (gang->aff_flags & AFF_MERGED)
+		return ERR_PTR(-EBUSY);
+
+	neighbor = NULL;
+	if (flags & SPU_CREATE_AFFINITY_SPU) {
+		if (!filp || filp->f_op != &spufs_context_fops)
+			return ERR_PTR(-EINVAL);
+
+		neighbor = get_spu_context(
+				SPUFS_I(file_inode(filp))->i_ctx);
+
+		if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) &&
+		    !list_is_last(&neighbor->aff_list, &gang->aff_list_head) &&
+		    !list_entry(neighbor->aff_list.next, struct spu_context,
+		    aff_list)->aff_head) {
+			err = ERR_PTR(-EEXIST);
+			goto out_put_neighbor;
+		}
+
+		if (gang != neighbor->gang) {
+			err = ERR_PTR(-EINVAL);
+			goto out_put_neighbor;
+		}
+
+		count = 1;
+		list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
+			count++;
+		if (list_empty(&neighbor->aff_list))
+			count++;
+
+		for (node = 0; node < MAX_NUMNODES; node++) {
+			if ((cbe_spu_info[node].n_spus - atomic_read(
+				&cbe_spu_info[node].reserved_spus)) >= count)
+				break;
+		}
+
+		if (node == MAX_NUMNODES) {
+			err = ERR_PTR(-EEXIST);
+			goto out_put_neighbor;
+		}
+	}
+
+	return neighbor;
+
+out_put_neighbor:
+	put_spu_context(neighbor);
+	return err;
+}
+
+static void
+spufs_set_affinity(unsigned int flags, struct spu_context *ctx,
+					struct spu_context *neighbor)
+{
+	if (flags & SPU_CREATE_AFFINITY_MEM)
+		ctx->gang->aff_ref_ctx = ctx;
+
+	if (flags & SPU_CREATE_AFFINITY_SPU) {
+		if (list_empty(&neighbor->aff_list)) {
+			list_add_tail(&neighbor->aff_list,
+				&ctx->gang->aff_list_head);
+			neighbor->aff_head = 1;
+		}
+
+		if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head)
+		    || list_entry(neighbor->aff_list.next, struct spu_context,
+							aff_list)->aff_head) {
+			list_add(&ctx->aff_list, &neighbor->aff_list);
+		} else  {
+			list_add_tail(&ctx->aff_list, &neighbor->aff_list);
+			if (neighbor->aff_head) {
+				neighbor->aff_head = 0;
+				ctx->aff_head = 1;
+			}
+		}
+
+		if (!ctx->gang->aff_ref_ctx)
+			ctx->gang->aff_ref_ctx = ctx;
+	}
+}
+
+static int
+spufs_create_context(struct inode *inode, struct dentry *dentry,
+			struct vfsmount *mnt, int flags, umode_t mode,
+			struct file *aff_filp)
+{
+	int ret;
+	int affinity;
+	struct spu_gang *gang;
+	struct spu_context *neighbor;
+	struct path path = {.mnt = mnt, .dentry = dentry};
+
+	if ((flags & SPU_CREATE_NOSCHED) &&
+	    !capable(CAP_SYS_NICE))
+		return -EPERM;
+
+	if ((flags & (SPU_CREATE_NOSCHED | SPU_CREATE_ISOLATE))
+	    == SPU_CREATE_ISOLATE)
+		return -EINVAL;
+
+	if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader)
+		return -ENODEV;
+
+	gang = NULL;
+	neighbor = NULL;
+	affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU);
+	if (affinity) {
+		gang = SPUFS_I(inode)->i_gang;
+		if (!gang)
+			return -EINVAL;
+		mutex_lock(&gang->aff_mutex);
+		neighbor = spufs_assert_affinity(flags, gang, aff_filp);
+		if (IS_ERR(neighbor)) {
+			ret = PTR_ERR(neighbor);
+			goto out_aff_unlock;
+		}
+	}
+
+	ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO);
+	if (ret)
+		goto out_aff_unlock;
+
+	if (affinity) {
+		spufs_set_affinity(flags, SPUFS_I(d_inode(dentry))->i_ctx,
+								neighbor);
+		if (neighbor)
+			put_spu_context(neighbor);
+	}
+
+	ret = spufs_context_open(&path);
+	if (ret < 0)
+		WARN_ON(spufs_rmdir(inode, dentry));
+
+out_aff_unlock:
+	if (affinity)
+		mutex_unlock(&gang->aff_mutex);
+	return ret;
+}
+
+static int
+spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	int ret;
+	struct inode *inode;
+	struct spu_gang *gang;
+
+	ret = -ENOSPC;
+	inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR);
+	if (!inode)
+		goto out;
+
+	ret = 0;
+	if (dir->i_mode & S_ISGID) {
+		inode->i_gid = dir->i_gid;
+		inode->i_mode &= S_ISGID;
+	}
+	gang = alloc_spu_gang();
+	SPUFS_I(inode)->i_ctx = NULL;
+	SPUFS_I(inode)->i_gang = gang;
+	if (!gang)
+		goto out_iput;
+
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = &simple_dir_operations;
+
+	d_instantiate(dentry, inode);
+	inc_nlink(dir);
+	inc_nlink(d_inode(dentry));
+	return ret;
+
+out_iput:
+	iput(inode);
+out:
+	return ret;
+}
+
+static int spufs_gang_open(struct path *path)
+{
+	int ret;
+	struct file *filp;
+
+	ret = get_unused_fd_flags(0);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * get references for dget and mntget, will be released
+	 * in error path of *_open().
+	 */
+	filp = dentry_open(path, O_RDONLY, current_cred());
+	if (IS_ERR(filp)) {
+		put_unused_fd(ret);
+		return PTR_ERR(filp);
+	}
+
+	filp->f_op = &simple_dir_operations;
+	fd_install(ret, filp);
+	return ret;
+}
+
+static int spufs_create_gang(struct inode *inode,
+			struct dentry *dentry,
+			struct vfsmount *mnt, umode_t mode)
+{
+	struct path path = {.mnt = mnt, .dentry = dentry};
+	int ret;
+
+	ret = spufs_mkgang(inode, dentry, mode & S_IRWXUGO);
+	if (!ret) {
+		ret = spufs_gang_open(&path);
+		if (ret < 0) {
+			int err = simple_rmdir(inode, dentry);
+			WARN_ON(err);
+		}
+	}
+	return ret;
+}
+
+
+static struct file_system_type spufs_type;
+
+long spufs_create(struct path *path, struct dentry *dentry,
+		unsigned int flags, umode_t mode, struct file *filp)
+{
+	struct inode *dir = d_inode(path->dentry);
+	int ret;
+
+	/* check if we are on spufs */
+	if (path->dentry->d_sb->s_type != &spufs_type)
+		return -EINVAL;
+
+	/* don't accept undefined flags */
+	if (flags & (~SPU_CREATE_FLAG_ALL))
+		return -EINVAL;
+
+	/* only threads can be underneath a gang */
+	if (path->dentry != path->dentry->d_sb->s_root)
+		if ((flags & SPU_CREATE_GANG) || !SPUFS_I(dir)->i_gang)
+			return -EINVAL;
+
+	mode &= ~current_umask();
+
+	if (flags & SPU_CREATE_GANG)
+		ret = spufs_create_gang(dir, dentry, path->mnt, mode);
+	else
+		ret = spufs_create_context(dir, dentry, path->mnt, flags, mode,
+					    filp);
+	if (ret >= 0)
+		fsnotify_mkdir(dir, dentry);
+
+	return ret;
+}
+
+/* File system initialization */
+enum {
+	Opt_uid, Opt_gid, Opt_mode, Opt_debug, Opt_err,
+};
+
+static const match_table_t spufs_tokens = {
+	{ Opt_uid,   "uid=%d" },
+	{ Opt_gid,   "gid=%d" },
+	{ Opt_mode,  "mode=%o" },
+	{ Opt_debug, "debug" },
+	{ Opt_err,    NULL  },
+};
+
+static int
+spufs_parse_options(struct super_block *sb, char *options, struct inode *root)
+{
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token, option;
+
+		if (!*p)
+			continue;
+
+		token = match_token(p, spufs_tokens, args);
+		switch (token) {
+		case Opt_uid:
+			if (match_int(&args[0], &option))
+				return 0;
+			root->i_uid = make_kuid(current_user_ns(), option);
+			if (!uid_valid(root->i_uid))
+				return 0;
+			break;
+		case Opt_gid:
+			if (match_int(&args[0], &option))
+				return 0;
+			root->i_gid = make_kgid(current_user_ns(), option);
+			if (!gid_valid(root->i_gid))
+				return 0;
+			break;
+		case Opt_mode:
+			if (match_octal(&args[0], &option))
+				return 0;
+			root->i_mode = option | S_IFDIR;
+			break;
+		case Opt_debug:
+			spufs_get_sb_info(sb)->debug = 1;
+			break;
+		default:
+			return 0;
+		}
+	}
+	return 1;
+}
+
+static void spufs_exit_isolated_loader(void)
+{
+	free_pages((unsigned long) isolated_loader,
+			get_order(isolated_loader_size));
+}
+
+static void
+spufs_init_isolated_loader(void)
+{
+	struct device_node *dn;
+	const char *loader;
+	int size;
+
+	dn = of_find_node_by_path("/spu-isolation");
+	if (!dn)
+		return;
+
+	loader = of_get_property(dn, "loader", &size);
+	if (!loader)
+		return;
+
+	/* the loader must be align on a 16 byte boundary */
+	isolated_loader = (char *)__get_free_pages(GFP_KERNEL, get_order(size));
+	if (!isolated_loader)
+		return;
+
+	isolated_loader_size = size;
+	memcpy(isolated_loader, loader, size);
+	printk(KERN_INFO "spufs: SPU isolation mode enabled\n");
+}
+
+static int
+spufs_create_root(struct super_block *sb, void *data)
+{
+	struct inode *inode;
+	int ret;
+
+	ret = -ENODEV;
+	if (!spu_management_ops)
+		goto out;
+
+	ret = -ENOMEM;
+	inode = spufs_new_inode(sb, S_IFDIR | 0775);
+	if (!inode)
+		goto out;
+
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = &simple_dir_operations;
+	SPUFS_I(inode)->i_ctx = NULL;
+	inc_nlink(inode);
+
+	ret = -EINVAL;
+	if (!spufs_parse_options(sb, data, inode))
+		goto out_iput;
+
+	ret = -ENOMEM;
+	sb->s_root = d_make_root(inode);
+	if (!sb->s_root)
+		goto out;
+
+	return 0;
+out_iput:
+	iput(inode);
+out:
+	return ret;
+}
+
+static int
+spufs_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct spufs_sb_info *info;
+	static const struct super_operations s_ops = {
+		.alloc_inode = spufs_alloc_inode,
+		.destroy_inode = spufs_destroy_inode,
+		.statfs = simple_statfs,
+		.evict_inode = spufs_evict_inode,
+		.show_options = generic_show_options,
+	};
+
+	save_mount_options(sb, data);
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_blocksize = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_magic = SPUFS_MAGIC;
+	sb->s_op = &s_ops;
+	sb->s_fs_info = info;
+
+	return spufs_create_root(sb, data);
+}
+
+static struct dentry *
+spufs_mount(struct file_system_type *fstype, int flags,
+		const char *name, void *data)
+{
+	return mount_single(fstype, flags, data, spufs_fill_super);
+}
+
+static struct file_system_type spufs_type = {
+	.owner = THIS_MODULE,
+	.name = "spufs",
+	.mount = spufs_mount,
+	.kill_sb = kill_litter_super,
+};
+MODULE_ALIAS_FS("spufs");
+
+static int __init spufs_init(void)
+{
+	int ret;
+
+	ret = -ENODEV;
+	if (!spu_management_ops)
+		goto out;
+
+	ret = -ENOMEM;
+	spufs_inode_cache = kmem_cache_create("spufs_inode_cache",
+			sizeof(struct spufs_inode_info), 0,
+			SLAB_HWCACHE_ALIGN, spufs_init_once);
+
+	if (!spufs_inode_cache)
+		goto out;
+	ret = spu_sched_init();
+	if (ret)
+		goto out_cache;
+	ret = register_spu_syscalls(&spufs_calls);
+	if (ret)
+		goto out_sched;
+	ret = register_filesystem(&spufs_type);
+	if (ret)
+		goto out_syscalls;
+
+	spufs_init_isolated_loader();
+
+	return 0;
+
+out_syscalls:
+	unregister_spu_syscalls(&spufs_calls);
+out_sched:
+	spu_sched_exit();
+out_cache:
+	kmem_cache_destroy(spufs_inode_cache);
+out:
+	return ret;
+}
+module_init(spufs_init);
+
+static void __exit spufs_exit(void)
+{
+	spu_sched_exit();
+	spufs_exit_isolated_loader();
+	unregister_spu_syscalls(&spufs_calls);
+	unregister_filesystem(&spufs_type);
+	kmem_cache_destroy(spufs_inode_cache);
+}
+module_exit(spufs_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>");
+
diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
new file mode 100644
index 000000000..147069938
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
@@ -0,0 +1,183 @@
+/*
+ * SPU local store allocation routines
+ *
+ * Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu.h>
+
+#include "spufs.h"
+
+static int spu_alloc_lscsa_std(struct spu_state *csa)
+{
+	struct spu_lscsa *lscsa;
+	unsigned char *p;
+
+	lscsa = vzalloc(sizeof(struct spu_lscsa));
+	if (!lscsa)
+		return -ENOMEM;
+	csa->lscsa = lscsa;
+
+	/* Set LS pages reserved to allow for user-space mapping. */
+	for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		SetPageReserved(vmalloc_to_page(p));
+
+	return 0;
+}
+
+static void spu_free_lscsa_std(struct spu_state *csa)
+{
+	/* Clear reserved bit before vfree. */
+	unsigned char *p;
+
+	if (csa->lscsa == NULL)
+		return;
+
+	for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		ClearPageReserved(vmalloc_to_page(p));
+
+	vfree(csa->lscsa);
+}
+
+#ifdef CONFIG_SPU_FS_64K_LS
+
+#define SPU_64K_PAGE_SHIFT	16
+#define SPU_64K_PAGE_ORDER	(SPU_64K_PAGE_SHIFT - PAGE_SHIFT)
+#define SPU_64K_PAGE_COUNT	(1ul << SPU_64K_PAGE_ORDER)
+
+int spu_alloc_lscsa(struct spu_state *csa)
+{
+	struct page	**pgarray;
+	unsigned char	*p;
+	int		i, j, n_4k;
+
+	/* Check availability of 64K pages */
+	if (!spu_64k_pages_available())
+		goto fail;
+
+	csa->use_big_pages = 1;
+
+	pr_debug("spu_alloc_lscsa(csa=0x%p), trying to allocate 64K pages\n",
+		 csa);
+
+	/* First try to allocate our 64K pages. We need 5 of them
+	 * with the current implementation. In the future, we should try
+	 * to separate the lscsa with the actual local store image, thus
+	 * allowing us to require only 4 64K pages per context
+	 */
+	for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) {
+		/* XXX This is likely to fail, we should use a special pool
+		 *     similar to what hugetlbfs does.
+		 */
+		csa->lscsa_pages[i] = alloc_pages(GFP_KERNEL,
+						  SPU_64K_PAGE_ORDER);
+		if (csa->lscsa_pages[i] == NULL)
+			goto fail;
+	}
+
+	pr_debug(" success ! creating vmap...\n");
+
+	/* Now we need to create a vmalloc mapping of these for the kernel
+	 * and SPU context switch code to use. Currently, we stick to a
+	 * normal kernel vmalloc mapping, which in our case will be 4K
+	 */
+	n_4k = SPU_64K_PAGE_COUNT * SPU_LSCSA_NUM_BIG_PAGES;
+	pgarray = kmalloc(sizeof(struct page *) * n_4k, GFP_KERNEL);
+	if (pgarray == NULL)
+		goto fail;
+	for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++)
+		for (j = 0; j < SPU_64K_PAGE_COUNT; j++)
+			/* We assume all the struct page's are contiguous
+			 * which should be hopefully the case for an order 4
+			 * allocation..
+			 */
+			pgarray[i * SPU_64K_PAGE_COUNT + j] =
+				csa->lscsa_pages[i] + j;
+	csa->lscsa = vmap(pgarray, n_4k, VM_USERMAP, PAGE_KERNEL);
+	kfree(pgarray);
+	if (csa->lscsa == NULL)
+		goto fail;
+
+	memset(csa->lscsa, 0, sizeof(struct spu_lscsa));
+
+	/* Set LS pages reserved to allow for user-space mapping.
+	 *
+	 * XXX isn't that a bit obsolete ? I think we should just
+	 * make sure the page count is high enough. Anyway, won't harm
+	 * for now
+	 */
+	for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		SetPageReserved(vmalloc_to_page(p));
+
+	pr_debug(" all good !\n");
+
+	return 0;
+fail:
+	pr_debug("spufs: failed to allocate lscsa 64K pages, falling back\n");
+	spu_free_lscsa(csa);
+	return spu_alloc_lscsa_std(csa);
+}
+
+void spu_free_lscsa(struct spu_state *csa)
+{
+	unsigned char *p;
+	int i;
+
+	if (!csa->use_big_pages) {
+		spu_free_lscsa_std(csa);
+		return;
+	}
+	csa->use_big_pages = 0;
+
+	if (csa->lscsa == NULL)
+		goto free_pages;
+
+	for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		ClearPageReserved(vmalloc_to_page(p));
+
+	vunmap(csa->lscsa);
+	csa->lscsa = NULL;
+
+ free_pages:
+
+	for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++)
+		if (csa->lscsa_pages[i])
+			__free_pages(csa->lscsa_pages[i], SPU_64K_PAGE_ORDER);
+}
+
+#else /* CONFIG_SPU_FS_64K_LS */
+
+int spu_alloc_lscsa(struct spu_state *csa)
+{
+	return spu_alloc_lscsa_std(csa);
+}
+
+void spu_free_lscsa(struct spu_state *csa)
+{
+	spu_free_lscsa_std(csa);
+}
+
+#endif /* !defined(CONFIG_SPU_FS_64K_LS) */
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
new file mode 100644
index 000000000..4ddf769a6
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -0,0 +1,454 @@
+#define DEBUG
+
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/io.h>
+#include <asm/unistd.h>
+
+#include "spufs.h"
+
+/* interrupt-level stop callback function. */
+void spufs_stop_callback(struct spu *spu, int irq)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	/*
+	 * It should be impossible to preempt a context while an exception
+	 * is being processed, since the context switch code is specially
+	 * coded to deal with interrupts ... But, just in case, sanity check
+	 * the context pointer.  It is OK to return doing nothing since
+	 * the exception will be regenerated when the context is resumed.
+	 */
+	if (ctx) {
+		/* Copy exception arguments into module specific structure */
+		switch(irq) {
+		case 0 :
+			ctx->csa.class_0_pending = spu->class_0_pending;
+			ctx->csa.class_0_dar = spu->class_0_dar;
+			break;
+		case 1 :
+			ctx->csa.class_1_dsisr = spu->class_1_dsisr;
+			ctx->csa.class_1_dar = spu->class_1_dar;
+			break;
+		case 2 :
+			break;
+		}
+
+		/* ensure that the exception status has hit memory before a
+		 * thread waiting on the context's stop queue is woken */
+		smp_wmb();
+
+		wake_up_all(&ctx->stop_wq);
+	}
+}
+
+int spu_stopped(struct spu_context *ctx, u32 *stat)
+{
+	u64 dsisr;
+	u32 stopped;
+
+	stopped = SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP |
+		SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
+
+top:
+	*stat = ctx->ops->status_read(ctx);
+	if (*stat & stopped) {
+		/*
+		 * If the spu hasn't finished stopping, we need to
+		 * re-read the register to get the stopped value.
+		 */
+		if (*stat & SPU_STATUS_RUNNING)
+			goto top;
+		return 1;
+	}
+
+	if (test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags))
+		return 1;
+
+	dsisr = ctx->csa.class_1_dsisr;
+	if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))
+		return 1;
+
+	if (ctx->csa.class_0_pending)
+		return 1;
+
+	return 0;
+}
+
+static int spu_setup_isolated(struct spu_context *ctx)
+{
+	int ret;
+	u64 __iomem *mfc_cntl;
+	u64 sr1;
+	u32 status;
+	unsigned long timeout;
+	const u32 status_loading = SPU_STATUS_RUNNING
+		| SPU_STATUS_ISOLATED_STATE | SPU_STATUS_ISOLATED_LOAD_STATUS;
+
+	ret = -ENODEV;
+	if (!isolated_loader)
+		goto out;
+
+	/*
+	 * We need to exclude userspace access to the context.
+	 *
+	 * To protect against memory access we invalidate all ptes
+	 * and make sure the pagefault handlers block on the mutex.
+	 */
+	spu_unmap_mappings(ctx);
+
+	mfc_cntl = &ctx->spu->priv2->mfc_control_RW;
+
+	/* purge the MFC DMA queue to ensure no spurious accesses before we
+	 * enter kernel mode */
+	timeout = jiffies + HZ;
+	out_be64(mfc_cntl, MFC_CNTL_PURGE_DMA_REQUEST);
+	while ((in_be64(mfc_cntl) & MFC_CNTL_PURGE_DMA_STATUS_MASK)
+			!= MFC_CNTL_PURGE_DMA_COMPLETE) {
+		if (time_after(jiffies, timeout)) {
+			printk(KERN_ERR "%s: timeout flushing MFC DMA queue\n",
+					__func__);
+			ret = -EIO;
+			goto out;
+		}
+		cond_resched();
+	}
+
+	/* clear purge status */
+	out_be64(mfc_cntl, 0);
+
+	/* put the SPE in kernel mode to allow access to the loader */
+	sr1 = spu_mfc_sr1_get(ctx->spu);
+	sr1 &= ~MFC_STATE1_PROBLEM_STATE_MASK;
+	spu_mfc_sr1_set(ctx->spu, sr1);
+
+	/* start the loader */
+	ctx->ops->signal1_write(ctx, (unsigned long)isolated_loader >> 32);
+	ctx->ops->signal2_write(ctx,
+			(unsigned long)isolated_loader & 0xffffffff);
+
+	ctx->ops->runcntl_write(ctx,
+			SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE);
+
+	ret = 0;
+	timeout = jiffies + HZ;
+	while (((status = ctx->ops->status_read(ctx)) & status_loading) ==
+				status_loading) {
+		if (time_after(jiffies, timeout)) {
+			printk(KERN_ERR "%s: timeout waiting for loader\n",
+					__func__);
+			ret = -EIO;
+			goto out_drop_priv;
+		}
+		cond_resched();
+	}
+
+	if (!(status & SPU_STATUS_RUNNING)) {
+		/* If isolated LOAD has failed: run SPU, we will get a stop-and
+		 * signal later. */
+		pr_debug("%s: isolated LOAD failed\n", __func__);
+		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
+		ret = -EACCES;
+		goto out_drop_priv;
+	}
+
+	if (!(status & SPU_STATUS_ISOLATED_STATE)) {
+		/* This isn't allowed by the CBEA, but check anyway */
+		pr_debug("%s: SPU fell out of isolated mode?\n", __func__);
+		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_STOP);
+		ret = -EINVAL;
+		goto out_drop_priv;
+	}
+
+out_drop_priv:
+	/* Finished accessing the loader. Drop kernel mode */
+	sr1 |= MFC_STATE1_PROBLEM_STATE_MASK;
+	spu_mfc_sr1_set(ctx->spu, sr1);
+
+out:
+	return ret;
+}
+
+static int spu_run_init(struct spu_context *ctx, u32 *npc)
+{
+	unsigned long runcntl = SPU_RUNCNTL_RUNNABLE;
+	int ret;
+
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+	/*
+	 * NOSCHED is synchronous scheduling with respect to the caller.
+	 * The caller waits for the context to be loaded.
+	 */
+	if (ctx->flags & SPU_CREATE_NOSCHED) {
+		if (ctx->state == SPU_STATE_SAVED) {
+			ret = spu_activate(ctx, 0);
+			if (ret)
+				return ret;
+		}
+	}
+
+	/*
+	 * Apply special setup as required.
+	 */
+	if (ctx->flags & SPU_CREATE_ISOLATE) {
+		if (!(ctx->ops->status_read(ctx) & SPU_STATUS_ISOLATED_STATE)) {
+			ret = spu_setup_isolated(ctx);
+			if (ret)
+				return ret;
+		}
+
+		/*
+		 * If userspace has set the runcntrl register (eg, to
+		 * issue an isolated exit), we need to re-set it here
+		 */
+		runcntl = ctx->ops->runcntl_read(ctx) &
+			(SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE);
+		if (runcntl == 0)
+			runcntl = SPU_RUNCNTL_RUNNABLE;
+	} else {
+		unsigned long privcntl;
+
+		if (test_thread_flag(TIF_SINGLESTEP))
+			privcntl = SPU_PRIVCNTL_MODE_SINGLE_STEP;
+		else
+			privcntl = SPU_PRIVCNTL_MODE_NORMAL;
+
+		ctx->ops->privcntl_write(ctx, privcntl);
+		ctx->ops->npc_write(ctx, *npc);
+	}
+
+	ctx->ops->runcntl_write(ctx, runcntl);
+
+	if (ctx->flags & SPU_CREATE_NOSCHED) {
+		spuctx_switch_state(ctx, SPU_UTIL_USER);
+	} else {
+
+		if (ctx->state == SPU_STATE_SAVED) {
+			ret = spu_activate(ctx, 0);
+			if (ret)
+				return ret;
+		} else {
+			spuctx_switch_state(ctx, SPU_UTIL_USER);
+		}
+	}
+
+	set_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags);
+	return 0;
+}
+
+static int spu_run_fini(struct spu_context *ctx, u32 *npc,
+			       u32 *status)
+{
+	int ret = 0;
+
+	spu_del_from_rq(ctx);
+
+	*status = ctx->ops->status_read(ctx);
+	*npc = ctx->ops->npc_read(ctx);
+
+	spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
+	clear_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags);
+	spu_switch_log_notify(NULL, ctx, SWITCH_LOG_EXIT, *status);
+	spu_release(ctx);
+
+	if (signal_pending(current))
+		ret = -ERESTARTSYS;
+
+	return ret;
+}
+
+/*
+ * SPU syscall restarting is tricky because we violate the basic
+ * assumption that the signal handler is running on the interrupted
+ * thread. Here instead, the handler runs on PowerPC user space code,
+ * while the syscall was called from the SPU.
+ * This means we can only do a very rough approximation of POSIX
+ * signal semantics.
+ */
+static int spu_handle_restartsys(struct spu_context *ctx, long *spu_ret,
+			  unsigned int *npc)
+{
+	int ret;
+
+	switch (*spu_ret) {
+	case -ERESTARTSYS:
+	case -ERESTARTNOINTR:
+		/*
+		 * Enter the regular syscall restarting for
+		 * sys_spu_run, then restart the SPU syscall
+		 * callback.
+		 */
+		*npc -= 8;
+		ret = -ERESTARTSYS;
+		break;
+	case -ERESTARTNOHAND:
+	case -ERESTART_RESTARTBLOCK:
+		/*
+		 * Restart block is too hard for now, just return -EINTR
+		 * to the SPU.
+		 * ERESTARTNOHAND comes from sys_pause, we also return
+		 * -EINTR from there.
+		 * Assume that we need to be restarted ourselves though.
+		 */
+		*spu_ret = -EINTR;
+		ret = -ERESTARTSYS;
+		break;
+	default:
+		printk(KERN_WARNING "%s: unexpected return code %ld\n",
+			__func__, *spu_ret);
+		ret = 0;
+	}
+	return ret;
+}
+
+static int spu_process_callback(struct spu_context *ctx)
+{
+	struct spu_syscall_block s;
+	u32 ls_pointer, npc;
+	void __iomem *ls;
+	long spu_ret;
+	int ret;
+
+	/* get syscall block from local store */
+	npc = ctx->ops->npc_read(ctx) & ~3;
+	ls = (void __iomem *)ctx->ops->get_ls(ctx);
+	ls_pointer = in_be32(ls + npc);
+	if (ls_pointer > (LS_SIZE - sizeof(s)))
+		return -EFAULT;
+	memcpy_fromio(&s, ls + ls_pointer, sizeof(s));
+
+	/* do actual syscall without pinning the spu */
+	ret = 0;
+	spu_ret = -ENOSYS;
+	npc += 4;
+
+	if (s.nr_ret < __NR_syscalls) {
+		spu_release(ctx);
+		/* do actual system call from here */
+		spu_ret = spu_sys_callback(&s);
+		if (spu_ret <= -ERESTARTSYS) {
+			ret = spu_handle_restartsys(ctx, &spu_ret, &npc);
+		}
+		mutex_lock(&ctx->state_mutex);
+		if (ret == -ERESTARTSYS)
+			return ret;
+	}
+
+	/* need to re-get the ls, as it may have changed when we released the
+	 * spu */
+	ls = (void __iomem *)ctx->ops->get_ls(ctx);
+
+	/* write result, jump over indirect pointer */
+	memcpy_toio(ls + ls_pointer, &spu_ret, sizeof(spu_ret));
+	ctx->ops->npc_write(ctx, npc);
+	ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
+	return ret;
+}
+
+long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
+{
+	int ret;
+	struct spu *spu;
+	u32 status;
+
+	if (mutex_lock_interruptible(&ctx->run_mutex))
+		return -ERESTARTSYS;
+
+	ctx->event_return = 0;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		goto out_unlock;
+
+	spu_enable_spu(ctx);
+
+	spu_update_sched_info(ctx);
+
+	ret = spu_run_init(ctx, npc);
+	if (ret) {
+		spu_release(ctx);
+		goto out;
+	}
+
+	do {
+		ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status));
+		if (unlikely(ret)) {
+			/*
+			 * This is nasty: we need the state_mutex for all the
+			 * bookkeeping even if the syscall was interrupted by
+			 * a signal. ewww.
+			 */
+			mutex_lock(&ctx->state_mutex);
+			break;
+		}
+		spu = ctx->spu;
+		if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE,
+						&ctx->sched_flags))) {
+			if (!(status & SPU_STATUS_STOPPED_BY_STOP)) {
+				spu_switch_notify(spu, ctx);
+				continue;
+			}
+		}
+
+		spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+		if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+		    (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) {
+			ret = spu_process_callback(ctx);
+			if (ret)
+				break;
+			status &= ~SPU_STATUS_STOPPED_BY_STOP;
+		}
+		ret = spufs_handle_class1(ctx);
+		if (ret)
+			break;
+
+		ret = spufs_handle_class0(ctx);
+		if (ret)
+			break;
+
+		if (signal_pending(current))
+			ret = -ERESTARTSYS;
+	} while (!ret && !(status & (SPU_STATUS_STOPPED_BY_STOP |
+				      SPU_STATUS_STOPPED_BY_HALT |
+				       SPU_STATUS_SINGLE_STEP)));
+
+	spu_disable_spu(ctx);
+	ret = spu_run_fini(ctx, npc, &status);
+	spu_yield(ctx);
+
+	if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+	    (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100))
+		ctx->stats.libassist++;
+
+	if ((ret == 0) ||
+	    ((ret == -ERESTARTSYS) &&
+	     ((status & SPU_STATUS_STOPPED_BY_HALT) ||
+	      (status & SPU_STATUS_SINGLE_STEP) ||
+	      ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+	       (status >> SPU_STOP_STATUS_SHIFT != 0x2104)))))
+		ret = status;
+
+	/* Note: we don't need to force_sig SIGTRAP on single-step
+	 * since we have TIF_SINGLESTEP set, thus the kernel will do
+	 * it upon return from the syscall anyawy
+	 */
+	if (unlikely(status & SPU_STATUS_SINGLE_STEP))
+		ret = -ERESTARTSYS;
+
+	else if (unlikely((status & SPU_STATUS_STOPPED_BY_STOP)
+	    && (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff)) {
+		force_sig(SIGTRAP, current);
+		ret = -ERESTARTSYS;
+	}
+
+out:
+	*event = ctx->event_return;
+out_unlock:
+	mutex_unlock(&ctx->run_mutex);
+	return ret;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
new file mode 100644
index 000000000..5c80a0f9d
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -0,0 +1,1167 @@
+/* sched.c - SPU scheduler.
+ *
+ * Copyright (C) IBM 2005
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * 2006-03-31	NUMA domains added.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/sched/rt.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/completion.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/numa.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+#include <linux/pid_namespace.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/spu_priv1.h>
+#include "spufs.h"
+#define CREATE_TRACE_POINTS
+#include "sputrace.h"
+
+struct spu_prio_array {
+	DECLARE_BITMAP(bitmap, MAX_PRIO);
+	struct list_head runq[MAX_PRIO];
+	spinlock_t runq_lock;
+	int nr_waiting;
+};
+
+static unsigned long spu_avenrun[3];
+static struct spu_prio_array *spu_prio;
+static struct task_struct *spusched_task;
+static struct timer_list spusched_timer;
+static struct timer_list spuloadavg_timer;
+
+/*
+ * Frequency of the spu scheduler tick.  By default we do one SPU scheduler
+ * tick for every 10 CPU scheduler ticks.
+ */
+#define SPUSCHED_TICK		(10)
+
+/*
+ * These are the 'tuning knobs' of the scheduler:
+ *
+ * Minimum timeslice is 5 msecs (or 1 spu scheduler tick, whichever is
+ * larger), default timeslice is 100 msecs, maximum timeslice is 800 msecs.
+ */
+#define MIN_SPU_TIMESLICE	max(5 * HZ / (1000 * SPUSCHED_TICK), 1)
+#define DEF_SPU_TIMESLICE	(100 * HZ / (1000 * SPUSCHED_TICK))
+
+#define SCALE_PRIO(x, prio) \
+	max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_SPU_TIMESLICE)
+
+/*
+ * scale user-nice values [ -20 ... 0 ... 19 ] to time slice values:
+ * [800ms ... 100ms ... 5ms]
+ *
+ * The higher a thread's priority, the bigger timeslices
+ * it gets during one round of execution. But even the lowest
+ * priority thread gets MIN_TIMESLICE worth of execution time.
+ */
+void spu_set_timeslice(struct spu_context *ctx)
+{
+	if (ctx->prio < NORMAL_PRIO)
+		ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE * 4, ctx->prio);
+	else
+		ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE, ctx->prio);
+}
+
+/*
+ * Update scheduling information from the owning thread.
+ */
+void __spu_update_sched_info(struct spu_context *ctx)
+{
+	/*
+	 * assert that the context is not on the runqueue, so it is safe
+	 * to change its scheduling parameters.
+	 */
+	BUG_ON(!list_empty(&ctx->rq));
+
+	/*
+	 * 32-Bit assignments are atomic on powerpc, and we don't care about
+	 * memory ordering here because retrieving the controlling thread is
+	 * per definition racy.
+	 */
+	ctx->tid = current->pid;
+
+	/*
+	 * We do our own priority calculations, so we normally want
+	 * ->static_prio to start with. Unfortunately this field
+	 * contains junk for threads with a realtime scheduling
+	 * policy so we have to look at ->prio in this case.
+	 */
+	if (rt_prio(current->prio))
+		ctx->prio = current->prio;
+	else
+		ctx->prio = current->static_prio;
+	ctx->policy = current->policy;
+
+	/*
+	 * TO DO: the context may be loaded, so we may need to activate
+	 * it again on a different node. But it shouldn't hurt anything
+	 * to update its parameters, because we know that the scheduler
+	 * is not actively looking at this field, since it is not on the
+	 * runqueue. The context will be rescheduled on the proper node
+	 * if it is timesliced or preempted.
+	 */
+	cpumask_copy(&ctx->cpus_allowed, tsk_cpus_allowed(current));
+
+	/* Save the current cpu id for spu interrupt routing. */
+	ctx->last_ran = raw_smp_processor_id();
+}
+
+void spu_update_sched_info(struct spu_context *ctx)
+{
+	int node;
+
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		node = ctx->spu->node;
+
+		/*
+		 * Take list_mutex to sync with find_victim().
+		 */
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		__spu_update_sched_info(ctx);
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	} else {
+		__spu_update_sched_info(ctx);
+	}
+}
+
+static int __node_allowed(struct spu_context *ctx, int node)
+{
+	if (nr_cpus_node(node)) {
+		const struct cpumask *mask = cpumask_of_node(node);
+
+		if (cpumask_intersects(mask, &ctx->cpus_allowed))
+			return 1;
+	}
+
+	return 0;
+}
+
+static int node_allowed(struct spu_context *ctx, int node)
+{
+	int rval;
+
+	spin_lock(&spu_prio->runq_lock);
+	rval = __node_allowed(ctx, node);
+	spin_unlock(&spu_prio->runq_lock);
+
+	return rval;
+}
+
+void do_notify_spus_active(void)
+{
+	int node;
+
+	/*
+	 * Wake up the active spu_contexts.
+	 *
+	 * When the awakened processes see their "notify_active" flag is set,
+	 * they will call spu_switch_notify().
+	 */
+	for_each_online_node(node) {
+		struct spu *spu;
+
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if (spu->alloc_state != SPU_FREE) {
+				struct spu_context *ctx = spu->ctx;
+				set_bit(SPU_SCHED_NOTIFY_ACTIVE,
+					&ctx->sched_flags);
+				mb();
+				wake_up_all(&ctx->stop_wq);
+			}
+		}
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	}
+}
+
+/**
+ * spu_bind_context - bind spu context to physical spu
+ * @spu:	physical spu to bind to
+ * @ctx:	context to bind
+ */
+static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
+{
+	spu_context_trace(spu_bind_context__enter, ctx, spu);
+
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+	if (ctx->flags & SPU_CREATE_NOSCHED)
+		atomic_inc(&cbe_spu_info[spu->node].reserved_spus);
+
+	ctx->stats.slb_flt_base = spu->stats.slb_flt;
+	ctx->stats.class2_intr_base = spu->stats.class2_intr;
+
+	spu_associate_mm(spu, ctx->owner);
+
+	spin_lock_irq(&spu->register_lock);
+	spu->ctx = ctx;
+	spu->flags = 0;
+	ctx->spu = spu;
+	ctx->ops = &spu_hw_ops;
+	spu->pid = current->pid;
+	spu->tgid = current->tgid;
+	spu->ibox_callback = spufs_ibox_callback;
+	spu->wbox_callback = spufs_wbox_callback;
+	spu->stop_callback = spufs_stop_callback;
+	spu->mfc_callback = spufs_mfc_callback;
+	spin_unlock_irq(&spu->register_lock);
+
+	spu_unmap_mappings(ctx);
+
+	spu_switch_log_notify(spu, ctx, SWITCH_LOG_START, 0);
+	spu_restore(&ctx->csa, spu);
+	spu->timestamp = jiffies;
+	spu_switch_notify(spu, ctx);
+	ctx->state = SPU_STATE_RUNNABLE;
+
+	spuctx_switch_state(ctx, SPU_UTIL_USER);
+}
+
+/*
+ * Must be used with the list_mutex held.
+ */
+static inline int sched_spu(struct spu *spu)
+{
+	BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex));
+
+	return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED));
+}
+
+static void aff_merge_remaining_ctxs(struct spu_gang *gang)
+{
+	struct spu_context *ctx;
+
+	list_for_each_entry(ctx, &gang->aff_list_head, aff_list) {
+		if (list_empty(&ctx->aff_list))
+			list_add(&ctx->aff_list, &gang->aff_list_head);
+	}
+	gang->aff_flags |= AFF_MERGED;
+}
+
+static void aff_set_offsets(struct spu_gang *gang)
+{
+	struct spu_context *ctx;
+	int offset;
+
+	offset = -1;
+	list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
+								aff_list) {
+		if (&ctx->aff_list == &gang->aff_list_head)
+			break;
+		ctx->aff_offset = offset--;
+	}
+
+	offset = 0;
+	list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) {
+		if (&ctx->aff_list == &gang->aff_list_head)
+			break;
+		ctx->aff_offset = offset++;
+	}
+
+	gang->aff_flags |= AFF_OFFSETS_SET;
+}
+
+static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff,
+		 int group_size, int lowest_offset)
+{
+	struct spu *spu;
+	int node, n;
+
+	/*
+	 * TODO: A better algorithm could be used to find a good spu to be
+	 *       used as reference location for the ctxs chain.
+	 */
+	node = cpu_to_node(raw_smp_processor_id());
+	for (n = 0; n < MAX_NUMNODES; n++, node++) {
+		/*
+		 * "available_spus" counts how many spus are not potentially
+		 * going to be used by other affinity gangs whose reference
+		 * context is already in place. Although this code seeks to
+		 * avoid having affinity gangs with a summed amount of
+		 * contexts bigger than the amount of spus in the node,
+		 * this may happen sporadically. In this case, available_spus
+		 * becomes negative, which is harmless.
+		 */
+		int available_spus;
+
+		node = (node < MAX_NUMNODES) ? node : 0;
+		if (!node_allowed(ctx, node))
+			continue;
+
+		available_spus = 0;
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if (spu->ctx && spu->ctx->gang && !spu->ctx->aff_offset
+					&& spu->ctx->gang->aff_ref_spu)
+				available_spus -= spu->ctx->gang->contexts;
+			available_spus++;
+		}
+		if (available_spus < ctx->gang->contexts) {
+			mutex_unlock(&cbe_spu_info[node].list_mutex);
+			continue;
+		}
+
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if ((!mem_aff || spu->has_mem_affinity) &&
+							sched_spu(spu)) {
+				mutex_unlock(&cbe_spu_info[node].list_mutex);
+				return spu;
+			}
+		}
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	}
+	return NULL;
+}
+
+static void aff_set_ref_point_location(struct spu_gang *gang)
+{
+	int mem_aff, gs, lowest_offset;
+	struct spu_context *ctx;
+	struct spu *tmp;
+
+	mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM;
+	lowest_offset = 0;
+	gs = 0;
+
+	list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
+		gs++;
+
+	list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
+								aff_list) {
+		if (&ctx->aff_list == &gang->aff_list_head)
+			break;
+		lowest_offset = ctx->aff_offset;
+	}
+
+	gang->aff_ref_spu = aff_ref_location(gang->aff_ref_ctx, mem_aff, gs,
+							lowest_offset);
+}
+
+static struct spu *ctx_location(struct spu *ref, int offset, int node)
+{
+	struct spu *spu;
+
+	spu = NULL;
+	if (offset >= 0) {
+		list_for_each_entry(spu, ref->aff_list.prev, aff_list) {
+			BUG_ON(spu->node != node);
+			if (offset == 0)
+				break;
+			if (sched_spu(spu))
+				offset--;
+		}
+	} else {
+		list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) {
+			BUG_ON(spu->node != node);
+			if (offset == 0)
+				break;
+			if (sched_spu(spu))
+				offset++;
+		}
+	}
+
+	return spu;
+}
+
+/*
+ * affinity_check is called each time a context is going to be scheduled.
+ * It returns the spu ptr on which the context must run.
+ */
+static int has_affinity(struct spu_context *ctx)
+{
+	struct spu_gang *gang = ctx->gang;
+
+	if (list_empty(&ctx->aff_list))
+		return 0;
+
+	if (atomic_read(&ctx->gang->aff_sched_count) == 0)
+		ctx->gang->aff_ref_spu = NULL;
+
+	if (!gang->aff_ref_spu) {
+		if (!(gang->aff_flags & AFF_MERGED))
+			aff_merge_remaining_ctxs(gang);
+		if (!(gang->aff_flags & AFF_OFFSETS_SET))
+			aff_set_offsets(gang);
+		aff_set_ref_point_location(gang);
+	}
+
+	return gang->aff_ref_spu != NULL;
+}
+
+/**
+ * spu_unbind_context - unbind spu context from physical spu
+ * @spu:	physical spu to unbind from
+ * @ctx:	context to unbind
+ */
+static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
+{
+	u32 status;
+
+	spu_context_trace(spu_unbind_context__enter, ctx, spu);
+
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+ 	if (spu->ctx->flags & SPU_CREATE_NOSCHED)
+		atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
+
+	if (ctx->gang)
+		/*
+		 * If ctx->gang->aff_sched_count is positive, SPU affinity is
+		 * being considered in this gang. Using atomic_dec_if_positive
+		 * allow us to skip an explicit check for affinity in this gang
+		 */
+		atomic_dec_if_positive(&ctx->gang->aff_sched_count);
+
+	spu_switch_notify(spu, NULL);
+	spu_unmap_mappings(ctx);
+	spu_save(&ctx->csa, spu);
+	spu_switch_log_notify(spu, ctx, SWITCH_LOG_STOP, 0);
+
+	spin_lock_irq(&spu->register_lock);
+	spu->timestamp = jiffies;
+	ctx->state = SPU_STATE_SAVED;
+	spu->ibox_callback = NULL;
+	spu->wbox_callback = NULL;
+	spu->stop_callback = NULL;
+	spu->mfc_callback = NULL;
+	spu->pid = 0;
+	spu->tgid = 0;
+	ctx->ops = &spu_backing_ops;
+	spu->flags = 0;
+	spu->ctx = NULL;
+	spin_unlock_irq(&spu->register_lock);
+
+	spu_associate_mm(spu, NULL);
+
+	ctx->stats.slb_flt +=
+		(spu->stats.slb_flt - ctx->stats.slb_flt_base);
+	ctx->stats.class2_intr +=
+		(spu->stats.class2_intr - ctx->stats.class2_intr_base);
+
+	/* This maps the underlying spu state to idle */
+	spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
+	ctx->spu = NULL;
+
+	if (spu_stopped(ctx, &status))
+		wake_up_all(&ctx->stop_wq);
+}
+
+/**
+ * spu_add_to_rq - add a context to the runqueue
+ * @ctx:       context to add
+ */
+static void __spu_add_to_rq(struct spu_context *ctx)
+{
+	/*
+	 * Unfortunately this code path can be called from multiple threads
+	 * on behalf of a single context due to the way the problem state
+	 * mmap support works.
+	 *
+	 * Fortunately we need to wake up all these threads at the same time
+	 * and can simply skip the runqueue addition for every but the first
+	 * thread getting into this codepath.
+	 *
+	 * It's still quite hacky, and long-term we should proxy all other
+	 * threads through the owner thread so that spu_run is in control
+	 * of all the scheduling activity for a given context.
+	 */
+	if (list_empty(&ctx->rq)) {
+		list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
+		set_bit(ctx->prio, spu_prio->bitmap);
+		if (!spu_prio->nr_waiting++)
+			mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+	}
+}
+
+static void spu_add_to_rq(struct spu_context *ctx)
+{
+	spin_lock(&spu_prio->runq_lock);
+	__spu_add_to_rq(ctx);
+	spin_unlock(&spu_prio->runq_lock);
+}
+
+static void __spu_del_from_rq(struct spu_context *ctx)
+{
+	int prio = ctx->prio;
+
+	if (!list_empty(&ctx->rq)) {
+		if (!--spu_prio->nr_waiting)
+			del_timer(&spusched_timer);
+		list_del_init(&ctx->rq);
+
+		if (list_empty(&spu_prio->runq[prio]))
+			clear_bit(prio, spu_prio->bitmap);
+	}
+}
+
+void spu_del_from_rq(struct spu_context *ctx)
+{
+	spin_lock(&spu_prio->runq_lock);
+	__spu_del_from_rq(ctx);
+	spin_unlock(&spu_prio->runq_lock);
+}
+
+static void spu_prio_wait(struct spu_context *ctx)
+{
+	DEFINE_WAIT(wait);
+
+	/*
+	 * The caller must explicitly wait for a context to be loaded
+	 * if the nosched flag is set.  If NOSCHED is not set, the caller
+	 * queues the context and waits for an spu event or error.
+	 */
+	BUG_ON(!(ctx->flags & SPU_CREATE_NOSCHED));
+
+	spin_lock(&spu_prio->runq_lock);
+	prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE);
+	if (!signal_pending(current)) {
+		__spu_add_to_rq(ctx);
+		spin_unlock(&spu_prio->runq_lock);
+		mutex_unlock(&ctx->state_mutex);
+		schedule();
+		mutex_lock(&ctx->state_mutex);
+		spin_lock(&spu_prio->runq_lock);
+		__spu_del_from_rq(ctx);
+	}
+	spin_unlock(&spu_prio->runq_lock);
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(&ctx->stop_wq, &wait);
+}
+
+static struct spu *spu_get_idle(struct spu_context *ctx)
+{
+	struct spu *spu, *aff_ref_spu;
+	int node, n;
+
+	spu_context_nospu_trace(spu_get_idle__enter, ctx);
+
+	if (ctx->gang) {
+		mutex_lock(&ctx->gang->aff_mutex);
+		if (has_affinity(ctx)) {
+			aff_ref_spu = ctx->gang->aff_ref_spu;
+			atomic_inc(&ctx->gang->aff_sched_count);
+			mutex_unlock(&ctx->gang->aff_mutex);
+			node = aff_ref_spu->node;
+
+			mutex_lock(&cbe_spu_info[node].list_mutex);
+			spu = ctx_location(aff_ref_spu, ctx->aff_offset, node);
+			if (spu && spu->alloc_state == SPU_FREE)
+				goto found;
+			mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+			atomic_dec(&ctx->gang->aff_sched_count);
+			goto not_found;
+		}
+		mutex_unlock(&ctx->gang->aff_mutex);
+	}
+	node = cpu_to_node(raw_smp_processor_id());
+	for (n = 0; n < MAX_NUMNODES; n++, node++) {
+		node = (node < MAX_NUMNODES) ? node : 0;
+		if (!node_allowed(ctx, node))
+			continue;
+
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if (spu->alloc_state == SPU_FREE)
+				goto found;
+		}
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	}
+
+ not_found:
+	spu_context_nospu_trace(spu_get_idle__not_found, ctx);
+	return NULL;
+
+ found:
+	spu->alloc_state = SPU_USED;
+	mutex_unlock(&cbe_spu_info[node].list_mutex);
+	spu_context_trace(spu_get_idle__found, ctx, spu);
+	spu_init_channels(spu);
+	return spu;
+}
+
+/**
+ * find_victim - find a lower priority context to preempt
+ * @ctx:	canidate context for running
+ *
+ * Returns the freed physical spu to run the new context on.
+ */
+static struct spu *find_victim(struct spu_context *ctx)
+{
+	struct spu_context *victim = NULL;
+	struct spu *spu;
+	int node, n;
+
+	spu_context_nospu_trace(spu_find_victim__enter, ctx);
+
+	/*
+	 * Look for a possible preemption candidate on the local node first.
+	 * If there is no candidate look at the other nodes.  This isn't
+	 * exactly fair, but so far the whole spu scheduler tries to keep
+	 * a strong node affinity.  We might want to fine-tune this in
+	 * the future.
+	 */
+ restart:
+	node = cpu_to_node(raw_smp_processor_id());
+	for (n = 0; n < MAX_NUMNODES; n++, node++) {
+		node = (node < MAX_NUMNODES) ? node : 0;
+		if (!node_allowed(ctx, node))
+			continue;
+
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			struct spu_context *tmp = spu->ctx;
+
+			if (tmp && tmp->prio > ctx->prio &&
+			    !(tmp->flags & SPU_CREATE_NOSCHED) &&
+			    (!victim || tmp->prio > victim->prio)) {
+				victim = spu->ctx;
+			}
+		}
+		if (victim)
+			get_spu_context(victim);
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+		if (victim) {
+			/*
+			 * This nests ctx->state_mutex, but we always lock
+			 * higher priority contexts before lower priority
+			 * ones, so this is safe until we introduce
+			 * priority inheritance schemes.
+			 *
+			 * XXX if the highest priority context is locked,
+			 * this can loop a long time.  Might be better to
+			 * look at another context or give up after X retries.
+			 */
+			if (!mutex_trylock(&victim->state_mutex)) {
+				put_spu_context(victim);
+				victim = NULL;
+				goto restart;
+			}
+
+			spu = victim->spu;
+			if (!spu || victim->prio <= ctx->prio) {
+				/*
+				 * This race can happen because we've dropped
+				 * the active list mutex.  Not a problem, just
+				 * restart the search.
+				 */
+				mutex_unlock(&victim->state_mutex);
+				put_spu_context(victim);
+				victim = NULL;
+				goto restart;
+			}
+
+			spu_context_trace(__spu_deactivate__unload, ctx, spu);
+
+			mutex_lock(&cbe_spu_info[node].list_mutex);
+			cbe_spu_info[node].nr_active--;
+			spu_unbind_context(spu, victim);
+			mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+			victim->stats.invol_ctx_switch++;
+			spu->stats.invol_ctx_switch++;
+			if (test_bit(SPU_SCHED_SPU_RUN, &victim->sched_flags))
+				spu_add_to_rq(victim);
+
+			mutex_unlock(&victim->state_mutex);
+			put_spu_context(victim);
+
+			return spu;
+		}
+	}
+
+	return NULL;
+}
+
+static void __spu_schedule(struct spu *spu, struct spu_context *ctx)
+{
+	int node = spu->node;
+	int success = 0;
+
+	spu_set_timeslice(ctx);
+
+	mutex_lock(&cbe_spu_info[node].list_mutex);
+	if (spu->ctx == NULL) {
+		spu_bind_context(spu, ctx);
+		cbe_spu_info[node].nr_active++;
+		spu->alloc_state = SPU_USED;
+		success = 1;
+	}
+	mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+	if (success)
+		wake_up_all(&ctx->run_wq);
+	else
+		spu_add_to_rq(ctx);
+}
+
+static void spu_schedule(struct spu *spu, struct spu_context *ctx)
+{
+	/* not a candidate for interruptible because it's called either
+	   from the scheduler thread or from spu_deactivate */
+	mutex_lock(&ctx->state_mutex);
+	if (ctx->state == SPU_STATE_SAVED)
+		__spu_schedule(spu, ctx);
+	spu_release(ctx);
+}
+
+/**
+ * spu_unschedule - remove a context from a spu, and possibly release it.
+ * @spu:	The SPU to unschedule from
+ * @ctx:	The context currently scheduled on the SPU
+ * @free_spu	Whether to free the SPU for other contexts
+ *
+ * Unbinds the context @ctx from the SPU @spu. If @free_spu is non-zero, the
+ * SPU is made available for other contexts (ie, may be returned by
+ * spu_get_idle). If this is zero, the caller is expected to schedule another
+ * context to this spu.
+ *
+ * Should be called with ctx->state_mutex held.
+ */
+static void spu_unschedule(struct spu *spu, struct spu_context *ctx,
+		int free_spu)
+{
+	int node = spu->node;
+
+	mutex_lock(&cbe_spu_info[node].list_mutex);
+	cbe_spu_info[node].nr_active--;
+	if (free_spu)
+		spu->alloc_state = SPU_FREE;
+	spu_unbind_context(spu, ctx);
+	ctx->stats.invol_ctx_switch++;
+	spu->stats.invol_ctx_switch++;
+	mutex_unlock(&cbe_spu_info[node].list_mutex);
+}
+
+/**
+ * spu_activate - find a free spu for a context and execute it
+ * @ctx:	spu context to schedule
+ * @flags:	flags (currently ignored)
+ *
+ * Tries to find a free spu to run @ctx.  If no free spu is available
+ * add the context to the runqueue so it gets woken up once an spu
+ * is available.
+ */
+int spu_activate(struct spu_context *ctx, unsigned long flags)
+{
+	struct spu *spu;
+
+	/*
+	 * If there are multiple threads waiting for a single context
+	 * only one actually binds the context while the others will
+	 * only be able to acquire the state_mutex once the context
+	 * already is in runnable state.
+	 */
+	if (ctx->spu)
+		return 0;
+
+spu_activate_top:
+	if (signal_pending(current))
+		return -ERESTARTSYS;
+
+	spu = spu_get_idle(ctx);
+	/*
+	 * If this is a realtime thread we try to get it running by
+	 * preempting a lower priority thread.
+	 */
+	if (!spu && rt_prio(ctx->prio))
+		spu = find_victim(ctx);
+	if (spu) {
+		unsigned long runcntl;
+
+		runcntl = ctx->ops->runcntl_read(ctx);
+		__spu_schedule(spu, ctx);
+		if (runcntl & SPU_RUNCNTL_RUNNABLE)
+			spuctx_switch_state(ctx, SPU_UTIL_USER);
+
+		return 0;
+	}
+
+	if (ctx->flags & SPU_CREATE_NOSCHED) {
+		spu_prio_wait(ctx);
+		goto spu_activate_top;
+	}
+
+	spu_add_to_rq(ctx);
+
+	return 0;
+}
+
+/**
+ * grab_runnable_context - try to find a runnable context
+ *
+ * Remove the highest priority context on the runqueue and return it
+ * to the caller.  Returns %NULL if no runnable context was found.
+ */
+static struct spu_context *grab_runnable_context(int prio, int node)
+{
+	struct spu_context *ctx;
+	int best;
+
+	spin_lock(&spu_prio->runq_lock);
+	best = find_first_bit(spu_prio->bitmap, prio);
+	while (best < prio) {
+		struct list_head *rq = &spu_prio->runq[best];
+
+		list_for_each_entry(ctx, rq, rq) {
+			/* XXX(hch): check for affinity here as well */
+			if (__node_allowed(ctx, node)) {
+				__spu_del_from_rq(ctx);
+				goto found;
+			}
+		}
+		best++;
+	}
+	ctx = NULL;
+ found:
+	spin_unlock(&spu_prio->runq_lock);
+	return ctx;
+}
+
+static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_context *new = NULL;
+
+	if (spu) {
+		new = grab_runnable_context(max_prio, spu->node);
+		if (new || force) {
+			spu_unschedule(spu, ctx, new == NULL);
+			if (new) {
+				if (new->flags & SPU_CREATE_NOSCHED)
+					wake_up(&new->stop_wq);
+				else {
+					spu_release(ctx);
+					spu_schedule(spu, new);
+					/* this one can't easily be made
+					   interruptible */
+					mutex_lock(&ctx->state_mutex);
+				}
+			}
+		}
+	}
+
+	return new != NULL;
+}
+
+/**
+ * spu_deactivate - unbind a context from it's physical spu
+ * @ctx:	spu context to unbind
+ *
+ * Unbind @ctx from the physical spu it is running on and schedule
+ * the highest priority context to run on the freed physical spu.
+ */
+void spu_deactivate(struct spu_context *ctx)
+{
+	spu_context_nospu_trace(spu_deactivate__enter, ctx);
+	__spu_deactivate(ctx, 1, MAX_PRIO);
+}
+
+/**
+ * spu_yield -	yield a physical spu if others are waiting
+ * @ctx:	spu context to yield
+ *
+ * Check if there is a higher priority context waiting and if yes
+ * unbind @ctx from the physical spu and schedule the highest
+ * priority context to run on the freed physical spu instead.
+ */
+void spu_yield(struct spu_context *ctx)
+{
+	spu_context_nospu_trace(spu_yield__enter, ctx);
+	if (!(ctx->flags & SPU_CREATE_NOSCHED)) {
+		mutex_lock(&ctx->state_mutex);
+		__spu_deactivate(ctx, 0, MAX_PRIO);
+		mutex_unlock(&ctx->state_mutex);
+	}
+}
+
+static noinline void spusched_tick(struct spu_context *ctx)
+{
+	struct spu_context *new = NULL;
+	struct spu *spu = NULL;
+
+	if (spu_acquire(ctx))
+		BUG();	/* a kernel thread never has signals pending */
+
+	if (ctx->state != SPU_STATE_RUNNABLE)
+		goto out;
+	if (ctx->flags & SPU_CREATE_NOSCHED)
+		goto out;
+	if (ctx->policy == SCHED_FIFO)
+		goto out;
+
+	if (--ctx->time_slice && test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+		goto out;
+
+	spu = ctx->spu;
+
+	spu_context_trace(spusched_tick__preempt, ctx, spu);
+
+	new = grab_runnable_context(ctx->prio + 1, spu->node);
+	if (new) {
+		spu_unschedule(spu, ctx, 0);
+		if (test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+			spu_add_to_rq(ctx);
+	} else {
+		spu_context_nospu_trace(spusched_tick__newslice, ctx);
+		if (!ctx->time_slice)
+			ctx->time_slice++;
+	}
+out:
+	spu_release(ctx);
+
+	if (new)
+		spu_schedule(spu, new);
+}
+
+/**
+ * count_active_contexts - count nr of active tasks
+ *
+ * Return the number of tasks currently running or waiting to run.
+ *
+ * Note that we don't take runq_lock / list_mutex here.  Reading
+ * a single 32bit value is atomic on powerpc, and we don't care
+ * about memory ordering issues here.
+ */
+static unsigned long count_active_contexts(void)
+{
+	int nr_active = 0, node;
+
+	for (node = 0; node < MAX_NUMNODES; node++)
+		nr_active += cbe_spu_info[node].nr_active;
+	nr_active += spu_prio->nr_waiting;
+
+	return nr_active;
+}
+
+/**
+ * spu_calc_load - update the avenrun load estimates.
+ *
+ * No locking against reading these values from userspace, as for
+ * the CPU loadavg code.
+ */
+static void spu_calc_load(void)
+{
+	unsigned long active_tasks; /* fixed-point */
+
+	active_tasks = count_active_contexts() * FIXED_1;
+	CALC_LOAD(spu_avenrun[0], EXP_1, active_tasks);
+	CALC_LOAD(spu_avenrun[1], EXP_5, active_tasks);
+	CALC_LOAD(spu_avenrun[2], EXP_15, active_tasks);
+}
+
+static void spusched_wake(unsigned long data)
+{
+	mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+	wake_up_process(spusched_task);
+}
+
+static void spuloadavg_wake(unsigned long data)
+{
+	mod_timer(&spuloadavg_timer, jiffies + LOAD_FREQ);
+	spu_calc_load();
+}
+
+static int spusched_thread(void *unused)
+{
+	struct spu *spu;
+	int node;
+
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+		for (node = 0; node < MAX_NUMNODES; node++) {
+			struct mutex *mtx = &cbe_spu_info[node].list_mutex;
+
+			mutex_lock(mtx);
+			list_for_each_entry(spu, &cbe_spu_info[node].spus,
+					cbe_list) {
+				struct spu_context *ctx = spu->ctx;
+
+				if (ctx) {
+					get_spu_context(ctx);
+					mutex_unlock(mtx);
+					spusched_tick(ctx);
+					mutex_lock(mtx);
+					put_spu_context(ctx);
+				}
+			}
+			mutex_unlock(mtx);
+		}
+	}
+
+	return 0;
+}
+
+void spuctx_switch_state(struct spu_context *ctx,
+		enum spu_utilization_state new_state)
+{
+	unsigned long long curtime;
+	signed long long delta;
+	struct spu *spu;
+	enum spu_utilization_state old_state;
+	int node;
+
+	curtime = ktime_get_ns();
+	delta = curtime - ctx->stats.tstamp;
+
+	WARN_ON(!mutex_is_locked(&ctx->state_mutex));
+	WARN_ON(delta < 0);
+
+	spu = ctx->spu;
+	old_state = ctx->stats.util_state;
+	ctx->stats.util_state = new_state;
+	ctx->stats.tstamp = curtime;
+
+	/*
+	 * Update the physical SPU utilization statistics.
+	 */
+	if (spu) {
+		ctx->stats.times[old_state] += delta;
+		spu->stats.times[old_state] += delta;
+		spu->stats.util_state = new_state;
+		spu->stats.tstamp = curtime;
+		node = spu->node;
+		if (old_state == SPU_UTIL_USER)
+			atomic_dec(&cbe_spu_info[node].busy_spus);
+		if (new_state == SPU_UTIL_USER)
+			atomic_inc(&cbe_spu_info[node].busy_spus);
+	}
+}
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
+static int show_spu_loadavg(struct seq_file *s, void *private)
+{
+	int a, b, c;
+
+	a = spu_avenrun[0] + (FIXED_1/200);
+	b = spu_avenrun[1] + (FIXED_1/200);
+	c = spu_avenrun[2] + (FIXED_1/200);
+
+	/*
+	 * Note that last_pid doesn't really make much sense for the
+	 * SPU loadavg (it even seems very odd on the CPU side...),
+	 * but we include it here to have a 100% compatible interface.
+	 */
+	seq_printf(s, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
+		LOAD_INT(a), LOAD_FRAC(a),
+		LOAD_INT(b), LOAD_FRAC(b),
+		LOAD_INT(c), LOAD_FRAC(c),
+		count_active_contexts(),
+		atomic_read(&nr_spu_contexts),
+		task_active_pid_ns(current)->last_pid);
+	return 0;
+}
+
+static int spu_loadavg_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_spu_loadavg, NULL);
+}
+
+static const struct file_operations spu_loadavg_fops = {
+	.open		= spu_loadavg_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+int __init spu_sched_init(void)
+{
+	struct proc_dir_entry *entry;
+	int err = -ENOMEM, i;
+
+	spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL);
+	if (!spu_prio)
+		goto out;
+
+	for (i = 0; i < MAX_PRIO; i++) {
+		INIT_LIST_HEAD(&spu_prio->runq[i]);
+		__clear_bit(i, spu_prio->bitmap);
+	}
+	spin_lock_init(&spu_prio->runq_lock);
+
+	setup_timer(&spusched_timer, spusched_wake, 0);
+	setup_timer(&spuloadavg_timer, spuloadavg_wake, 0);
+
+	spusched_task = kthread_run(spusched_thread, NULL, "spusched");
+	if (IS_ERR(spusched_task)) {
+		err = PTR_ERR(spusched_task);
+		goto out_free_spu_prio;
+	}
+
+	mod_timer(&spuloadavg_timer, 0);
+
+	entry = proc_create("spu_loadavg", 0, NULL, &spu_loadavg_fops);
+	if (!entry)
+		goto out_stop_kthread;
+
+	pr_debug("spusched: tick: %d, min ticks: %d, default ticks: %d\n",
+			SPUSCHED_TICK, MIN_SPU_TIMESLICE, DEF_SPU_TIMESLICE);
+	return 0;
+
+ out_stop_kthread:
+	kthread_stop(spusched_task);
+ out_free_spu_prio:
+	kfree(spu_prio);
+ out:
+	return err;
+}
+
+void spu_sched_exit(void)
+{
+	struct spu *spu;
+	int node;
+
+	remove_proc_entry("spu_loadavg", NULL);
+
+	del_timer_sync(&spusched_timer);
+	del_timer_sync(&spuloadavg_timer);
+	kthread_stop(spusched_task);
+
+	for (node = 0; node < MAX_NUMNODES; node++) {
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
+			if (spu->alloc_state != SPU_FREE)
+				spu->alloc_state = SPU_FREE;
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	}
+	kfree(spu_prio);
+}
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore.c b/arch/powerpc/platforms/cell/spufs/spu_restore.c
new file mode 100644
index 000000000..72c905f1e
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore.c
@@ -0,0 +1,336 @@
+/*
+ * spu_restore.c
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * SPU-side context restore sequence outlined in
+ * Synergistic Processor Element Book IV
+ *
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+
+#ifndef LS_SIZE
+#define LS_SIZE                 0x40000	/* 256K (in bytes) */
+#endif
+
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+#include <spu_intrinsics.h>
+#include <asm/spu_csa.h>
+#include "spu_utils.h"
+
+#define BR_INSTR		0x327fff80	/* br -4         */
+#define NOP_INSTR		0x40200000	/* nop           */
+#define HEQ_INSTR		0x7b000000	/* heq $0, $0    */
+#define STOP_INSTR		0x00000000	/* stop 0x0      */
+#define ILLEGAL_INSTR		0x00800000	/* illegal instr */
+#define RESTORE_COMPLETE	0x00003ffc	/* stop 0x3ffc   */
+
+static inline void fetch_regs_from_mem(addr64 lscsa_ea)
+{
+	unsigned int ls = (unsigned int)&regs_spill[0];
+	unsigned int size = sizeof(regs_spill);
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0x40;	/* GET */
+
+	spu_writech(MFC_LSA, ls);
+	spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+	spu_writech(MFC_EAL, lscsa_ea.ui[1]);
+	spu_writech(MFC_Size, size);
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void restore_upper_240kb(addr64 lscsa_ea)
+{
+	unsigned int ls = 16384;
+	unsigned int list = (unsigned int)&dma_list[0];
+	unsigned int size = sizeof(dma_list);
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0x44;	/* GETL */
+
+	/* Restore, Step 4:
+	 *    Enqueue the GETL command (tag 0) to the MFC SPU command
+	 *    queue to transfer the upper 240 kb of LS from CSA.
+	 */
+	spu_writech(MFC_LSA, ls);
+	spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+	spu_writech(MFC_EAL, list);
+	spu_writech(MFC_Size, size);
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void restore_decr(void)
+{
+	unsigned int offset;
+	unsigned int decr_running;
+	unsigned int decr;
+
+	/* Restore, Step 6(moved):
+	 *    If the LSCSA "decrementer running" flag is set
+	 *    then write the SPU_WrDec channel with the
+	 *    decrementer value from LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(decr_status);
+	decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING;
+	if (decr_running) {
+		offset = LSCSA_QW_OFFSET(decr);
+		decr = regs_spill[offset].slot[0];
+		spu_writech(SPU_WrDec, decr);
+	}
+}
+
+static inline void write_ppu_mb(void)
+{
+	unsigned int offset;
+	unsigned int data;
+
+	/* Restore, Step 11:
+	 *    Write the MFC_WrOut_MB channel with the PPU_MB
+	 *    data from LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(ppu_mb);
+	data = regs_spill[offset].slot[0];
+	spu_writech(SPU_WrOutMbox, data);
+}
+
+static inline void write_ppuint_mb(void)
+{
+	unsigned int offset;
+	unsigned int data;
+
+	/* Restore, Step 12:
+	 *    Write the MFC_WrInt_MB channel with the PPUINT_MB
+	 *    data from LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(ppuint_mb);
+	data = regs_spill[offset].slot[0];
+	spu_writech(SPU_WrOutIntrMbox, data);
+}
+
+static inline void restore_fpcr(void)
+{
+	unsigned int offset;
+	vector unsigned int fpcr;
+
+	/* Restore, Step 13:
+	 *    Restore the floating-point status and control
+	 *    register from the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(fpcr);
+	fpcr = regs_spill[offset].v;
+	spu_mtfpscr(fpcr);
+}
+
+static inline void restore_srr0(void)
+{
+	unsigned int offset;
+	unsigned int srr0;
+
+	/* Restore, Step 14:
+	 *    Restore the SPU SRR0 data from the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(srr0);
+	srr0 = regs_spill[offset].slot[0];
+	spu_writech(SPU_WrSRR0, srr0);
+}
+
+static inline void restore_event_mask(void)
+{
+	unsigned int offset;
+	unsigned int event_mask;
+
+	/* Restore, Step 15:
+	 *    Restore the SPU_RdEventMsk data from the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(event_mask);
+	event_mask = regs_spill[offset].slot[0];
+	spu_writech(SPU_WrEventMask, event_mask);
+}
+
+static inline void restore_tag_mask(void)
+{
+	unsigned int offset;
+	unsigned int tag_mask;
+
+	/* Restore, Step 16:
+	 *    Restore the SPU_RdTagMsk data from the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(tag_mask);
+	tag_mask = regs_spill[offset].slot[0];
+	spu_writech(MFC_WrTagMask, tag_mask);
+}
+
+static inline void restore_complete(void)
+{
+	extern void exit_fini(void);
+	unsigned int *exit_instrs = (unsigned int *)exit_fini;
+	unsigned int offset;
+	unsigned int stopped_status;
+	unsigned int stopped_code;
+
+	/* Restore, Step 18:
+	 *    Issue a stop-and-signal instruction with
+	 *    "good context restore" signal value.
+	 *
+	 * Restore, Step 19:
+	 *    There may be additional instructions placed
+	 *    here by the PPE Sequence for SPU Context
+	 *    Restore in order to restore the correct
+	 *    "stopped state".
+	 *
+	 *    This step is handled here by analyzing the
+	 *    LSCSA.stopped_status and then modifying the
+	 *    exit() function to behave appropriately.
+	 */
+
+	offset = LSCSA_QW_OFFSET(stopped_status);
+	stopped_status = regs_spill[offset].slot[0];
+	stopped_code = regs_spill[offset].slot[1];
+
+	switch (stopped_status) {
+	case SPU_STOPPED_STATUS_P_I:
+		/* SPU_Status[P,I]=1.  Add illegal instruction
+		 * followed by stop-and-signal instruction after
+		 * end of restore code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = ILLEGAL_INSTR;
+		exit_instrs[2] = STOP_INSTR | stopped_code;
+		break;
+	case SPU_STOPPED_STATUS_P_H:
+		/* SPU_Status[P,H]=1.  Add 'heq $0, $0' followed
+		 * by stop-and-signal instruction after end of
+		 * restore code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = HEQ_INSTR;
+		exit_instrs[2] = STOP_INSTR | stopped_code;
+		break;
+	case SPU_STOPPED_STATUS_S_P:
+		/* SPU_Status[S,P]=1.  Add nop instruction
+		 * followed by 'br -4' after end of restore
+		 * code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = STOP_INSTR | stopped_code;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	case SPU_STOPPED_STATUS_S_I:
+		/* SPU_Status[S,I]=1.  Add  illegal instruction
+		 * followed by 'br -4' after end of restore code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = ILLEGAL_INSTR;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	case SPU_STOPPED_STATUS_I:
+		/* SPU_Status[I]=1. Add illegal instruction followed
+		 * by infinite loop after end of restore sequence.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = ILLEGAL_INSTR;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	case SPU_STOPPED_STATUS_S:
+		/* SPU_Status[S]=1. Add two 'nop' instructions. */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = NOP_INSTR;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	case SPU_STOPPED_STATUS_H:
+		/* SPU_Status[H]=1. Add 'heq $0, $0' instruction
+		 * after end of restore code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = HEQ_INSTR;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	case SPU_STOPPED_STATUS_P:
+		/* SPU_Status[P]=1. Add stop-and-signal instruction
+		 * after end of restore code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = STOP_INSTR | stopped_code;
+		break;
+	case SPU_STOPPED_STATUS_R:
+		/* SPU_Status[I,S,H,P,R]=0. Add infinite loop. */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = NOP_INSTR;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	default:
+		/* SPU_Status[R]=1. No additional instructions. */
+		break;
+	}
+	spu_sync();
+}
+
+/**
+ * main - entry point for SPU-side context restore.
+ *
+ * This code deviates from the documented sequence in the
+ * following aspects:
+ *
+ *	1. The EA for LSCSA is passed from PPE in the
+ *	   signal notification channels.
+ *	2. The register spill area is pulled by SPU
+ *	   into LS, rather than pushed by PPE.
+ *	3. All 128 registers are restored by exit().
+ *	4. The exit() function is modified at run
+ *	   time in order to properly restore the
+ *	   SPU_Status register.
+ */
+int main()
+{
+	addr64 lscsa_ea;
+
+	lscsa_ea.ui[0] = spu_readch(SPU_RdSigNotify1);
+	lscsa_ea.ui[1] = spu_readch(SPU_RdSigNotify2);
+	fetch_regs_from_mem(lscsa_ea);
+
+	set_event_mask();		/* Step 1.  */
+	set_tag_mask();			/* Step 2.  */
+	build_dma_list(lscsa_ea);	/* Step 3.  */
+	restore_upper_240kb(lscsa_ea);	/* Step 4.  */
+					/* Step 5: done by 'exit'. */
+	enqueue_putllc(lscsa_ea);	/* Step 7. */
+	set_tag_update();		/* Step 8. */
+	read_tag_status();		/* Step 9. */
+	restore_decr();			/* moved Step 6. */
+	read_llar_status();		/* Step 10. */
+	write_ppu_mb();			/* Step 11. */
+	write_ppuint_mb();		/* Step 12. */
+	restore_fpcr();			/* Step 13. */
+	restore_srr0();			/* Step 14. */
+	restore_event_mask();		/* Step 15. */
+	restore_tag_mask();		/* Step 16. */
+					/* Step 17. done by 'exit'. */
+	restore_complete();		/* Step 18. */
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S b/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S
new file mode 100644
index 000000000..2905949de
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S
@@ -0,0 +1,116 @@
+/*
+ * crt0_r.S: Entry function for SPU-side context restore.
+ *
+ * Copyright (C) 2005 IBM
+ *
+ * Entry and exit function for SPU-side of the context restore
+ * sequence.  Sets up an initial stack frame, then branches to
+ * 'main'.  On return, restores all 128 registers from the LSCSA
+ * and exits.
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <asm/spu_csa.h>
+
+.data
+.align 7
+.globl regs_spill
+regs_spill:
+.space SIZEOF_SPU_SPILL_REGS, 0x0
+
+.text
+.global _start
+_start:
+	/* Initialize the stack pointer to point to 16368
+	 * (16kb-16). The back chain pointer is initialized
+	 * to NULL.
+	 */
+	il      $0, 0
+	il      $SP, 16368
+	stqd    $0, 0($SP)
+
+	/* Allocate a minimum stack frame for the called main.
+	 * This is needed so that main has a place to save the
+	 * link register when it calls another function.
+	 */
+	stqd    $SP, -160($SP)
+	ai      $SP, $SP, -160
+
+	/* Call the program's main function. */
+	brsl    $0, main
+
+.global exit
+.global	_exit
+exit:
+_exit:
+	/* SPU Context Restore, Step 5: Restore the remaining 112 GPRs. */
+	ila     $3, regs_spill + 256
+restore_regs:
+	lqr     $4, restore_reg_insts
+restore_reg_loop:
+	ai      $4, $4, 4
+	.balignl 16, 0x40200000
+restore_reg_insts:       /* must be quad-word aligned. */
+	lqd     $16, 0($3)
+	lqd     $17, 16($3)
+	lqd     $18, 32($3)
+	lqd     $19, 48($3)
+	andi    $5, $4, 0x7F
+	stqr    $4, restore_reg_insts
+	ai      $3, $3, 64
+	brnz    $5, restore_reg_loop
+
+	/* SPU Context Restore Step 17: Restore the first 16 GPRs. */
+	lqa $0, regs_spill + 0
+	lqa $1, regs_spill + 16
+	lqa $2, regs_spill + 32
+	lqa $3, regs_spill + 48
+	lqa $4, regs_spill + 64
+	lqa $5, regs_spill + 80
+	lqa $6, regs_spill + 96
+	lqa $7, regs_spill + 112
+	lqa $8, regs_spill + 128
+	lqa $9, regs_spill + 144
+	lqa $10, regs_spill + 160
+	lqa $11, regs_spill + 176
+	lqa $12, regs_spill + 192
+	lqa $13, regs_spill + 208
+	lqa $14, regs_spill + 224
+	lqa $15, regs_spill + 240
+
+	/* Under normal circumstances, the 'exit' function
+	 * terminates with 'stop SPU_RESTORE_COMPLETE',
+	 * indicating that the SPU-side restore code has
+	 * completed.
+	 *
+	 * However it is possible that instructions immediately
+	 * following the 'stop 0x3ffc' have been modified at run
+	 * time so as to recreate the exact SPU_Status settings
+	 * from the application, e.g. illegal instruciton, halt,
+	 * etc.
+	 */
+.global exit_fini
+.global	_exit_fini
+exit_fini:
+_exit_fini:
+	stop	SPU_RESTORE_COMPLETE
+	stop	0
+	stop	0
+	stop	0
+
+	/* Pad the size of this crt0.o to be multiple of 16 bytes. */
+.balignl 16, 0x0
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped
new file mode 100644
index 000000000..f383b027e
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped
@@ -0,0 +1,935 @@
+/*
+ * spu_restore_dump.h: Copyright (C) 2005 IBM.
+ * Hex-dump auto generated from spu_restore.c.
+ * Do not edit!
+ */
+static unsigned int spu_restore_code[]  __attribute__((__aligned__(128))) = {
+0x40800000,
+0x409ff801,
+0x24000080,
+0x24fd8081,
+0x1cd80081,
+0x33001180,
+0x42034003,
+0x33800284,
+0x1c010204,
+0x40200000,
+0x40200000,
+0x40200000,
+0x34000190,
+0x34004191,
+0x34008192,
+0x3400c193,
+0x141fc205,
+0x23fffd84,
+0x1c100183,
+0x217ffa85,
+0x3080b000,
+0x3080b201,
+0x3080b402,
+0x3080b603,
+0x3080b804,
+0x3080ba05,
+0x3080bc06,
+0x3080be07,
+0x3080c008,
+0x3080c209,
+0x3080c40a,
+0x3080c60b,
+0x3080c80c,
+0x3080ca0d,
+0x3080cc0e,
+0x3080ce0f,
+0x00003ffc,
+0x00000000,
+0x00000000,
+0x00000000,
+0x01a00182,
+0x3ec00083,
+0xb0a14103,
+0x01a00204,
+0x3ec10083,
+0x4202c002,
+0xb0a14203,
+0x21a00802,
+0x3fbf028a,
+0x3f20050a,
+0x3fbe0502,
+0x3fe30102,
+0x21a00882,
+0x3f82028b,
+0x3fe3058b,
+0x3fbf0584,
+0x3f200204,
+0x3fbe0204,
+0x3fe30204,
+0x04000203,
+0x21a00903,
+0x40848002,
+0x21a00982,
+0x40800003,
+0x21a00a03,
+0x40802002,
+0x21a00a82,
+0x21a00083,
+0x40800082,
+0x21a00b02,
+0x10002612,
+0x42a00003,
+0x42074006,
+0x1800c204,
+0x40a00008,
+0x40800789,
+0x1c010305,
+0x34000302,
+0x1cffc489,
+0x3ec00303,
+0x3ec00287,
+0xb0408403,
+0x24000302,
+0x34000282,
+0x1c020306,
+0xb0408207,
+0x18020204,
+0x24000282,
+0x217ffa09,
+0x04000402,
+0x21a00802,
+0x3fbe0504,
+0x3fe30204,
+0x21a00884,
+0x42074002,
+0x21a00902,
+0x40803c03,
+0x21a00983,
+0x04000485,
+0x21a00a05,
+0x40802202,
+0x21a00a82,
+0x21a00805,
+0x21a00884,
+0x3fbf0582,
+0x3f200102,
+0x3fbe0102,
+0x3fe30102,
+0x21a00902,
+0x40804003,
+0x21a00983,
+0x21a00a05,
+0x40805a02,
+0x21a00a82,
+0x40800083,
+0x21a00b83,
+0x01a00c02,
+0x30809c03,
+0x34000182,
+0x14004102,
+0x21002082,
+0x01a00d82,
+0x3080a003,
+0x34000182,
+0x21a00e02,
+0x3080a203,
+0x34000182,
+0x21a00f02,
+0x3080a403,
+0x34000182,
+0x77400100,
+0x3080a603,
+0x34000182,
+0x21a00702,
+0x3080a803,
+0x34000182,
+0x21a00082,
+0x3080aa03,
+0x34000182,
+0x21a00b02,
+0x4020007f,
+0x3080ae02,
+0x42004805,
+0x3080ac04,
+0x34000103,
+0x34000202,
+0x1cffc183,
+0x3b810106,
+0x0f608184,
+0x42013802,
+0x5c020183,
+0x38810102,
+0x3b810102,
+0x21000e83,
+0x4020007f,
+0x35000100,
+0x00000470,
+0x000002f8,
+0x00000430,
+0x00000360,
+0x000002f8,
+0x000003c8,
+0x000004a8,
+0x00000298,
+0x00000360,
+0x00200000,
+0x409ffe02,
+0x30801203,
+0x40800208,
+0x3ec40084,
+0x40800407,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x38820282,
+0x41004003,
+0xb0408189,
+0x28820282,
+0x3881c282,
+0xb0408304,
+0x2881c282,
+0x00400000,
+0x40800003,
+0x35000000,
+0x30809e03,
+0x34000182,
+0x21a00382,
+0x4020007f,
+0x327fde00,
+0x409ffe02,
+0x30801203,
+0x40800206,
+0x3ec40084,
+0x40800407,
+0x40800608,
+0x3ac1828a,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x38818282,
+0x41004003,
+0xb040818a,
+0x10005b0b,
+0x41201003,
+0x28818282,
+0x3881c282,
+0xb0408184,
+0x41193f83,
+0x60ffc003,
+0x2881c282,
+0x38820282,
+0xb0408189,
+0x28820282,
+0x327fef80,
+0x409ffe02,
+0x30801203,
+0x40800207,
+0x3ec40086,
+0x4120100b,
+0x10005b14,
+0x40800404,
+0x3ac1c289,
+0x40800608,
+0xb060c106,
+0x3ac10286,
+0x3ac2028a,
+0x20801203,
+0x3881c282,
+0x41193f83,
+0x60ffc003,
+0xb0408589,
+0x2881c282,
+0x38810282,
+0xb0408586,
+0x28810282,
+0x38820282,
+0xb040818a,
+0x28820282,
+0x4020007f,
+0x327fe280,
+0x409ffe02,
+0x30801203,
+0x40800207,
+0x3ec40084,
+0x40800408,
+0x10005b14,
+0x40800609,
+0x3ac1c28a,
+0x3ac2028b,
+0xb060c104,
+0x3ac24284,
+0x20801203,
+0x41201003,
+0x3881c282,
+0xb040830a,
+0x2881c282,
+0x38820282,
+0xb040818b,
+0x41193f83,
+0x60ffc003,
+0x28820282,
+0x38824282,
+0xb0408184,
+0x28824282,
+0x4020007f,
+0x327fd580,
+0x409ffe02,
+0x1000658e,
+0x40800206,
+0x30801203,
+0x40800407,
+0x3ec40084,
+0x40800608,
+0x3ac1828a,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x413d8003,
+0x38818282,
+0x4020007f,
+0x327fd800,
+0x409ffe03,
+0x30801202,
+0x40800207,
+0x3ec40084,
+0x10005b09,
+0x3ac1c288,
+0xb0408184,
+0x4020007f,
+0x4020007f,
+0x20801202,
+0x3881c282,
+0xb0408308,
+0x2881c282,
+0x327fc680,
+0x409ffe02,
+0x1000588b,
+0x40800208,
+0x30801203,
+0x40800407,
+0x3ec40084,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x413d8003,
+0x38820282,
+0x327fbd80,
+0x00200000,
+0x00000da0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000d90,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000db0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000dc0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000d80,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000df0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000de0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000dd0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000e04,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000e00,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/spu_save.c b/arch/powerpc/platforms/cell/spufs/spu_save.c
new file mode 100644
index 000000000..ae95cc170
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_save.c
@@ -0,0 +1,195 @@
+/*
+ * spu_save.c
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * SPU-side context save sequence outlined in
+ * Synergistic Processor Element Book IV
+ *
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+
+#ifndef LS_SIZE
+#define LS_SIZE                 0x40000	/* 256K (in bytes) */
+#endif
+
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+#include <spu_intrinsics.h>
+#include <asm/spu_csa.h>
+#include "spu_utils.h"
+
+static inline void save_event_mask(void)
+{
+	unsigned int offset;
+
+	/* Save, Step 2:
+	 *    Read the SPU_RdEventMsk channel and save to the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(event_mask);
+	regs_spill[offset].slot[0] = spu_readch(SPU_RdEventMask);
+}
+
+static inline void save_tag_mask(void)
+{
+	unsigned int offset;
+
+	/* Save, Step 3:
+	 *    Read the SPU_RdTagMsk channel and save to the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(tag_mask);
+	regs_spill[offset].slot[0] = spu_readch(MFC_RdTagMask);
+}
+
+static inline void save_upper_240kb(addr64 lscsa_ea)
+{
+	unsigned int ls = 16384;
+	unsigned int list = (unsigned int)&dma_list[0];
+	unsigned int size = sizeof(dma_list);
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0x24;	/* PUTL */
+
+	/* Save, Step 7:
+	 *    Enqueue the PUTL command (tag 0) to the MFC SPU command
+	 *    queue to transfer the remaining 240 kb of LS to CSA.
+	 */
+	spu_writech(MFC_LSA, ls);
+	spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+	spu_writech(MFC_EAL, list);
+	spu_writech(MFC_Size, size);
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void save_fpcr(void)
+{
+	// vector unsigned int fpcr;
+	unsigned int offset;
+
+	/* Save, Step 9:
+	 *    Issue the floating-point status and control register
+	 *    read instruction, and save to the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(fpcr);
+	regs_spill[offset].v = spu_mffpscr();
+}
+
+static inline void save_decr(void)
+{
+	unsigned int offset;
+
+	/* Save, Step 10:
+	 *    Read and save the SPU_RdDec channel data to
+	 *    the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(decr);
+	regs_spill[offset].slot[0] = spu_readch(SPU_RdDec);
+}
+
+static inline void save_srr0(void)
+{
+	unsigned int offset;
+
+	/* Save, Step 11:
+	 *    Read and save the SPU_WSRR0 channel data to
+	 *    the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(srr0);
+	regs_spill[offset].slot[0] = spu_readch(SPU_RdSRR0);
+}
+
+static inline void spill_regs_to_mem(addr64 lscsa_ea)
+{
+	unsigned int ls = (unsigned int)&regs_spill[0];
+	unsigned int size = sizeof(regs_spill);
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0x20;	/* PUT */
+
+	/* Save, Step 13:
+	 *    Enqueue a PUT command (tag 0) to send the LSCSA
+	 *    to the CSA.
+	 */
+	spu_writech(MFC_LSA, ls);
+	spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+	spu_writech(MFC_EAL, lscsa_ea.ui[1]);
+	spu_writech(MFC_Size, size);
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void enqueue_sync(addr64 lscsa_ea)
+{
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0xCC;
+
+	/* Save, Step 14:
+	 *    Enqueue an MFC_SYNC command (tag 0).
+	 */
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void save_complete(void)
+{
+	/* Save, Step 18:
+	 *    Issue a stop-and-signal instruction indicating
+	 *    "save complete".  Note: This function will not
+	 *    return!!
+	 */
+	spu_stop(SPU_SAVE_COMPLETE);
+}
+
+/**
+ * main - entry point for SPU-side context save.
+ *
+ * This code deviates from the documented sequence as follows:
+ *
+ *      1. The EA for LSCSA is passed from PPE in the
+ *         signal notification channels.
+ *      2. All 128 registers are saved by crt0.o.
+ */
+int main()
+{
+	addr64 lscsa_ea;
+
+	lscsa_ea.ui[0] = spu_readch(SPU_RdSigNotify1);
+	lscsa_ea.ui[1] = spu_readch(SPU_RdSigNotify2);
+
+	/* Step 1: done by exit(). */
+	save_event_mask();	/* Step 2.  */
+	save_tag_mask();	/* Step 3.  */
+	set_event_mask();	/* Step 4.  */
+	set_tag_mask();		/* Step 5.  */
+	build_dma_list(lscsa_ea);	/* Step 6.  */
+	save_upper_240kb(lscsa_ea);	/* Step 7.  */
+	/* Step 8: done by exit(). */
+	save_fpcr();		/* Step 9.  */
+	save_decr();		/* Step 10. */
+	save_srr0();		/* Step 11. */
+	enqueue_putllc(lscsa_ea);	/* Step 12. */
+	spill_regs_to_mem(lscsa_ea);	/* Step 13. */
+	enqueue_sync(lscsa_ea);	/* Step 14. */
+	set_tag_update();	/* Step 15. */
+	read_tag_status();	/* Step 16. */
+	read_llar_status();	/* Step 17. */
+	save_complete();	/* Step 18. */
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S b/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S
new file mode 100644
index 000000000..6659d6a66
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S
@@ -0,0 +1,102 @@
+/*
+ * crt0_s.S: Entry function for SPU-side context save.
+ *
+ * Copyright (C) 2005 IBM
+ *
+ * Entry function for SPU-side of the context save sequence.
+ * Saves all 128 GPRs, sets up an initial stack frame, then
+ * branches to 'main'.
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <asm/spu_csa.h>
+
+.data
+.align 7
+.globl regs_spill
+regs_spill:
+.space SIZEOF_SPU_SPILL_REGS, 0x0
+
+.text
+.global _start
+_start:
+	/* SPU Context Save Step 1: Save the first 16 GPRs. */
+	stqa $0, regs_spill + 0
+	stqa $1, regs_spill + 16
+	stqa $2, regs_spill + 32
+	stqa $3, regs_spill + 48
+	stqa $4, regs_spill + 64
+	stqa $5, regs_spill + 80
+	stqa $6, regs_spill + 96
+	stqa $7, regs_spill + 112
+	stqa $8, regs_spill + 128
+	stqa $9, regs_spill + 144
+	stqa $10, regs_spill + 160
+	stqa $11, regs_spill + 176
+	stqa $12, regs_spill + 192
+	stqa $13, regs_spill + 208
+	stqa $14, regs_spill + 224
+	stqa $15, regs_spill + 240
+
+	/* SPU Context Save, Step 8: Save the remaining 112 GPRs. */
+	ila     $3, regs_spill + 256
+save_regs:
+	lqr     $4, save_reg_insts
+save_reg_loop:
+	ai      $4, $4, 4
+	.balignl 16, 0x40200000
+save_reg_insts:       /* must be quad-word aligned. */
+	stqd    $16, 0($3)
+	stqd    $17, 16($3)
+	stqd    $18, 32($3)
+	stqd    $19, 48($3)
+	andi    $5, $4, 0x7F
+	stqr    $4, save_reg_insts
+	ai      $3, $3, 64
+	brnz    $5, save_reg_loop
+
+	/* Initialize the stack pointer to point to 16368
+	 * (16kb-16). The back chain pointer is initialized
+	 * to NULL.
+	 */
+	il	$0, 0
+	il	$SP, 16368
+	stqd	$0, 0($SP)
+
+	/* Allocate a minimum stack frame for the called main.
+	 * This is needed so that main has a place to save the
+	 * link register when it calls another function.
+	 */
+	stqd	$SP, -160($SP)
+	ai	$SP, $SP, -160
+
+	/* Call the program's main function. */
+	brsl	$0, main
+
+	/* In this case main should not return; if it does
+	 * there has been an error in the sequence.  Execute
+	 * stop-and-signal with code=0.
+	 */
+.global exit
+.global	_exit
+exit:
+_exit:
+	stop	0x0
+
+	/* Pad the size of this crt0.o to be multiple of 16 bytes. */
+.balignl 16, 0x0
+
diff --git a/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped
new file mode 100644
index 000000000..b9f81ac8a
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped
@@ -0,0 +1,743 @@
+/*
+ * spu_save_dump.h: Copyright (C) 2005 IBM.
+ * Hex-dump auto generated from spu_save.c.
+ * Do not edit!
+ */
+static unsigned int spu_save_code[]  __attribute__((__aligned__(128))) = {
+0x20805000,
+0x20805201,
+0x20805402,
+0x20805603,
+0x20805804,
+0x20805a05,
+0x20805c06,
+0x20805e07,
+0x20806008,
+0x20806209,
+0x2080640a,
+0x2080660b,
+0x2080680c,
+0x20806a0d,
+0x20806c0e,
+0x20806e0f,
+0x4201c003,
+0x33800184,
+0x1c010204,
+0x40200000,
+0x24000190,
+0x24004191,
+0x24008192,
+0x2400c193,
+0x141fc205,
+0x23fffd84,
+0x1c100183,
+0x217ffb85,
+0x40800000,
+0x409ff801,
+0x24000080,
+0x24fd8081,
+0x1cd80081,
+0x33000180,
+0x00000000,
+0x00000000,
+0x01a00182,
+0x3ec00083,
+0xb1c38103,
+0x01a00204,
+0x3ec10082,
+0x4201400d,
+0xb1c38202,
+0x01a00583,
+0x34218682,
+0x3ed80684,
+0xb0408184,
+0x24218682,
+0x01a00603,
+0x00200000,
+0x34214682,
+0x3ed40684,
+0xb0408184,
+0x40800003,
+0x24214682,
+0x21a00083,
+0x40800082,
+0x21a00b02,
+0x4020007f,
+0x1000251e,
+0x42a00002,
+0x32800008,
+0x4205c00c,
+0x00200000,
+0x40a0000b,
+0x3f82070f,
+0x4080020a,
+0x40800709,
+0x3fe3078f,
+0x3fbf0783,
+0x3f200183,
+0x3fbe0183,
+0x3fe30187,
+0x18008387,
+0x4205c002,
+0x3ac30404,
+0x1cffc489,
+0x00200000,
+0x18008403,
+0x38830402,
+0x4cffc486,
+0x3ac28185,
+0xb0408584,
+0x28830402,
+0x1c020408,
+0x38828182,
+0xb0408385,
+0x1802c387,
+0x28828182,
+0x217ff886,
+0x04000582,
+0x32800007,
+0x21a00802,
+0x3fbf0705,
+0x3f200285,
+0x3fbe0285,
+0x3fe30285,
+0x21a00885,
+0x04000603,
+0x21a00903,
+0x40803c02,
+0x21a00982,
+0x04000386,
+0x21a00a06,
+0x40801202,
+0x21a00a82,
+0x73000003,
+0x24200683,
+0x01a00404,
+0x00200000,
+0x34204682,
+0x3ec40683,
+0xb0408203,
+0x24204682,
+0x01a00783,
+0x00200000,
+0x3421c682,
+0x3edc0684,
+0xb0408184,
+0x2421c682,
+0x21a00806,
+0x21a00885,
+0x3fbf0784,
+0x3f200204,
+0x3fbe0204,
+0x3fe30204,
+0x21a00904,
+0x40804002,
+0x21a00982,
+0x21a00a06,
+0x40805a02,
+0x21a00a82,
+0x04000683,
+0x21a00803,
+0x21a00885,
+0x21a00904,
+0x40848002,
+0x21a00982,
+0x21a00a06,
+0x40801002,
+0x21a00a82,
+0x21a00a06,
+0x40806602,
+0x00200000,
+0x35800009,
+0x21a00a82,
+0x40800083,
+0x21a00b83,
+0x01a00c02,
+0x01a00d83,
+0x00003ffb,
+0x40800003,
+0x4020007f,
+0x35000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/spu_utils.h b/arch/powerpc/platforms/cell/spufs/spu_utils.h
new file mode 100644
index 000000000..58359feb6
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_utils.h
@@ -0,0 +1,160 @@
+/*
+ * utils.h: Utilities for SPU-side of the context switch operation.
+ *
+ * (C) Copyright IBM 2005
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _SPU_CONTEXT_UTILS_H_
+#define _SPU_CONTEXT_UTILS_H_
+
+/*
+ * 64-bit safe EA.
+ */
+typedef union {
+	unsigned long long ull;
+	unsigned int ui[2];
+} addr64;
+
+/*
+ * 128-bit register template.
+ */
+typedef union {
+	unsigned int slot[4];
+	vector unsigned int v;
+} spu_reg128v;
+
+/*
+ * DMA list structure.
+ */
+struct dma_list_elem {
+	unsigned int size;
+	unsigned int ea_low;
+};
+
+/*
+ * Declare storage for 8-byte aligned DMA list.
+ */
+struct dma_list_elem dma_list[15] __attribute__ ((aligned(8)));
+
+/*
+ * External definition for storage
+ * declared in crt0.
+ */
+extern spu_reg128v regs_spill[NR_SPU_SPILL_REGS];
+
+/*
+ * Compute LSCSA byte offset for a given field.
+ */
+static struct spu_lscsa *dummy = (struct spu_lscsa *)0;
+#define LSCSA_BYTE_OFFSET(_field)  \
+	((char *)(&(dummy->_field)) - (char *)(&(dummy->gprs[0].slot[0])))
+#define LSCSA_QW_OFFSET(_field)  (LSCSA_BYTE_OFFSET(_field) >> 4)
+
+static inline void set_event_mask(void)
+{
+	unsigned int event_mask = 0;
+
+	/* Save, Step 4:
+	 * Restore, Step 1:
+	 *    Set the SPU_RdEventMsk channel to zero to mask
+	 *    all events.
+	 */
+	spu_writech(SPU_WrEventMask, event_mask);
+}
+
+static inline void set_tag_mask(void)
+{
+	unsigned int tag_mask = 1;
+
+	/* Save, Step 5:
+	 * Restore, Step 2:
+	 *    Set the SPU_WrTagMsk channel to '01' to unmask
+	 *    only tag group 0.
+	 */
+	spu_writech(MFC_WrTagMask, tag_mask);
+}
+
+static inline void build_dma_list(addr64 lscsa_ea)
+{
+	unsigned int ea_low;
+	int i;
+
+	/* Save, Step 6:
+	 * Restore, Step 3:
+	 *    Update the effective address for the CSA in the
+	 *    pre-canned DMA-list in local storage.
+	 */
+	ea_low = lscsa_ea.ui[1];
+	ea_low += LSCSA_BYTE_OFFSET(ls[16384]);
+
+	for (i = 0; i < 15; i++, ea_low += 16384) {
+		dma_list[i].size = 16384;
+		dma_list[i].ea_low = ea_low;
+	}
+}
+
+static inline void enqueue_putllc(addr64 lscsa_ea)
+{
+	unsigned int ls = 0;
+	unsigned int size = 128;
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0xB4;	/* PUTLLC */
+
+	/* Save, Step 12:
+	 * Restore, Step 7:
+	 *    Send a PUTLLC (tag 0) command to the MFC using
+	 *    an effective address in the CSA in order to
+	 *    remove any possible lock-line reservation.
+	 */
+	spu_writech(MFC_LSA, ls);
+	spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+	spu_writech(MFC_EAL, lscsa_ea.ui[1]);
+	spu_writech(MFC_Size, size);
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void set_tag_update(void)
+{
+	unsigned int update_any = 1;
+
+	/* Save, Step 15:
+	 * Restore, Step 8:
+	 *    Write the MFC_TagUpdate channel with '01'.
+	 */
+	spu_writech(MFC_WrTagUpdate, update_any);
+}
+
+static inline void read_tag_status(void)
+{
+	/* Save, Step 16:
+	 * Restore, Step 9:
+	 *    Read the MFC_TagStat channel data.
+	 */
+	spu_readch(MFC_RdTagStat);
+}
+
+static inline void read_llar_status(void)
+{
+	/* Save, Step 17:
+	 * Restore, Step 10:
+	 *    Read the MFC_AtomicStat channel data.
+	 */
+	spu_readch(MFC_RdAtomicStat);
+}
+
+#endif				/* _SPU_CONTEXT_UTILS_H_ */
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
new file mode 100644
index 000000000..bcfd6f063
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -0,0 +1,376 @@
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef SPUFS_H
+#define SPUFS_H
+
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/fs.h>
+#include <linux/cpumask.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/spu_info.h>
+
+#define SPUFS_PS_MAP_SIZE	0x20000
+#define SPUFS_MFC_MAP_SIZE	0x1000
+#define SPUFS_CNTL_MAP_SIZE	0x1000
+#define SPUFS_SIGNAL_MAP_SIZE	PAGE_SIZE
+#define SPUFS_MSS_MAP_SIZE	0x1000
+
+/* The magic number for our file system */
+enum {
+	SPUFS_MAGIC = 0x23c9b64e,
+};
+
+struct spu_context_ops;
+struct spu_gang;
+
+/* ctx->sched_flags */
+enum {
+	SPU_SCHED_NOTIFY_ACTIVE,
+	SPU_SCHED_WAS_ACTIVE,	/* was active upon spu_acquire_saved()  */
+	SPU_SCHED_SPU_RUN,	/* context is within spu_run */
+};
+
+enum {
+	SWITCH_LOG_BUFSIZE = 4096,
+};
+
+enum {
+	SWITCH_LOG_START,
+	SWITCH_LOG_STOP,
+	SWITCH_LOG_EXIT,
+};
+
+struct switch_log {
+	wait_queue_head_t	wait;
+	unsigned long		head;
+	unsigned long		tail;
+	struct switch_log_entry {
+		struct timespec	tstamp;
+		s32		spu_id;
+		u32		type;
+		u32		val;
+		u64		timebase;
+	} log[];
+};
+
+struct spu_context {
+	struct spu *spu;		  /* pointer to a physical SPU */
+	struct spu_state csa;		  /* SPU context save area. */
+	spinlock_t mmio_lock;		  /* protects mmio access */
+	struct address_space *local_store; /* local store mapping.  */
+	struct address_space *mfc;	   /* 'mfc' area mappings. */
+	struct address_space *cntl;	   /* 'control' area mappings. */
+	struct address_space *signal1;	   /* 'signal1' area mappings. */
+	struct address_space *signal2;	   /* 'signal2' area mappings. */
+	struct address_space *mss;	   /* 'mss' area mappings. */
+	struct address_space *psmap;	   /* 'psmap' area mappings. */
+	struct mutex mapping_lock;
+	u64 object_id;		   /* user space pointer for oprofile */
+
+	enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state;
+	struct mutex state_mutex;
+	struct mutex run_mutex;
+
+	struct mm_struct *owner;
+
+	struct kref kref;
+	wait_queue_head_t ibox_wq;
+	wait_queue_head_t wbox_wq;
+	wait_queue_head_t stop_wq;
+	wait_queue_head_t mfc_wq;
+	wait_queue_head_t run_wq;
+	struct fasync_struct *ibox_fasync;
+	struct fasync_struct *wbox_fasync;
+	struct fasync_struct *mfc_fasync;
+	u32 tagwait;
+	struct spu_context_ops *ops;
+	struct work_struct reap_work;
+	unsigned long flags;
+	unsigned long event_return;
+
+	struct list_head gang_list;
+	struct spu_gang *gang;
+	struct kref *prof_priv_kref;
+	void ( * prof_priv_release) (struct kref *kref);
+
+	/* owner thread */
+	pid_t tid;
+
+	/* scheduler fields */
+	struct list_head rq;
+	unsigned int time_slice;
+	unsigned long sched_flags;
+	cpumask_t cpus_allowed;
+	int policy;
+	int prio;
+	int last_ran;
+
+	/* statistics */
+	struct {
+		/* updates protected by ctx->state_mutex */
+		enum spu_utilization_state util_state;
+		unsigned long long tstamp;	/* time of last state switch */
+		unsigned long long times[SPU_UTIL_MAX];
+		unsigned long long vol_ctx_switch;
+		unsigned long long invol_ctx_switch;
+		unsigned long long min_flt;
+		unsigned long long maj_flt;
+		unsigned long long hash_flt;
+		unsigned long long slb_flt;
+		unsigned long long slb_flt_base; /* # at last ctx switch */
+		unsigned long long class2_intr;
+		unsigned long long class2_intr_base; /* # at last ctx switch */
+		unsigned long long libassist;
+	} stats;
+
+	/* context switch log */
+	struct switch_log *switch_log;
+
+	struct list_head aff_list;
+	int aff_head;
+	int aff_offset;
+};
+
+struct spu_gang {
+	struct list_head list;
+	struct mutex mutex;
+	struct kref kref;
+	int contexts;
+
+	struct spu_context *aff_ref_ctx;
+	struct list_head aff_list_head;
+	struct mutex aff_mutex;
+	int aff_flags;
+	struct spu *aff_ref_spu;
+	atomic_t aff_sched_count;
+};
+
+/* Flag bits for spu_gang aff_flags */
+#define AFF_OFFSETS_SET		1
+#define AFF_MERGED		2
+
+struct mfc_dma_command {
+	int32_t pad;	/* reserved */
+	uint32_t lsa;	/* local storage address */
+	uint64_t ea;	/* effective address */
+	uint16_t size;	/* transfer size */
+	uint16_t tag;	/* command tag */
+	uint16_t class;	/* class ID */
+	uint16_t cmd;	/* command opcode */
+};
+
+
+/* SPU context query/set operations. */
+struct spu_context_ops {
+	int (*mbox_read) (struct spu_context * ctx, u32 * data);
+	 u32(*mbox_stat_read) (struct spu_context * ctx);
+	unsigned int (*mbox_stat_poll)(struct spu_context *ctx,
+					unsigned int events);
+	int (*ibox_read) (struct spu_context * ctx, u32 * data);
+	int (*wbox_write) (struct spu_context * ctx, u32 data);
+	 u32(*signal1_read) (struct spu_context * ctx);
+	void (*signal1_write) (struct spu_context * ctx, u32 data);
+	 u32(*signal2_read) (struct spu_context * ctx);
+	void (*signal2_write) (struct spu_context * ctx, u32 data);
+	void (*signal1_type_set) (struct spu_context * ctx, u64 val);
+	 u64(*signal1_type_get) (struct spu_context * ctx);
+	void (*signal2_type_set) (struct spu_context * ctx, u64 val);
+	 u64(*signal2_type_get) (struct spu_context * ctx);
+	 u32(*npc_read) (struct spu_context * ctx);
+	void (*npc_write) (struct spu_context * ctx, u32 data);
+	 u32(*status_read) (struct spu_context * ctx);
+	char*(*get_ls) (struct spu_context * ctx);
+	void (*privcntl_write) (struct spu_context *ctx, u64 data);
+	 u32 (*runcntl_read) (struct spu_context * ctx);
+	void (*runcntl_write) (struct spu_context * ctx, u32 data);
+	void (*runcntl_stop) (struct spu_context * ctx);
+	void (*master_start) (struct spu_context * ctx);
+	void (*master_stop) (struct spu_context * ctx);
+	int (*set_mfc_query)(struct spu_context * ctx, u32 mask, u32 mode);
+	u32 (*read_mfc_tagstatus)(struct spu_context * ctx);
+	u32 (*get_mfc_free_elements)(struct spu_context *ctx);
+	int (*send_mfc_command)(struct spu_context * ctx,
+				struct mfc_dma_command * cmd);
+	void (*dma_info_read) (struct spu_context * ctx,
+			       struct spu_dma_info * info);
+	void (*proxydma_info_read) (struct spu_context * ctx,
+				    struct spu_proxydma_info * info);
+	void (*restart_dma)(struct spu_context *ctx);
+};
+
+extern struct spu_context_ops spu_hw_ops;
+extern struct spu_context_ops spu_backing_ops;
+
+struct spufs_inode_info {
+	struct spu_context *i_ctx;
+	struct spu_gang *i_gang;
+	struct inode vfs_inode;
+	int i_openers;
+};
+#define SPUFS_I(inode) \
+	container_of(inode, struct spufs_inode_info, vfs_inode)
+
+struct spufs_tree_descr {
+	const char *name;
+	const struct file_operations *ops;
+	umode_t mode;
+	size_t size;
+};
+
+extern const struct spufs_tree_descr spufs_dir_contents[];
+extern const struct spufs_tree_descr spufs_dir_nosched_contents[];
+extern const struct spufs_tree_descr spufs_dir_debug_contents[];
+
+/* system call implementation */
+extern struct spufs_calls spufs_calls;
+struct coredump_params;
+long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status);
+long spufs_create(struct path *nd, struct dentry *dentry, unsigned int flags,
+			umode_t mode, struct file *filp);
+/* ELF coredump callbacks for writing SPU ELF notes */
+extern int spufs_coredump_extra_notes_size(void);
+extern int spufs_coredump_extra_notes_write(struct coredump_params *cprm);
+
+extern const struct file_operations spufs_context_fops;
+
+/* gang management */
+struct spu_gang *alloc_spu_gang(void);
+struct spu_gang *get_spu_gang(struct spu_gang *gang);
+int put_spu_gang(struct spu_gang *gang);
+void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx);
+void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
+
+/* fault handling */
+int spufs_handle_class1(struct spu_context *ctx);
+int spufs_handle_class0(struct spu_context *ctx);
+
+/* affinity */
+struct spu *affinity_check(struct spu_context *ctx);
+
+/* context management */
+extern atomic_t nr_spu_contexts;
+static inline int __must_check spu_acquire(struct spu_context *ctx)
+{
+	return mutex_lock_interruptible(&ctx->state_mutex);
+}
+
+static inline void spu_release(struct spu_context *ctx)
+{
+	mutex_unlock(&ctx->state_mutex);
+}
+
+struct spu_context * alloc_spu_context(struct spu_gang *gang);
+void destroy_spu_context(struct kref *kref);
+struct spu_context * get_spu_context(struct spu_context *ctx);
+int put_spu_context(struct spu_context *ctx);
+void spu_unmap_mappings(struct spu_context *ctx);
+
+void spu_forget(struct spu_context *ctx);
+int __must_check spu_acquire_saved(struct spu_context *ctx);
+void spu_release_saved(struct spu_context *ctx);
+
+int spu_stopped(struct spu_context *ctx, u32 * stat);
+void spu_del_from_rq(struct spu_context *ctx);
+int spu_activate(struct spu_context *ctx, unsigned long flags);
+void spu_deactivate(struct spu_context *ctx);
+void spu_yield(struct spu_context *ctx);
+void spu_switch_notify(struct spu *spu, struct spu_context *ctx);
+void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx,
+		u32 type, u32 val);
+void spu_set_timeslice(struct spu_context *ctx);
+void spu_update_sched_info(struct spu_context *ctx);
+void __spu_update_sched_info(struct spu_context *ctx);
+int __init spu_sched_init(void);
+void spu_sched_exit(void);
+
+extern char *isolated_loader;
+
+/*
+ * spufs_wait
+ *	Same as wait_event_interruptible(), except that here
+ *	we need to call spu_release(ctx) before sleeping, and
+ *	then spu_acquire(ctx) when awoken.
+ *
+ * 	Returns with state_mutex re-acquired when successful or
+ * 	with -ERESTARTSYS and the state_mutex dropped when interrupted.
+ */
+
+#define spufs_wait(wq, condition)					\
+({									\
+	int __ret = 0;							\
+	DEFINE_WAIT(__wait);						\
+	for (;;) {							\
+		prepare_to_wait(&(wq), &__wait, TASK_INTERRUPTIBLE);	\
+		if (condition)						\
+			break;						\
+		spu_release(ctx);					\
+		if (signal_pending(current)) {				\
+			__ret = -ERESTARTSYS;				\
+			break;						\
+		}							\
+		schedule();						\
+		__ret = spu_acquire(ctx);				\
+		if (__ret)						\
+			break;						\
+	}								\
+	finish_wait(&(wq), &__wait);					\
+	__ret;								\
+})
+
+size_t spu_wbox_write(struct spu_context *ctx, u32 data);
+size_t spu_ibox_read(struct spu_context *ctx, u32 *data);
+
+/* irq callback funcs. */
+void spufs_ibox_callback(struct spu *spu);
+void spufs_wbox_callback(struct spu *spu);
+void spufs_stop_callback(struct spu *spu, int irq);
+void spufs_mfc_callback(struct spu *spu);
+void spufs_dma_callback(struct spu *spu, int type);
+
+extern struct spu_coredump_calls spufs_coredump_calls;
+struct spufs_coredump_reader {
+	char *name;
+	ssize_t (*read)(struct spu_context *ctx,
+			char __user *buffer, size_t size, loff_t *pos);
+	u64 (*get)(struct spu_context *ctx);
+	size_t size;
+};
+extern const struct spufs_coredump_reader spufs_coredump_read[];
+extern int spufs_coredump_num_notes;
+
+extern int spu_init_csa(struct spu_state *csa);
+extern void spu_fini_csa(struct spu_state *csa);
+extern int spu_save(struct spu_state *prev, struct spu *spu);
+extern int spu_restore(struct spu_state *new, struct spu *spu);
+extern int spu_switch(struct spu_state *prev, struct spu_state *new,
+		      struct spu *spu);
+extern int spu_alloc_lscsa(struct spu_state *csa);
+extern void spu_free_lscsa(struct spu_state *csa);
+
+extern void spuctx_switch_state(struct spu_context *ctx,
+		enum spu_utilization_state new_state);
+
+#endif
diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.h b/arch/powerpc/platforms/cell/spufs/sputrace.h
new file mode 100644
index 000000000..db2656aa4
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.h
@@ -0,0 +1,39 @@
+#if !defined(_TRACE_SPUFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SPUFS_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM spufs
+
+TRACE_EVENT(spufs_context,
+	TP_PROTO(struct spu_context *ctx, struct spu *spu, const char *name),
+	TP_ARGS(ctx, spu, name),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(int, owner_tid)
+		__field(int, number)
+	),
+
+	TP_fast_assign(
+		__entry->name = name;
+		__entry->owner_tid = ctx->tid;
+		__entry->number = spu ? spu->number : -1;
+	),
+
+	TP_printk("%s (ctxthread = %d, spu = %d)",
+		__entry->name, __entry->owner_tid, __entry->number)
+);
+
+#define spu_context_trace(name, ctx, spu) \
+	trace_spufs_context(ctx, spu, __stringify(name))
+#define spu_context_nospu_trace(name, ctx) \
+	trace_spufs_context(ctx, NULL, __stringify(name))
+
+#endif /* _TRACE_SPUFS_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE sputrace
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c
new file mode 100644
index 000000000..dde35551e
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -0,0 +1,2222 @@
+/*
+ * spu_switch.c
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * Host-side part of SPU context switch sequence outlined in
+ * Synergistic Processor Element, Book IV.
+ *
+ * A fully premptive switch of an SPE is very expensive in terms
+ * of time and system resources.  SPE Book IV indicates that SPE
+ * allocation should follow a "serially reusable device" model,
+ * in which the SPE is assigned a task until it completes.  When
+ * this is not possible, this sequence may be used to premptively
+ * save, and then later (optionally) restore the context of a
+ * program executing on an SPE.
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+
+#include <asm/io.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu_context.h>
+
+#include "spufs.h"
+
+#include "spu_save_dump.h"
+#include "spu_restore_dump.h"
+
+#if 0
+#define POLL_WHILE_TRUE(_c) {				\
+    do {						\
+    } while (_c);					\
+  }
+#else
+#define RELAX_SPIN_COUNT				1000
+#define POLL_WHILE_TRUE(_c) {				\
+    do {						\
+	int _i;						\
+	for (_i=0; _i<RELAX_SPIN_COUNT && (_c); _i++) { \
+	    cpu_relax();				\
+	}						\
+	if (unlikely(_c)) yield();			\
+	else break;					\
+    } while (_c);					\
+  }
+#endif				/* debug */
+
+#define POLL_WHILE_FALSE(_c)	POLL_WHILE_TRUE(!(_c))
+
+static inline void acquire_spu_lock(struct spu *spu)
+{
+	/* Save, Step 1:
+	 * Restore, Step 1:
+	 *    Acquire SPU-specific mutual exclusion lock.
+	 *    TBD.
+	 */
+}
+
+static inline void release_spu_lock(struct spu *spu)
+{
+	/* Restore, Step 76:
+	 *    Release SPU-specific mutual exclusion lock.
+	 *    TBD.
+	 */
+}
+
+static inline int check_spu_isolate(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 isolate_state;
+
+	/* Save, Step 2:
+	 * Save, Step 6:
+	 *     If SPU_Status[E,L,IS] any field is '1', this
+	 *     SPU is in isolate state and cannot be context
+	 *     saved at this time.
+	 */
+	isolate_state = SPU_STATUS_ISOLATED_STATE |
+	    SPU_STATUS_ISOLATED_LOAD_STATUS | SPU_STATUS_ISOLATED_EXIT_STATUS;
+	return (in_be32(&prob->spu_status_R) & isolate_state) ? 1 : 0;
+}
+
+static inline void disable_interrupts(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 3:
+	 * Restore, Step 2:
+	 *     Save INT_Mask_class0 in CSA.
+	 *     Write INT_MASK_class0 with value of 0.
+	 *     Save INT_Mask_class1 in CSA.
+	 *     Write INT_MASK_class1 with value of 0.
+	 *     Save INT_Mask_class2 in CSA.
+	 *     Write INT_MASK_class2 with value of 0.
+	 *     Synchronize all three interrupts to be sure
+	 *     we no longer execute a handler on another CPU.
+	 */
+	spin_lock_irq(&spu->register_lock);
+	if (csa) {
+		csa->priv1.int_mask_class0_RW = spu_int_mask_get(spu, 0);
+		csa->priv1.int_mask_class1_RW = spu_int_mask_get(spu, 1);
+		csa->priv1.int_mask_class2_RW = spu_int_mask_get(spu, 2);
+	}
+	spu_int_mask_set(spu, 0, 0ul);
+	spu_int_mask_set(spu, 1, 0ul);
+	spu_int_mask_set(spu, 2, 0ul);
+	eieio();
+	spin_unlock_irq(&spu->register_lock);
+
+	/*
+	 * This flag needs to be set before calling synchronize_irq so
+	 * that the update will be visible to the relevant handlers
+	 * via a simple load.
+	 */
+	set_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags);
+	clear_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags);
+	synchronize_irq(spu->irqs[0]);
+	synchronize_irq(spu->irqs[1]);
+	synchronize_irq(spu->irqs[2]);
+}
+
+static inline void set_watchdog_timer(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 4:
+	 * Restore, Step 25.
+	 *    Set a software watchdog timer, which specifies the
+	 *    maximum allowable time for a context save sequence.
+	 *
+	 *    For present, this implementation will not set a global
+	 *    watchdog timer, as virtualization & variable system load
+	 *    may cause unpredictable execution times.
+	 */
+}
+
+static inline void inhibit_user_access(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 5:
+	 * Restore, Step 3:
+	 *     Inhibit user-space access (if provided) to this
+	 *     SPU by unmapping the virtual pages assigned to
+	 *     the SPU memory-mapped I/O (MMIO) for problem
+	 *     state. TBD.
+	 */
+}
+
+static inline void set_switch_pending(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 7:
+	 * Restore, Step 5:
+	 *     Set a software context switch pending flag.
+	 *     Done above in Step 3 - disable_interrupts().
+	 */
+}
+
+static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 8:
+	 *     Suspend DMA and save MFC_CNTL.
+	 */
+	switch (in_be64(&priv2->mfc_control_RW) &
+	       MFC_CNTL_SUSPEND_DMA_STATUS_MASK) {
+	case MFC_CNTL_SUSPEND_IN_PROGRESS:
+		POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+				  MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
+				 MFC_CNTL_SUSPEND_COMPLETE);
+		/* fall through */
+	case MFC_CNTL_SUSPEND_COMPLETE:
+		if (csa)
+			csa->priv2.mfc_control_RW =
+				in_be64(&priv2->mfc_control_RW) |
+				MFC_CNTL_SUSPEND_DMA_QUEUE;
+		break;
+	case MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION:
+		out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE);
+		POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+				  MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
+				 MFC_CNTL_SUSPEND_COMPLETE);
+		if (csa)
+			csa->priv2.mfc_control_RW =
+				in_be64(&priv2->mfc_control_RW) &
+				~MFC_CNTL_SUSPEND_DMA_QUEUE &
+				~MFC_CNTL_SUSPEND_MASK;
+		break;
+	}
+}
+
+static inline void save_spu_runcntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 9:
+	 *     Save SPU_Runcntl in the CSA.  This value contains
+	 *     the "Application Desired State".
+	 */
+	csa->prob.spu_runcntl_RW = in_be32(&prob->spu_runcntl_RW);
+}
+
+static inline void save_mfc_sr1(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 10:
+	 *     Save MFC_SR1 in the CSA.
+	 */
+	csa->priv1.mfc_sr1_RW = spu_mfc_sr1_get(spu);
+}
+
+static inline void save_spu_status(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 11:
+	 *     Read SPU_Status[R], and save to CSA.
+	 */
+	if ((in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) == 0) {
+		csa->prob.spu_status_R = in_be32(&prob->spu_status_R);
+	} else {
+		u32 stopped;
+
+		out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+		eieio();
+		POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+				SPU_STATUS_RUNNING);
+		stopped =
+		    SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP |
+		    SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
+		if ((in_be32(&prob->spu_status_R) & stopped) == 0)
+			csa->prob.spu_status_R = SPU_STATUS_RUNNING;
+		else
+			csa->prob.spu_status_R = in_be32(&prob->spu_status_R);
+	}
+}
+
+static inline void save_mfc_stopped_status(struct spu_state *csa,
+		struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	const u64 mask = MFC_CNTL_DECREMENTER_RUNNING |
+			MFC_CNTL_DMA_QUEUES_EMPTY;
+
+	/* Save, Step 12:
+	 *     Read MFC_CNTL[Ds].  Update saved copy of
+	 *     CSA.MFC_CNTL[Ds].
+	 *
+	 * update: do the same with MFC_CNTL[Q].
+	 */
+	csa->priv2.mfc_control_RW &= ~mask;
+	csa->priv2.mfc_control_RW |= in_be64(&priv2->mfc_control_RW) & mask;
+}
+
+static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 13:
+	 *     Write MFC_CNTL[Dh] set to a '1' to halt
+	 *     the decrementer.
+	 */
+	out_be64(&priv2->mfc_control_RW,
+		 MFC_CNTL_DECREMENTER_HALTED | MFC_CNTL_SUSPEND_MASK);
+	eieio();
+}
+
+static inline void save_timebase(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 14:
+	 *    Read PPE Timebase High and Timebase low registers
+	 *    and save in CSA.  TBD.
+	 */
+	csa->suspend_time = get_cycles();
+}
+
+static inline void remove_other_spu_access(struct spu_state *csa,
+					   struct spu *spu)
+{
+	/* Save, Step 15:
+	 *     Remove other SPU access to this SPU by unmapping
+	 *     this SPU's pages from their address space.  TBD.
+	 */
+}
+
+static inline void do_mfc_mssync(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 16:
+	 * Restore, Step 11.
+	 *     Write SPU_MSSync register. Poll SPU_MSSync[P]
+	 *     for a value of 0.
+	 */
+	out_be64(&prob->spc_mssync_RW, 1UL);
+	POLL_WHILE_TRUE(in_be64(&prob->spc_mssync_RW) & MS_SYNC_PENDING);
+}
+
+static inline void issue_mfc_tlbie(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 17:
+	 * Restore, Step 12.
+	 * Restore, Step 48.
+	 *     Write TLB_Invalidate_Entry[IS,VPN,L,Lp]=0 register.
+	 *     Then issue a PPE sync instruction.
+	 */
+	spu_tlb_invalidate(spu);
+	mb();
+}
+
+static inline void handle_pending_interrupts(struct spu_state *csa,
+					     struct spu *spu)
+{
+	/* Save, Step 18:
+	 *     Handle any pending interrupts from this SPU
+	 *     here.  This is OS or hypervisor specific.  One
+	 *     option is to re-enable interrupts to handle any
+	 *     pending interrupts, with the interrupt handlers
+	 *     recognizing the software Context Switch Pending
+	 *     flag, to ensure the SPU execution or MFC command
+	 *     queue is not restarted.  TBD.
+	 */
+}
+
+static inline void save_mfc_queues(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	int i;
+
+	/* Save, Step 19:
+	 *     If MFC_Cntl[Se]=0 then save
+	 *     MFC command queues.
+	 */
+	if ((in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DMA_QUEUES_EMPTY) == 0) {
+		for (i = 0; i < 8; i++) {
+			csa->priv2.puq[i].mfc_cq_data0_RW =
+			    in_be64(&priv2->puq[i].mfc_cq_data0_RW);
+			csa->priv2.puq[i].mfc_cq_data1_RW =
+			    in_be64(&priv2->puq[i].mfc_cq_data1_RW);
+			csa->priv2.puq[i].mfc_cq_data2_RW =
+			    in_be64(&priv2->puq[i].mfc_cq_data2_RW);
+			csa->priv2.puq[i].mfc_cq_data3_RW =
+			    in_be64(&priv2->puq[i].mfc_cq_data3_RW);
+		}
+		for (i = 0; i < 16; i++) {
+			csa->priv2.spuq[i].mfc_cq_data0_RW =
+			    in_be64(&priv2->spuq[i].mfc_cq_data0_RW);
+			csa->priv2.spuq[i].mfc_cq_data1_RW =
+			    in_be64(&priv2->spuq[i].mfc_cq_data1_RW);
+			csa->priv2.spuq[i].mfc_cq_data2_RW =
+			    in_be64(&priv2->spuq[i].mfc_cq_data2_RW);
+			csa->priv2.spuq[i].mfc_cq_data3_RW =
+			    in_be64(&priv2->spuq[i].mfc_cq_data3_RW);
+		}
+	}
+}
+
+static inline void save_ppu_querymask(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 20:
+	 *     Save the PPU_QueryMask register
+	 *     in the CSA.
+	 */
+	csa->prob.dma_querymask_RW = in_be32(&prob->dma_querymask_RW);
+}
+
+static inline void save_ppu_querytype(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 21:
+	 *     Save the PPU_QueryType register
+	 *     in the CSA.
+	 */
+	csa->prob.dma_querytype_RW = in_be32(&prob->dma_querytype_RW);
+}
+
+static inline void save_ppu_tagstatus(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save the Prxy_TagStatus register in the CSA.
+	 *
+	 * It is unnecessary to restore dma_tagstatus_R, however,
+	 * dma_tagstatus_R in the CSA is accessed via backing_ops, so
+	 * we must save it.
+	 */
+	csa->prob.dma_tagstatus_R = in_be32(&prob->dma_tagstatus_R);
+}
+
+static inline void save_mfc_csr_tsq(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 22:
+	 *     Save the MFC_CSR_TSQ register
+	 *     in the LSCSA.
+	 */
+	csa->priv2.spu_tag_status_query_RW =
+	    in_be64(&priv2->spu_tag_status_query_RW);
+}
+
+static inline void save_mfc_csr_cmd(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 23:
+	 *     Save the MFC_CSR_CMD1 and MFC_CSR_CMD2
+	 *     registers in the CSA.
+	 */
+	csa->priv2.spu_cmd_buf1_RW = in_be64(&priv2->spu_cmd_buf1_RW);
+	csa->priv2.spu_cmd_buf2_RW = in_be64(&priv2->spu_cmd_buf2_RW);
+}
+
+static inline void save_mfc_csr_ato(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 24:
+	 *     Save the MFC_CSR_ATO register in
+	 *     the CSA.
+	 */
+	csa->priv2.spu_atomic_status_RW = in_be64(&priv2->spu_atomic_status_RW);
+}
+
+static inline void save_mfc_tclass_id(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 25:
+	 *     Save the MFC_TCLASS_ID register in
+	 *     the CSA.
+	 */
+	csa->priv1.mfc_tclass_id_RW = spu_mfc_tclass_id_get(spu);
+}
+
+static inline void set_mfc_tclass_id(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 26:
+	 * Restore, Step 23.
+	 *     Write the MFC_TCLASS_ID register with
+	 *     the value 0x10000000.
+	 */
+	spu_mfc_tclass_id_set(spu, 0x10000000);
+	eieio();
+}
+
+static inline void purge_mfc_queue(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 27:
+	 * Restore, Step 14.
+	 *     Write MFC_CNTL[Pc]=1 (purge queue).
+	 */
+	out_be64(&priv2->mfc_control_RW,
+			MFC_CNTL_PURGE_DMA_REQUEST |
+			MFC_CNTL_SUSPEND_MASK);
+	eieio();
+}
+
+static inline void wait_purge_complete(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 28:
+	 *     Poll MFC_CNTL[Ps] until value '11' is read
+	 *     (purge complete).
+	 */
+	POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+			 MFC_CNTL_PURGE_DMA_STATUS_MASK) ==
+			 MFC_CNTL_PURGE_DMA_COMPLETE);
+}
+
+static inline void setup_mfc_sr1(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 30:
+	 * Restore, Step 18:
+	 *     Write MFC_SR1 with MFC_SR1[D=0,S=1] and
+	 *     MFC_SR1[TL,R,Pr,T] set correctly for the
+	 *     OS specific environment.
+	 *
+	 *     Implementation note: The SPU-side code
+	 *     for save/restore is privileged, so the
+	 *     MFC_SR1[Pr] bit is not set.
+	 *
+	 */
+	spu_mfc_sr1_set(spu, (MFC_STATE1_MASTER_RUN_CONTROL_MASK |
+			      MFC_STATE1_RELOCATE_MASK |
+			      MFC_STATE1_BUS_TLBIE_MASK));
+}
+
+static inline void save_spu_npc(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 31:
+	 *     Save SPU_NPC in the CSA.
+	 */
+	csa->prob.spu_npc_RW = in_be32(&prob->spu_npc_RW);
+}
+
+static inline void save_spu_privcntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 32:
+	 *     Save SPU_PrivCntl in the CSA.
+	 */
+	csa->priv2.spu_privcntl_RW = in_be64(&priv2->spu_privcntl_RW);
+}
+
+static inline void reset_spu_privcntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 33:
+	 * Restore, Step 16:
+	 *     Write SPU_PrivCntl[S,Le,A] fields reset to 0.
+	 */
+	out_be64(&priv2->spu_privcntl_RW, 0UL);
+	eieio();
+}
+
+static inline void save_spu_lslr(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 34:
+	 *     Save SPU_LSLR in the CSA.
+	 */
+	csa->priv2.spu_lslr_RW = in_be64(&priv2->spu_lslr_RW);
+}
+
+static inline void reset_spu_lslr(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 35:
+	 * Restore, Step 17.
+	 *     Reset SPU_LSLR.
+	 */
+	out_be64(&priv2->spu_lslr_RW, LS_ADDR_MASK);
+	eieio();
+}
+
+static inline void save_spu_cfg(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 36:
+	 *     Save SPU_Cfg in the CSA.
+	 */
+	csa->priv2.spu_cfg_RW = in_be64(&priv2->spu_cfg_RW);
+}
+
+static inline void save_pm_trace(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 37:
+	 *     Save PM_Trace_Tag_Wait_Mask in the CSA.
+	 *     Not performed by this implementation.
+	 */
+}
+
+static inline void save_mfc_rag(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 38:
+	 *     Save RA_GROUP_ID register and the
+	 *     RA_ENABLE reigster in the CSA.
+	 */
+	csa->priv1.resource_allocation_groupID_RW =
+		spu_resource_allocation_groupID_get(spu);
+	csa->priv1.resource_allocation_enable_RW =
+		spu_resource_allocation_enable_get(spu);
+}
+
+static inline void save_ppu_mb_stat(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 39:
+	 *     Save MB_Stat register in the CSA.
+	 */
+	csa->prob.mb_stat_R = in_be32(&prob->mb_stat_R);
+}
+
+static inline void save_ppu_mb(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 40:
+	 *     Save the PPU_MB register in the CSA.
+	 */
+	csa->prob.pu_mb_R = in_be32(&prob->pu_mb_R);
+}
+
+static inline void save_ppuint_mb(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 41:
+	 *     Save the PPUINT_MB register in the CSA.
+	 */
+	csa->priv2.puint_mb_R = in_be64(&priv2->puint_mb_R);
+}
+
+static inline void save_ch_part1(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+	int i;
+
+	/* Save, Step 42:
+	 */
+
+	/* Save CH 1, without channel count */
+	out_be64(&priv2->spu_chnlcntptr_RW, 1);
+	csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW);
+
+	/* Save the following CH: [0,3,4,24,25,27] */
+	for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		csa->spu_chnldata_RW[idx] = in_be64(&priv2->spu_chnldata_RW);
+		csa->spu_chnlcnt_RW[idx] = in_be64(&priv2->spu_chnlcnt_RW);
+		out_be64(&priv2->spu_chnldata_RW, 0UL);
+		out_be64(&priv2->spu_chnlcnt_RW, 0UL);
+		eieio();
+	}
+}
+
+static inline void save_spu_mb(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	int i;
+
+	/* Save, Step 43:
+	 *     Save SPU Read Mailbox Channel.
+	 */
+	out_be64(&priv2->spu_chnlcntptr_RW, 29UL);
+	eieio();
+	csa->spu_chnlcnt_RW[29] = in_be64(&priv2->spu_chnlcnt_RW);
+	for (i = 0; i < 4; i++) {
+		csa->spu_mailbox_data[i] = in_be64(&priv2->spu_chnldata_RW);
+	}
+	out_be64(&priv2->spu_chnlcnt_RW, 0UL);
+	eieio();
+}
+
+static inline void save_mfc_cmd(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 44:
+	 *     Save MFC_CMD Channel.
+	 */
+	out_be64(&priv2->spu_chnlcntptr_RW, 21UL);
+	eieio();
+	csa->spu_chnlcnt_RW[21] = in_be64(&priv2->spu_chnlcnt_RW);
+	eieio();
+}
+
+static inline void reset_ch(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 ch_indices[4] = { 21UL, 23UL, 28UL, 30UL };
+	u64 ch_counts[4] = { 16UL, 1UL, 1UL, 1UL };
+	u64 idx;
+	int i;
+
+	/* Save, Step 45:
+	 *     Reset the following CH: [21, 23, 28, 30]
+	 */
+	for (i = 0; i < 4; i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]);
+		eieio();
+	}
+}
+
+static inline void resume_mfc_queue(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 46:
+	 * Restore, Step 25.
+	 *     Write MFC_CNTL[Sc]=0 (resume queue processing).
+	 */
+	out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESUME_DMA_QUEUE);
+}
+
+static inline void setup_mfc_slbs(struct spu_state *csa, struct spu *spu,
+		unsigned int *code, int code_size)
+{
+	/* Save, Step 47:
+	 * Restore, Step 30.
+	 *     If MFC_SR1[R]=1, write 0 to SLB_Invalidate_All
+	 *     register, then initialize SLB_VSID and SLB_ESID
+	 *     to provide access to SPU context save code and
+	 *     LSCSA.
+	 *
+	 *     This implementation places both the context
+	 *     switch code and LSCSA in kernel address space.
+	 *
+	 *     Further this implementation assumes that the
+	 *     MFC_SR1[R]=1 (in other words, assume that
+	 *     translation is desired by OS environment).
+	 */
+	spu_invalidate_slbs(spu);
+	spu_setup_kernel_slbs(spu, csa->lscsa, code, code_size);
+}
+
+static inline void set_switch_active(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 48:
+	 * Restore, Step 23.
+	 *     Change the software context switch pending flag
+	 *     to context switch active.  This implementation does
+	 *     not uses a switch active flag.
+	 *
+	 * Now that we have saved the mfc in the csa, we can add in the
+	 * restart command if an exception occurred.
+	 */
+	if (test_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags))
+		csa->priv2.mfc_control_RW |= MFC_CNTL_RESTART_DMA_COMMAND;
+	clear_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags);
+	mb();
+}
+
+static inline void enable_interrupts(struct spu_state *csa, struct spu *spu)
+{
+	unsigned long class1_mask = CLASS1_ENABLE_SEGMENT_FAULT_INTR |
+	    CLASS1_ENABLE_STORAGE_FAULT_INTR;
+
+	/* Save, Step 49:
+	 * Restore, Step 22:
+	 *     Reset and then enable interrupts, as
+	 *     needed by OS.
+	 *
+	 *     This implementation enables only class1
+	 *     (translation) interrupts.
+	 */
+	spin_lock_irq(&spu->register_lock);
+	spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+	spu_int_stat_clear(spu, 1, CLASS1_INTR_MASK);
+	spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+	spu_int_mask_set(spu, 0, 0ul);
+	spu_int_mask_set(spu, 1, class1_mask);
+	spu_int_mask_set(spu, 2, 0ul);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static inline int send_mfc_dma(struct spu *spu, unsigned long ea,
+			       unsigned int ls_offset, unsigned int size,
+			       unsigned int tag, unsigned int rclass,
+			       unsigned int cmd)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	union mfc_tag_size_class_cmd command;
+	unsigned int transfer_size;
+	volatile unsigned int status = 0x0;
+
+	while (size > 0) {
+		transfer_size =
+		    (size > MFC_MAX_DMA_SIZE) ? MFC_MAX_DMA_SIZE : size;
+		command.u.mfc_size = transfer_size;
+		command.u.mfc_tag = tag;
+		command.u.mfc_rclassid = rclass;
+		command.u.mfc_cmd = cmd;
+		do {
+			out_be32(&prob->mfc_lsa_W, ls_offset);
+			out_be64(&prob->mfc_ea_W, ea);
+			out_be64(&prob->mfc_union_W.all64, command.all64);
+			status =
+			    in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32);
+			if (unlikely(status & 0x2)) {
+				cpu_relax();
+			}
+		} while (status & 0x3);
+		size -= transfer_size;
+		ea += transfer_size;
+		ls_offset += transfer_size;
+	}
+	return 0;
+}
+
+static inline void save_ls_16kb(struct spu_state *csa, struct spu *spu)
+{
+	unsigned long addr = (unsigned long)&csa->lscsa->ls[0];
+	unsigned int ls_offset = 0x0;
+	unsigned int size = 16384;
+	unsigned int tag = 0;
+	unsigned int rclass = 0;
+	unsigned int cmd = MFC_PUT_CMD;
+
+	/* Save, Step 50:
+	 *     Issue a DMA command to copy the first 16K bytes
+	 *     of local storage to the CSA.
+	 */
+	send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void set_spu_npc(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 51:
+	 * Restore, Step 31.
+	 *     Write SPU_NPC[IE]=0 and SPU_NPC[LSA] to entry
+	 *     point address of context save code in local
+	 *     storage.
+	 *
+	 *     This implementation uses SPU-side save/restore
+	 *     programs with entry points at LSA of 0.
+	 */
+	out_be32(&prob->spu_npc_RW, 0);
+	eieio();
+}
+
+static inline void set_signot1(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	union {
+		u64 ull;
+		u32 ui[2];
+	} addr64;
+
+	/* Save, Step 52:
+	 * Restore, Step 32:
+	 *    Write SPU_Sig_Notify_1 register with upper 32-bits
+	 *    of the CSA.LSCSA effective address.
+	 */
+	addr64.ull = (u64) csa->lscsa;
+	out_be32(&prob->signal_notify1, addr64.ui[0]);
+	eieio();
+}
+
+static inline void set_signot2(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	union {
+		u64 ull;
+		u32 ui[2];
+	} addr64;
+
+	/* Save, Step 53:
+	 * Restore, Step 33:
+	 *    Write SPU_Sig_Notify_2 register with lower 32-bits
+	 *    of the CSA.LSCSA effective address.
+	 */
+	addr64.ull = (u64) csa->lscsa;
+	out_be32(&prob->signal_notify2, addr64.ui[1]);
+	eieio();
+}
+
+static inline void send_save_code(struct spu_state *csa, struct spu *spu)
+{
+	unsigned long addr = (unsigned long)&spu_save_code[0];
+	unsigned int ls_offset = 0x0;
+	unsigned int size = sizeof(spu_save_code);
+	unsigned int tag = 0;
+	unsigned int rclass = 0;
+	unsigned int cmd = MFC_GETFS_CMD;
+
+	/* Save, Step 54:
+	 *     Issue a DMA command to copy context save code
+	 *     to local storage and start SPU.
+	 */
+	send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void set_ppu_querymask(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 55:
+	 * Restore, Step 38.
+	 *     Write PPU_QueryMask=1 (enable Tag Group 0)
+	 *     and issue eieio instruction.
+	 */
+	out_be32(&prob->dma_querymask_RW, MFC_TAGID_TO_TAGMASK(0));
+	eieio();
+}
+
+static inline void wait_tag_complete(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 mask = MFC_TAGID_TO_TAGMASK(0);
+	unsigned long flags;
+
+	/* Save, Step 56:
+	 * Restore, Step 39.
+	 * Restore, Step 39.
+	 * Restore, Step 46.
+	 *     Poll PPU_TagStatus[gn] until 01 (Tag group 0 complete)
+	 *     or write PPU_QueryType[TS]=01 and wait for Tag Group
+	 *     Complete Interrupt.  Write INT_Stat_Class0 or
+	 *     INT_Stat_Class2 with value of 'handled'.
+	 */
+	POLL_WHILE_FALSE(in_be32(&prob->dma_tagstatus_R) & mask);
+
+	local_irq_save(flags);
+	spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+	spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+	local_irq_restore(flags);
+}
+
+static inline void wait_spu_stopped(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	unsigned long flags;
+
+	/* Save, Step 57:
+	 * Restore, Step 40.
+	 *     Poll until SPU_Status[R]=0 or wait for SPU Class 0
+	 *     or SPU Class 2 interrupt.  Write INT_Stat_class0
+	 *     or INT_Stat_class2 with value of handled.
+	 */
+	POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING);
+
+	local_irq_save(flags);
+	spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+	spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+	local_irq_restore(flags);
+}
+
+static inline int check_save_status(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 complete;
+
+	/* Save, Step 54:
+	 *     If SPU_Status[P]=1 and SPU_Status[SC] = "success",
+	 *     context save succeeded, otherwise context save
+	 *     failed.
+	 */
+	complete = ((SPU_SAVE_COMPLETE << SPU_STOP_STATUS_SHIFT) |
+		    SPU_STATUS_STOPPED_BY_STOP);
+	return (in_be32(&prob->spu_status_R) != complete) ? 1 : 0;
+}
+
+static inline void terminate_spu_app(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 4:
+	 *    If required, notify the "using application" that
+	 *    the SPU task has been terminated.  TBD.
+	 */
+}
+
+static inline void suspend_mfc_and_halt_decr(struct spu_state *csa,
+		struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 7:
+	 *     Write MFC_Cntl[Dh,Sc,Sm]='1','1','0' to suspend
+	 *     the queue and halt the decrementer.
+	 */
+	out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE |
+		 MFC_CNTL_DECREMENTER_HALTED);
+	eieio();
+}
+
+static inline void wait_suspend_mfc_complete(struct spu_state *csa,
+					     struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 8:
+	 * Restore, Step 47.
+	 *     Poll MFC_CNTL[Ss] until 11 is returned.
+	 */
+	POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+			 MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
+			 MFC_CNTL_SUSPEND_COMPLETE);
+}
+
+static inline int suspend_spe(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 9:
+	 *    If SPU_Status[R]=1, stop SPU execution
+	 *    and wait for stop to complete.
+	 *
+	 *    Returns       1 if SPU_Status[R]=1 on entry.
+	 *                  0 otherwise
+	 */
+	if (in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) {
+		if (in_be32(&prob->spu_status_R) &
+		    SPU_STATUS_ISOLATED_EXIT_STATUS) {
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+		}
+		if ((in_be32(&prob->spu_status_R) &
+		     SPU_STATUS_ISOLATED_LOAD_STATUS)
+		    || (in_be32(&prob->spu_status_R) &
+			SPU_STATUS_ISOLATED_STATE)) {
+			out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+			eieio();
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+			out_be32(&prob->spu_runcntl_RW, 0x2);
+			eieio();
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+		}
+		if (in_be32(&prob->spu_status_R) &
+		    SPU_STATUS_WAITING_FOR_CHANNEL) {
+			out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+			eieio();
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+		}
+		return 1;
+	}
+	return 0;
+}
+
+static inline void clear_spu_status(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 10:
+	 *    If SPU_Status[R]=0 and SPU_Status[E,L,IS]=1,
+	 *    release SPU from isolate state.
+	 */
+	if (!(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING)) {
+		if (in_be32(&prob->spu_status_R) &
+		    SPU_STATUS_ISOLATED_EXIT_STATUS) {
+			spu_mfc_sr1_set(spu,
+					MFC_STATE1_MASTER_RUN_CONTROL_MASK);
+			eieio();
+			out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+			eieio();
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+		}
+		if ((in_be32(&prob->spu_status_R) &
+		     SPU_STATUS_ISOLATED_LOAD_STATUS)
+		    || (in_be32(&prob->spu_status_R) &
+			SPU_STATUS_ISOLATED_STATE)) {
+			spu_mfc_sr1_set(spu,
+					MFC_STATE1_MASTER_RUN_CONTROL_MASK);
+			eieio();
+			out_be32(&prob->spu_runcntl_RW, 0x2);
+			eieio();
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+		}
+	}
+}
+
+static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+	u64 idx;
+	int i;
+
+	/* Restore, Step 20:
+	 */
+
+	/* Reset CH 1 */
+	out_be64(&priv2->spu_chnlcntptr_RW, 1);
+	out_be64(&priv2->spu_chnldata_RW, 0UL);
+
+	/* Reset the following CH: [0,3,4,24,25,27] */
+	for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		out_be64(&priv2->spu_chnldata_RW, 0UL);
+		out_be64(&priv2->spu_chnlcnt_RW, 0UL);
+		eieio();
+	}
+}
+
+static inline void reset_ch_part2(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 ch_indices[5] = { 21UL, 23UL, 28UL, 29UL, 30UL };
+	u64 ch_counts[5] = { 16UL, 1UL, 1UL, 0UL, 1UL };
+	u64 idx;
+	int i;
+
+	/* Restore, Step 21:
+	 *     Reset the following CH: [21, 23, 28, 29, 30]
+	 */
+	for (i = 0; i < 5; i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]);
+		eieio();
+	}
+}
+
+static inline void setup_spu_status_part1(struct spu_state *csa,
+					  struct spu *spu)
+{
+	u32 status_P = SPU_STATUS_STOPPED_BY_STOP;
+	u32 status_I = SPU_STATUS_INVALID_INSTR;
+	u32 status_H = SPU_STATUS_STOPPED_BY_HALT;
+	u32 status_S = SPU_STATUS_SINGLE_STEP;
+	u32 status_S_I = SPU_STATUS_SINGLE_STEP | SPU_STATUS_INVALID_INSTR;
+	u32 status_S_P = SPU_STATUS_SINGLE_STEP | SPU_STATUS_STOPPED_BY_STOP;
+	u32 status_P_H = SPU_STATUS_STOPPED_BY_HALT |SPU_STATUS_STOPPED_BY_STOP;
+	u32 status_P_I = SPU_STATUS_STOPPED_BY_STOP |SPU_STATUS_INVALID_INSTR;
+	u32 status_code;
+
+	/* Restore, Step 27:
+	 *     If the CSA.SPU_Status[I,S,H,P]=1 then add the correct
+	 *     instruction sequence to the end of the SPU based restore
+	 *     code (after the "context restored" stop and signal) to
+	 *     restore the correct SPU status.
+	 *
+	 *     NOTE: Rather than modifying the SPU executable, we
+	 *     instead add a new 'stopped_status' field to the
+	 *     LSCSA.  The SPU-side restore reads this field and
+	 *     takes the appropriate action when exiting.
+	 */
+
+	status_code =
+	    (csa->prob.spu_status_R >> SPU_STOP_STATUS_SHIFT) & 0xFFFF;
+	if ((csa->prob.spu_status_R & status_P_I) == status_P_I) {
+
+		/* SPU_Status[P,I]=1 - Illegal Instruction followed
+		 * by Stop and Signal instruction, followed by 'br -4'.
+		 *
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P_I;
+		csa->lscsa->stopped_status.slot[1] = status_code;
+
+	} else if ((csa->prob.spu_status_R & status_P_H) == status_P_H) {
+
+		/* SPU_Status[P,H]=1 - Halt Conditional, followed
+		 * by Stop and Signal instruction, followed by
+		 * 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P_H;
+		csa->lscsa->stopped_status.slot[1] = status_code;
+
+	} else if ((csa->prob.spu_status_R & status_S_P) == status_S_P) {
+
+		/* SPU_Status[S,P]=1 - Stop and Signal instruction
+		 * followed by 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S_P;
+		csa->lscsa->stopped_status.slot[1] = status_code;
+
+	} else if ((csa->prob.spu_status_R & status_S_I) == status_S_I) {
+
+		/* SPU_Status[S,I]=1 - Illegal instruction followed
+		 * by 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S_I;
+		csa->lscsa->stopped_status.slot[1] = status_code;
+
+	} else if ((csa->prob.spu_status_R & status_P) == status_P) {
+
+		/* SPU_Status[P]=1 - Stop and Signal instruction
+		 * followed by 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P;
+		csa->lscsa->stopped_status.slot[1] = status_code;
+
+	} else if ((csa->prob.spu_status_R & status_H) == status_H) {
+
+		/* SPU_Status[H]=1 - Halt Conditional, followed
+		 * by 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_H;
+
+	} else if ((csa->prob.spu_status_R & status_S) == status_S) {
+
+		/* SPU_Status[S]=1 - Two nop instructions.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S;
+
+	} else if ((csa->prob.spu_status_R & status_I) == status_I) {
+
+		/* SPU_Status[I]=1 - Illegal instruction followed
+		 * by 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_I;
+
+	}
+}
+
+static inline void setup_spu_status_part2(struct spu_state *csa,
+					  struct spu *spu)
+{
+	u32 mask;
+
+	/* Restore, Step 28:
+	 *     If the CSA.SPU_Status[I,S,H,P,R]=0 then
+	 *     add a 'br *' instruction to the end of
+	 *     the SPU based restore code.
+	 *
+	 *     NOTE: Rather than modifying the SPU executable, we
+	 *     instead add a new 'stopped_status' field to the
+	 *     LSCSA.  The SPU-side restore reads this field and
+	 *     takes the appropriate action when exiting.
+	 */
+	mask = SPU_STATUS_INVALID_INSTR |
+	    SPU_STATUS_SINGLE_STEP |
+	    SPU_STATUS_STOPPED_BY_HALT |
+	    SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_RUNNING;
+	if (!(csa->prob.spu_status_R & mask)) {
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_R;
+	}
+}
+
+static inline void restore_mfc_rag(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 29:
+	 *     Restore RA_GROUP_ID register and the
+	 *     RA_ENABLE reigster from the CSA.
+	 */
+	spu_resource_allocation_groupID_set(spu,
+			csa->priv1.resource_allocation_groupID_RW);
+	spu_resource_allocation_enable_set(spu,
+			csa->priv1.resource_allocation_enable_RW);
+}
+
+static inline void send_restore_code(struct spu_state *csa, struct spu *spu)
+{
+	unsigned long addr = (unsigned long)&spu_restore_code[0];
+	unsigned int ls_offset = 0x0;
+	unsigned int size = sizeof(spu_restore_code);
+	unsigned int tag = 0;
+	unsigned int rclass = 0;
+	unsigned int cmd = MFC_GETFS_CMD;
+
+	/* Restore, Step 37:
+	 *     Issue MFC DMA command to copy context
+	 *     restore code to local storage.
+	 */
+	send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void setup_decr(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 34:
+	 *     If CSA.MFC_CNTL[Ds]=1 (decrementer was
+	 *     running) then adjust decrementer, set
+	 *     decrementer running status in LSCSA,
+	 *     and set decrementer "wrapped" status
+	 *     in LSCSA.
+	 */
+	if (csa->priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING) {
+		cycles_t resume_time = get_cycles();
+		cycles_t delta_time = resume_time - csa->suspend_time;
+
+		csa->lscsa->decr_status.slot[0] = SPU_DECR_STATUS_RUNNING;
+		if (csa->lscsa->decr.slot[0] < delta_time) {
+			csa->lscsa->decr_status.slot[0] |=
+				 SPU_DECR_STATUS_WRAPPED;
+		}
+
+		csa->lscsa->decr.slot[0] -= delta_time;
+	} else {
+		csa->lscsa->decr_status.slot[0] = 0;
+	}
+}
+
+static inline void setup_ppu_mb(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 35:
+	 *     Copy the CSA.PU_MB data into the LSCSA.
+	 */
+	csa->lscsa->ppu_mb.slot[0] = csa->prob.pu_mb_R;
+}
+
+static inline void setup_ppuint_mb(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 36:
+	 *     Copy the CSA.PUINT_MB data into the LSCSA.
+	 */
+	csa->lscsa->ppuint_mb.slot[0] = csa->priv2.puint_mb_R;
+}
+
+static inline int check_restore_status(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 complete;
+
+	/* Restore, Step 40:
+	 *     If SPU_Status[P]=1 and SPU_Status[SC] = "success",
+	 *     context restore succeeded, otherwise context restore
+	 *     failed.
+	 */
+	complete = ((SPU_RESTORE_COMPLETE << SPU_STOP_STATUS_SHIFT) |
+		    SPU_STATUS_STOPPED_BY_STOP);
+	return (in_be32(&prob->spu_status_R) != complete) ? 1 : 0;
+}
+
+static inline void restore_spu_privcntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 41:
+	 *     Restore SPU_PrivCntl from the CSA.
+	 */
+	out_be64(&priv2->spu_privcntl_RW, csa->priv2.spu_privcntl_RW);
+	eieio();
+}
+
+static inline void restore_status_part1(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 mask;
+
+	/* Restore, Step 42:
+	 *     If any CSA.SPU_Status[I,S,H,P]=1, then
+	 *     restore the error or single step state.
+	 */
+	mask = SPU_STATUS_INVALID_INSTR |
+	    SPU_STATUS_SINGLE_STEP |
+	    SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
+	if (csa->prob.spu_status_R & mask) {
+		out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+		eieio();
+		POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+				SPU_STATUS_RUNNING);
+	}
+}
+
+static inline void restore_status_part2(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 mask;
+
+	/* Restore, Step 43:
+	 *     If all CSA.SPU_Status[I,S,H,P,R]=0 then write
+	 *     SPU_RunCntl[R0R1]='01', wait for SPU_Status[R]=1,
+	 *     then write '00' to SPU_RunCntl[R0R1] and wait
+	 *     for SPU_Status[R]=0.
+	 */
+	mask = SPU_STATUS_INVALID_INSTR |
+	    SPU_STATUS_SINGLE_STEP |
+	    SPU_STATUS_STOPPED_BY_HALT |
+	    SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_RUNNING;
+	if (!(csa->prob.spu_status_R & mask)) {
+		out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+		eieio();
+		POLL_WHILE_FALSE(in_be32(&prob->spu_status_R) &
+				 SPU_STATUS_RUNNING);
+		out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+		eieio();
+		POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+				SPU_STATUS_RUNNING);
+	}
+}
+
+static inline void restore_ls_16kb(struct spu_state *csa, struct spu *spu)
+{
+	unsigned long addr = (unsigned long)&csa->lscsa->ls[0];
+	unsigned int ls_offset = 0x0;
+	unsigned int size = 16384;
+	unsigned int tag = 0;
+	unsigned int rclass = 0;
+	unsigned int cmd = MFC_GET_CMD;
+
+	/* Restore, Step 44:
+	 *     Issue a DMA command to restore the first
+	 *     16kb of local storage from CSA.
+	 */
+	send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void suspend_mfc(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 47.
+	 *     Write MFC_Cntl[Sc,Sm]='1','0' to suspend
+	 *     the queue.
+	 */
+	out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE);
+	eieio();
+}
+
+static inline void clear_interrupts(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 49:
+	 *     Write INT_MASK_class0 with value of 0.
+	 *     Write INT_MASK_class1 with value of 0.
+	 *     Write INT_MASK_class2 with value of 0.
+	 *     Write INT_STAT_class0 with value of -1.
+	 *     Write INT_STAT_class1 with value of -1.
+	 *     Write INT_STAT_class2 with value of -1.
+	 */
+	spin_lock_irq(&spu->register_lock);
+	spu_int_mask_set(spu, 0, 0ul);
+	spu_int_mask_set(spu, 1, 0ul);
+	spu_int_mask_set(spu, 2, 0ul);
+	spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+	spu_int_stat_clear(spu, 1, CLASS1_INTR_MASK);
+	spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static inline void restore_mfc_queues(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	int i;
+
+	/* Restore, Step 50:
+	 *     If MFC_Cntl[Se]!=0 then restore
+	 *     MFC command queues.
+	 */
+	if ((csa->priv2.mfc_control_RW & MFC_CNTL_DMA_QUEUES_EMPTY_MASK) == 0) {
+		for (i = 0; i < 8; i++) {
+			out_be64(&priv2->puq[i].mfc_cq_data0_RW,
+				 csa->priv2.puq[i].mfc_cq_data0_RW);
+			out_be64(&priv2->puq[i].mfc_cq_data1_RW,
+				 csa->priv2.puq[i].mfc_cq_data1_RW);
+			out_be64(&priv2->puq[i].mfc_cq_data2_RW,
+				 csa->priv2.puq[i].mfc_cq_data2_RW);
+			out_be64(&priv2->puq[i].mfc_cq_data3_RW,
+				 csa->priv2.puq[i].mfc_cq_data3_RW);
+		}
+		for (i = 0; i < 16; i++) {
+			out_be64(&priv2->spuq[i].mfc_cq_data0_RW,
+				 csa->priv2.spuq[i].mfc_cq_data0_RW);
+			out_be64(&priv2->spuq[i].mfc_cq_data1_RW,
+				 csa->priv2.spuq[i].mfc_cq_data1_RW);
+			out_be64(&priv2->spuq[i].mfc_cq_data2_RW,
+				 csa->priv2.spuq[i].mfc_cq_data2_RW);
+			out_be64(&priv2->spuq[i].mfc_cq_data3_RW,
+				 csa->priv2.spuq[i].mfc_cq_data3_RW);
+		}
+	}
+	eieio();
+}
+
+static inline void restore_ppu_querymask(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 51:
+	 *     Restore the PPU_QueryMask register from CSA.
+	 */
+	out_be32(&prob->dma_querymask_RW, csa->prob.dma_querymask_RW);
+	eieio();
+}
+
+static inline void restore_ppu_querytype(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 52:
+	 *     Restore the PPU_QueryType register from CSA.
+	 */
+	out_be32(&prob->dma_querytype_RW, csa->prob.dma_querytype_RW);
+	eieio();
+}
+
+static inline void restore_mfc_csr_tsq(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 53:
+	 *     Restore the MFC_CSR_TSQ register from CSA.
+	 */
+	out_be64(&priv2->spu_tag_status_query_RW,
+		 csa->priv2.spu_tag_status_query_RW);
+	eieio();
+}
+
+static inline void restore_mfc_csr_cmd(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 54:
+	 *     Restore the MFC_CSR_CMD1 and MFC_CSR_CMD2
+	 *     registers from CSA.
+	 */
+	out_be64(&priv2->spu_cmd_buf1_RW, csa->priv2.spu_cmd_buf1_RW);
+	out_be64(&priv2->spu_cmd_buf2_RW, csa->priv2.spu_cmd_buf2_RW);
+	eieio();
+}
+
+static inline void restore_mfc_csr_ato(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 55:
+	 *     Restore the MFC_CSR_ATO register from CSA.
+	 */
+	out_be64(&priv2->spu_atomic_status_RW, csa->priv2.spu_atomic_status_RW);
+}
+
+static inline void restore_mfc_tclass_id(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 56:
+	 *     Restore the MFC_TCLASS_ID register from CSA.
+	 */
+	spu_mfc_tclass_id_set(spu, csa->priv1.mfc_tclass_id_RW);
+	eieio();
+}
+
+static inline void set_llr_event(struct spu_state *csa, struct spu *spu)
+{
+	u64 ch0_cnt, ch0_data;
+	u64 ch1_data;
+
+	/* Restore, Step 57:
+	 *    Set the Lock Line Reservation Lost Event by:
+	 *      1. OR CSA.SPU_Event_Status with bit 21 (Lr) set to 1.
+	 *      2. If CSA.SPU_Channel_0_Count=0 and
+	 *         CSA.SPU_Wr_Event_Mask[Lr]=1 and
+	 *         CSA.SPU_Event_Status[Lr]=0 then set
+	 *         CSA.SPU_Event_Status_Count=1.
+	 */
+	ch0_cnt = csa->spu_chnlcnt_RW[0];
+	ch0_data = csa->spu_chnldata_RW[0];
+	ch1_data = csa->spu_chnldata_RW[1];
+	csa->spu_chnldata_RW[0] |= MFC_LLR_LOST_EVENT;
+	if ((ch0_cnt == 0) && !(ch0_data & MFC_LLR_LOST_EVENT) &&
+	    (ch1_data & MFC_LLR_LOST_EVENT)) {
+		csa->spu_chnlcnt_RW[0] = 1;
+	}
+}
+
+static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 58:
+	 *     If the status of the CSA software decrementer
+	 *     "wrapped" flag is set, OR in a '1' to
+	 *     CSA.SPU_Event_Status[Tm].
+	 */
+	if (!(csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED))
+		return;
+
+	if ((csa->spu_chnlcnt_RW[0] == 0) &&
+	    (csa->spu_chnldata_RW[1] & 0x20) &&
+	    !(csa->spu_chnldata_RW[0] & 0x20))
+		csa->spu_chnlcnt_RW[0] = 1;
+
+	csa->spu_chnldata_RW[0] |= 0x20;
+}
+
+static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+	int i;
+
+	/* Restore, Step 59:
+	 *	Restore the following CH: [0,3,4,24,25,27]
+	 */
+	for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		out_be64(&priv2->spu_chnldata_RW, csa->spu_chnldata_RW[idx]);
+		out_be64(&priv2->spu_chnlcnt_RW, csa->spu_chnlcnt_RW[idx]);
+		eieio();
+	}
+}
+
+static inline void restore_ch_part2(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 ch_indices[3] = { 9UL, 21UL, 23UL };
+	u64 ch_counts[3] = { 1UL, 16UL, 1UL };
+	u64 idx;
+	int i;
+
+	/* Restore, Step 60:
+	 *     Restore the following CH: [9,21,23].
+	 */
+	ch_counts[0] = 1UL;
+	ch_counts[1] = csa->spu_chnlcnt_RW[21];
+	ch_counts[2] = 1UL;
+	for (i = 0; i < 3; i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]);
+		eieio();
+	}
+}
+
+static inline void restore_spu_lslr(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 61:
+	 *     Restore the SPU_LSLR register from CSA.
+	 */
+	out_be64(&priv2->spu_lslr_RW, csa->priv2.spu_lslr_RW);
+	eieio();
+}
+
+static inline void restore_spu_cfg(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 62:
+	 *     Restore the SPU_Cfg register from CSA.
+	 */
+	out_be64(&priv2->spu_cfg_RW, csa->priv2.spu_cfg_RW);
+	eieio();
+}
+
+static inline void restore_pm_trace(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 63:
+	 *     Restore PM_Trace_Tag_Wait_Mask from CSA.
+	 *     Not performed by this implementation.
+	 */
+}
+
+static inline void restore_spu_npc(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 64:
+	 *     Restore SPU_NPC from CSA.
+	 */
+	out_be32(&prob->spu_npc_RW, csa->prob.spu_npc_RW);
+	eieio();
+}
+
+static inline void restore_spu_mb(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	int i;
+
+	/* Restore, Step 65:
+	 *     Restore MFC_RdSPU_MB from CSA.
+	 */
+	out_be64(&priv2->spu_chnlcntptr_RW, 29UL);
+	eieio();
+	out_be64(&priv2->spu_chnlcnt_RW, csa->spu_chnlcnt_RW[29]);
+	for (i = 0; i < 4; i++) {
+		out_be64(&priv2->spu_chnldata_RW, csa->spu_mailbox_data[i]);
+	}
+	eieio();
+}
+
+static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 dummy = 0;
+
+	/* Restore, Step 66:
+	 *     If CSA.MB_Stat[P]=0 (mailbox empty) then
+	 *     read from the PPU_MB register.
+	 */
+	if ((csa->prob.mb_stat_R & 0xFF) == 0) {
+		dummy = in_be32(&prob->pu_mb_R);
+		eieio();
+	}
+}
+
+static inline void check_ppuint_mb_stat(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 dummy = 0UL;
+
+	/* Restore, Step 66:
+	 *     If CSA.MB_Stat[I]=0 (mailbox empty) then
+	 *     read from the PPUINT_MB register.
+	 */
+	if ((csa->prob.mb_stat_R & 0xFF0000) == 0) {
+		dummy = in_be64(&priv2->puint_mb_R);
+		eieio();
+		spu_int_stat_clear(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
+		eieio();
+	}
+}
+
+static inline void restore_mfc_sr1(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 69:
+	 *     Restore the MFC_SR1 register from CSA.
+	 */
+	spu_mfc_sr1_set(spu, csa->priv1.mfc_sr1_RW);
+	eieio();
+}
+
+static inline void set_int_route(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	spu_cpu_affinity_set(spu, ctx->last_ran);
+}
+
+static inline void restore_other_spu_access(struct spu_state *csa,
+					    struct spu *spu)
+{
+	/* Restore, Step 70:
+	 *     Restore other SPU mappings to this SPU. TBD.
+	 */
+}
+
+static inline void restore_spu_runcntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 71:
+	 *     If CSA.SPU_Status[R]=1 then write
+	 *     SPU_RunCntl[R0R1]='01'.
+	 */
+	if (csa->prob.spu_status_R & SPU_STATUS_RUNNING) {
+		out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+		eieio();
+	}
+}
+
+static inline void restore_mfc_cntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 72:
+	 *    Restore the MFC_CNTL register for the CSA.
+	 */
+	out_be64(&priv2->mfc_control_RW, csa->priv2.mfc_control_RW);
+	eieio();
+
+	/*
+	 * The queue is put back into the same state that was evident prior to
+	 * the context switch. The suspend flag is added to the saved state in
+	 * the csa, if the operational state was suspending or suspended. In
+	 * this case, the code that suspended the mfc is responsible for
+	 * continuing it. Note that SPE faults do not change the operational
+	 * state of the spu.
+	 */
+}
+
+static inline void enable_user_access(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 73:
+	 *     Enable user-space access (if provided) to this
+	 *     SPU by mapping the virtual pages assigned to
+	 *     the SPU memory-mapped I/O (MMIO) for problem
+	 *     state. TBD.
+	 */
+}
+
+static inline void reset_switch_active(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 74:
+	 *     Reset the "context switch active" flag.
+	 *     Not performed by this implementation.
+	 */
+}
+
+static inline void reenable_interrupts(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 75:
+	 *     Re-enable SPU interrupts.
+	 */
+	spin_lock_irq(&spu->register_lock);
+	spu_int_mask_set(spu, 0, csa->priv1.int_mask_class0_RW);
+	spu_int_mask_set(spu, 1, csa->priv1.int_mask_class1_RW);
+	spu_int_mask_set(spu, 2, csa->priv1.int_mask_class2_RW);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static int quiece_spu(struct spu_state *prev, struct spu *spu)
+{
+	/*
+	 * Combined steps 2-18 of SPU context save sequence, which
+	 * quiesce the SPU state (disable SPU execution, MFC command
+	 * queues, decrementer, SPU interrupts, etc.).
+	 *
+	 * Returns      0 on success.
+	 *              2 if failed step 2.
+	 *              6 if failed step 6.
+	 */
+
+	if (check_spu_isolate(prev, spu)) {	/* Step 2. */
+		return 2;
+	}
+	disable_interrupts(prev, spu);	        /* Step 3. */
+	set_watchdog_timer(prev, spu);	        /* Step 4. */
+	inhibit_user_access(prev, spu);	        /* Step 5. */
+	if (check_spu_isolate(prev, spu)) {	/* Step 6. */
+		return 6;
+	}
+	set_switch_pending(prev, spu);	        /* Step 7. */
+	save_mfc_cntl(prev, spu);		/* Step 8. */
+	save_spu_runcntl(prev, spu);	        /* Step 9. */
+	save_mfc_sr1(prev, spu);	        /* Step 10. */
+	save_spu_status(prev, spu);	        /* Step 11. */
+	save_mfc_stopped_status(prev, spu);     /* Step 12. */
+	halt_mfc_decr(prev, spu);	        /* Step 13. */
+	save_timebase(prev, spu);		/* Step 14. */
+	remove_other_spu_access(prev, spu);	/* Step 15. */
+	do_mfc_mssync(prev, spu);	        /* Step 16. */
+	issue_mfc_tlbie(prev, spu);	        /* Step 17. */
+	handle_pending_interrupts(prev, spu);	/* Step 18. */
+
+	return 0;
+}
+
+static void save_csa(struct spu_state *prev, struct spu *spu)
+{
+	/*
+	 * Combine steps 19-44 of SPU context save sequence, which
+	 * save regions of the privileged & problem state areas.
+	 */
+
+	save_mfc_queues(prev, spu);	/* Step 19. */
+	save_ppu_querymask(prev, spu);	/* Step 20. */
+	save_ppu_querytype(prev, spu);	/* Step 21. */
+	save_ppu_tagstatus(prev, spu);  /* NEW.     */
+	save_mfc_csr_tsq(prev, spu);	/* Step 22. */
+	save_mfc_csr_cmd(prev, spu);	/* Step 23. */
+	save_mfc_csr_ato(prev, spu);	/* Step 24. */
+	save_mfc_tclass_id(prev, spu);	/* Step 25. */
+	set_mfc_tclass_id(prev, spu);	/* Step 26. */
+	save_mfc_cmd(prev, spu);	/* Step 26a - moved from 44. */
+	purge_mfc_queue(prev, spu);	/* Step 27. */
+	wait_purge_complete(prev, spu);	/* Step 28. */
+	setup_mfc_sr1(prev, spu);	/* Step 30. */
+	save_spu_npc(prev, spu);	/* Step 31. */
+	save_spu_privcntl(prev, spu);	/* Step 32. */
+	reset_spu_privcntl(prev, spu);	/* Step 33. */
+	save_spu_lslr(prev, spu);	/* Step 34. */
+	reset_spu_lslr(prev, spu);	/* Step 35. */
+	save_spu_cfg(prev, spu);	/* Step 36. */
+	save_pm_trace(prev, spu);	/* Step 37. */
+	save_mfc_rag(prev, spu);	/* Step 38. */
+	save_ppu_mb_stat(prev, spu);	/* Step 39. */
+	save_ppu_mb(prev, spu);	        /* Step 40. */
+	save_ppuint_mb(prev, spu);	/* Step 41. */
+	save_ch_part1(prev, spu);	/* Step 42. */
+	save_spu_mb(prev, spu);	        /* Step 43. */
+	reset_ch(prev, spu);	        /* Step 45. */
+}
+
+static void save_lscsa(struct spu_state *prev, struct spu *spu)
+{
+	/*
+	 * Perform steps 46-57 of SPU context save sequence,
+	 * which save regions of the local store and register
+	 * file.
+	 */
+
+	resume_mfc_queue(prev, spu);	/* Step 46. */
+	/* Step 47. */
+	setup_mfc_slbs(prev, spu, spu_save_code, sizeof(spu_save_code));
+	set_switch_active(prev, spu);	/* Step 48. */
+	enable_interrupts(prev, spu);	/* Step 49. */
+	save_ls_16kb(prev, spu);	/* Step 50. */
+	set_spu_npc(prev, spu);	        /* Step 51. */
+	set_signot1(prev, spu);		/* Step 52. */
+	set_signot2(prev, spu);		/* Step 53. */
+	send_save_code(prev, spu);	/* Step 54. */
+	set_ppu_querymask(prev, spu);	/* Step 55. */
+	wait_tag_complete(prev, spu);	/* Step 56. */
+	wait_spu_stopped(prev, spu);	/* Step 57. */
+}
+
+static void force_spu_isolate_exit(struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Stop SPE execution and wait for completion. */
+	out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+	iobarrier_rw();
+	POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING);
+
+	/* Restart SPE master runcntl. */
+	spu_mfc_sr1_set(spu, MFC_STATE1_MASTER_RUN_CONTROL_MASK);
+	iobarrier_w();
+
+	/* Initiate isolate exit request and wait for completion. */
+	out_be64(&priv2->spu_privcntl_RW, 4LL);
+	iobarrier_w();
+	out_be32(&prob->spu_runcntl_RW, 2);
+	iobarrier_rw();
+	POLL_WHILE_FALSE((in_be32(&prob->spu_status_R)
+				& SPU_STATUS_STOPPED_BY_STOP));
+
+	/* Reset load request to normal. */
+	out_be64(&priv2->spu_privcntl_RW, SPU_PRIVCNT_LOAD_REQUEST_NORMAL);
+	iobarrier_w();
+}
+
+/**
+ * stop_spu_isolate
+ *	Check SPU run-control state and force isolated
+ *	exit function as necessary.
+ */
+static void stop_spu_isolate(struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	if (in_be32(&prob->spu_status_R) & SPU_STATUS_ISOLATED_STATE) {
+		/* The SPU is in isolated state; the only way
+		 * to get it out is to perform an isolated
+		 * exit (clean) operation.
+		 */
+		force_spu_isolate_exit(spu);
+	}
+}
+
+static void harvest(struct spu_state *prev, struct spu *spu)
+{
+	/*
+	 * Perform steps 2-25 of SPU context restore sequence,
+	 * which resets an SPU either after a failed save, or
+	 * when using SPU for first time.
+	 */
+
+	disable_interrupts(prev, spu);	        /* Step 2.  */
+	inhibit_user_access(prev, spu);	        /* Step 3.  */
+	terminate_spu_app(prev, spu);	        /* Step 4.  */
+	set_switch_pending(prev, spu);	        /* Step 5.  */
+	stop_spu_isolate(spu);			/* NEW.     */
+	remove_other_spu_access(prev, spu);	/* Step 6.  */
+	suspend_mfc_and_halt_decr(prev, spu);	/* Step 7.  */
+	wait_suspend_mfc_complete(prev, spu);	/* Step 8.  */
+	if (!suspend_spe(prev, spu))	        /* Step 9.  */
+		clear_spu_status(prev, spu);	/* Step 10. */
+	do_mfc_mssync(prev, spu);	        /* Step 11. */
+	issue_mfc_tlbie(prev, spu);	        /* Step 12. */
+	handle_pending_interrupts(prev, spu);	/* Step 13. */
+	purge_mfc_queue(prev, spu);	        /* Step 14. */
+	wait_purge_complete(prev, spu);	        /* Step 15. */
+	reset_spu_privcntl(prev, spu);	        /* Step 16. */
+	reset_spu_lslr(prev, spu);              /* Step 17. */
+	setup_mfc_sr1(prev, spu);	        /* Step 18. */
+	spu_invalidate_slbs(spu);		/* Step 19. */
+	reset_ch_part1(prev, spu);	        /* Step 20. */
+	reset_ch_part2(prev, spu);	        /* Step 21. */
+	enable_interrupts(prev, spu);	        /* Step 22. */
+	set_switch_active(prev, spu);	        /* Step 23. */
+	set_mfc_tclass_id(prev, spu);	        /* Step 24. */
+	resume_mfc_queue(prev, spu);	        /* Step 25. */
+}
+
+static void restore_lscsa(struct spu_state *next, struct spu *spu)
+{
+	/*
+	 * Perform steps 26-40 of SPU context restore sequence,
+	 * which restores regions of the local store and register
+	 * file.
+	 */
+
+	set_watchdog_timer(next, spu);	        /* Step 26. */
+	setup_spu_status_part1(next, spu);	/* Step 27. */
+	setup_spu_status_part2(next, spu);	/* Step 28. */
+	restore_mfc_rag(next, spu);	        /* Step 29. */
+	/* Step 30. */
+	setup_mfc_slbs(next, spu, spu_restore_code, sizeof(spu_restore_code));
+	set_spu_npc(next, spu);	                /* Step 31. */
+	set_signot1(next, spu);	                /* Step 32. */
+	set_signot2(next, spu);	                /* Step 33. */
+	setup_decr(next, spu);	                /* Step 34. */
+	setup_ppu_mb(next, spu);	        /* Step 35. */
+	setup_ppuint_mb(next, spu);	        /* Step 36. */
+	send_restore_code(next, spu);	        /* Step 37. */
+	set_ppu_querymask(next, spu);	        /* Step 38. */
+	wait_tag_complete(next, spu);	        /* Step 39. */
+	wait_spu_stopped(next, spu);	        /* Step 40. */
+}
+
+static void restore_csa(struct spu_state *next, struct spu *spu)
+{
+	/*
+	 * Combine steps 41-76 of SPU context restore sequence, which
+	 * restore regions of the privileged & problem state areas.
+	 */
+
+	restore_spu_privcntl(next, spu);	/* Step 41. */
+	restore_status_part1(next, spu);	/* Step 42. */
+	restore_status_part2(next, spu);	/* Step 43. */
+	restore_ls_16kb(next, spu);	        /* Step 44. */
+	wait_tag_complete(next, spu);	        /* Step 45. */
+	suspend_mfc(next, spu);	                /* Step 46. */
+	wait_suspend_mfc_complete(next, spu);	/* Step 47. */
+	issue_mfc_tlbie(next, spu);	        /* Step 48. */
+	clear_interrupts(next, spu);	        /* Step 49. */
+	restore_mfc_queues(next, spu);	        /* Step 50. */
+	restore_ppu_querymask(next, spu);	/* Step 51. */
+	restore_ppu_querytype(next, spu);	/* Step 52. */
+	restore_mfc_csr_tsq(next, spu);	        /* Step 53. */
+	restore_mfc_csr_cmd(next, spu);	        /* Step 54. */
+	restore_mfc_csr_ato(next, spu);	        /* Step 55. */
+	restore_mfc_tclass_id(next, spu);	/* Step 56. */
+	set_llr_event(next, spu);	        /* Step 57. */
+	restore_decr_wrapped(next, spu);	/* Step 58. */
+	restore_ch_part1(next, spu);	        /* Step 59. */
+	restore_ch_part2(next, spu);	        /* Step 60. */
+	restore_spu_lslr(next, spu);	        /* Step 61. */
+	restore_spu_cfg(next, spu);	        /* Step 62. */
+	restore_pm_trace(next, spu);	        /* Step 63. */
+	restore_spu_npc(next, spu);	        /* Step 64. */
+	restore_spu_mb(next, spu);	        /* Step 65. */
+	check_ppu_mb_stat(next, spu);	        /* Step 66. */
+	check_ppuint_mb_stat(next, spu);	/* Step 67. */
+	spu_invalidate_slbs(spu);		/* Modified Step 68. */
+	restore_mfc_sr1(next, spu);	        /* Step 69. */
+	set_int_route(next, spu);		/* NEW      */
+	restore_other_spu_access(next, spu);	/* Step 70. */
+	restore_spu_runcntl(next, spu);	        /* Step 71. */
+	restore_mfc_cntl(next, spu);	        /* Step 72. */
+	enable_user_access(next, spu);	        /* Step 73. */
+	reset_switch_active(next, spu);	        /* Step 74. */
+	reenable_interrupts(next, spu);	        /* Step 75. */
+}
+
+static int __do_spu_save(struct spu_state *prev, struct spu *spu)
+{
+	int rc;
+
+	/*
+	 * SPU context save can be broken into three phases:
+	 *
+	 *     (a) quiesce [steps 2-16].
+	 *     (b) save of CSA, performed by PPE [steps 17-42]
+	 *     (c) save of LSCSA, mostly performed by SPU [steps 43-52].
+	 *
+	 * Returns      0 on success.
+	 *              2,6 if failed to quiece SPU
+	 *              53 if SPU-side of save failed.
+	 */
+
+	rc = quiece_spu(prev, spu);	        /* Steps 2-16. */
+	switch (rc) {
+	default:
+	case 2:
+	case 6:
+		harvest(prev, spu);
+		return rc;
+		break;
+	case 0:
+		break;
+	}
+	save_csa(prev, spu);	                /* Steps 17-43. */
+	save_lscsa(prev, spu);	                /* Steps 44-53. */
+	return check_save_status(prev, spu);	/* Step 54.     */
+}
+
+static int __do_spu_restore(struct spu_state *next, struct spu *spu)
+{
+	int rc;
+
+	/*
+	 * SPU context restore can be broken into three phases:
+	 *
+	 *    (a) harvest (or reset) SPU [steps 2-24].
+	 *    (b) restore LSCSA [steps 25-40], mostly performed by SPU.
+	 *    (c) restore CSA [steps 41-76], performed by PPE.
+	 *
+	 * The 'harvest' step is not performed here, but rather
+	 * as needed below.
+	 */
+
+	restore_lscsa(next, spu);	        /* Steps 24-39. */
+	rc = check_restore_status(next, spu);	/* Step 40.     */
+	switch (rc) {
+	default:
+		/* Failed. Return now. */
+		return rc;
+		break;
+	case 0:
+		/* Fall through to next step. */
+		break;
+	}
+	restore_csa(next, spu);
+
+	return 0;
+}
+
+/**
+ * spu_save - SPU context save, with locking.
+ * @prev: pointer to SPU context save area, to be saved.
+ * @spu: pointer to SPU iomem structure.
+ *
+ * Acquire locks, perform the save operation then return.
+ */
+int spu_save(struct spu_state *prev, struct spu *spu)
+{
+	int rc;
+
+	acquire_spu_lock(spu);	        /* Step 1.     */
+	rc = __do_spu_save(prev, spu);	/* Steps 2-53. */
+	release_spu_lock(spu);
+	if (rc != 0 && rc != 2 && rc != 6) {
+		panic("%s failed on SPU[%d], rc=%d.\n",
+		      __func__, spu->number, rc);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(spu_save);
+
+/**
+ * spu_restore - SPU context restore, with harvest and locking.
+ * @new: pointer to SPU context save area, to be restored.
+ * @spu: pointer to SPU iomem structure.
+ *
+ * Perform harvest + restore, as we may not be coming
+ * from a previous successful save operation, and the
+ * hardware state is unknown.
+ */
+int spu_restore(struct spu_state *new, struct spu *spu)
+{
+	int rc;
+
+	acquire_spu_lock(spu);
+	harvest(NULL, spu);
+	spu->slb_replace = 0;
+	rc = __do_spu_restore(new, spu);
+	release_spu_lock(spu);
+	if (rc) {
+		panic("%s failed on SPU[%d] rc=%d.\n",
+		       __func__, spu->number, rc);
+	}
+	return rc;
+}
+EXPORT_SYMBOL_GPL(spu_restore);
+
+static void init_prob(struct spu_state *csa)
+{
+	csa->spu_chnlcnt_RW[9] = 1;
+	csa->spu_chnlcnt_RW[21] = 16;
+	csa->spu_chnlcnt_RW[23] = 1;
+	csa->spu_chnlcnt_RW[28] = 1;
+	csa->spu_chnlcnt_RW[30] = 1;
+	csa->prob.spu_runcntl_RW = SPU_RUNCNTL_STOP;
+	csa->prob.mb_stat_R = 0x000400;
+}
+
+static void init_priv1(struct spu_state *csa)
+{
+	/* Enable decode, relocate, tlbie response, master runcntl. */
+	csa->priv1.mfc_sr1_RW = MFC_STATE1_LOCAL_STORAGE_DECODE_MASK |
+	    MFC_STATE1_MASTER_RUN_CONTROL_MASK |
+	    MFC_STATE1_PROBLEM_STATE_MASK |
+	    MFC_STATE1_RELOCATE_MASK | MFC_STATE1_BUS_TLBIE_MASK;
+
+	/* Enable OS-specific set of interrupts. */
+	csa->priv1.int_mask_class0_RW = CLASS0_ENABLE_DMA_ALIGNMENT_INTR |
+	    CLASS0_ENABLE_INVALID_DMA_COMMAND_INTR |
+	    CLASS0_ENABLE_SPU_ERROR_INTR;
+	csa->priv1.int_mask_class1_RW = CLASS1_ENABLE_SEGMENT_FAULT_INTR |
+	    CLASS1_ENABLE_STORAGE_FAULT_INTR;
+	csa->priv1.int_mask_class2_RW = CLASS2_ENABLE_SPU_STOP_INTR |
+	    CLASS2_ENABLE_SPU_HALT_INTR |
+	    CLASS2_ENABLE_SPU_DMA_TAG_GROUP_COMPLETE_INTR;
+}
+
+static void init_priv2(struct spu_state *csa)
+{
+	csa->priv2.spu_lslr_RW = LS_ADDR_MASK;
+	csa->priv2.mfc_control_RW = MFC_CNTL_RESUME_DMA_QUEUE |
+	    MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION |
+	    MFC_CNTL_DMA_QUEUES_EMPTY_MASK;
+}
+
+/**
+ * spu_alloc_csa - allocate and initialize an SPU context save area.
+ *
+ * Allocate and initialize the contents of an SPU context save area.
+ * This includes enabling address translation, interrupt masks, etc.,
+ * as appropriate for the given OS environment.
+ *
+ * Note that storage for the 'lscsa' is allocated separately,
+ * as it is by far the largest of the context save regions,
+ * and may need to be pinned or otherwise specially aligned.
+ */
+int spu_init_csa(struct spu_state *csa)
+{
+	int rc;
+
+	if (!csa)
+		return -EINVAL;
+	memset(csa, 0, sizeof(struct spu_state));
+
+	rc = spu_alloc_lscsa(csa);
+	if (rc)
+		return rc;
+
+	spin_lock_init(&csa->register_lock);
+
+	init_prob(csa);
+	init_priv1(csa);
+	init_priv2(csa);
+
+	return 0;
+}
+
+void spu_fini_csa(struct spu_state *csa)
+{
+	spu_free_lscsa(csa);
+}
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
new file mode 100644
index 000000000..a87200a53
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -0,0 +1,88 @@
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/export.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/slab.h>
+
+#include <asm/uaccess.h>
+
+#include "spufs.h"
+
+/**
+ * sys_spu_run - run code loaded into an SPU
+ *
+ * @unpc:    next program counter for the SPU
+ * @ustatus: status of the SPU
+ *
+ * This system call transfers the control of execution of a
+ * user space thread to an SPU. It will return when the
+ * SPU has finished executing or when it hits an error
+ * condition and it will be interrupted if a signal needs
+ * to be delivered to a handler in user space.
+ *
+ * The next program counter is set to the passed value
+ * before the SPU starts fetching code and the user space
+ * pointer gets updated with the new value when returning
+ * from kernel space.
+ *
+ * The status value returned from spu_run reflects the
+ * value of the spu_status register after the SPU has stopped.
+ *
+ */
+static long do_spu_run(struct file *filp,
+			__u32 __user *unpc,
+			__u32 __user *ustatus)
+{
+	long ret;
+	struct spufs_inode_info *i;
+	u32 npc, status;
+
+	ret = -EFAULT;
+	if (get_user(npc, unpc))
+		goto out;
+
+	/* check if this file was created by spu_create */
+	ret = -EINVAL;
+	if (filp->f_op != &spufs_context_fops)
+		goto out;
+
+	i = SPUFS_I(file_inode(filp));
+	ret = spufs_run_spu(i->i_ctx, &npc, &status);
+
+	if (put_user(npc, unpc))
+		ret = -EFAULT;
+
+	if (ustatus && put_user(status, ustatus))
+		ret = -EFAULT;
+out:
+	return ret;
+}
+
+static long do_spu_create(const char __user *pathname, unsigned int flags,
+		umode_t mode, struct file *neighbor)
+{
+	struct path path;
+	struct dentry *dentry;
+	int ret;
+
+	dentry = user_path_create(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY);
+	ret = PTR_ERR(dentry);
+	if (!IS_ERR(dentry)) {
+		ret = spufs_create(&path, dentry, flags, mode, neighbor);
+		done_path_create(&path, dentry);
+	}
+
+	return ret;
+}
+
+struct spufs_calls spufs_calls = {
+	.create_thread = do_spu_create,
+	.spu_run = do_spu_run,
+	.notify_spus_active = do_notify_spus_active,
+	.owner = THIS_MODULE,
+#ifdef CONFIG_COREDUMP
+	.coredump_extra_notes_size = spufs_coredump_extra_notes_size,
+	.coredump_extra_notes_write = spufs_coredump_extra_notes_write,
+#endif
+};
diff --git a/arch/powerpc/platforms/chrp/Kconfig b/arch/powerpc/platforms/chrp/Kconfig
new file mode 100644
index 000000000..d3cdab582
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/Kconfig
@@ -0,0 +1,15 @@
+config PPC_CHRP
+	bool "Common Hardware Reference Platform (CHRP) based machines"
+	depends on 6xx
+	select HAVE_PCSPKR_PLATFORM
+	select MPIC
+	select PPC_I8259
+	select PPC_INDIRECT_PCI
+	select PPC_RTAS
+	select PPC_RTAS_DAEMON
+	select RTAS_ERROR_LOGGING
+	select PPC_MPC106
+	select PPC_UDBG_16550
+	select PPC_NATIVE
+	select PCI
+	default y
diff --git a/arch/powerpc/platforms/chrp/Makefile b/arch/powerpc/platforms/chrp/Makefile
new file mode 100644
index 000000000..4b3bfadc7
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/Makefile
@@ -0,0 +1,3 @@
+obj-y				+= setup.o time.o pegasos_eth.o pci.o
+obj-$(CONFIG_SMP)		+= smp.o
+obj-$(CONFIG_NVRAM)		+= nvram.o
diff --git a/arch/powerpc/platforms/chrp/chrp.h b/arch/powerpc/platforms/chrp/chrp.h
new file mode 100644
index 000000000..63f0aee4c
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/chrp.h
@@ -0,0 +1,11 @@
+/*
+ * Declarations of CHRP platform-specific things.
+ */
+
+extern void chrp_nvram_init(void);
+extern void chrp_get_rtc_time(struct rtc_time *);
+extern int chrp_set_rtc_time(struct rtc_time *);
+extern long chrp_time_init(void);
+
+extern void chrp_find_bridges(void);
+extern void chrp_event_scan(unsigned long);
diff --git a/arch/powerpc/platforms/chrp/gg2.h b/arch/powerpc/platforms/chrp/gg2.h
new file mode 100644
index 000000000..341ae55b9
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/gg2.h
@@ -0,0 +1,61 @@
+/*
+ *  include/asm-ppc/gg2.h -- VLSI VAS96011/12 `Golden Gate 2' register definitions
+ *
+ *  Copyright (C) 1997 Geert Uytterhoeven
+ *
+ *  This file is based on the following documentation:
+ *
+ *	The VAS96011/12 Chipset, Data Book, Edition 1.0
+ *	VLSI Technology, Inc.
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of this archive
+ *  for more details.
+ */
+
+#ifndef _ASMPPC_GG2_H
+#define _ASMPPC_GG2_H
+
+    /*
+     *  Memory Map (CHRP mode)
+     */
+
+#define GG2_PCI_MEM_BASE	0xc0000000	/* Peripheral memory space */
+#define GG2_ISA_MEM_BASE	0xf7000000	/* Peripheral memory alias */
+#define GG2_ISA_IO_BASE		0xf8000000	/* Peripheral I/O space */
+#define GG2_PCI_CONFIG_BASE	0xfec00000	/* PCI configuration space */
+#define GG2_INT_ACK_SPECIAL	0xfec80000	/* Interrupt acknowledge and */
+						/* special PCI cycles */
+#define GG2_ROM_BASE0		0xff000000	/* ROM bank 0 */
+#define GG2_ROM_BASE1		0xff800000	/* ROM bank 1 */
+
+
+    /*
+     *  GG2 specific PCI Registers
+     */
+
+extern void __iomem *gg2_pci_config_base;	/* kernel virtual address */
+
+#define GG2_PCI_BUSNO		0x40	/* Bus number */
+#define GG2_PCI_SUBBUSNO	0x41	/* Subordinate bus number */
+#define GG2_PCI_DISCCTR		0x42	/* Disconnect counter */
+#define GG2_PCI_PPC_CTRL	0x50	/* PowerPC interface control register */
+#define GG2_PCI_ADDR_MAP	0x5c	/* Address map */
+#define GG2_PCI_PCI_CTRL	0x60	/* PCI interface control register */
+#define GG2_PCI_ROM_CTRL	0x70	/* ROM interface control register */
+#define GG2_PCI_ROM_TIME	0x74	/* ROM timing */
+#define GG2_PCI_CC_CTRL		0x80	/* Cache controller control register */
+#define GG2_PCI_DRAM_BANK0	0x90	/* Control register for DRAM bank #0 */
+#define GG2_PCI_DRAM_BANK1	0x94	/* Control register for DRAM bank #1 */
+#define GG2_PCI_DRAM_BANK2	0x98	/* Control register for DRAM bank #2 */
+#define GG2_PCI_DRAM_BANK3	0x9c	/* Control register for DRAM bank #3 */
+#define GG2_PCI_DRAM_BANK4	0xa0	/* Control register for DRAM bank #4 */
+#define GG2_PCI_DRAM_BANK5	0xa4	/* Control register for DRAM bank #5 */
+#define GG2_PCI_DRAM_TIME0	0xb0	/* Timing parameters set #0 */
+#define GG2_PCI_DRAM_TIME1	0xb4	/* Timing parameters set #1 */
+#define GG2_PCI_DRAM_CTRL	0xc0	/* DRAM control */
+#define GG2_PCI_ERR_CTRL	0xd0	/* Error control register */
+#define GG2_PCI_ERR_STATUS	0xd4	/* Error status register */
+					/* Cleared when read */
+
+#endif /* _ASMPPC_GG2_H */
diff --git a/arch/powerpc/platforms/chrp/nvram.c b/arch/powerpc/platforms/chrp/nvram.c
new file mode 100644
index 000000000..9ef8cc337
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/nvram.c
@@ -0,0 +1,91 @@
+/*
+ *  c 2001 PPC 64 Team, IBM Corp
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ * /dev/nvram driver for PPC
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <asm/uaccess.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include "chrp.h"
+
+static unsigned int nvram_size;
+static unsigned char nvram_buf[4];
+static DEFINE_SPINLOCK(nvram_lock);
+
+static unsigned char chrp_nvram_read(int addr)
+{
+	unsigned int done;
+	unsigned long flags;
+	unsigned char ret;
+
+	if (addr >= nvram_size) {
+		printk(KERN_DEBUG "%s: read addr %d > nvram_size %u\n",
+		       current->comm, addr, nvram_size);
+		return 0xff;
+	}
+	spin_lock_irqsave(&nvram_lock, flags);
+	if ((rtas_call(rtas_token("nvram-fetch"), 3, 2, &done, addr,
+		       __pa(nvram_buf), 1) != 0) || 1 != done)
+		ret = 0xff;
+	else
+		ret = nvram_buf[0];
+	spin_unlock_irqrestore(&nvram_lock, flags);
+
+	return ret;
+}
+
+static void chrp_nvram_write(int addr, unsigned char val)
+{
+	unsigned int done;
+	unsigned long flags;
+
+	if (addr >= nvram_size) {
+		printk(KERN_DEBUG "%s: write addr %d > nvram_size %u\n",
+		       current->comm, addr, nvram_size);
+		return;
+	}
+	spin_lock_irqsave(&nvram_lock, flags);
+	nvram_buf[0] = val;
+	if ((rtas_call(rtas_token("nvram-store"), 3, 2, &done, addr,
+		       __pa(nvram_buf), 1) != 0) || 1 != done)
+		printk(KERN_DEBUG "rtas IO error storing 0x%02x at %d", val, addr);
+	spin_unlock_irqrestore(&nvram_lock, flags);
+}
+
+void __init chrp_nvram_init(void)
+{
+	struct device_node *nvram;
+	const __be32 *nbytes_p;
+	unsigned int proplen;
+
+	nvram = of_find_node_by_type(NULL, "nvram");
+	if (nvram == NULL)
+		return;
+
+	nbytes_p = of_get_property(nvram, "#bytes", &proplen);
+	if (nbytes_p == NULL || proplen != sizeof(unsigned int)) {
+		of_node_put(nvram);
+		return;
+	}
+
+	nvram_size = be32_to_cpup(nbytes_p);
+
+	printk(KERN_INFO "CHRP nvram contains %u bytes\n", nvram_size);
+	of_node_put(nvram);
+
+	ppc_md.nvram_read_val = chrp_nvram_read;
+	ppc_md.nvram_write_val = chrp_nvram_write;
+
+	return;
+}
diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c
new file mode 100644
index 000000000..1b87e198f
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/pci.c
@@ -0,0 +1,379 @@
+/*
+ * CHRP pci routines.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/irq.h>
+#include <asm/hydra.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/sections.h>
+#include <asm/pci-bridge.h>
+#include <asm/grackle.h>
+#include <asm/rtas.h>
+
+#include "chrp.h"
+#include "gg2.h"
+
+/* LongTrail */
+void __iomem *gg2_pci_config_base;
+
+/*
+ * The VLSI Golden Gate II has only 512K of PCI configuration space, so we
+ * limit the bus number to 3 bits
+ */
+
+int gg2_read_config(struct pci_bus *bus, unsigned int devfn, int off,
+			   int len, u32 *val)
+{
+	volatile void __iomem *cfg_data;
+	struct pci_controller *hose = pci_bus_to_host(bus);
+
+	if (bus->number > 7)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	/*
+	 * Note: the caller has already checked that off is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	cfg_data = hose->cfg_data + ((bus->number<<16) | (devfn<<8) | off);
+	switch (len) {
+	case 1:
+		*val =  in_8(cfg_data);
+		break;
+	case 2:
+		*val = in_le16(cfg_data);
+		break;
+	default:
+		*val = in_le32(cfg_data);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+int gg2_write_config(struct pci_bus *bus, unsigned int devfn, int off,
+			    int len, u32 val)
+{
+	volatile void __iomem *cfg_data;
+	struct pci_controller *hose = pci_bus_to_host(bus);
+
+	if (bus->number > 7)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	/*
+	 * Note: the caller has already checked that off is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	cfg_data = hose->cfg_data + ((bus->number<<16) | (devfn<<8) | off);
+	switch (len) {
+	case 1:
+		out_8(cfg_data, val);
+		break;
+	case 2:
+		out_le16(cfg_data, val);
+		break;
+	default:
+		out_le32(cfg_data, val);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops gg2_pci_ops =
+{
+	.read = gg2_read_config,
+	.write = gg2_write_config,
+};
+
+/*
+ * Access functions for PCI config space using RTAS calls.
+ */
+int rtas_read_config(struct pci_bus *bus, unsigned int devfn, int offset,
+		     int len, u32 *val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	unsigned long addr = (offset & 0xff) | ((devfn & 0xff) << 8)
+		| (((bus->number - hose->first_busno) & 0xff) << 16)
+		| (hose->global_number << 24);
+        int ret = -1;
+	int rval;
+
+	rval = rtas_call(rtas_token("read-pci-config"), 2, 2, &ret, addr, len);
+	*val = ret;
+	return rval? PCIBIOS_DEVICE_NOT_FOUND: PCIBIOS_SUCCESSFUL;
+}
+
+int rtas_write_config(struct pci_bus *bus, unsigned int devfn, int offset,
+		      int len, u32 val)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	unsigned long addr = (offset & 0xff) | ((devfn & 0xff) << 8)
+		| (((bus->number - hose->first_busno) & 0xff) << 16)
+		| (hose->global_number << 24);
+	int rval;
+
+	rval = rtas_call(rtas_token("write-pci-config"), 3, 1, NULL,
+			 addr, len, val);
+	return rval? PCIBIOS_DEVICE_NOT_FOUND: PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops rtas_pci_ops =
+{
+	.read = rtas_read_config,
+	.write = rtas_write_config,
+};
+
+volatile struct Hydra __iomem *Hydra = NULL;
+
+int __init
+hydra_init(void)
+{
+	struct device_node *np;
+	struct resource r;
+
+	np = of_find_node_by_name(NULL, "mac-io");
+	if (np == NULL || of_address_to_resource(np, 0, &r)) {
+		of_node_put(np);
+		return 0;
+	}
+	of_node_put(np);
+	Hydra = ioremap(r.start, resource_size(&r));
+	printk("Hydra Mac I/O at %llx\n", (unsigned long long)r.start);
+	printk("Hydra Feature_Control was %x",
+	       in_le32(&Hydra->Feature_Control));
+	out_le32(&Hydra->Feature_Control, (HYDRA_FC_SCC_CELL_EN |
+					   HYDRA_FC_SCSI_CELL_EN |
+					   HYDRA_FC_SCCA_ENABLE |
+					   HYDRA_FC_SCCB_ENABLE |
+					   HYDRA_FC_ARB_BYPASS |
+					   HYDRA_FC_MPIC_ENABLE |
+					   HYDRA_FC_SLOW_SCC_PCLK |
+					   HYDRA_FC_MPIC_IS_MASTER));
+	printk(", now %x\n", in_le32(&Hydra->Feature_Control));
+	return 1;
+}
+
+#define PRG_CL_RESET_VALID 0x00010000
+
+static void __init
+setup_python(struct pci_controller *hose, struct device_node *dev)
+{
+	u32 __iomem *reg;
+	u32 val;
+	struct resource r;
+
+	if (of_address_to_resource(dev, 0, &r)) {
+		printk(KERN_ERR "No address for Python PCI controller\n");
+		return;
+	}
+
+	/* Clear the magic go-slow bit */
+	reg = ioremap(r.start + 0xf6000, 0x40);
+	BUG_ON(!reg); 
+	val = in_be32(&reg[12]);
+	if (val & PRG_CL_RESET_VALID) {
+		out_be32(&reg[12], val & ~PRG_CL_RESET_VALID);
+		in_be32(&reg[12]);
+	}
+	iounmap(reg);
+
+	setup_indirect_pci(hose, r.start + 0xf8000, r.start + 0xf8010, 0);
+}
+
+/* Marvell Discovery II based Pegasos 2 */
+static void __init setup_peg2(struct pci_controller *hose, struct device_node *dev)
+{
+	struct device_node *root = of_find_node_by_path("/");
+	struct device_node *rtas;
+
+	rtas = of_find_node_by_name (root, "rtas");
+	if (rtas) {
+		hose->ops = &rtas_pci_ops;
+		of_node_put(rtas);
+	} else {
+		printk ("RTAS supporting Pegasos OF not found, please upgrade"
+			" your firmware\n");
+	}
+	pci_add_flags(PCI_REASSIGN_ALL_BUS);
+	/* keep the reference to the root node */
+}
+
+void __init
+chrp_find_bridges(void)
+{
+	struct device_node *dev;
+	const int *bus_range;
+	int len, index = -1;
+	struct pci_controller *hose;
+	const unsigned int *dma;
+	const char *model, *machine;
+	int is_longtrail = 0, is_mot = 0, is_pegasos = 0;
+	struct device_node *root = of_find_node_by_path("/");
+	struct resource r;
+	/*
+	 * The PCI host bridge nodes on some machines don't have
+	 * properties to adequately identify them, so we have to
+	 * look at what sort of machine this is as well.
+	 */
+	machine = of_get_property(root, "model", NULL);
+	if (machine != NULL) {
+		is_longtrail = strncmp(machine, "IBM,LongTrail", 13) == 0;
+		is_mot = strncmp(machine, "MOT", 3) == 0;
+		if (strncmp(machine, "Pegasos2", 8) == 0)
+			is_pegasos = 2;
+		else if (strncmp(machine, "Pegasos", 7) == 0)
+			is_pegasos = 1;
+	}
+	for (dev = root->child; dev != NULL; dev = dev->sibling) {
+		if (dev->type == NULL || strcmp(dev->type, "pci") != 0)
+			continue;
+		++index;
+		/* The GG2 bridge on the LongTrail doesn't have an address */
+		if (of_address_to_resource(dev, 0, &r) && !is_longtrail) {
+			printk(KERN_WARNING "Can't use %s: no address\n",
+			       dev->full_name);
+			continue;
+		}
+		bus_range = of_get_property(dev, "bus-range", &len);
+		if (bus_range == NULL || len < 2 * sizeof(int)) {
+			printk(KERN_WARNING "Can't get bus-range for %s\n",
+				dev->full_name);
+			continue;
+		}
+		if (bus_range[1] == bus_range[0])
+			printk(KERN_INFO "PCI bus %d", bus_range[0]);
+		else
+			printk(KERN_INFO "PCI buses %d..%d",
+			       bus_range[0], bus_range[1]);
+		printk(" controlled by %s", dev->full_name);
+		if (!is_longtrail)
+			printk(" at %llx", (unsigned long long)r.start);
+		printk("\n");
+
+		hose = pcibios_alloc_controller(dev);
+		if (!hose) {
+			printk("Can't allocate PCI controller structure for %s\n",
+				dev->full_name);
+			continue;
+		}
+		hose->first_busno = hose->self_busno = bus_range[0];
+		hose->last_busno = bus_range[1];
+
+		model = of_get_property(dev, "model", NULL);
+		if (model == NULL)
+			model = "<none>";
+		if (strncmp(model, "IBM, Python", 11) == 0) {
+			setup_python(hose, dev);
+		} else if (is_mot
+			   || strncmp(model, "Motorola, Grackle", 17) == 0) {
+			setup_grackle(hose);
+		} else if (is_longtrail) {
+			void __iomem *p = ioremap(GG2_PCI_CONFIG_BASE, 0x80000);
+			hose->ops = &gg2_pci_ops;
+			hose->cfg_data = p;
+			gg2_pci_config_base = p;
+		} else if (is_pegasos == 1) {
+			setup_indirect_pci(hose, 0xfec00cf8, 0xfee00cfc, 0);
+		} else if (is_pegasos == 2) {
+			setup_peg2(hose, dev);
+		} else if (!strncmp(model, "IBM,CPC710", 10)) {
+			setup_indirect_pci(hose,
+					   r.start + 0x000f8000,
+					   r.start + 0x000f8010,
+					   0);
+			if (index == 0) {
+				dma = of_get_property(dev, "system-dma-base",
+							&len);
+				if (dma && len >= sizeof(*dma)) {
+					dma = (unsigned int *)
+						(((unsigned long)dma) +
+						len - sizeof(*dma));
+						pci_dram_offset = *dma;
+				}
+			}
+		} else {
+			printk("No methods for %s (model %s), using RTAS\n",
+			       dev->full_name, model);
+			hose->ops = &rtas_pci_ops;
+		}
+
+		pci_process_bridge_OF_ranges(hose, dev, index == 0);
+
+		/* check the first bridge for a property that we can
+		   use to set pci_dram_offset */
+		dma = of_get_property(dev, "ibm,dma-ranges", &len);
+		if (index == 0 && dma != NULL && len >= 6 * sizeof(*dma)) {
+			pci_dram_offset = dma[2] - dma[3];
+			printk("pci_dram_offset = %lx\n", pci_dram_offset);
+		}
+	}
+	of_node_put(root);
+}
+
+/* SL82C105 IDE Control/Status Register */
+#define SL82C105_IDECSR                0x40
+
+/* Fixup for Winbond ATA quirk, required for briq mostly because the
+ * 8259 is configured for level sensitive IRQ 14 and so wants the
+ * ATA controller to be set to fully native mode or bad things
+ * will happen.
+ */
+static void chrp_pci_fixup_winbond_ata(struct pci_dev *sl82c105)
+{
+	u8 progif;
+
+	/* If non-briq machines need that fixup too, please speak up */
+	if (!machine_is(chrp) || _chrp_type != _CHRP_briq)
+		return;
+
+	if ((sl82c105->class & 5) != 5) {
+		printk("W83C553: Switching SL82C105 IDE to PCI native mode\n");
+		/* Enable SL82C105 PCI native IDE mode */
+		pci_read_config_byte(sl82c105, PCI_CLASS_PROG, &progif);
+		pci_write_config_byte(sl82c105, PCI_CLASS_PROG, progif | 0x05);
+		sl82c105->class |= 0x05;
+		/* Disable SL82C105 second port */
+		pci_write_config_word(sl82c105, SL82C105_IDECSR, 0x0003);
+		/* Clear IO BARs, they will be reassigned */
+		pci_write_config_dword(sl82c105, PCI_BASE_ADDRESS_0, 0);
+		pci_write_config_dword(sl82c105, PCI_BASE_ADDRESS_1, 0);
+		pci_write_config_dword(sl82c105, PCI_BASE_ADDRESS_2, 0);
+		pci_write_config_dword(sl82c105, PCI_BASE_ADDRESS_3, 0);
+	}
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105,
+			chrp_pci_fixup_winbond_ata);
+
+/* Pegasos2 firmware version 20040810 configures the built-in IDE controller
+ * in legacy mode, but sets the PCI registers to PCI native mode.
+ * The chip can only operate in legacy mode, so force the PCI class into legacy
+ * mode as well. The same fixup must be done to the class-code property in
+ * the IDE node /pci@80000000/ide@C,1
+ */
+static void chrp_pci_fixup_vt8231_ata(struct pci_dev *viaide)
+{
+	u8 progif;
+	struct pci_dev *viaisa;
+
+	if (!machine_is(chrp) || _chrp_type != _CHRP_Pegasos)
+		return;
+	if (viaide->irq != 14)
+		return;
+
+	viaisa = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8231, NULL);
+	if (!viaisa)
+		return;
+	dev_info(&viaide->dev, "Fixing VIA IDE, force legacy mode on\n");
+
+	pci_read_config_byte(viaide, PCI_CLASS_PROG, &progif);
+	pci_write_config_byte(viaide, PCI_CLASS_PROG, progif & ~0x5);
+	viaide->class &= ~0x5;
+
+	pci_dev_put(viaisa);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_1, chrp_pci_fixup_vt8231_ata);
diff --git a/arch/powerpc/platforms/chrp/pegasos_eth.c b/arch/powerpc/platforms/chrp/pegasos_eth.c
new file mode 100644
index 000000000..2b4dc6abd
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/pegasos_eth.c
@@ -0,0 +1,200 @@
+/*
+ *  Copyright (C) 2005 Sven Luther <sl@bplan-gmbh.de>
+ *  Thanks to :
+ *	Dale Farnsworth <dale@farnsworth.org>
+ *	Mark A. Greer <mgreer@mvista.com>
+ *	Nicolas DET <nd@bplan-gmbh.de>
+ *	Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ *  And anyone else who helped me on this.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/mv643xx.h>
+#include <linux/pci.h>
+
+#define PEGASOS2_MARVELL_REGBASE 		(0xf1000000)
+#define PEGASOS2_MARVELL_REGSIZE 		(0x00004000)
+#define PEGASOS2_SRAM_BASE 			(0xf2000000)
+#define PEGASOS2_SRAM_SIZE			(256*1024)
+
+#define PEGASOS2_SRAM_BASE_ETH_PORT0			(PEGASOS2_SRAM_BASE)
+#define PEGASOS2_SRAM_BASE_ETH_PORT1			(PEGASOS2_SRAM_BASE_ETH_PORT0 + (PEGASOS2_SRAM_SIZE / 2) )
+
+
+#define PEGASOS2_SRAM_RXRING_SIZE		(PEGASOS2_SRAM_SIZE/4)
+#define PEGASOS2_SRAM_TXRING_SIZE		(PEGASOS2_SRAM_SIZE/4)
+
+#undef BE_VERBOSE
+
+static struct resource mv643xx_eth_shared_resources[] = {
+	[0] = {
+		.name	= "ethernet shared base",
+		.start	= 0xf1000000 + MV643XX_ETH_SHARED_REGS,
+		.end	= 0xf1000000 + MV643XX_ETH_SHARED_REGS +
+					MV643XX_ETH_SHARED_REGS_SIZE - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+};
+
+static struct platform_device mv643xx_eth_shared_device = {
+	.name		= MV643XX_ETH_SHARED_NAME,
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(mv643xx_eth_shared_resources),
+	.resource	= mv643xx_eth_shared_resources,
+};
+
+/*
+ * The orion mdio driver only covers shared + 0x4 up to shared + 0x84 - 1
+ */
+static struct resource mv643xx_eth_mvmdio_resources[] = {
+	[0] = {
+		.name	= "ethernet mdio base",
+		.start	= 0xf1000000 + MV643XX_ETH_SHARED_REGS + 0x4,
+		.end	= 0xf1000000 + MV643XX_ETH_SHARED_REGS + 0x83,
+		.flags	= IORESOURCE_MEM,
+	},
+};
+
+static struct platform_device mv643xx_eth_mvmdio_device = {
+	.name		= "orion-mdio",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(mv643xx_eth_mvmdio_resources),
+	.resource	= mv643xx_eth_shared_resources,
+};
+
+static struct resource mv643xx_eth_port1_resources[] = {
+	[0] = {
+		.name	= "eth port1 irq",
+		.start	= 9,
+		.end	= 9,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct mv643xx_eth_platform_data eth_port1_pd = {
+	.shared		= &mv643xx_eth_shared_device,
+	.port_number	= 1,
+	.phy_addr	= MV643XX_ETH_PHY_ADDR(7),
+
+	.tx_sram_addr = PEGASOS2_SRAM_BASE_ETH_PORT1,
+	.tx_sram_size = PEGASOS2_SRAM_TXRING_SIZE,
+	.tx_queue_size = PEGASOS2_SRAM_TXRING_SIZE/16,
+
+	.rx_sram_addr = PEGASOS2_SRAM_BASE_ETH_PORT1 + PEGASOS2_SRAM_TXRING_SIZE,
+	.rx_sram_size = PEGASOS2_SRAM_RXRING_SIZE,
+	.rx_queue_size = PEGASOS2_SRAM_RXRING_SIZE/16,
+};
+
+static struct platform_device eth_port1_device = {
+	.name		= MV643XX_ETH_NAME,
+	.id		= 1,
+	.num_resources	= ARRAY_SIZE(mv643xx_eth_port1_resources),
+	.resource	= mv643xx_eth_port1_resources,
+	.dev = {
+		.platform_data = &eth_port1_pd,
+	},
+};
+
+static struct platform_device *mv643xx_eth_pd_devs[] __initdata = {
+	&mv643xx_eth_shared_device,
+	&mv643xx_eth_mvmdio_device,
+	&eth_port1_device,
+};
+
+/***********/
+/***********/
+#define MV_READ(offset,val) 	{ val = readl(mv643xx_reg_base + offset); }
+#define MV_WRITE(offset,data) writel(data, mv643xx_reg_base + offset)
+
+static void __iomem *mv643xx_reg_base;
+
+static int Enable_SRAM(void)
+{
+	u32 ALong;
+
+	if (mv643xx_reg_base == NULL)
+		mv643xx_reg_base = ioremap(PEGASOS2_MARVELL_REGBASE,
+					PEGASOS2_MARVELL_REGSIZE);
+
+	if (mv643xx_reg_base == NULL)
+		return -ENOMEM;
+
+#ifdef BE_VERBOSE
+	printk("Pegasos II/Marvell MV64361: register remapped from %p to %p\n",
+		(void *)PEGASOS2_MARVELL_REGBASE, (void *)mv643xx_reg_base);
+#endif
+
+	MV_WRITE(MV64340_SRAM_CONFIG, 0);
+
+	MV_WRITE(MV64340_INTEGRATED_SRAM_BASE_ADDR, PEGASOS2_SRAM_BASE >> 16);
+
+	MV_READ(MV64340_BASE_ADDR_ENABLE, ALong);
+	ALong &= ~(1 << 19);
+	MV_WRITE(MV64340_BASE_ADDR_ENABLE, ALong);
+
+	ALong = 0x02;
+	ALong |= PEGASOS2_SRAM_BASE & 0xffff0000;
+	MV_WRITE(MV643XX_ETH_BAR_4, ALong);
+
+	MV_WRITE(MV643XX_ETH_SIZE_REG_4, (PEGASOS2_SRAM_SIZE-1) & 0xffff0000);
+
+	MV_READ(MV643XX_ETH_BASE_ADDR_ENABLE_REG, ALong);
+	ALong &= ~(1 << 4);
+	MV_WRITE(MV643XX_ETH_BASE_ADDR_ENABLE_REG, ALong);
+
+#ifdef BE_VERBOSE
+	printk("Pegasos II/Marvell MV64361: register unmapped\n");
+	printk("Pegasos II/Marvell MV64361: SRAM at %p, size=%x\n", (void*) PEGASOS2_SRAM_BASE, PEGASOS2_SRAM_SIZE);
+#endif
+
+	iounmap(mv643xx_reg_base);
+	mv643xx_reg_base = NULL;
+
+	return 1;
+}
+
+
+/***********/
+/***********/
+static int __init mv643xx_eth_add_pds(void)
+{
+	int ret = 0;
+	static struct pci_device_id pci_marvell_mv64360[] = {
+		{ PCI_DEVICE(PCI_VENDOR_ID_MARVELL, PCI_DEVICE_ID_MARVELL_MV64360) },
+		{ }
+	};
+
+#ifdef BE_VERBOSE
+	printk("Pegasos II/Marvell MV64361: init\n");
+#endif
+
+	if (pci_dev_present(pci_marvell_mv64360)) {
+		ret = platform_add_devices(mv643xx_eth_pd_devs,
+				ARRAY_SIZE(mv643xx_eth_pd_devs));
+
+		if ( Enable_SRAM() < 0)
+		{
+			eth_port1_pd.tx_sram_addr = 0;
+			eth_port1_pd.tx_sram_size = 0;
+			eth_port1_pd.rx_sram_addr = 0;
+			eth_port1_pd.rx_sram_size = 0;
+
+#ifdef BE_VERBOSE
+			printk("Pegasos II/Marvell MV64361: Can't enable the "
+				"SRAM\n");
+#endif
+		}
+	}
+
+#ifdef BE_VERBOSE
+	printk("Pegasos II/Marvell MV64361: init is over\n");
+#endif
+
+	return ret;
+}
+
+device_initcall(mv643xx_eth_add_pds);
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
new file mode 100644
index 000000000..15ebc4e8a
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -0,0 +1,608 @@
+/*
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Adapted from 'alpha' version by Gary Thomas
+ *  Modified by Cort Dougan (cort@cs.nmt.edu)
+ */
+
+/*
+ * bootup setup stuff..
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/tty.h>
+#include <linux/major.h>
+#include <linux/interrupt.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <generated/utsrelease.h>
+#include <linux/adb.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/console.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/initrd.h>
+#include <linux/timer.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/hydra.h>
+#include <asm/sections.h>
+#include <asm/time.h>
+#include <asm/i8259.h>
+#include <asm/mpic.h>
+#include <asm/rtas.h>
+#include <asm/xmon.h>
+
+#include "chrp.h"
+#include "gg2.h"
+
+void rtas_indicator_progress(char *, unsigned short);
+
+int _chrp_type;
+EXPORT_SYMBOL(_chrp_type);
+
+static struct mpic *chrp_mpic;
+
+/* Used for doing CHRP event-scans */
+DEFINE_PER_CPU(struct timer_list, heartbeat_timer);
+unsigned long event_scan_interval;
+
+extern unsigned long loops_per_jiffy;
+
+/* To be replaced by RTAS when available */
+static unsigned int __iomem *briq_SPOR;
+
+#ifdef CONFIG_SMP
+extern struct smp_ops_t chrp_smp_ops;
+#endif
+
+static const char *gg2_memtypes[4] = {
+	"FPM", "SDRAM", "EDO", "BEDO"
+};
+static const char *gg2_cachesizes[4] = {
+	"256 KB", "512 KB", "1 MB", "Reserved"
+};
+static const char *gg2_cachetypes[4] = {
+	"Asynchronous", "Reserved", "Flow-Through Synchronous",
+	"Pipelined Synchronous"
+};
+static const char *gg2_cachemodes[4] = {
+	"Disabled", "Write-Through", "Copy-Back", "Transparent Mode"
+};
+
+static const char *chrp_names[] = {
+	"Unknown",
+	"","","",
+	"Motorola",
+	"IBM or Longtrail",
+	"Genesi Pegasos",
+	"Total Impact Briq"
+};
+
+void chrp_show_cpuinfo(struct seq_file *m)
+{
+	int i, sdramen;
+	unsigned int t;
+	struct device_node *root;
+	const char *model = "";
+
+	root = of_find_node_by_path("/");
+	if (root)
+		model = of_get_property(root, "model", NULL);
+	seq_printf(m, "machine\t\t: CHRP %s\n", model);
+
+	/* longtrail (goldengate) stuff */
+	if (model && !strncmp(model, "IBM,LongTrail", 13)) {
+		/* VLSI VAS96011/12 `Golden Gate 2' */
+		/* Memory banks */
+		sdramen = (in_le32(gg2_pci_config_base + GG2_PCI_DRAM_CTRL)
+			   >>31) & 1;
+		for (i = 0; i < (sdramen ? 4 : 6); i++) {
+			t = in_le32(gg2_pci_config_base+
+						 GG2_PCI_DRAM_BANK0+
+						 i*4);
+			if (!(t & 1))
+				continue;
+			switch ((t>>8) & 0x1f) {
+			case 0x1f:
+				model = "4 MB";
+				break;
+			case 0x1e:
+				model = "8 MB";
+				break;
+			case 0x1c:
+				model = "16 MB";
+				break;
+			case 0x18:
+				model = "32 MB";
+				break;
+			case 0x10:
+				model = "64 MB";
+				break;
+			case 0x00:
+				model = "128 MB";
+				break;
+			default:
+				model = "Reserved";
+				break;
+			}
+			seq_printf(m, "memory bank %d\t: %s %s\n", i, model,
+				   gg2_memtypes[sdramen ? 1 : ((t>>1) & 3)]);
+		}
+		/* L2 cache */
+		t = in_le32(gg2_pci_config_base+GG2_PCI_CC_CTRL);
+		seq_printf(m, "board l2\t: %s %s (%s)\n",
+			   gg2_cachesizes[(t>>7) & 3],
+			   gg2_cachetypes[(t>>2) & 3],
+			   gg2_cachemodes[t & 3]);
+	}
+	of_node_put(root);
+}
+
+/*
+ *  Fixes for the National Semiconductor PC78308VUL SuperI/O
+ *
+ *  Some versions of Open Firmware incorrectly initialize the IRQ settings
+ *  for keyboard and mouse
+ */
+static inline void __init sio_write(u8 val, u8 index)
+{
+	outb(index, 0x15c);
+	outb(val, 0x15d);
+}
+
+static inline u8 __init sio_read(u8 index)
+{
+	outb(index, 0x15c);
+	return inb(0x15d);
+}
+
+static void __init sio_fixup_irq(const char *name, u8 device, u8 level,
+				     u8 type)
+{
+	u8 level0, type0, active;
+
+	/* select logical device */
+	sio_write(device, 0x07);
+	active = sio_read(0x30);
+	level0 = sio_read(0x70);
+	type0 = sio_read(0x71);
+	if (level0 != level || type0 != type || !active) {
+		printk(KERN_WARNING "sio: %s irq level %d, type %d, %sactive: "
+		       "remapping to level %d, type %d, active\n",
+		       name, level0, type0, !active ? "in" : "", level, type);
+		sio_write(0x01, 0x30);
+		sio_write(level, 0x70);
+		sio_write(type, 0x71);
+	}
+}
+
+static void __init sio_init(void)
+{
+	struct device_node *root;
+	const char *model;
+
+	root = of_find_node_by_path("/");
+	if (!root)
+		return;
+
+	model = of_get_property(root, "model", NULL);
+	if (model && !strncmp(model, "IBM,LongTrail", 13)) {
+		/* logical device 0 (KBC/Keyboard) */
+		sio_fixup_irq("keyboard", 0, 1, 2);
+		/* select logical device 1 (KBC/Mouse) */
+		sio_fixup_irq("mouse", 1, 12, 2);
+	}
+
+	of_node_put(root);
+}
+
+
+static void __init pegasos_set_l2cr(void)
+{
+	struct device_node *np;
+
+	/* On Pegasos, enable the l2 cache if needed, as the OF forgets it */
+	if (_chrp_type != _CHRP_Pegasos)
+		return;
+
+	/* Enable L2 cache if needed */
+	np = of_find_node_by_type(NULL, "cpu");
+	if (np != NULL) {
+		const unsigned int *l2cr = of_get_property(np, "l2cr", NULL);
+		if (l2cr == NULL) {
+			printk ("Pegasos l2cr : no cpu l2cr property found\n");
+			goto out;
+		}
+		if (!((*l2cr) & 0x80000000)) {
+			printk ("Pegasos l2cr : L2 cache was not active, "
+				"activating\n");
+			_set_L2CR(0);
+			_set_L2CR((*l2cr) | 0x80000000);
+		}
+	}
+out:
+	of_node_put(np);
+}
+
+static void briq_restart(char *cmd)
+{
+	local_irq_disable();
+	if (briq_SPOR)
+		out_be32(briq_SPOR, 0);
+	for(;;);
+}
+
+/*
+ * Per default, input/output-device points to the keyboard/screen
+ * If no card is installed, the built-in serial port is used as a fallback.
+ * But unfortunately, the firmware does not connect /chosen/{stdin,stdout}
+ * the the built-in serial node. Instead, a /failsafe node is created.
+ */
+static __init void chrp_init_early(void)
+{
+	struct device_node *node;
+	const char *property;
+
+	if (strstr(boot_command_line, "console="))
+		return;
+	/* find the boot console from /chosen/stdout */
+	if (!of_chosen)
+		return;
+	node = of_find_node_by_path("/");
+	if (!node)
+		return;
+	property = of_get_property(node, "model", NULL);
+	if (!property)
+		goto out_put;
+	if (strcmp(property, "Pegasos2"))
+		goto out_put;
+	/* this is a Pegasos2 */
+	property = of_get_property(of_chosen, "linux,stdout-path", NULL);
+	if (!property)
+		goto out_put;
+	of_node_put(node);
+	node = of_find_node_by_path(property);
+	if (!node)
+		return;
+	property = of_get_property(node, "device_type", NULL);
+	if (!property)
+		goto out_put;
+	if (strcmp(property, "serial"))
+		goto out_put;
+	/*
+	 * The 9pin connector is either /failsafe
+	 * or /pci@80000000/isa@C/serial@i2F8
+	 * The optional graphics card has also type 'serial' in VGA mode.
+	 */
+	property = of_get_property(node, "name", NULL);
+	if (!property)
+		goto out_put;
+	if (!strcmp(property, "failsafe") || !strcmp(property, "serial"))
+		add_preferred_console("ttyS", 0, NULL);
+out_put:
+	of_node_put(node);
+}
+
+void __init chrp_setup_arch(void)
+{
+	struct device_node *root = of_find_node_by_path("/");
+	const char *machine = NULL;
+
+	/* init to some ~sane value until calibrate_delay() runs */
+	loops_per_jiffy = 50000000/HZ;
+
+	if (root)
+		machine = of_get_property(root, "model", NULL);
+	if (machine && strncmp(machine, "Pegasos", 7) == 0) {
+		_chrp_type = _CHRP_Pegasos;
+	} else if (machine && strncmp(machine, "IBM", 3) == 0) {
+		_chrp_type = _CHRP_IBM;
+	} else if (machine && strncmp(machine, "MOT", 3) == 0) {
+		_chrp_type = _CHRP_Motorola;
+	} else if (machine && strncmp(machine, "TotalImpact,BRIQ-1", 18) == 0) {
+		_chrp_type = _CHRP_briq;
+		/* Map the SPOR register on briq and change the restart hook */
+		briq_SPOR = ioremap(0xff0000e8, 4);
+		ppc_md.restart = briq_restart;
+	} else {
+		/* Let's assume it is an IBM chrp if all else fails */
+		_chrp_type = _CHRP_IBM;
+	}
+	of_node_put(root);
+	printk("chrp type = %x [%s]\n", _chrp_type, chrp_names[_chrp_type]);
+
+	rtas_initialize();
+	if (rtas_token("display-character") >= 0)
+		ppc_md.progress = rtas_progress;
+
+	/* use RTAS time-of-day routines if available */
+	if (rtas_token("get-time-of-day") != RTAS_UNKNOWN_SERVICE) {
+		ppc_md.get_boot_time	= rtas_get_boot_time;
+		ppc_md.get_rtc_time	= rtas_get_rtc_time;
+		ppc_md.set_rtc_time	= rtas_set_rtc_time;
+	}
+
+	/* On pegasos, enable the L2 cache if not already done by OF */
+	pegasos_set_l2cr();
+
+	/* Lookup PCI host bridges */
+	chrp_find_bridges();
+
+	/*
+	 *  Temporary fixes for PCI devices.
+	 *  -- Geert
+	 */
+	hydra_init();		/* Mac I/O */
+
+	/*
+	 *  Fix the Super I/O configuration
+	 */
+	sio_init();
+
+	pci_create_OF_bus_map();
+
+	/*
+	 * Print the banner, then scroll down so boot progress
+	 * can be printed.  -- Cort
+	 */
+	if (ppc_md.progress) ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0x0);
+}
+
+static void chrp_8259_cascade(unsigned int irq, struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned int cascade_irq = i8259_irq();
+
+	if (cascade_irq != NO_IRQ)
+		generic_handle_irq(cascade_irq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+/*
+ * Finds the open-pic node and sets up the mpic driver.
+ */
+static void __init chrp_find_openpic(void)
+{
+	struct device_node *np, *root;
+	int len, i, j;
+	int isu_size, idu_size;
+	const unsigned int *iranges, *opprop = NULL;
+	int oplen = 0;
+	unsigned long opaddr;
+	int na = 1;
+
+	np = of_find_node_by_type(NULL, "open-pic");
+	if (np == NULL)
+		return;
+	root = of_find_node_by_path("/");
+	if (root) {
+		opprop = of_get_property(root, "platform-open-pic", &oplen);
+		na = of_n_addr_cells(root);
+	}
+	if (opprop && oplen >= na * sizeof(unsigned int)) {
+		opaddr = opprop[na-1];	/* assume 32-bit */
+		oplen /= na * sizeof(unsigned int);
+	} else {
+		struct resource r;
+		if (of_address_to_resource(np, 0, &r)) {
+			goto bail;
+		}
+		opaddr = r.start;
+		oplen = 0;
+	}
+
+	printk(KERN_INFO "OpenPIC at %lx\n", opaddr);
+
+	iranges = of_get_property(np, "interrupt-ranges", &len);
+	if (iranges == NULL)
+		len = 0;	/* non-distributed mpic */
+	else
+		len /= 2 * sizeof(unsigned int);
+
+	/*
+	 * The first pair of cells in interrupt-ranges refers to the
+	 * IDU; subsequent pairs refer to the ISUs.
+	 */
+	if (oplen < len) {
+		printk(KERN_ERR "Insufficient addresses for distributed"
+		       " OpenPIC (%d < %d)\n", oplen, len);
+		len = oplen;
+	}
+
+	isu_size = 0;
+	idu_size = 0;
+	if (len > 0 && iranges[1] != 0) {
+		printk(KERN_INFO "OpenPIC irqs %d..%d in IDU\n",
+		       iranges[0], iranges[0] + iranges[1] - 1);
+		idu_size = iranges[1];
+	}
+	if (len > 1)
+		isu_size = iranges[3];
+
+	chrp_mpic = mpic_alloc(np, opaddr, MPIC_NO_RESET,
+			isu_size, 0, " MPIC    ");
+	if (chrp_mpic == NULL) {
+		printk(KERN_ERR "Failed to allocate MPIC structure\n");
+		goto bail;
+	}
+	j = na - 1;
+	for (i = 1; i < len; ++i) {
+		iranges += 2;
+		j += na;
+		printk(KERN_INFO "OpenPIC irqs %d..%d in ISU at %x\n",
+		       iranges[0], iranges[0] + iranges[1] - 1,
+		       opprop[j]);
+		mpic_assign_isu(chrp_mpic, i - 1, opprop[j]);
+	}
+
+	mpic_init(chrp_mpic);
+	ppc_md.get_irq = mpic_get_irq;
+ bail:
+	of_node_put(root);
+	of_node_put(np);
+}
+
+#if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(CONFIG_XMON)
+static struct irqaction xmon_irqaction = {
+	.handler = xmon_irq,
+	.name = "XMON break",
+};
+#endif
+
+static void __init chrp_find_8259(void)
+{
+	struct device_node *np, *pic = NULL;
+	unsigned long chrp_int_ack = 0;
+	unsigned int cascade_irq;
+
+	/* Look for cascade */
+	for_each_node_by_type(np, "interrupt-controller")
+		if (of_device_is_compatible(np, "chrp,iic")) {
+			pic = np;
+			break;
+		}
+	/* Ok, 8259 wasn't found. We need to handle the case where
+	 * we have a pegasos that claims to be chrp but doesn't have
+	 * a proper interrupt tree
+	 */
+	if (pic == NULL && chrp_mpic != NULL) {
+		printk(KERN_ERR "i8259: Not found in device-tree"
+		       " assuming no legacy interrupts\n");
+		return;
+	}
+
+	/* Look for intack. In a perfect world, we would look for it on
+	 * the ISA bus that holds the 8259 but heh... Works that way. If
+	 * we ever see a problem, we can try to re-use the pSeries code here.
+	 * Also, Pegasos-type platforms don't have a proper node to start
+	 * from anyway
+	 */
+	for_each_node_by_name(np, "pci") {
+		const unsigned int *addrp = of_get_property(np,
+				"8259-interrupt-acknowledge", NULL);
+
+		if (addrp == NULL)
+			continue;
+		chrp_int_ack = addrp[of_n_addr_cells(np)-1];
+		break;
+	}
+	of_node_put(np);
+	if (np == NULL)
+		printk(KERN_WARNING "Cannot find PCI interrupt acknowledge"
+		       " address, polling\n");
+
+	i8259_init(pic, chrp_int_ack);
+	if (ppc_md.get_irq == NULL) {
+		ppc_md.get_irq = i8259_irq;
+		irq_set_default_host(i8259_get_host());
+	}
+	if (chrp_mpic != NULL) {
+		cascade_irq = irq_of_parse_and_map(pic, 0);
+		if (cascade_irq == NO_IRQ)
+			printk(KERN_ERR "i8259: failed to map cascade irq\n");
+		else
+			irq_set_chained_handler(cascade_irq,
+						chrp_8259_cascade);
+	}
+}
+
+void __init chrp_init_IRQ(void)
+{
+#if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(CONFIG_XMON)
+	struct device_node *kbd;
+#endif
+	chrp_find_openpic();
+	chrp_find_8259();
+
+#ifdef CONFIG_SMP
+	/* Pegasos has no MPIC, those ops would make it crash. It might be an
+	 * option to move setting them to after we probe the PIC though
+	 */
+	if (chrp_mpic != NULL)
+		smp_ops = &chrp_smp_ops;
+#endif /* CONFIG_SMP */
+
+	if (_chrp_type == _CHRP_Pegasos)
+		ppc_md.get_irq        = i8259_irq;
+
+#if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(CONFIG_XMON)
+	/* see if there is a keyboard in the device tree
+	   with a parent of type "adb" */
+	for_each_node_by_name(kbd, "keyboard")
+		if (kbd->parent && kbd->parent->type
+		    && strcmp(kbd->parent->type, "adb") == 0)
+			break;
+	of_node_put(kbd);
+	if (kbd)
+		setup_irq(HYDRA_INT_ADB_NMI, &xmon_irqaction);
+#endif
+}
+
+void __init
+chrp_init2(void)
+{
+#ifdef CONFIG_NVRAM
+	chrp_nvram_init();
+#endif
+
+	request_region(0x20,0x20,"pic1");
+	request_region(0xa0,0x20,"pic2");
+	request_region(0x00,0x20,"dma1");
+	request_region(0x40,0x20,"timer");
+	request_region(0x80,0x10,"dma page reg");
+	request_region(0xc0,0x20,"dma2");
+
+	if (ppc_md.progress)
+		ppc_md.progress("  Have fun!    ", 0x7777);
+}
+
+static int __init chrp_probe(void)
+{
+	const char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(),
+						"device_type", NULL);
+ 	if (dtype == NULL)
+ 		return 0;
+ 	if (strcmp(dtype, "chrp"))
+		return 0;
+
+	ISA_DMA_THRESHOLD = ~0L;
+	DMA_MODE_READ = 0x44;
+	DMA_MODE_WRITE = 0x48;
+
+	pm_power_off = rtas_power_off;
+
+	return 1;
+}
+
+define_machine(chrp) {
+	.name			= "CHRP",
+	.probe			= chrp_probe,
+	.setup_arch		= chrp_setup_arch,
+	.init			= chrp_init2,
+	.init_early		= chrp_init_early,
+	.show_cpuinfo		= chrp_show_cpuinfo,
+	.init_IRQ		= chrp_init_IRQ,
+	.restart		= rtas_restart,
+	.halt			= rtas_halt,
+	.time_init		= chrp_time_init,
+	.set_rtc_time		= chrp_set_rtc_time,
+	.get_rtc_time		= chrp_get_rtc_time,
+	.calibrate_decr		= generic_calibrate_decr,
+	.phys_mem_access_prot	= pci_phys_mem_access_prot,
+};
diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c
new file mode 100644
index 000000000..b6c9a0dcc
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/smp.c
@@ -0,0 +1,53 @@
+/*
+ * Smp support for CHRP machines.
+ *
+ * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great
+ * deal of code from the sparc and intel versions.
+ *
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/mpic.h>
+#include <asm/rtas.h>
+
+static int smp_chrp_kick_cpu(int nr)
+{
+	*(unsigned long *)KERNELBASE = nr;
+	asm volatile("dcbf 0,%0"::"r"(KERNELBASE):"memory");
+
+	return 0;
+}
+
+static void smp_chrp_setup_cpu(int cpu_nr)
+{
+	mpic_setup_this_cpu();
+}
+
+/* CHRP with openpic */
+struct smp_ops_t chrp_smp_ops = {
+	.message_pass = smp_mpic_message_pass,
+	.probe = smp_mpic_probe,
+	.kick_cpu = smp_chrp_kick_cpu,
+	.setup_cpu = smp_chrp_setup_cpu,
+	.give_timebase = rtas_give_timebase,
+	.take_timebase = rtas_take_timebase,
+};
diff --git a/arch/powerpc/platforms/chrp/time.c b/arch/powerpc/platforms/chrp/time.c
new file mode 100644
index 000000000..f803f4b8a
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/time.c
@@ -0,0 +1,158 @@
+/*
+ *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ *
+ * Adapted for PowerPC (PReP) by Gary Thomas
+ * Modified by Cort Dougan (cort@cs.nmt.edu).
+ * Copied and modified from arch/i386/kernel/time.c
+ *
+ */
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/init.h>
+#include <linux/bcd.h>
+#include <linux/ioport.h>
+
+#include <asm/io.h>
+#include <asm/nvram.h>
+#include <asm/prom.h>
+#include <asm/sections.h>
+#include <asm/time.h>
+
+extern spinlock_t rtc_lock;
+
+#define NVRAM_AS0  0x74
+#define NVRAM_AS1  0x75
+#define NVRAM_DATA 0x77
+
+static int nvram_as1 = NVRAM_AS1;
+static int nvram_as0 = NVRAM_AS0;
+static int nvram_data = NVRAM_DATA;
+
+long __init chrp_time_init(void)
+{
+	struct device_node *rtcs;
+	struct resource r;
+	int base;
+
+	rtcs = of_find_compatible_node(NULL, "rtc", "pnpPNP,b00");
+	if (rtcs == NULL)
+		rtcs = of_find_compatible_node(NULL, "rtc", "ds1385-rtc");
+	if (rtcs == NULL)
+		return 0;
+	if (of_address_to_resource(rtcs, 0, &r)) {
+		of_node_put(rtcs);
+		return 0;
+	}
+	of_node_put(rtcs);
+
+	base = r.start;
+	nvram_as1 = 0;
+	nvram_as0 = base;
+	nvram_data = base + 1;
+
+	return 0;
+}
+
+int chrp_cmos_clock_read(int addr)
+{
+	if (nvram_as1 != 0)
+		outb(addr>>8, nvram_as1);
+	outb(addr, nvram_as0);
+	return (inb(nvram_data));
+}
+
+void chrp_cmos_clock_write(unsigned long val, int addr)
+{
+	if (nvram_as1 != 0)
+		outb(addr>>8, nvram_as1);
+	outb(addr, nvram_as0);
+	outb(val, nvram_data);
+	return;
+}
+
+/*
+ * Set the hardware clock. -- Cort
+ */
+int chrp_set_rtc_time(struct rtc_time *tmarg)
+{
+	unsigned char save_control, save_freq_select;
+	struct rtc_time tm = *tmarg;
+
+	spin_lock(&rtc_lock);
+
+	save_control = chrp_cmos_clock_read(RTC_CONTROL); /* tell the clock it's being set */
+
+	chrp_cmos_clock_write((save_control|RTC_SET), RTC_CONTROL);
+
+	save_freq_select = chrp_cmos_clock_read(RTC_FREQ_SELECT); /* stop and reset prescaler */
+
+	chrp_cmos_clock_write((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
+
+	if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+		tm.tm_sec = bin2bcd(tm.tm_sec);
+		tm.tm_min = bin2bcd(tm.tm_min);
+		tm.tm_hour = bin2bcd(tm.tm_hour);
+		tm.tm_mon = bin2bcd(tm.tm_mon);
+		tm.tm_mday = bin2bcd(tm.tm_mday);
+		tm.tm_year = bin2bcd(tm.tm_year);
+	}
+	chrp_cmos_clock_write(tm.tm_sec,RTC_SECONDS);
+	chrp_cmos_clock_write(tm.tm_min,RTC_MINUTES);
+	chrp_cmos_clock_write(tm.tm_hour,RTC_HOURS);
+	chrp_cmos_clock_write(tm.tm_mon,RTC_MONTH);
+	chrp_cmos_clock_write(tm.tm_mday,RTC_DAY_OF_MONTH);
+	chrp_cmos_clock_write(tm.tm_year,RTC_YEAR);
+
+	/* The following flags have to be released exactly in this order,
+	 * otherwise the DS12887 (popular MC146818A clone with integrated
+	 * battery and quartz) will not reset the oscillator and will not
+	 * update precisely 500 ms later. You won't find this mentioned in
+	 * the Dallas Semiconductor data sheets, but who believes data
+	 * sheets anyway ...                           -- Markus Kuhn
+	 */
+	chrp_cmos_clock_write(save_control, RTC_CONTROL);
+	chrp_cmos_clock_write(save_freq_select, RTC_FREQ_SELECT);
+
+	spin_unlock(&rtc_lock);
+	return 0;
+}
+
+void chrp_get_rtc_time(struct rtc_time *tm)
+{
+	unsigned int year, mon, day, hour, min, sec;
+
+	do {
+		sec = chrp_cmos_clock_read(RTC_SECONDS);
+		min = chrp_cmos_clock_read(RTC_MINUTES);
+		hour = chrp_cmos_clock_read(RTC_HOURS);
+		day = chrp_cmos_clock_read(RTC_DAY_OF_MONTH);
+		mon = chrp_cmos_clock_read(RTC_MONTH);
+		year = chrp_cmos_clock_read(RTC_YEAR);
+	} while (sec != chrp_cmos_clock_read(RTC_SECONDS));
+
+	if (!(chrp_cmos_clock_read(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+		sec = bcd2bin(sec);
+		min = bcd2bin(min);
+		hour = bcd2bin(hour);
+		day = bcd2bin(day);
+		mon = bcd2bin(mon);
+		year = bcd2bin(year);
+	}
+	if (year < 70)
+		year += 100;
+	tm->tm_sec = sec;
+	tm->tm_min = min;
+	tm->tm_hour = hour;
+	tm->tm_mday = day;
+	tm->tm_mon = mon;
+	tm->tm_year = year;
+}
diff --git a/arch/powerpc/platforms/embedded6xx/Kconfig b/arch/powerpc/platforms/embedded6xx/Kconfig
new file mode 100644
index 000000000..a25f496c2
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/Kconfig
@@ -0,0 +1,118 @@
+config EMBEDDED6xx
+	bool "Embedded 6xx/7xx/7xxx-based boards"
+	depends on 6xx && BROKEN_ON_SMP
+
+config LINKSTATION
+	bool "Linkstation / Kurobox(HG) from Buffalo"
+	depends on EMBEDDED6xx
+	select MPIC
+	select FSL_SOC
+	select PPC_UDBG_16550 if SERIAL_8250
+	select DEFAULT_UIMAGE
+	select MPC10X_BRIDGE
+	help
+	  Select LINKSTATION if configuring for one of PPC- (MPC8241)
+	  based NAS systems from Buffalo Technology. So far only
+	  KuroboxHG has been tested. In the future classical Kurobox,
+	  Linkstation-I HD-HLAN and HD-HGLAN versions, and PPC-based
+	  Terastation systems should be supported too.
+
+config STORCENTER
+	bool "IOMEGA StorCenter"
+	depends on EMBEDDED6xx
+	select MPIC
+	select FSL_SOC
+	select PPC_UDBG_16550 if SERIAL_8250
+	select MPC10X_BRIDGE
+	help
+	  Select STORCENTER if configuring for the iomega StorCenter
+	  with an 8241 CPU in it.
+
+config MPC7448HPC2
+	bool "Freescale MPC7448HPC2(Taiga)"
+	depends on EMBEDDED6xx
+	select TSI108_BRIDGE
+	select DEFAULT_UIMAGE
+	select PPC_UDBG_16550
+	help
+	  Select MPC7448HPC2 if configuring for Freescale MPC7448HPC2 (Taiga)
+	  platform
+
+config PPC_HOLLY
+	bool "PPC750GX/CL with TSI10x bridge (Hickory/Holly)"
+	depends on EMBEDDED6xx
+	select TSI108_BRIDGE
+	select PPC_UDBG_16550
+	help
+	  Select PPC_HOLLY if configuring for an IBM 750GX/CL Eval
+	  Board with TSI108/9 bridge (Hickory/Holly)
+
+config PPC_C2K
+	bool "SBS/GEFanuc C2K board"
+	depends on EMBEDDED6xx
+	select MV64X60
+	select NOT_COHERENT_CACHE
+	select MTD_CFI_I4
+	help
+	  This option enables support for the GE Fanuc C2K board (formerly
+	  an SBS board).
+
+config MVME5100
+	bool "Motorola/Emerson MVME5100"
+	depends on EMBEDDED6xx
+	select MPIC
+	select PCI
+	select PPC_INDIRECT_PCI
+	select PPC_I8259
+	select PPC_NATIVE
+	select PPC_UDBG_16550
+	help
+	  This option enables support for the Motorola (now Emerson) MVME5100
+	  board.
+
+config TSI108_BRIDGE
+	bool
+	select PCI
+	select MPIC
+	select MPIC_WEIRD
+
+config MPC10X_BRIDGE
+	bool
+	select PPC_INDIRECT_PCI
+
+config MV64X60
+	bool
+	select PPC_INDIRECT_PCI
+	select CHECK_CACHE_COHERENCY
+
+config GAMECUBE_COMMON
+	bool
+
+config USBGECKO_UDBG
+	bool "USB Gecko udbg console for the Nintendo GameCube/Wii"
+	depends on GAMECUBE_COMMON
+	help
+	  If you say yes to this option, support will be included for the
+	  USB Gecko adapter as an udbg console.
+	  The USB Gecko is a EXI to USB Serial converter that can be plugged
+	  into a memcard slot in the Nintendo GameCube/Wii.
+
+	  This driver bypasses the EXI layer completely.
+
+	  If in doubt, say N here.
+
+config GAMECUBE
+	bool "Nintendo-GameCube"
+	depends on EMBEDDED6xx
+	select GAMECUBE_COMMON
+	help
+	  Select GAMECUBE if configuring for the Nintendo GameCube.
+	  More information at: <http://gc-linux.sourceforge.net/>
+
+config WII
+	bool "Nintendo-Wii"
+	depends on EMBEDDED6xx
+	select GAMECUBE_COMMON
+	help
+	  Select WII if configuring for the Nintendo Wii.
+	  More information at: <http://gc-linux.sourceforge.net/>
diff --git a/arch/powerpc/platforms/embedded6xx/Makefile b/arch/powerpc/platforms/embedded6xx/Makefile
new file mode 100644
index 000000000..f126a2a09
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/Makefile
@@ -0,0 +1,13 @@
+#
+# Makefile for the 6xx/7xx/7xxxx linux kernel.
+#
+obj-$(CONFIG_MPC7448HPC2)	+= mpc7448_hpc2.o
+obj-$(CONFIG_LINKSTATION)	+= linkstation.o ls_uart.o
+obj-$(CONFIG_STORCENTER)	+= storcenter.o
+obj-$(CONFIG_PPC_HOLLY)		+= holly.o
+obj-$(CONFIG_PPC_C2K)		+= c2k.o
+obj-$(CONFIG_USBGECKO_UDBG)	+= usbgecko_udbg.o
+obj-$(CONFIG_GAMECUBE_COMMON)	+= flipper-pic.o
+obj-$(CONFIG_GAMECUBE)		+= gamecube.o
+obj-$(CONFIG_WII)		+= wii.o hlwd-pic.o
+obj-$(CONFIG_MVME5100)		+= mvme5100.o
diff --git a/arch/powerpc/platforms/embedded6xx/c2k.c b/arch/powerpc/platforms/embedded6xx/c2k.c
new file mode 100644
index 000000000..ebd3963fd
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/c2k.c
@@ -0,0 +1,148 @@
+/*
+ * Board setup routines for the GEFanuc C2K board
+ *
+ * Author: Remi Machet <rmachet@slac.stanford.edu>
+ *
+ * Originated from prpmc2800.c
+ *
+ * 2008 (c) Stanford University
+ * 2007 (c) MontaVista, Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/time.h>
+#include <linux/of.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/time.h>
+
+#include <mm/mmu_decl.h>
+
+#include <sysdev/mv64x60.h>
+
+#define MV64x60_MPP_CNTL_0	0x0000
+#define MV64x60_MPP_CNTL_2	0x0008
+
+#define MV64x60_GPP_IO_CNTL	0x0000
+#define MV64x60_GPP_LEVEL_CNTL	0x0010
+#define MV64x60_GPP_VALUE_SET	0x0018
+
+static void __iomem *mv64x60_mpp_reg_base;
+static void __iomem *mv64x60_gpp_reg_base;
+
+static void __init c2k_setup_arch(void)
+{
+	struct device_node *np;
+	phys_addr_t paddr;
+	const unsigned int *reg;
+
+	/*
+	 * ioremap mpp and gpp registers in case they are later
+	 * needed by c2k_reset_board().
+	 */
+	np = of_find_compatible_node(NULL, NULL, "marvell,mv64360-mpp");
+	reg = of_get_property(np, "reg", NULL);
+	paddr = of_translate_address(np, reg);
+	of_node_put(np);
+	mv64x60_mpp_reg_base = ioremap(paddr, reg[1]);
+
+	np = of_find_compatible_node(NULL, NULL, "marvell,mv64360-gpp");
+	reg = of_get_property(np, "reg", NULL);
+	paddr = of_translate_address(np, reg);
+	of_node_put(np);
+	mv64x60_gpp_reg_base = ioremap(paddr, reg[1]);
+
+#ifdef CONFIG_PCI
+	mv64x60_pci_init();
+#endif
+}
+
+static void c2k_reset_board(void)
+{
+	u32 temp;
+
+	local_irq_disable();
+
+	temp = in_le32(mv64x60_mpp_reg_base + MV64x60_MPP_CNTL_0);
+	temp &= 0xFFFF0FFF;
+	out_le32(mv64x60_mpp_reg_base + MV64x60_MPP_CNTL_0, temp);
+
+	temp = in_le32(mv64x60_gpp_reg_base + MV64x60_GPP_LEVEL_CNTL);
+	temp |= 0x00000004;
+	out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_LEVEL_CNTL, temp);
+
+	temp = in_le32(mv64x60_gpp_reg_base + MV64x60_GPP_IO_CNTL);
+	temp |= 0x00000004;
+	out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_IO_CNTL, temp);
+
+	temp = in_le32(mv64x60_mpp_reg_base + MV64x60_MPP_CNTL_2);
+	temp &= 0xFFFF0FFF;
+	out_le32(mv64x60_mpp_reg_base + MV64x60_MPP_CNTL_2, temp);
+
+	temp = in_le32(mv64x60_gpp_reg_base + MV64x60_GPP_LEVEL_CNTL);
+	temp |= 0x00080000;
+	out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_LEVEL_CNTL, temp);
+
+	temp = in_le32(mv64x60_gpp_reg_base + MV64x60_GPP_IO_CNTL);
+	temp |= 0x00080000;
+	out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_IO_CNTL, temp);
+
+	out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_VALUE_SET, 0x00080004);
+}
+
+static void c2k_restart(char *cmd)
+{
+	c2k_reset_board();
+	msleep(100);
+	panic("restart failed\n");
+}
+
+#ifdef CONFIG_NOT_COHERENT_CACHE
+#define COHERENCY_SETTING "off"
+#else
+#define COHERENCY_SETTING "on"
+#endif
+
+void c2k_show_cpuinfo(struct seq_file *m)
+{
+	seq_printf(m, "Vendor\t\t: GEFanuc\n");
+	seq_printf(m, "coherency\t: %s\n", COHERENCY_SETTING);
+}
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init c2k_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "GEFanuc,C2K"))
+		return 0;
+
+	printk(KERN_INFO "Detected a GEFanuc C2K board\n");
+
+	_set_L2CR(0);
+	_set_L2CR(L2CR_L2E | L2CR_L2PE | L2CR_L2I);
+	return 1;
+}
+
+define_machine(c2k) {
+	.name			= "C2K",
+	.probe			= c2k_probe,
+	.setup_arch		= c2k_setup_arch,
+	.init_early		= mv64x60_init_early,
+	.show_cpuinfo		= c2k_show_cpuinfo,
+	.init_IRQ		= mv64x60_init_irq,
+	.get_irq		= mv64x60_get_irq,
+	.restart		= c2k_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+};
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
new file mode 100644
index 000000000..4cde8e7da
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
@@ -0,0 +1,255 @@
+/*
+ * arch/powerpc/platforms/embedded6xx/flipper-pic.c
+ *
+ * Nintendo GameCube/Wii "Flipper" interrupt controller support.
+ * Copyright (C) 2004-2009 The GameCube Linux Team
+ * Copyright (C) 2007,2008,2009 Albert Herranz
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ */
+#define DRV_MODULE_NAME "flipper-pic"
+#define pr_fmt(fmt) DRV_MODULE_NAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <asm/io.h>
+
+#include "flipper-pic.h"
+
+#define FLIPPER_NR_IRQS		32
+
+/*
+ * Each interrupt has a corresponding bit in both
+ * the Interrupt Cause (ICR) and Interrupt Mask (IMR) registers.
+ *
+ * Enabling/disabling an interrupt line involves setting/clearing
+ * the corresponding bit in IMR.
+ * Except for the RSW interrupt, all interrupts get deasserted automatically
+ * when the source deasserts the interrupt.
+ */
+#define FLIPPER_ICR		0x00
+#define FLIPPER_ICR_RSS		(1<<16) /* reset switch state */
+
+#define FLIPPER_IMR		0x04
+
+#define FLIPPER_RESET		0x24
+
+
+/*
+ * IRQ chip hooks.
+ *
+ */
+
+static void flipper_pic_mask_and_ack(struct irq_data *d)
+{
+	int irq = irqd_to_hwirq(d);
+	void __iomem *io_base = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << irq;
+
+	clrbits32(io_base + FLIPPER_IMR, mask);
+	/* this is at least needed for RSW */
+	out_be32(io_base + FLIPPER_ICR, mask);
+}
+
+static void flipper_pic_ack(struct irq_data *d)
+{
+	int irq = irqd_to_hwirq(d);
+	void __iomem *io_base = irq_data_get_irq_chip_data(d);
+
+	/* this is at least needed for RSW */
+	out_be32(io_base + FLIPPER_ICR, 1 << irq);
+}
+
+static void flipper_pic_mask(struct irq_data *d)
+{
+	int irq = irqd_to_hwirq(d);
+	void __iomem *io_base = irq_data_get_irq_chip_data(d);
+
+	clrbits32(io_base + FLIPPER_IMR, 1 << irq);
+}
+
+static void flipper_pic_unmask(struct irq_data *d)
+{
+	int irq = irqd_to_hwirq(d);
+	void __iomem *io_base = irq_data_get_irq_chip_data(d);
+
+	setbits32(io_base + FLIPPER_IMR, 1 << irq);
+}
+
+
+static struct irq_chip flipper_pic = {
+	.name		= "flipper-pic",
+	.irq_ack	= flipper_pic_ack,
+	.irq_mask_ack	= flipper_pic_mask_and_ack,
+	.irq_mask	= flipper_pic_mask,
+	.irq_unmask	= flipper_pic_unmask,
+};
+
+/*
+ * IRQ host hooks.
+ *
+ */
+
+static struct irq_domain *flipper_irq_host;
+
+static int flipper_pic_map(struct irq_domain *h, unsigned int virq,
+			   irq_hw_number_t hwirq)
+{
+	irq_set_chip_data(virq, h->host_data);
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &flipper_pic, handle_level_irq);
+	return 0;
+}
+
+static int flipper_pic_match(struct irq_domain *h, struct device_node *np)
+{
+	return 1;
+}
+
+
+static const struct irq_domain_ops flipper_irq_domain_ops = {
+	.map = flipper_pic_map,
+	.match = flipper_pic_match,
+};
+
+/*
+ * Platform hooks.
+ *
+ */
+
+static void __flipper_quiesce(void __iomem *io_base)
+{
+	/* mask and ack all IRQs */
+	out_be32(io_base + FLIPPER_IMR, 0x00000000);
+	out_be32(io_base + FLIPPER_ICR, 0xffffffff);
+}
+
+struct irq_domain * __init flipper_pic_init(struct device_node *np)
+{
+	struct device_node *pi;
+	struct irq_domain *irq_domain = NULL;
+	struct resource res;
+	void __iomem *io_base;
+	int retval;
+
+	pi = of_get_parent(np);
+	if (!pi) {
+		pr_err("no parent found\n");
+		goto out;
+	}
+	if (!of_device_is_compatible(pi, "nintendo,flipper-pi")) {
+		pr_err("unexpected parent compatible\n");
+		goto out;
+	}
+
+	retval = of_address_to_resource(pi, 0, &res);
+	if (retval) {
+		pr_err("no io memory range found\n");
+		goto out;
+	}
+	io_base = ioremap(res.start, resource_size(&res));
+
+	pr_info("controller at 0x%08x mapped to 0x%p\n", res.start, io_base);
+
+	__flipper_quiesce(io_base);
+
+	irq_domain = irq_domain_add_linear(np, FLIPPER_NR_IRQS,
+				  &flipper_irq_domain_ops, io_base);
+	if (!irq_domain) {
+		pr_err("failed to allocate irq_domain\n");
+		return NULL;
+	}
+
+out:
+	return irq_domain;
+}
+
+unsigned int flipper_pic_get_irq(void)
+{
+	void __iomem *io_base = flipper_irq_host->host_data;
+	int irq;
+	u32 irq_status;
+
+	irq_status = in_be32(io_base + FLIPPER_ICR) &
+		     in_be32(io_base + FLIPPER_IMR);
+	if (irq_status == 0)
+		return NO_IRQ;	/* no more IRQs pending */
+
+	irq = __ffs(irq_status);
+	return irq_linear_revmap(flipper_irq_host, irq);
+}
+
+/*
+ * Probe function.
+ *
+ */
+
+void __init flipper_pic_probe(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "nintendo,flipper-pic");
+	BUG_ON(!np);
+
+	flipper_irq_host = flipper_pic_init(np);
+	BUG_ON(!flipper_irq_host);
+
+	irq_set_default_host(flipper_irq_host);
+
+	of_node_put(np);
+}
+
+/*
+ * Misc functions related to the flipper chipset.
+ *
+ */
+
+/**
+ * flipper_quiesce() - quiesce flipper irq controller
+ *
+ * Mask and ack all interrupt sources.
+ *
+ */
+void flipper_quiesce(void)
+{
+	void __iomem *io_base = flipper_irq_host->host_data;
+
+	__flipper_quiesce(io_base);
+}
+
+/*
+ * Resets the platform.
+ */
+void flipper_platform_reset(void)
+{
+	void __iomem *io_base;
+
+	if (flipper_irq_host && flipper_irq_host->host_data) {
+		io_base = flipper_irq_host->host_data;
+		out_8(io_base + FLIPPER_RESET, 0x00);
+	}
+}
+
+/*
+ * Returns non-zero if the reset button is pressed.
+ */
+int flipper_is_reset_button_pressed(void)
+{
+	void __iomem *io_base;
+	u32 icr;
+
+	if (flipper_irq_host && flipper_irq_host->host_data) {
+		io_base = flipper_irq_host->host_data;
+		icr = in_be32(io_base + FLIPPER_ICR);
+		return !(icr & FLIPPER_ICR_RSS);
+	}
+	return 0;
+}
+
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.h b/arch/powerpc/platforms/embedded6xx/flipper-pic.h
new file mode 100644
index 000000000..e339186b5
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.h
@@ -0,0 +1,25 @@
+/*
+ * arch/powerpc/platforms/embedded6xx/flipper-pic.h
+ *
+ * Nintendo GameCube/Wii "Flipper" interrupt controller support.
+ * Copyright (C) 2004-2009 The GameCube Linux Team
+ * Copyright (C) 2007,2008,2009 Albert Herranz
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ */
+
+#ifndef __FLIPPER_PIC_H
+#define __FLIPPER_PIC_H
+
+unsigned int flipper_pic_get_irq(void);
+void __init flipper_pic_probe(void);
+
+void flipper_quiesce(void);
+void flipper_platform_reset(void);
+int flipper_is_reset_button_pressed(void);
+
+#endif
diff --git a/arch/powerpc/platforms/embedded6xx/gamecube.c b/arch/powerpc/platforms/embedded6xx/gamecube.c
new file mode 100644
index 000000000..fe0ed6ee2
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/gamecube.c
@@ -0,0 +1,108 @@
+/*
+ * arch/powerpc/platforms/embedded6xx/gamecube.c
+ *
+ * Nintendo GameCube board-specific support
+ * Copyright (C) 2004-2009 The GameCube Linux Team
+ * Copyright (C) 2007,2008,2009 Albert Herranz
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/kexec.h>
+#include <linux/seq_file.h>
+#include <linux/of_platform.h>
+
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+
+#include "flipper-pic.h"
+#include "usbgecko_udbg.h"
+
+
+static void gamecube_spin(void)
+{
+	/* spin until power button pressed */
+	for (;;)
+		cpu_relax();
+}
+
+static void gamecube_restart(char *cmd)
+{
+	local_irq_disable();
+	flipper_platform_reset();
+	gamecube_spin();
+}
+
+static void gamecube_power_off(void)
+{
+	local_irq_disable();
+	gamecube_spin();
+}
+
+static void gamecube_halt(void)
+{
+	gamecube_restart(NULL);
+}
+
+static void __init gamecube_init_early(void)
+{
+	ug_udbg_init();
+}
+
+static int __init gamecube_probe(void)
+{
+	unsigned long dt_root;
+
+	dt_root = of_get_flat_dt_root();
+	if (!of_flat_dt_is_compatible(dt_root, "nintendo,gamecube"))
+		return 0;
+
+	pm_power_off = gamecube_power_off;
+
+	return 1;
+}
+
+static void gamecube_shutdown(void)
+{
+	flipper_quiesce();
+}
+
+define_machine(gamecube) {
+	.name			= "gamecube",
+	.probe			= gamecube_probe,
+	.init_early		= gamecube_init_early,
+	.restart		= gamecube_restart,
+	.halt			= gamecube_halt,
+	.init_IRQ		= flipper_pic_probe,
+	.get_irq		= flipper_pic_get_irq,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+	.machine_shutdown	= gamecube_shutdown,
+};
+
+
+static const struct of_device_id gamecube_of_bus[] = {
+	{ .compatible = "nintendo,flipper", },
+	{ },
+};
+
+static int __init gamecube_device_probe(void)
+{
+	if (!machine_is(gamecube))
+		return 0;
+
+	of_platform_bus_probe(NULL, gamecube_of_bus, NULL);
+	return 0;
+}
+device_initcall(gamecube_device_probe);
+
diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
new file mode 100644
index 000000000..c269caee5
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
@@ -0,0 +1,236 @@
+/*
+ * arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+ *
+ * Nintendo Wii "Hollywood" interrupt controller support.
+ * Copyright (C) 2009 The GameCube Linux Team
+ * Copyright (C) 2009 Albert Herranz
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ */
+#define DRV_MODULE_NAME "hlwd-pic"
+#define pr_fmt(fmt) DRV_MODULE_NAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <asm/io.h>
+
+#include "hlwd-pic.h"
+
+#define HLWD_NR_IRQS	32
+
+/*
+ * Each interrupt has a corresponding bit in both
+ * the Interrupt Cause (ICR) and Interrupt Mask (IMR) registers.
+ *
+ * Enabling/disabling an interrupt line involves asserting/clearing
+ * the corresponding bit in IMR. ACK'ing a request simply involves
+ * asserting the corresponding bit in ICR.
+ */
+#define HW_BROADWAY_ICR		0x00
+#define HW_BROADWAY_IMR		0x04
+
+
+/*
+ * IRQ chip hooks.
+ *
+ */
+
+static void hlwd_pic_mask_and_ack(struct irq_data *d)
+{
+	int irq = irqd_to_hwirq(d);
+	void __iomem *io_base = irq_data_get_irq_chip_data(d);
+	u32 mask = 1 << irq;
+
+	clrbits32(io_base + HW_BROADWAY_IMR, mask);
+	out_be32(io_base + HW_BROADWAY_ICR, mask);
+}
+
+static void hlwd_pic_ack(struct irq_data *d)
+{
+	int irq = irqd_to_hwirq(d);
+	void __iomem *io_base = irq_data_get_irq_chip_data(d);
+
+	out_be32(io_base + HW_BROADWAY_ICR, 1 << irq);
+}
+
+static void hlwd_pic_mask(struct irq_data *d)
+{
+	int irq = irqd_to_hwirq(d);
+	void __iomem *io_base = irq_data_get_irq_chip_data(d);
+
+	clrbits32(io_base + HW_BROADWAY_IMR, 1 << irq);
+}
+
+static void hlwd_pic_unmask(struct irq_data *d)
+{
+	int irq = irqd_to_hwirq(d);
+	void __iomem *io_base = irq_data_get_irq_chip_data(d);
+
+	setbits32(io_base + HW_BROADWAY_IMR, 1 << irq);
+}
+
+
+static struct irq_chip hlwd_pic = {
+	.name		= "hlwd-pic",
+	.irq_ack	= hlwd_pic_ack,
+	.irq_mask_ack	= hlwd_pic_mask_and_ack,
+	.irq_mask	= hlwd_pic_mask,
+	.irq_unmask	= hlwd_pic_unmask,
+};
+
+/*
+ * IRQ host hooks.
+ *
+ */
+
+static struct irq_domain *hlwd_irq_host;
+
+static int hlwd_pic_map(struct irq_domain *h, unsigned int virq,
+			   irq_hw_number_t hwirq)
+{
+	irq_set_chip_data(virq, h->host_data);
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &hlwd_pic, handle_level_irq);
+	return 0;
+}
+
+static const struct irq_domain_ops hlwd_irq_domain_ops = {
+	.map = hlwd_pic_map,
+};
+
+static unsigned int __hlwd_pic_get_irq(struct irq_domain *h)
+{
+	void __iomem *io_base = h->host_data;
+	int irq;
+	u32 irq_status;
+
+	irq_status = in_be32(io_base + HW_BROADWAY_ICR) &
+		     in_be32(io_base + HW_BROADWAY_IMR);
+	if (irq_status == 0)
+		return NO_IRQ;	/* no more IRQs pending */
+
+	irq = __ffs(irq_status);
+	return irq_linear_revmap(h, irq);
+}
+
+static void hlwd_pic_irq_cascade(unsigned int cascade_virq,
+				      struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct irq_domain *irq_domain = irq_get_handler_data(cascade_virq);
+	unsigned int virq;
+
+	raw_spin_lock(&desc->lock);
+	chip->irq_mask(&desc->irq_data); /* IRQ_LEVEL */
+	raw_spin_unlock(&desc->lock);
+
+	virq = __hlwd_pic_get_irq(irq_domain);
+	if (virq != NO_IRQ)
+		generic_handle_irq(virq);
+	else
+		pr_err("spurious interrupt!\n");
+
+	raw_spin_lock(&desc->lock);
+	chip->irq_ack(&desc->irq_data); /* IRQ_LEVEL */
+	if (!irqd_irq_disabled(&desc->irq_data) && chip->irq_unmask)
+		chip->irq_unmask(&desc->irq_data);
+	raw_spin_unlock(&desc->lock);
+}
+
+/*
+ * Platform hooks.
+ *
+ */
+
+static void __hlwd_quiesce(void __iomem *io_base)
+{
+	/* mask and ack all IRQs */
+	out_be32(io_base + HW_BROADWAY_IMR, 0);
+	out_be32(io_base + HW_BROADWAY_ICR, 0xffffffff);
+}
+
+struct irq_domain *hlwd_pic_init(struct device_node *np)
+{
+	struct irq_domain *irq_domain;
+	struct resource res;
+	void __iomem *io_base;
+	int retval;
+
+	retval = of_address_to_resource(np, 0, &res);
+	if (retval) {
+		pr_err("no io memory range found\n");
+		return NULL;
+	}
+	io_base = ioremap(res.start, resource_size(&res));
+	if (!io_base) {
+		pr_err("ioremap failed\n");
+		return NULL;
+	}
+
+	pr_info("controller at 0x%08x mapped to 0x%p\n", res.start, io_base);
+
+	__hlwd_quiesce(io_base);
+
+	irq_domain = irq_domain_add_linear(np, HLWD_NR_IRQS,
+					   &hlwd_irq_domain_ops, io_base);
+	if (!irq_domain) {
+		pr_err("failed to allocate irq_domain\n");
+		iounmap(io_base);
+		return NULL;
+	}
+
+	return irq_domain;
+}
+
+unsigned int hlwd_pic_get_irq(void)
+{
+	return __hlwd_pic_get_irq(hlwd_irq_host);
+}
+
+/*
+ * Probe function.
+ *
+ */
+
+void hlwd_pic_probe(void)
+{
+	struct irq_domain *host;
+	struct device_node *np;
+	const u32 *interrupts;
+	int cascade_virq;
+
+	for_each_compatible_node(np, NULL, "nintendo,hollywood-pic") {
+		interrupts = of_get_property(np, "interrupts", NULL);
+		if (interrupts) {
+			host = hlwd_pic_init(np);
+			BUG_ON(!host);
+			cascade_virq = irq_of_parse_and_map(np, 0);
+			irq_set_handler_data(cascade_virq, host);
+			irq_set_chained_handler(cascade_virq,
+						hlwd_pic_irq_cascade);
+			hlwd_irq_host = host;
+			break;
+		}
+	}
+}
+
+/**
+ * hlwd_quiesce() - quiesce hollywood irq controller
+ *
+ * Mask and ack all interrupt sources.
+ *
+ */
+void hlwd_quiesce(void)
+{
+	void __iomem *io_base = hlwd_irq_host->host_data;
+
+	__hlwd_quiesce(io_base);
+}
+
diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.h b/arch/powerpc/platforms/embedded6xx/hlwd-pic.h
new file mode 100644
index 000000000..d2e5a0927
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.h
@@ -0,0 +1,22 @@
+/*
+ * arch/powerpc/platforms/embedded6xx/hlwd-pic.h
+ *
+ * Nintendo Wii "Hollywood" interrupt controller support.
+ * Copyright (C) 2009 The GameCube Linux Team
+ * Copyright (C) 2009 Albert Herranz
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ */
+
+#ifndef __HLWD_PIC_H
+#define __HLWD_PIC_H
+
+extern unsigned int hlwd_pic_get_irq(void);
+extern void hlwd_pic_probe(void);
+extern void hlwd_quiesce(void);
+
+#endif
diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c
new file mode 100644
index 000000000..8c305c7c8
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -0,0 +1,285 @@
+/*
+ * Board setup routines for the IBM 750GX/CL platform w/ TSI10x bridge
+ *
+ * Copyright 2007 IBM Corporation
+ *
+ * Stephen Winiecki <stevewin@us.ibm.com>
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ *
+ * Based on code from mpc7448_hpc2.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/serial.h>
+#include <linux/tty.h>
+#include <linux/serial_core.h>
+#include <linux/of_platform.h>
+#include <linux/module.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/tsi108.h>
+#include <asm/pci-bridge.h>
+#include <asm/reg.h>
+#include <mm/mmu_decl.h>
+#include <asm/tsi108_irq.h>
+#include <asm/tsi108_pci.h>
+#include <asm/mpic.h>
+
+#undef DEBUG
+
+#define HOLLY_PCI_CFG_PHYS 0x7c000000
+
+int holly_exclude_device(struct pci_controller *hose, u_char bus, u_char devfn)
+{
+	if (bus == 0 && PCI_SLOT(devfn) == 0)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	else
+		return PCIBIOS_SUCCESSFUL;
+}
+
+static void holly_remap_bridge(void)
+{
+	u32 lut_val, lut_addr;
+	int i;
+
+	printk(KERN_INFO "Remapping PCI bridge\n");
+
+	/* Re-init the PCI bridge and LUT registers to have mappings that don't
+	 * rely on PIBS
+	 */
+	lut_addr = 0x900;
+	for (i = 0; i < 31; i++) {
+		tsi108_write_reg(TSI108_PB_OFFSET + lut_addr, 0x00000201);
+		lut_addr += 4;
+		tsi108_write_reg(TSI108_PB_OFFSET + lut_addr, 0x0);
+		lut_addr += 4;
+	}
+
+	/* Reserve the last LUT entry for PCI I/O space */
+	tsi108_write_reg(TSI108_PB_OFFSET + lut_addr, 0x00000241);
+	lut_addr += 4;
+	tsi108_write_reg(TSI108_PB_OFFSET + lut_addr, 0x0);
+
+	/* Map PCI I/O space */
+	tsi108_write_reg(TSI108_PCI_PFAB_IO_UPPER, 0x0);
+	tsi108_write_reg(TSI108_PCI_PFAB_IO, 0x1);
+
+	/* Map PCI CFG space */
+	tsi108_write_reg(TSI108_PCI_PFAB_BAR0_UPPER, 0x0);
+	tsi108_write_reg(TSI108_PCI_PFAB_BAR0, 0x7c000000 | 0x01);
+
+	/* We don't need MEM32 and PRM remapping so disable them */
+	tsi108_write_reg(TSI108_PCI_PFAB_MEM32, 0x0);
+	tsi108_write_reg(TSI108_PCI_PFAB_PFM3, 0x0);
+	tsi108_write_reg(TSI108_PCI_PFAB_PFM4, 0x0);
+
+	/* Set P2O_BAR0 */
+	tsi108_write_reg(TSI108_PCI_P2O_BAR0_UPPER, 0x0);
+	tsi108_write_reg(TSI108_PCI_P2O_BAR0, 0xc0000000);
+
+	/* Init the PCI LUTs to do no remapping */
+	lut_addr = 0x500;
+	lut_val = 0x00000002;
+
+	for (i = 0; i < 32; i++) {
+		tsi108_write_reg(TSI108_PCI_OFFSET + lut_addr, lut_val);
+		lut_addr += 4;
+		tsi108_write_reg(TSI108_PCI_OFFSET + lut_addr, 0x40000000);
+		lut_addr += 4;
+		lut_val += 0x02000000;
+	}
+	tsi108_write_reg(TSI108_PCI_P2O_PAGE_SIZES, 0x00007900);
+
+	/* Set 64-bit PCI bus address for system memory */
+	tsi108_write_reg(TSI108_PCI_P2O_BAR2_UPPER, 0x0);
+	tsi108_write_reg(TSI108_PCI_P2O_BAR2, 0x0);
+}
+
+static void __init holly_setup_arch(void)
+{
+	struct device_node *np;
+
+	if (ppc_md.progress)
+		ppc_md.progress("holly_setup_arch():set_bridge", 0);
+
+	tsi108_csr_vir_base = get_vir_csrbase();
+
+	/* setup PCI host bridge */
+	holly_remap_bridge();
+
+	np = of_find_node_by_type(NULL, "pci");
+	if (np)
+		tsi108_setup_pci(np, HOLLY_PCI_CFG_PHYS, 1);
+
+	ppc_md.pci_exclude_device = holly_exclude_device;
+	if (ppc_md.progress)
+		ppc_md.progress("tsi108: resources set", 0x100);
+
+	printk(KERN_INFO "PPC750GX/CL Platform\n");
+}
+
+/*
+ * Interrupt setup and service.  Interrupts on the holly come
+ * from the four external INT pins, PCI interrupts are routed via
+ * PCI interrupt control registers, it generates internal IRQ23
+ *
+ * Interrupt routing on the Holly Board:
+ * TSI108:PB_INT[0] -> CPU0:INT#
+ * TSI108:PB_INT[1] -> CPU0:MCP#
+ * TSI108:PB_INT[2] -> N/C
+ * TSI108:PB_INT[3] -> N/C
+ */
+static void __init holly_init_IRQ(void)
+{
+	struct mpic *mpic;
+#ifdef CONFIG_PCI
+	unsigned int cascade_pci_irq;
+	struct device_node *tsi_pci;
+	struct device_node *cascade_node = NULL;
+#endif
+
+	mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+			MPIC_SPV_EOI | MPIC_NO_PTHROU_DIS | MPIC_REGSET_TSI108,
+			24, 0,
+			"Tsi108_PIC");
+
+	BUG_ON(mpic == NULL);
+
+	mpic_assign_isu(mpic, 0, mpic->paddr + 0x100);
+
+	mpic_init(mpic);
+
+#ifdef CONFIG_PCI
+	tsi_pci = of_find_node_by_type(NULL, "pci");
+	if (tsi_pci == NULL) {
+		printk(KERN_ERR "%s: No tsi108 pci node found !\n", __func__);
+		return;
+	}
+
+	cascade_node = of_find_node_by_type(NULL, "pic-router");
+	if (cascade_node == NULL) {
+		printk(KERN_ERR "%s: No tsi108 pci cascade node found !\n", __func__);
+		return;
+	}
+
+	cascade_pci_irq = irq_of_parse_and_map(tsi_pci, 0);
+	pr_debug("%s: tsi108 cascade_pci_irq = 0x%x\n", __func__, (u32) cascade_pci_irq);
+	tsi108_pci_int_init(cascade_node);
+	irq_set_handler_data(cascade_pci_irq, mpic);
+	irq_set_chained_handler(cascade_pci_irq, tsi108_irq_cascade);
+#endif
+	/* Configure MPIC outputs to CPU0 */
+	tsi108_write_reg(TSI108_MPIC_OFFSET + 0x30c, 0);
+}
+
+void holly_show_cpuinfo(struct seq_file *m)
+{
+	seq_printf(m, "vendor\t\t: IBM\n");
+	seq_printf(m, "machine\t\t: PPC750 GX/CL\n");
+}
+
+void holly_restart(char *cmd)
+{
+	__be32 __iomem *ocn_bar1 = NULL;
+	unsigned long bar;
+	struct device_node *bridge = NULL;
+	const void *prop;
+	int size;
+	phys_addr_t addr = 0xc0000000;
+
+	local_irq_disable();
+
+	bridge = of_find_node_by_type(NULL, "tsi-bridge");
+	if (bridge) {
+		prop = of_get_property(bridge, "reg", &size);
+		addr = of_translate_address(bridge, prop);
+	}
+	addr += (TSI108_PB_OFFSET + 0x414);
+
+	ocn_bar1 = ioremap(addr, 0x4);
+
+	/* Turn on the BOOT bit so the addresses are correctly
+	 * routed to the HLP interface */
+	bar = ioread32be(ocn_bar1);
+	bar |= 2;
+	iowrite32be(bar, ocn_bar1);
+	iosync();
+
+	/* Set SRR0 to the reset vector and turn on MSR_IP */
+	mtspr(SPRN_SRR0, 0xfff00100);
+	mtspr(SPRN_SRR1, MSR_IP);
+
+	/* Do an rfi to jump back to firmware.  Somewhat evil,
+	 * but it works
+	 */
+	__asm__ __volatile__("rfi" : : : "memory");
+
+	/* Spin until reset happens.  Shouldn't really get here */
+	for (;;) ;
+}
+
+void holly_power_off(void)
+{
+	local_irq_disable();
+	/* No way to shut power off with software */
+	for (;;) ;
+}
+
+void holly_halt(void)
+{
+	holly_power_off();
+}
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init holly_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "ibm,holly"))
+		return 0;
+	return 1;
+}
+
+static int ppc750_machine_check_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *entry;
+
+	/* Are we prepared to handle this fault */
+	if ((entry = search_exception_tables(regs->nip)) != NULL) {
+		tsi108_clear_pci_cfg_error();
+		regs->msr |= MSR_RI;
+		regs->nip = entry->fixup;
+		return 1;
+	}
+	return 0;
+}
+
+define_machine(holly){
+	.name                   	= "PPC750 GX/CL TSI",
+	.probe                  	= holly_probe,
+	.setup_arch             	= holly_setup_arch,
+	.init_IRQ               	= holly_init_IRQ,
+	.show_cpuinfo           	= holly_show_cpuinfo,
+	.get_irq                	= mpic_get_irq,
+	.restart                	= holly_restart,
+	.calibrate_decr         	= generic_calibrate_decr,
+	.machine_check_exception	= ppc750_machine_check_exception,
+	.progress               	= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c
new file mode 100644
index 000000000..540eeb58d
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/linkstation.c
@@ -0,0 +1,166 @@
+/*
+ * Board setup routines for the Buffalo Linkstation / Kurobox Platform.
+ *
+ * Copyright (C) 2006 G. Liakhovetski (g.liakhovetski@gmx.de)
+ *
+ * Based on sandpoint.c by Mark A. Greer
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of
+ * any kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/initrd.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/prom.h>
+#include <asm/mpic.h>
+#include <asm/pci-bridge.h>
+
+#include "mpc10x.h"
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+	{ .type = "soc", },
+	{ .compatible = "simple-bus", },
+	{},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+	of_platform_bus_probe(NULL, of_bus_ids, NULL);
+	return 0;
+}
+machine_device_initcall(linkstation, declare_of_platform_devices);
+
+static int __init linkstation_add_bridge(struct device_node *dev)
+{
+#ifdef CONFIG_PCI
+	int len;
+	struct pci_controller *hose;
+	const int *bus_range;
+
+	printk("Adding PCI host bridge %s\n", dev->full_name);
+
+	bus_range = of_get_property(dev, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int))
+		printk(KERN_WARNING "Can't get bus-range for %s, assume"
+				" bus 0\n", dev->full_name);
+
+	hose = pcibios_alloc_controller(dev);
+	if (hose == NULL)
+		return -ENOMEM;
+	hose->first_busno = bus_range ? bus_range[0] : 0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+	setup_indirect_pci(hose, 0xfec00000, 0xfee00000, 0);
+
+	/* Interpret the "ranges" property */
+	/* This also maps the I/O region and sets isa_io/mem_base */
+	pci_process_bridge_OF_ranges(hose, dev, 1);
+#endif
+	return 0;
+}
+
+static void __init linkstation_setup_arch(void)
+{
+	struct device_node *np;
+
+	/* Lookup PCI host bridges */
+	for_each_compatible_node(np, "pci", "mpc10x-pci")
+		linkstation_add_bridge(np);
+
+	printk(KERN_INFO "BUFFALO Network Attached Storage Series\n");
+	printk(KERN_INFO "(C) 2002-2005 BUFFALO INC.\n");
+}
+
+/*
+ * Interrupt setup and service.  Interrupts on the linkstation come
+ * from the four PCI slots plus onboard 8241 devices: I2C, DUART.
+ */
+static void __init linkstation_init_IRQ(void)
+{
+	struct mpic *mpic;
+
+	mpic = mpic_alloc(NULL, 0, 0, 4, 0, " EPIC     ");
+	BUG_ON(mpic == NULL);
+
+	/* PCI IRQs */
+	mpic_assign_isu(mpic, 0, mpic->paddr + 0x10200);
+
+	/* I2C */
+	mpic_assign_isu(mpic, 1, mpic->paddr + 0x11000);
+
+	/* ttyS0, ttyS1 */
+	mpic_assign_isu(mpic, 2, mpic->paddr + 0x11100);
+
+	mpic_init(mpic);
+}
+
+extern void avr_uart_configure(void);
+extern void avr_uart_send(const char);
+
+static void linkstation_restart(char *cmd)
+{
+	local_irq_disable();
+
+	/* Reset system via AVR */
+	avr_uart_configure();
+	/* Send reboot command */
+	avr_uart_send('C');
+
+	for(;;)  /* Spin until reset happens */
+		avr_uart_send('G');	/* "kick" */
+}
+
+static void linkstation_power_off(void)
+{
+	local_irq_disable();
+
+	/* Power down system via AVR */
+	avr_uart_configure();
+	/* send shutdown command */
+	avr_uart_send('E');
+
+	for(;;)  /* Spin until power-off happens */
+		avr_uart_send('G');	/* "kick" */
+	/* NOTREACHED */
+}
+
+static void linkstation_halt(void)
+{
+	linkstation_power_off();
+	/* NOTREACHED */
+}
+
+static void linkstation_show_cpuinfo(struct seq_file *m)
+{
+	seq_printf(m, "vendor\t\t: Buffalo Technology\n");
+	seq_printf(m, "machine\t\t: Linkstation I/Kurobox(HG)\n");
+}
+
+static int __init linkstation_probe(void)
+{
+	unsigned long root;
+
+	root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "linkstation"))
+		return 0;
+
+	pm_power_off = linkstation_power_off;
+
+	return 1;
+}
+
+define_machine(linkstation){
+	.name 			= "Buffalo Linkstation",
+	.probe 			= linkstation_probe,
+	.setup_arch 		= linkstation_setup_arch,
+	.init_IRQ 		= linkstation_init_IRQ,
+	.show_cpuinfo 		= linkstation_show_cpuinfo,
+	.get_irq 		= mpic_get_irq,
+	.restart 		= linkstation_restart,
+	.halt	 		= linkstation_halt,
+	.calibrate_decr 	= generic_calibrate_decr,
+};
diff --git a/arch/powerpc/platforms/embedded6xx/ls_uart.c b/arch/powerpc/platforms/embedded6xx/ls_uart.c
new file mode 100644
index 000000000..9d891bd5d
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/ls_uart.c
@@ -0,0 +1,142 @@
+/*
+ * AVR power-management chip interface for the Buffalo Linkstation /
+ * Kurobox Platform.
+ *
+ * Author: 2006 (c) G. Liakhovetski
+ *	 g.liakhovetski@gmx.de
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of
+ * any kind, whether express or implied.
+ */
+#include <linux/workqueue.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/serial_reg.h>
+#include <linux/serial_8250.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/termbits.h>
+
+#include "mpc10x.h"
+
+static void __iomem *avr_addr;
+static unsigned long avr_clock;
+
+static struct work_struct wd_work;
+
+static void wd_stop(struct work_struct *unused)
+{
+	const char string[] = "AAAAFFFFJJJJ>>>>VVVV>>>>ZZZZVVVVKKKK";
+	int i = 0, rescue = 8;
+	int len = strlen(string);
+
+	while (rescue--) {
+		int j;
+		char lsr = in_8(avr_addr + UART_LSR);
+
+		if (lsr & (UART_LSR_THRE | UART_LSR_TEMT)) {
+			for (j = 0; j < 16 && i < len; j++, i++)
+				out_8(avr_addr + UART_TX, string[i]);
+			if (i == len) {
+				/* Read "OK" back: 4ms for the last "KKKK"
+				   plus a couple bytes back */
+				msleep(7);
+				printk("linkstation: disarming the AVR watchdog: ");
+				while (in_8(avr_addr + UART_LSR) & UART_LSR_DR)
+					printk("%c", in_8(avr_addr + UART_RX));
+				break;
+			}
+		}
+		msleep(17);
+	}
+	printk("\n");
+}
+
+#define AVR_QUOT(clock) ((clock) + 8 * 9600) / (16 * 9600)
+
+void avr_uart_configure(void)
+{
+	unsigned char cval = UART_LCR_WLEN8;
+	unsigned int quot = AVR_QUOT(avr_clock);
+
+	if (!avr_addr || !avr_clock)
+		return;
+
+	out_8(avr_addr + UART_LCR, cval);			/* initialise UART */
+	out_8(avr_addr + UART_MCR, 0);
+	out_8(avr_addr + UART_IER, 0);
+
+	cval |= UART_LCR_STOP | UART_LCR_PARITY | UART_LCR_EPAR;
+
+	out_8(avr_addr + UART_LCR, cval);			/* Set character format */
+
+	out_8(avr_addr + UART_LCR, cval | UART_LCR_DLAB);	/* set DLAB */
+	out_8(avr_addr + UART_DLL, quot & 0xff);		/* LS of divisor */
+	out_8(avr_addr + UART_DLM, quot >> 8);			/* MS of divisor */
+	out_8(avr_addr + UART_LCR, cval);			/* reset DLAB */
+	out_8(avr_addr + UART_FCR, UART_FCR_ENABLE_FIFO);	/* enable FIFO */
+}
+
+void avr_uart_send(const char c)
+{
+	if (!avr_addr || !avr_clock)
+		return;
+
+	out_8(avr_addr + UART_TX, c);
+	out_8(avr_addr + UART_TX, c);
+	out_8(avr_addr + UART_TX, c);
+	out_8(avr_addr + UART_TX, c);
+}
+
+static void __init ls_uart_init(void)
+{
+	local_irq_disable();
+
+#ifndef CONFIG_SERIAL_8250
+	out_8(avr_addr + UART_FCR, UART_FCR_ENABLE_FIFO);	/* enable FIFO */
+	out_8(avr_addr + UART_FCR, UART_FCR_ENABLE_FIFO |
+	      UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT);	/* clear FIFOs */
+	out_8(avr_addr + UART_FCR, 0);
+	out_8(avr_addr + UART_IER, 0);
+
+	/* Clear up interrupts */
+	(void) in_8(avr_addr + UART_LSR);
+	(void) in_8(avr_addr + UART_RX);
+	(void) in_8(avr_addr + UART_IIR);
+	(void) in_8(avr_addr + UART_MSR);
+#endif
+	avr_uart_configure();
+
+	local_irq_enable();
+}
+
+static int __init ls_uarts_init(void)
+{
+	struct device_node *avr;
+	phys_addr_t phys_addr;
+	int len;
+
+	avr = of_find_node_by_path("/soc10x/serial@80004500");
+	if (!avr)
+		return -EINVAL;
+
+	avr_clock = *(u32*)of_get_property(avr, "clock-frequency", &len);
+	phys_addr = ((u32*)of_get_property(avr, "reg", &len))[0];
+
+	if (!avr_clock || !phys_addr)
+		return -EINVAL;
+
+	avr_addr = ioremap(phys_addr, 32);
+	if (!avr_addr)
+		return -EFAULT;
+
+	ls_uart_init();
+
+	INIT_WORK(&wd_work, wd_stop);
+	schedule_work(&wd_work);
+
+	return 0;
+}
+
+machine_late_initcall(linkstation, ls_uarts_init);
diff --git a/arch/powerpc/platforms/embedded6xx/mpc10x.h b/arch/powerpc/platforms/embedded6xx/mpc10x.h
new file mode 100644
index 000000000..b290b6366
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/mpc10x.h
@@ -0,0 +1,169 @@
+/*
+ * Common routines for the Motorola SPS MPC106/8240/107 Host bridge/Mem
+ * ctlr/EPIC/etc.
+ *
+ * Author: Mark A. Greer
+ *         mgreer@mvista.com
+ *
+ * 2001 (c) MontaVista, Software, Inc.  This file is licensed under
+ * the terms of the GNU General Public License version 2.  This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#ifndef __PPC_KERNEL_MPC10X_H
+#define __PPC_KERNEL_MPC10X_H
+
+#include <linux/pci_ids.h>
+#include <asm/pci-bridge.h>
+
+/*
+ * The values here don't completely map everything but should work in most
+ * cases.
+ *
+ * MAP A (PReP Map)
+ *   Processor: 0x80000000 - 0x807fffff -> PCI I/O: 0x00000000 - 0x007fffff
+ *   Processor: 0xc0000000 - 0xdfffffff -> PCI MEM: 0x00000000 - 0x1fffffff
+ *   PCI MEM:   0x80000000 -> Processor System Memory: 0x00000000
+ *   EUMB mapped to: ioremap_base - 0x00100000 (ioremap_base - 1 MB)
+ *
+ * MAP B (CHRP Map)
+ *   Processor: 0xfe000000 - 0xfebfffff -> PCI I/O: 0x00000000 - 0x00bfffff
+ *   Processor: 0x80000000 - 0xbfffffff -> PCI MEM: 0x80000000 - 0xbfffffff
+ *   PCI MEM:   0x00000000 -> Processor System Memory: 0x00000000
+ *   EUMB mapped to: ioremap_base - 0x00100000 (ioremap_base - 1 MB)
+ */
+
+/*
+ * Define the vendor/device IDs for the various bridges--should be added to
+ * <linux/pci_ids.h>
+ */
+#define	MPC10X_BRIDGE_106	((PCI_DEVICE_ID_MOTOROLA_MPC106 << 16) |  \
+				  PCI_VENDOR_ID_MOTOROLA)
+#define	MPC10X_BRIDGE_8240	((0x0003 << 16) | PCI_VENDOR_ID_MOTOROLA)
+#define	MPC10X_BRIDGE_107	((0x0004 << 16) | PCI_VENDOR_ID_MOTOROLA)
+#define	MPC10X_BRIDGE_8245	((0x0006 << 16) | PCI_VENDOR_ID_MOTOROLA)
+
+/* Define the type of map to use */
+#define	MPC10X_MEM_MAP_A		1
+#define	MPC10X_MEM_MAP_B		2
+
+/* Map A (PReP Map) Defines */
+#define	MPC10X_MAPA_CNFG_ADDR		0x80000cf8
+#define	MPC10X_MAPA_CNFG_DATA		0x80000cfc
+
+#define MPC10X_MAPA_ISA_IO_BASE		0x80000000
+#define MPC10X_MAPA_ISA_MEM_BASE	0xc0000000
+#define	MPC10X_MAPA_DRAM_OFFSET		0x80000000
+
+#define	MPC10X_MAPA_PCI_INTACK_ADDR	0xbffffff0
+#define	MPC10X_MAPA_PCI_IO_START	0x00000000
+#define	MPC10X_MAPA_PCI_IO_END	       (0x00800000 - 1)
+#define	MPC10X_MAPA_PCI_MEM_START	0x00000000
+#define	MPC10X_MAPA_PCI_MEM_END	       (0x20000000 - 1)
+
+#define	MPC10X_MAPA_PCI_MEM_OFFSET	(MPC10X_MAPA_ISA_MEM_BASE -	\
+					 MPC10X_MAPA_PCI_MEM_START)
+
+/* Map B (CHRP Map) Defines */
+#define	MPC10X_MAPB_CNFG_ADDR		0xfec00000
+#define	MPC10X_MAPB_CNFG_DATA		0xfee00000
+
+#define MPC10X_MAPB_ISA_IO_BASE		0xfe000000
+#define MPC10X_MAPB_ISA_MEM_BASE	0x80000000
+#define	MPC10X_MAPB_DRAM_OFFSET		0x00000000
+
+#define	MPC10X_MAPB_PCI_INTACK_ADDR	0xfef00000
+#define	MPC10X_MAPB_PCI_IO_START	0x00000000
+#define	MPC10X_MAPB_PCI_IO_END	       (0x00c00000 - 1)
+#define	MPC10X_MAPB_PCI_MEM_START	0x80000000
+#define	MPC10X_MAPB_PCI_MEM_END	       (0xc0000000 - 1)
+
+#define	MPC10X_MAPB_PCI_MEM_OFFSET	(MPC10X_MAPB_ISA_MEM_BASE -	\
+					 MPC10X_MAPB_PCI_MEM_START)
+
+/* Miscellaneous Configuration register offsets */
+#define	MPC10X_CFG_PIR_REG		0x09
+#define	MPC10X_CFG_PIR_HOST_BRIDGE	0x00
+#define	MPC10X_CFG_PIR_AGENT		0x01
+
+#define	MPC10X_CFG_EUMBBAR		0x78
+
+#define	MPC10X_CFG_PICR1_REG		0xa8
+#define	MPC10X_CFG_PICR1_ADDR_MAP_MASK	0x00010000
+#define	MPC10X_CFG_PICR1_ADDR_MAP_A	0x00010000
+#define	MPC10X_CFG_PICR1_ADDR_MAP_B	0x00000000
+#define	MPC10X_CFG_PICR1_SPEC_PCI_RD	0x00000004
+#define	MPC10X_CFG_PICR1_ST_GATH_EN	0x00000040
+
+#define	MPC10X_CFG_PICR2_REG		0xac
+#define	MPC10X_CFG_PICR2_COPYBACK_OPT	0x00000001
+
+#define	MPC10X_CFG_MAPB_OPTIONS_REG	0xe0
+#define	MPC10X_CFG_MAPB_OPTIONS_CFAE	0x80	/* CPU_FD_ALIAS_EN */
+#define	MPC10X_CFG_MAPB_OPTIONS_PFAE	0x40	/* PCI_FD_ALIAS_EN */
+#define	MPC10X_CFG_MAPB_OPTIONS_DR	0x20	/* DLL_RESET */
+#define	MPC10X_CFG_MAPB_OPTIONS_PCICH	0x08	/* PCI_COMPATIBILITY_HOLE */
+#define	MPC10X_CFG_MAPB_OPTIONS_PROCCH	0x04	/* PROC_COMPATIBILITY_HOLE */
+
+/* Define offsets for the memory controller registers in the config space */
+#define MPC10X_MCTLR_MEM_START_1	0x80	/* Banks 0-3 */
+#define MPC10X_MCTLR_MEM_START_2	0x84	/* Banks 4-7 */
+#define MPC10X_MCTLR_EXT_MEM_START_1	0x88	/* Banks 0-3 */
+#define MPC10X_MCTLR_EXT_MEM_START_2	0x8c	/* Banks 4-7 */
+
+#define MPC10X_MCTLR_MEM_END_1		0x90	/* Banks 0-3 */
+#define MPC10X_MCTLR_MEM_END_2		0x94	/* Banks 4-7 */
+#define MPC10X_MCTLR_EXT_MEM_END_1	0x98	/* Banks 0-3 */
+#define MPC10X_MCTLR_EXT_MEM_END_2	0x9c	/* Banks 4-7 */
+
+#define MPC10X_MCTLR_MEM_BANK_ENABLES	0xa0
+
+/* Define some offset in the EUMB */
+#define	MPC10X_EUMB_SIZE		0x00100000 /* Total EUMB size (1MB) */
+
+#define MPC10X_EUMB_MU_OFFSET		0x00000000 /* Msg Unit reg offset */
+#define MPC10X_EUMB_MU_SIZE		0x00001000 /* Msg Unit reg size */
+#define MPC10X_EUMB_DMA_OFFSET		0x00001000 /* DMA Unit reg offset */
+#define MPC10X_EUMB_DMA_SIZE		0x00001000 /* DMA Unit reg size  */
+#define MPC10X_EUMB_ATU_OFFSET		0x00002000 /* Addr xlate reg offset */
+#define MPC10X_EUMB_ATU_SIZE		0x00001000 /* Addr xlate reg size  */
+#define MPC10X_EUMB_I2C_OFFSET		0x00003000 /* I2C Unit reg offset */
+#define MPC10X_EUMB_I2C_SIZE		0x00001000 /* I2C Unit reg size  */
+#define MPC10X_EUMB_DUART_OFFSET	0x00004000 /* DUART Unit reg offset (8245) */
+#define MPC10X_EUMB_DUART_SIZE		0x00001000 /* DUART Unit reg size (8245) */
+#define	MPC10X_EUMB_EPIC_OFFSET		0x00040000 /* EPIC offset in EUMB */
+#define	MPC10X_EUMB_EPIC_SIZE		0x00030000 /* EPIC size */
+#define MPC10X_EUMB_PM_OFFSET		0x000fe000 /* Performance Monitor reg offset (8245) */
+#define MPC10X_EUMB_PM_SIZE		0x00001000 /* Performance Monitor reg size (8245) */
+#define MPC10X_EUMB_WP_OFFSET		0x000ff000 /* Data path diagnostic, watchpoint reg offset */
+#define MPC10X_EUMB_WP_SIZE		0x00001000 /* Data path diagnostic, watchpoint reg size */
+
+/*
+ * Define some recommended places to put the EUMB regs.
+ * For both maps, recommend putting the EUMB from 0xeff00000 to 0xefffffff.
+ */
+extern unsigned long			ioremap_base;
+#define	MPC10X_MAPA_EUMB_BASE		(ioremap_base - MPC10X_EUMB_SIZE)
+#define	MPC10X_MAPB_EUMB_BASE		MPC10X_MAPA_EUMB_BASE
+
+enum ppc_sys_devices {
+	MPC10X_IIC1,
+	MPC10X_DMA0,
+	MPC10X_DMA1,
+	MPC10X_UART0,
+	MPC10X_UART1,
+	NUM_PPC_SYS_DEVS,
+};
+
+int mpc10x_bridge_init(struct pci_controller *hose,
+		       uint current_map,
+		       uint new_map,
+		       uint phys_eumb_base);
+unsigned long mpc10x_get_mem_size(uint mem_map);
+int mpc10x_enable_store_gathering(struct pci_controller *hose);
+int mpc10x_disable_store_gathering(struct pci_controller *hose);
+
+/* For MPC107 boards that use the built-in openpic */
+void mpc10x_set_openpic(void);
+
+#endif	/* __PPC_KERNEL_MPC10X_H */
diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
new file mode 100644
index 000000000..df4ad95f1
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
@@ -0,0 +1,196 @@
+/*
+ * mpc7448_hpc2.c
+ *
+ * Board setup routines for the Freescale mpc7448hpc2(taiga) platform
+ *
+ * Author: Jacob Pan
+ *	 jacob.pan@freescale.com
+ * Author: Xianghua Xiao
+ *       x.xiao@freescale.com
+ * Maintainer: Roy Zang <tie-fei.zang@freescale.com>
+ * 	Add Flat Device Tree support fot mpc7448hpc2 board
+ *
+ * Copyright 2004-2006 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/console.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/serial.h>
+#include <linux/tty.h>
+#include <linux/serial_core.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/tsi108.h>
+#include <asm/pci-bridge.h>
+#include <asm/reg.h>
+#include <mm/mmu_decl.h>
+#include <asm/tsi108_pci.h>
+#include <asm/tsi108_irq.h>
+#include <asm/mpic.h>
+
+#undef DEBUG
+#ifdef DEBUG
+#define DBG(fmt...) do { printk(fmt); } while(0)
+#else
+#define DBG(fmt...) do { } while(0)
+#endif
+
+#define MPC7448HPC2_PCI_CFG_PHYS 0xfb000000
+
+int mpc7448_hpc2_exclude_device(struct pci_controller *hose,
+				u_char bus, u_char devfn)
+{
+	if (bus == 0 && PCI_SLOT(devfn) == 0)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	else
+		return PCIBIOS_SUCCESSFUL;
+}
+
+static void __init mpc7448_hpc2_setup_arch(void)
+{
+	struct device_node *np;
+	if (ppc_md.progress)
+		ppc_md.progress("mpc7448_hpc2_setup_arch():set_bridge", 0);
+
+	tsi108_csr_vir_base = get_vir_csrbase();
+
+	/* setup PCI host bridge */
+#ifdef CONFIG_PCI
+	for_each_compatible_node(np, "pci", "tsi108-pci")
+		tsi108_setup_pci(np, MPC7448HPC2_PCI_CFG_PHYS, 0);
+
+	ppc_md.pci_exclude_device = mpc7448_hpc2_exclude_device;
+	if (ppc_md.progress)
+		ppc_md.progress("tsi108: resources set", 0x100);
+#endif
+
+	printk(KERN_INFO "MPC7448HPC2 (TAIGA) Platform\n");
+	printk(KERN_INFO
+	       "Jointly ported by Freescale and Tundra Semiconductor\n");
+	printk(KERN_INFO
+	       "Enabling L2 cache then enabling the HID0 prefetch engine.\n");
+}
+
+/*
+ * Interrupt setup and service.  Interrupts on the mpc7448_hpc2 come
+ * from the four external INT pins, PCI interrupts are routed via
+ * PCI interrupt control registers, it generates internal IRQ23
+ *
+ * Interrupt routing on the Taiga Board:
+ * TSI108:PB_INT[0] -> CPU0:INT#
+ * TSI108:PB_INT[1] -> CPU0:MCP#
+ * TSI108:PB_INT[2] -> N/C
+ * TSI108:PB_INT[3] -> N/C
+ */
+static void __init mpc7448_hpc2_init_IRQ(void)
+{
+	struct mpic *mpic;
+#ifdef CONFIG_PCI
+	unsigned int cascade_pci_irq;
+	struct device_node *tsi_pci;
+	struct device_node *cascade_node = NULL;
+#endif
+
+	mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+			MPIC_SPV_EOI | MPIC_NO_PTHROU_DIS | MPIC_REGSET_TSI108,
+			24, 0,
+			"Tsi108_PIC");
+
+	BUG_ON(mpic == NULL);
+
+	mpic_assign_isu(mpic, 0, mpic->paddr + 0x100);
+
+	mpic_init(mpic);
+
+#ifdef CONFIG_PCI
+	tsi_pci = of_find_node_by_type(NULL, "pci");
+	if (tsi_pci == NULL) {
+		printk("%s: No tsi108 pci node found !\n", __func__);
+		return;
+	}
+	cascade_node = of_find_node_by_type(NULL, "pic-router");
+	if (cascade_node == NULL) {
+		printk("%s: No tsi108 pci cascade node found !\n", __func__);
+		return;
+	}
+
+	cascade_pci_irq = irq_of_parse_and_map(tsi_pci, 0);
+	DBG("%s: tsi108 cascade_pci_irq = 0x%x\n", __func__,
+	    (u32) cascade_pci_irq);
+	tsi108_pci_int_init(cascade_node);
+	irq_set_handler_data(cascade_pci_irq, mpic);
+	irq_set_chained_handler(cascade_pci_irq, tsi108_irq_cascade);
+#endif
+	/* Configure MPIC outputs to CPU0 */
+	tsi108_write_reg(TSI108_MPIC_OFFSET + 0x30c, 0);
+}
+
+void mpc7448_hpc2_show_cpuinfo(struct seq_file *m)
+{
+	seq_printf(m, "vendor\t\t: Freescale Semiconductor\n");
+}
+
+void mpc7448_hpc2_restart(char *cmd)
+{
+	local_irq_disable();
+
+	/* Set exception prefix high - to the firmware */
+	_nmask_and_or_msr(0, MSR_IP);
+
+	for (;;) ;		/* Spin until reset happens */
+}
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init mpc7448_hpc2_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "mpc74xx"))
+		return 0;
+	return 1;
+}
+
+static int mpc7448_machine_check_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *entry;
+
+	/* Are we prepared to handle this fault */
+	if ((entry = search_exception_tables(regs->nip)) != NULL) {
+		tsi108_clear_pci_cfg_error();
+		regs->msr |= MSR_RI;
+		regs->nip = entry->fixup;
+		return 1;
+	}
+	return 0;
+}
+
+define_machine(mpc7448_hpc2){
+	.name 			= "MPC7448 HPC2",
+	.probe 			= mpc7448_hpc2_probe,
+	.setup_arch 		= mpc7448_hpc2_setup_arch,
+	.init_IRQ 		= mpc7448_hpc2_init_IRQ,
+	.show_cpuinfo 		= mpc7448_hpc2_show_cpuinfo,
+	.get_irq 		= mpic_get_irq,
+	.restart 		= mpc7448_hpc2_restart,
+	.calibrate_decr 	= generic_calibrate_decr,
+	.machine_check_exception= mpc7448_machine_check_exception,
+	.progress 		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c
new file mode 100644
index 000000000..161330317
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c
@@ -0,0 +1,221 @@
+/*
+ * Board setup routines for the Motorola/Emerson MVME5100.
+ *
+ * Copyright 2013 CSC Australia Pty. Ltd.
+ *
+ * Based on earlier code by:
+ *
+ *    Matt Porter, MontaVista Software Inc.
+ *    Copyright 2001 MontaVista Software Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * Author: Stephen Chivers <schivers@csc.com>
+ *
+ */
+
+#include <linux/of_platform.h>
+
+#include <asm/i8259.h>
+#include <asm/pci-bridge.h>
+#include <asm/mpic.h>
+#include <asm/prom.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#define HAWK_MPIC_SIZE		0x00040000U
+#define MVME5100_PCI_MEM_OFFSET 0x00000000
+
+/* Board register addresses. */
+#define BOARD_STATUS_REG	0xfef88080
+#define BOARD_MODFAIL_REG	0xfef88090
+#define BOARD_MODRST_REG	0xfef880a0
+#define BOARD_TBEN_REG		0xfef880c0
+#define BOARD_SW_READ_REG	0xfef880e0
+#define BOARD_GEO_ADDR_REG	0xfef880e8
+#define BOARD_EXT_FEATURE1_REG	0xfef880f0
+#define BOARD_EXT_FEATURE2_REG	0xfef88100
+
+static phys_addr_t pci_membase;
+static u_char *restart;
+
+static void mvme5100_8259_cascade(unsigned int irq, struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned int cascade_irq = i8259_irq();
+
+	if (cascade_irq != NO_IRQ)
+		generic_handle_irq(cascade_irq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+static void __init mvme5100_pic_init(void)
+{
+	struct mpic *mpic;
+	struct device_node *np;
+	struct device_node *cp = NULL;
+	unsigned int cirq;
+	unsigned long intack = 0;
+	const u32 *prop = NULL;
+
+	np = of_find_node_by_type(NULL, "open-pic");
+	if (!np) {
+		pr_err("Could not find open-pic node\n");
+		return;
+	}
+
+	mpic = mpic_alloc(np, pci_membase, 0, 16, 256, " OpenPIC  ");
+
+	BUG_ON(mpic == NULL);
+	of_node_put(np);
+
+	mpic_assign_isu(mpic, 0, pci_membase + 0x10000);
+
+	mpic_init(mpic);
+
+	cp = of_find_compatible_node(NULL, NULL, "chrp,iic");
+	if (cp == NULL) {
+		pr_warn("mvme5100_pic_init: couldn't find i8259\n");
+		return;
+	}
+
+	cirq = irq_of_parse_and_map(cp, 0);
+	if (cirq == NO_IRQ) {
+		pr_warn("mvme5100_pic_init: no cascade interrupt?\n");
+		return;
+	}
+
+	np = of_find_compatible_node(NULL, "pci", "mpc10x-pci");
+	if (np) {
+		prop = of_get_property(np, "8259-interrupt-acknowledge", NULL);
+
+		if (prop)
+			intack = prop[0];
+
+		of_node_put(np);
+	}
+
+	if (intack)
+		pr_debug("mvme5100_pic_init: PCI 8259 intack at 0x%016lx\n",
+		   intack);
+
+	i8259_init(cp, intack);
+	of_node_put(cp);
+	irq_set_chained_handler(cirq, mvme5100_8259_cascade);
+}
+
+static int __init mvme5100_add_bridge(struct device_node *dev)
+{
+	const int		*bus_range;
+	int			len;
+	struct pci_controller	*hose;
+	unsigned short		devid;
+
+	pr_info("Adding PCI host bridge %s\n", dev->full_name);
+
+	bus_range = of_get_property(dev, "bus-range", &len);
+
+	hose = pcibios_alloc_controller(dev);
+	if (hose == NULL)
+		return -ENOMEM;
+
+	hose->first_busno = bus_range ? bus_range[0] : 0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+	setup_indirect_pci(hose, 0xfe000cf8, 0xfe000cfc, 0);
+
+	pci_process_bridge_OF_ranges(hose, dev, 1);
+
+	early_read_config_word(hose, 0, 0, PCI_DEVICE_ID, &devid);
+
+	if (devid != PCI_DEVICE_ID_MOTOROLA_HAWK) {
+		pr_err("HAWK PHB not present?\n");
+		return 0;
+	}
+
+	early_read_config_dword(hose, 0, 0, PCI_BASE_ADDRESS_1, &pci_membase);
+
+	if (pci_membase == 0) {
+		pr_err("HAWK PHB mibar not correctly set?\n");
+		return 0;
+	}
+
+	pr_info("mvme5100_pic_init: pci_membase: %x\n", pci_membase);
+
+	return 0;
+}
+
+static const struct of_device_id mvme5100_of_bus_ids[] __initconst = {
+	{ .compatible = "hawk-bridge", },
+	{},
+};
+
+/*
+ * Setup the architecture
+ */
+static void __init mvme5100_setup_arch(void)
+{
+	struct device_node *np;
+
+	if (ppc_md.progress)
+		ppc_md.progress("mvme5100_setup_arch()", 0);
+
+	for_each_compatible_node(np, "pci", "hawk-pci")
+		mvme5100_add_bridge(np);
+
+	restart = ioremap(BOARD_MODRST_REG, 4);
+}
+
+
+static void mvme5100_show_cpuinfo(struct seq_file *m)
+{
+	seq_puts(m, "Vendor\t\t: Motorola/Emerson\n");
+	seq_puts(m, "Machine\t\t: MVME5100\n");
+}
+
+static void mvme5100_restart(char *cmd)
+{
+
+	local_irq_disable();
+	mtmsr(mfmsr() | MSR_IP);
+
+	out_8((u_char *) restart, 0x01);
+
+	while (1)
+		;
+}
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init mvme5100_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "MVME5100");
+}
+
+static int __init probe_of_platform_devices(void)
+{
+
+	of_platform_bus_probe(NULL, mvme5100_of_bus_ids, NULL);
+	return 0;
+}
+
+machine_device_initcall(mvme5100, probe_of_platform_devices);
+
+define_machine(mvme5100) {
+	.name			= "MVME5100",
+	.probe			= mvme5100_probe,
+	.setup_arch		= mvme5100_setup_arch,
+	.init_IRQ		= mvme5100_pic_init,
+	.show_cpuinfo		= mvme5100_show_cpuinfo,
+	.get_irq		= mpic_get_irq,
+	.restart		= mvme5100_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c
new file mode 100644
index 000000000..d572833eb
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/storcenter.c
@@ -0,0 +1,125 @@
+/*
+ * Board setup routines for the storcenter
+ *
+ * Copyright 2007 (C) Oyvind Repvik (nail@nslu2-linux.org)
+ * Copyright 2007 Andy Wilcox, Jon Loeliger
+ *
+ * Based on linkstation.c by G. Liakhovetski
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of
+ * any kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/initrd.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/prom.h>
+#include <asm/mpic.h>
+#include <asm/pci-bridge.h>
+
+#include "mpc10x.h"
+
+
+static const struct of_device_id storcenter_of_bus[] __initconst = {
+	{ .name = "soc", },
+	{},
+};
+
+static int __init storcenter_device_probe(void)
+{
+	of_platform_bus_probe(NULL, storcenter_of_bus, NULL);
+	return 0;
+}
+machine_device_initcall(storcenter, storcenter_device_probe);
+
+
+static int __init storcenter_add_bridge(struct device_node *dev)
+{
+#ifdef CONFIG_PCI
+	int len;
+	struct pci_controller *hose;
+	const int *bus_range;
+
+	printk("Adding PCI host bridge %s\n", dev->full_name);
+
+	hose = pcibios_alloc_controller(dev);
+	if (hose == NULL)
+		return -ENOMEM;
+
+	bus_range = of_get_property(dev, "bus-range", &len);
+	hose->first_busno = bus_range ? bus_range[0] : 0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+	setup_indirect_pci(hose, MPC10X_MAPB_CNFG_ADDR, MPC10X_MAPB_CNFG_DATA, 0);
+
+	/* Interpret the "ranges" property */
+	/* This also maps the I/O region and sets isa_io/mem_base */
+	pci_process_bridge_OF_ranges(hose, dev, 1);
+#endif
+
+	return 0;
+}
+
+static void __init storcenter_setup_arch(void)
+{
+	struct device_node *np;
+
+	/* Lookup PCI host bridges */
+	for_each_compatible_node(np, "pci", "mpc10x-pci")
+		storcenter_add_bridge(np);
+
+	printk(KERN_INFO "IOMEGA StorCenter\n");
+}
+
+/*
+ * Interrupt setup and service.  Interrupts on the turbostation come
+ * from the four PCI slots plus onboard 8241 devices: I2C, DUART.
+ */
+static void __init storcenter_init_IRQ(void)
+{
+	struct mpic *mpic;
+
+	mpic = mpic_alloc(NULL, 0, 0, 16, 0, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+
+	/*
+	 * 16 Serial Interrupts followed by 16 Internal Interrupts.
+	 * I2C is the second internal, so it is at 17, 0x11020.
+	 */
+	mpic_assign_isu(mpic, 0, mpic->paddr + 0x10200);
+	mpic_assign_isu(mpic, 1, mpic->paddr + 0x11000);
+
+	mpic_init(mpic);
+}
+
+static void storcenter_restart(char *cmd)
+{
+	local_irq_disable();
+
+	/* Set exception prefix high - to the firmware */
+	_nmask_and_or_msr(0, MSR_IP);
+
+	/* Wait for reset to happen */
+	for (;;) ;
+}
+
+static int __init storcenter_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "iomega,storcenter");
+}
+
+define_machine(storcenter){
+	.name 			= "IOMEGA StorCenter",
+	.probe 			= storcenter_probe,
+	.setup_arch 		= storcenter_setup_arch,
+	.init_IRQ 		= storcenter_init_IRQ,
+	.get_irq 		= mpic_get_irq,
+	.restart 		= storcenter_restart,
+	.calibrate_decr 	= generic_calibrate_decr,
+};
diff --git a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
new file mode 100644
index 000000000..7feb325b6
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
@@ -0,0 +1,328 @@
+/*
+ * arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
+ *
+ * udbg serial input/output routines for the USB Gecko adapter.
+ * Copyright (C) 2008-2009 The GameCube Linux Team
+ * Copyright (C) 2008,2009 Albert Herranz
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ */
+
+#include <mm/mmu_decl.h>
+
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/fixmap.h>
+
+#include "usbgecko_udbg.h"
+
+
+#define EXI_CLK_32MHZ           5
+
+#define EXI_CSR                 0x00
+#define   EXI_CSR_CLKMASK       (0x7<<4)
+#define     EXI_CSR_CLK_32MHZ   (EXI_CLK_32MHZ<<4)
+#define   EXI_CSR_CSMASK        (0x7<<7)
+#define     EXI_CSR_CS_0        (0x1<<7)  /* Chip Select 001 */
+
+#define EXI_CR                  0x0c
+#define   EXI_CR_TSTART         (1<<0)
+#define   EXI_CR_WRITE		(1<<2)
+#define   EXI_CR_READ_WRITE     (2<<2)
+#define   EXI_CR_TLEN(len)      (((len)-1)<<4)
+
+#define EXI_DATA                0x10
+
+#define UG_READ_ATTEMPTS	100
+#define UG_WRITE_ATTEMPTS	100
+
+
+static void __iomem *ug_io_base;
+
+/*
+ * Performs one input/output transaction between the exi host and the usbgecko.
+ */
+static u32 ug_io_transaction(u32 in)
+{
+	u32 __iomem *csr_reg = ug_io_base + EXI_CSR;
+	u32 __iomem *data_reg = ug_io_base + EXI_DATA;
+	u32 __iomem *cr_reg = ug_io_base + EXI_CR;
+	u32 csr, data, cr;
+
+	/* select */
+	csr = EXI_CSR_CLK_32MHZ | EXI_CSR_CS_0;
+	out_be32(csr_reg, csr);
+
+	/* read/write */
+	data = in;
+	out_be32(data_reg, data);
+	cr = EXI_CR_TLEN(2) | EXI_CR_READ_WRITE | EXI_CR_TSTART;
+	out_be32(cr_reg, cr);
+
+	while (in_be32(cr_reg) & EXI_CR_TSTART)
+		barrier();
+
+	/* deselect */
+	out_be32(csr_reg, 0);
+
+	/* result */
+	data = in_be32(data_reg);
+
+	return data;
+}
+
+/*
+ * Returns true if an usbgecko adapter is found.
+ */
+static int ug_is_adapter_present(void)
+{
+	if (!ug_io_base)
+		return 0;
+
+	return ug_io_transaction(0x90000000) == 0x04700000;
+}
+
+/*
+ * Returns true if the TX fifo is ready for transmission.
+ */
+static int ug_is_txfifo_ready(void)
+{
+	return ug_io_transaction(0xc0000000) & 0x04000000;
+}
+
+/*
+ * Tries to transmit a character.
+ * If the TX fifo is not ready the result is undefined.
+ */
+static void ug_raw_putc(char ch)
+{
+	ug_io_transaction(0xb0000000 | (ch << 20));
+}
+
+/*
+ * Transmits a character.
+ * It silently fails if the TX fifo is not ready after a number of retries.
+ */
+static void ug_putc(char ch)
+{
+	int count = UG_WRITE_ATTEMPTS;
+
+	if (!ug_io_base)
+		return;
+
+	if (ch == '\n')
+		ug_putc('\r');
+
+	while (!ug_is_txfifo_ready() && count--)
+		barrier();
+	if (count >= 0)
+		ug_raw_putc(ch);
+}
+
+/*
+ * Returns true if the RX fifo is ready for transmission.
+ */
+static int ug_is_rxfifo_ready(void)
+{
+	return ug_io_transaction(0xd0000000) & 0x04000000;
+}
+
+/*
+ * Tries to receive a character.
+ * If a character is unavailable the function returns -1.
+ */
+static int ug_raw_getc(void)
+{
+	u32 data = ug_io_transaction(0xa0000000);
+	if (data & 0x08000000)
+		return (data >> 16) & 0xff;
+	else
+		return -1;
+}
+
+/*
+ * Receives a character.
+ * It fails if the RX fifo is not ready after a number of retries.
+ */
+static int ug_getc(void)
+{
+	int count = UG_READ_ATTEMPTS;
+
+	if (!ug_io_base)
+		return -1;
+
+	while (!ug_is_rxfifo_ready() && count--)
+		barrier();
+	return ug_raw_getc();
+}
+
+/*
+ * udbg functions.
+ *
+ */
+
+/*
+ * Transmits a character.
+ */
+void ug_udbg_putc(char ch)
+{
+	ug_putc(ch);
+}
+
+/*
+ * Receives a character. Waits until a character is available.
+ */
+static int ug_udbg_getc(void)
+{
+	int ch;
+
+	while ((ch = ug_getc()) == -1)
+		barrier();
+	return ch;
+}
+
+/*
+ * Receives a character. If a character is not available, returns -1.
+ */
+static int ug_udbg_getc_poll(void)
+{
+	if (!ug_is_rxfifo_ready())
+		return -1;
+	return ug_getc();
+}
+
+/*
+ * Retrieves and prepares the virtual address needed to access the hardware.
+ */
+static void __iomem *ug_udbg_setup_exi_io_base(struct device_node *np)
+{
+	void __iomem *exi_io_base = NULL;
+	phys_addr_t paddr;
+	const unsigned int *reg;
+
+	reg = of_get_property(np, "reg", NULL);
+	if (reg) {
+		paddr = of_translate_address(np, reg);
+		if (paddr)
+			exi_io_base = ioremap(paddr, reg[1]);
+	}
+	return exi_io_base;
+}
+
+/*
+ * Checks if a USB Gecko adapter is inserted in any memory card slot.
+ */
+static void __iomem *ug_udbg_probe(void __iomem *exi_io_base)
+{
+	int i;
+
+	/* look for a usbgecko on memcard slots A and B */
+	for (i = 0; i < 2; i++) {
+		ug_io_base = exi_io_base + 0x14 * i;
+		if (ug_is_adapter_present())
+			break;
+	}
+	if (i == 2)
+		ug_io_base = NULL;
+	return ug_io_base;
+
+}
+
+/*
+ * USB Gecko udbg support initialization.
+ */
+void __init ug_udbg_init(void)
+{
+	struct device_node *np;
+	void __iomem *exi_io_base;
+
+	if (ug_io_base)
+		udbg_printf("%s: early -> final\n", __func__);
+
+	np = of_find_compatible_node(NULL, NULL, "nintendo,flipper-exi");
+	if (!np) {
+		udbg_printf("%s: EXI node not found\n", __func__);
+		goto out;
+	}
+
+	exi_io_base = ug_udbg_setup_exi_io_base(np);
+	if (!exi_io_base) {
+		udbg_printf("%s: failed to setup EXI io base\n", __func__);
+		goto done;
+	}
+
+	if (!ug_udbg_probe(exi_io_base)) {
+		udbg_printf("usbgecko_udbg: not found\n");
+		iounmap(exi_io_base);
+	} else {
+		udbg_putc = ug_udbg_putc;
+		udbg_getc = ug_udbg_getc;
+		udbg_getc_poll = ug_udbg_getc_poll;
+		udbg_printf("usbgecko_udbg: ready\n");
+	}
+
+done:
+	of_node_put(np);
+out:
+	return;
+}
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO
+
+static phys_addr_t __init ug_early_grab_io_addr(void)
+{
+#if defined(CONFIG_GAMECUBE)
+	return 0x0c000000;
+#elif defined(CONFIG_WII)
+	return 0x0d000000;
+#else
+#error Invalid platform for USB Gecko based early debugging.
+#endif
+}
+
+/*
+ * USB Gecko early debug support initialization for udbg.
+ */
+void __init udbg_init_usbgecko(void)
+{
+	void __iomem *early_debug_area;
+	void __iomem *exi_io_base;
+
+	/*
+	 * At this point we have a BAT already setup that enables I/O
+	 * to the EXI hardware.
+	 *
+	 * The BAT uses a virtual address range reserved at the fixmap.
+	 * This must match the virtual address configured in
+	 * head_32.S:setup_usbgecko_bat().
+	 */
+	early_debug_area = (void __iomem *)__fix_to_virt(FIX_EARLY_DEBUG_BASE);
+	exi_io_base = early_debug_area + 0x00006800;
+
+	/* try to detect a USB Gecko */
+	if (!ug_udbg_probe(exi_io_base))
+		return;
+
+	/* we found a USB Gecko, load udbg hooks */
+	udbg_putc = ug_udbg_putc;
+	udbg_getc = ug_udbg_getc;
+	udbg_getc_poll = ug_udbg_getc_poll;
+
+	/*
+	 * Prepare again the same BAT for MMU_init.
+	 * This allows udbg I/O to continue working after the MMU is
+	 * turned on for real.
+	 * It is safe to continue using the same virtual address as it is
+	 * a reserved fixmap area.
+	 */
+	setbat(1, (unsigned long)early_debug_area,
+	       ug_early_grab_io_addr(), 128*1024, PAGE_KERNEL_NCG);
+}
+
+#endif /* CONFIG_PPC_EARLY_DEBUG_USBGECKO */
+
diff --git a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.h b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.h
new file mode 100644
index 000000000..bb6cde4ad
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.h
@@ -0,0 +1,32 @@
+/*
+ * arch/powerpc/platforms/embedded6xx/usbgecko_udbg.h
+ *
+ * udbg serial input/output routines for the USB Gecko adapter.
+ * Copyright (C) 2008-2009 The GameCube Linux Team
+ * Copyright (C) 2008,2009 Albert Herranz
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ */
+
+#ifndef __USBGECKO_UDBG_H
+#define __USBGECKO_UDBG_H
+
+#ifdef CONFIG_USBGECKO_UDBG
+
+extern void __init ug_udbg_init(void);
+
+#else
+
+static inline void __init ug_udbg_init(void)
+{
+}
+
+#endif /* CONFIG_USBGECKO_UDBG */
+
+void __init udbg_init_usbgecko(void);
+
+#endif /* __USBGECKO_UDBG_H */
diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
new file mode 100644
index 000000000..352592d3e
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/wii.c
@@ -0,0 +1,253 @@
+/*
+ * arch/powerpc/platforms/embedded6xx/wii.c
+ *
+ * Nintendo Wii board-specific support
+ * Copyright (C) 2008-2009 The GameCube Linux Team
+ * Copyright (C) 2008,2009 Albert Herranz
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ */
+#define DRV_MODULE_NAME "wii"
+#define pr_fmt(fmt) DRV_MODULE_NAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/of_platform.h>
+#include <linux/memblock.h>
+#include <mm/mmu_decl.h>
+
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+
+#include "flipper-pic.h"
+#include "hlwd-pic.h"
+#include "usbgecko_udbg.h"
+
+/* control block */
+#define HW_CTRL_COMPATIBLE	"nintendo,hollywood-control"
+
+#define HW_CTRL_RESETS		0x94
+#define HW_CTRL_RESETS_SYS	(1<<0)
+
+/* gpio */
+#define HW_GPIO_COMPATIBLE	"nintendo,hollywood-gpio"
+
+#define HW_GPIO_BASE(idx)	(idx * 0x20)
+#define HW_GPIO_OUT(idx)	(HW_GPIO_BASE(idx) + 0)
+#define HW_GPIO_DIR(idx)	(HW_GPIO_BASE(idx) + 4)
+
+#define HW_GPIO_SHUTDOWN	(1<<1)
+#define HW_GPIO_SLOT_LED	(1<<5)
+#define HW_GPIO_SENSOR_BAR	(1<<8)
+
+
+static void __iomem *hw_ctrl;
+static void __iomem *hw_gpio;
+
+unsigned long wii_hole_start;
+unsigned long wii_hole_size;
+
+
+static int __init page_aligned(unsigned long x)
+{
+	return !(x & (PAGE_SIZE-1));
+}
+
+void __init wii_memory_fixups(void)
+{
+	struct memblock_region *p = memblock.memory.regions;
+
+	/*
+	 * This is part of a workaround to allow the use of two
+	 * discontinuous RAM ranges on the Wii, even if this is
+	 * currently unsupported on 32-bit PowerPC Linux.
+	 *
+	 * We coalesce the two memory ranges of the Wii into a
+	 * single range, then create a reservation for the "hole"
+	 * between both ranges.
+	 */
+
+	BUG_ON(memblock.memory.cnt != 2);
+	BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base));
+
+	/* trim unaligned tail */
+	memblock_remove(ALIGN(p[1].base + p[1].size, PAGE_SIZE),
+			(phys_addr_t)ULLONG_MAX);
+
+	/* determine hole, add & reserve them */
+	wii_hole_start = ALIGN(p[0].base + p[0].size, PAGE_SIZE);
+	wii_hole_size = p[1].base - wii_hole_start;
+	memblock_add(wii_hole_start, wii_hole_size);
+	memblock_reserve(wii_hole_start, wii_hole_size);
+
+	BUG_ON(memblock.memory.cnt != 1);
+	__memblock_dump_all();
+
+	/* allow ioremapping the address space in the hole */
+	__allow_ioremap_reserved = 1;
+}
+
+unsigned long __init wii_mmu_mapin_mem2(unsigned long top)
+{
+	unsigned long delta, size, bl;
+	unsigned long max_size = (256<<20);
+
+	/* MEM2 64MB@0x10000000 */
+	delta = wii_hole_start + wii_hole_size;
+	size = top - delta;
+	for (bl = 128<<10; bl < max_size; bl <<= 1) {
+		if (bl * 2 > size)
+			break;
+	}
+	setbat(4, PAGE_OFFSET+delta, delta, bl, PAGE_KERNEL_X);
+	return delta + bl;
+}
+
+static void wii_spin(void)
+{
+	local_irq_disable();
+	for (;;)
+		cpu_relax();
+}
+
+static void __iomem *wii_ioremap_hw_regs(char *name, char *compatible)
+{
+	void __iomem *hw_regs = NULL;
+	struct device_node *np;
+	struct resource res;
+	int error = -ENODEV;
+
+	np = of_find_compatible_node(NULL, NULL, compatible);
+	if (!np) {
+		pr_err("no compatible node found for %s\n", compatible);
+		goto out;
+	}
+	error = of_address_to_resource(np, 0, &res);
+	if (error) {
+		pr_err("no valid reg found for %s\n", np->name);
+		goto out_put;
+	}
+
+	hw_regs = ioremap(res.start, resource_size(&res));
+	if (hw_regs) {
+		pr_info("%s at 0x%08x mapped to 0x%p\n", name,
+			res.start, hw_regs);
+	}
+
+out_put:
+	of_node_put(np);
+out:
+	return hw_regs;
+}
+
+static void __init wii_setup_arch(void)
+{
+	hw_ctrl = wii_ioremap_hw_regs("hw_ctrl", HW_CTRL_COMPATIBLE);
+	hw_gpio = wii_ioremap_hw_regs("hw_gpio", HW_GPIO_COMPATIBLE);
+	if (hw_gpio) {
+		/* turn off the front blue led and IR light */
+		clrbits32(hw_gpio + HW_GPIO_OUT(0),
+			  HW_GPIO_SLOT_LED | HW_GPIO_SENSOR_BAR);
+	}
+}
+
+static void wii_restart(char *cmd)
+{
+	local_irq_disable();
+
+	if (hw_ctrl) {
+		/* clear the system reset pin to cause a reset */
+		clrbits32(hw_ctrl + HW_CTRL_RESETS, HW_CTRL_RESETS_SYS);
+	}
+	wii_spin();
+}
+
+static void wii_power_off(void)
+{
+	local_irq_disable();
+
+	if (hw_gpio) {
+		/* make sure that the poweroff GPIO is configured as output */
+		setbits32(hw_gpio + HW_GPIO_DIR(1), HW_GPIO_SHUTDOWN);
+
+		/* drive the poweroff GPIO high */
+		setbits32(hw_gpio + HW_GPIO_OUT(1), HW_GPIO_SHUTDOWN);
+	}
+	wii_spin();
+}
+
+static void wii_halt(void)
+{
+	if (ppc_md.restart)
+		ppc_md.restart(NULL);
+	wii_spin();
+}
+
+static void __init wii_init_early(void)
+{
+	ug_udbg_init();
+}
+
+static void __init wii_pic_probe(void)
+{
+	flipper_pic_probe();
+	hlwd_pic_probe();
+}
+
+static int __init wii_probe(void)
+{
+	unsigned long dt_root;
+
+	dt_root = of_get_flat_dt_root();
+	if (!of_flat_dt_is_compatible(dt_root, "nintendo,wii"))
+		return 0;
+
+	pm_power_off = wii_power_off;
+
+	return 1;
+}
+
+static void wii_shutdown(void)
+{
+	hlwd_quiesce();
+	flipper_quiesce();
+}
+
+define_machine(wii) {
+	.name			= "wii",
+	.probe			= wii_probe,
+	.init_early		= wii_init_early,
+	.setup_arch		= wii_setup_arch,
+	.restart		= wii_restart,
+	.halt			= wii_halt,
+	.init_IRQ		= wii_pic_probe,
+	.get_irq		= flipper_pic_get_irq,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+	.machine_shutdown	= wii_shutdown,
+};
+
+static const struct of_device_id wii_of_bus[] = {
+	{ .compatible = "nintendo,hollywood", },
+	{ },
+};
+
+static int __init wii_device_probe(void)
+{
+	if (!machine_is(wii))
+		return 0;
+
+	of_platform_bus_probe(NULL, wii_of_bus, NULL);
+	return 0;
+}
+device_initcall(wii_device_probe);
+
diff --git a/arch/powerpc/platforms/fsl_uli1575.c b/arch/powerpc/platforms/fsl_uli1575.c
new file mode 100644
index 000000000..b97f6f3d3
--- /dev/null
+++ b/arch/powerpc/platforms/fsl_uli1575.c
@@ -0,0 +1,361 @@
+/*
+ * ULI M1575 setup code - specific to Freescale boards
+ *
+ * Copyright 2007 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/mc146818rtc.h>
+
+#include <asm/pci-bridge.h>
+
+#define ULI_PIRQA	0x08
+#define ULI_PIRQB	0x09
+#define ULI_PIRQC	0x0a
+#define ULI_PIRQD	0x0b
+#define ULI_PIRQE	0x0c
+#define ULI_PIRQF	0x0d
+#define ULI_PIRQG	0x0e
+
+#define ULI_8259_NONE	0x00
+#define ULI_8259_IRQ1	0x08
+#define ULI_8259_IRQ3	0x02
+#define ULI_8259_IRQ4	0x04
+#define ULI_8259_IRQ5	0x05
+#define ULI_8259_IRQ6	0x07
+#define ULI_8259_IRQ7	0x06
+#define ULI_8259_IRQ9	0x01
+#define ULI_8259_IRQ10	0x03
+#define ULI_8259_IRQ11	0x09
+#define ULI_8259_IRQ12	0x0b
+#define ULI_8259_IRQ14	0x0d
+#define ULI_8259_IRQ15	0x0f
+
+u8 uli_pirq_to_irq[8] = {
+	ULI_8259_IRQ9,		/* PIRQA */
+	ULI_8259_IRQ10,		/* PIRQB */
+	ULI_8259_IRQ11,		/* PIRQC */
+	ULI_8259_IRQ12,		/* PIRQD */
+	ULI_8259_IRQ5,		/* PIRQE */
+	ULI_8259_IRQ6,		/* PIRQF */
+	ULI_8259_IRQ7,		/* PIRQG */
+	ULI_8259_NONE,		/* PIRQH */
+};
+
+static inline bool is_quirk_valid(void)
+{
+	return (machine_is(mpc86xx_hpcn) ||
+		machine_is(mpc8544_ds) ||
+		machine_is(p2020_ds) ||
+		machine_is(mpc8572_ds));
+}
+
+/* Bridge */
+static void early_uli5249(struct pci_dev *dev)
+{
+	unsigned char temp;
+
+	if (!is_quirk_valid())
+		return;
+
+	pci_write_config_word(dev, PCI_COMMAND, PCI_COMMAND_IO |
+		 PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+
+	/* read/write lock */
+	pci_read_config_byte(dev, 0x7c, &temp);
+	pci_write_config_byte(dev, 0x7c, 0x80);
+
+	/* set as P2P bridge */
+	pci_write_config_byte(dev, PCI_CLASS_PROG, 0x01);
+	dev->class |= 0x1;
+
+	/* restore lock */
+	pci_write_config_byte(dev, 0x7c, temp);
+}
+
+
+static void quirk_uli1575(struct pci_dev *dev)
+{
+	int i;
+
+	if (!is_quirk_valid())
+		return;
+
+	/*
+	 * ULI1575 interrupts route setup
+	 */
+
+	/* ULI1575 IRQ mapping conf register maps PIRQx to IRQn */
+	for (i = 0; i < 4; i++) {
+		u8 val = uli_pirq_to_irq[i*2] | (uli_pirq_to_irq[i*2+1] << 4);
+		pci_write_config_byte(dev, 0x48 + i, val);
+	}
+
+	/* USB 1.1 OHCI controller 1: dev 28, func 0 - IRQ12 */
+	pci_write_config_byte(dev, 0x86, ULI_PIRQD);
+
+	/* USB 1.1 OHCI controller 2: dev 28, func 1 - IRQ9 */
+	pci_write_config_byte(dev, 0x87, ULI_PIRQA);
+
+	/* USB 1.1 OHCI controller 3: dev 28, func 2 - IRQ10 */
+	pci_write_config_byte(dev, 0x88, ULI_PIRQB);
+
+	/* Lan controller: dev 27, func 0 - IRQ6 */
+	pci_write_config_byte(dev, 0x89, ULI_PIRQF);
+
+	/* AC97 Audio controller: dev 29, func 0 - IRQ6 */
+	pci_write_config_byte(dev, 0x8a, ULI_PIRQF);
+
+	/* Modem controller: dev 29, func 1 - IRQ6 */
+	pci_write_config_byte(dev, 0x8b, ULI_PIRQF);
+
+	/* HD Audio controller: dev 29, func 2 - IRQ6 */
+	pci_write_config_byte(dev, 0x8c, ULI_PIRQF);
+
+	/* SATA controller: dev 31, func 1 - IRQ5 */
+	pci_write_config_byte(dev, 0x8d, ULI_PIRQE);
+
+	/* SMB interrupt: dev 30, func 1 - IRQ7 */
+	pci_write_config_byte(dev, 0x8e, ULI_PIRQG);
+
+	/* PMU ACPI SCI interrupt: dev 30, func 2 - IRQ7 */
+	pci_write_config_byte(dev, 0x8f, ULI_PIRQG);
+
+	/* USB 2.0 controller: dev 28, func 3 */
+	pci_write_config_byte(dev, 0x74, ULI_8259_IRQ11);
+
+	/* Primary PATA IDE IRQ: 14
+	 * Secondary PATA IDE IRQ: 15
+	 */
+	pci_write_config_byte(dev, 0x44, 0x30 | ULI_8259_IRQ14);
+	pci_write_config_byte(dev, 0x75, ULI_8259_IRQ15);
+}
+
+static void quirk_final_uli1575(struct pci_dev *dev)
+{
+	/* Set i8259 interrupt trigger
+	 * IRQ 3:  Level
+	 * IRQ 4:  Level
+	 * IRQ 5:  Level
+	 * IRQ 6:  Level
+	 * IRQ 7:  Level
+	 * IRQ 9:  Level
+	 * IRQ 10: Level
+	 * IRQ 11: Level
+	 * IRQ 12: Level
+	 * IRQ 14: Edge
+	 * IRQ 15: Edge
+	 */
+	if (!is_quirk_valid())
+		return;
+
+	outb(0xfa, 0x4d0);
+	outb(0x1e, 0x4d1);
+
+	/* setup RTC */
+	CMOS_WRITE(RTC_SET, RTC_CONTROL);
+	CMOS_WRITE(RTC_24H, RTC_CONTROL);
+
+	/* ensure month, date, and week alarm fields are ignored */
+	CMOS_WRITE(0, RTC_VALID);
+
+	outb_p(0x7c, 0x72);
+	outb_p(RTC_ALARM_DONT_CARE, 0x73);
+
+	outb_p(0x7d, 0x72);
+	outb_p(RTC_ALARM_DONT_CARE, 0x73);
+}
+
+/* SATA */
+static void quirk_uli5288(struct pci_dev *dev)
+{
+	unsigned char c;
+	unsigned int d;
+
+	if (!is_quirk_valid())
+		return;
+
+	/* read/write lock */
+	pci_read_config_byte(dev, 0x83, &c);
+	pci_write_config_byte(dev, 0x83, c|0x80);
+
+	pci_read_config_dword(dev, PCI_CLASS_REVISION, &d);
+	d = (d & 0xff) | (PCI_CLASS_STORAGE_SATA_AHCI << 8);
+	pci_write_config_dword(dev, PCI_CLASS_REVISION, d);
+
+	/* restore lock */
+	pci_write_config_byte(dev, 0x83, c);
+
+	/* disable emulated PATA mode enabled */
+	pci_read_config_byte(dev, 0x84, &c);
+	pci_write_config_byte(dev, 0x84, c & ~0x01);
+}
+
+/* PATA */
+static void quirk_uli5229(struct pci_dev *dev)
+{
+	unsigned short temp;
+
+	if (!is_quirk_valid())
+		return;
+
+	pci_write_config_word(dev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE |
+		PCI_COMMAND_MASTER | PCI_COMMAND_IO);
+
+	/* Enable Native IRQ 14/15 */
+	pci_read_config_word(dev, 0x4a, &temp);
+	pci_write_config_word(dev, 0x4a, temp | 0x1000);
+}
+
+/* We have to do a dummy read on the P2P for the RTC to work, WTF */
+static void quirk_final_uli5249(struct pci_dev *dev)
+{
+	int i;
+	u8 *dummy;
+	struct pci_bus *bus = dev->bus;
+	struct resource *res;
+	resource_size_t end = 0;
+
+	for (i = PCI_BRIDGE_RESOURCES; i < PCI_BRIDGE_RESOURCES+3; i++) {
+		unsigned long flags = pci_resource_flags(dev, i);
+		if ((flags & (IORESOURCE_MEM|IORESOURCE_PREFETCH)) == IORESOURCE_MEM)
+			end = pci_resource_end(dev, i);
+	}
+
+	pci_bus_for_each_resource(bus, res, i) {
+		if (res && res->flags & IORESOURCE_MEM) {
+			if (res->end == end)
+				dummy = ioremap(res->start, 0x4);
+			else
+				dummy = ioremap(res->end - 3, 0x4);
+			if (dummy) {
+				in_8(dummy);
+				iounmap(dummy);
+			}
+			break;
+		}
+	}
+}
+
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AL, 0x5249, early_uli5249);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x1575, quirk_uli1575);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5288, quirk_uli5288);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5229, quirk_uli5229);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, 0x5249, quirk_final_uli5249);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, 0x1575, quirk_final_uli1575);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AL, 0x5229, quirk_uli5229);
+
+static void hpcd_quirk_uli1575(struct pci_dev *dev)
+{
+	u32 temp32;
+
+	if (!machine_is(mpc86xx_hpcd))
+		return;
+
+	/* Disable INTx */
+	pci_read_config_dword(dev, 0x48, &temp32);
+	pci_write_config_dword(dev, 0x48, (temp32 | 1<<26));
+
+	/* Enable sideband interrupt */
+	pci_read_config_dword(dev, 0x90, &temp32);
+	pci_write_config_dword(dev, 0x90, (temp32 | 1<<22));
+}
+
+static void hpcd_quirk_uli5288(struct pci_dev *dev)
+{
+	unsigned char c;
+
+	if (!machine_is(mpc86xx_hpcd))
+		return;
+
+	pci_read_config_byte(dev, 0x83, &c);
+	c |= 0x80;
+	pci_write_config_byte(dev, 0x83, c);
+
+	pci_write_config_byte(dev, PCI_CLASS_PROG, 0x01);
+	pci_write_config_byte(dev, PCI_CLASS_DEVICE, 0x06);
+
+	pci_read_config_byte(dev, 0x83, &c);
+	c &= 0x7f;
+	pci_write_config_byte(dev, 0x83, c);
+}
+
+/*
+ * Since 8259PIC was disabled on the board, the IDE device can not
+ * use the legacy IRQ, we need to let the IDE device work under
+ * native mode and use the interrupt line like other PCI devices.
+ * IRQ14 is a sideband interrupt from IDE device to CPU and we use this
+ * as the interrupt for IDE device.
+ */
+static void hpcd_quirk_uli5229(struct pci_dev *dev)
+{
+	unsigned char c;
+
+	if (!machine_is(mpc86xx_hpcd))
+		return;
+
+	pci_read_config_byte(dev, 0x4b, &c);
+	c |= 0x10;
+	pci_write_config_byte(dev, 0x4b, c);
+}
+
+/*
+ * SATA interrupt pin bug fix
+ * There's a chip bug for 5288, The interrupt pin should be 2,
+ * not the read only value 1, So it use INTB#, not INTA# which
+ * actually used by the IDE device 5229.
+ * As of this bug, during the PCI initialization, 5288 read the
+ * irq of IDE device from the device tree, this function fix this
+ * bug by re-assigning a correct irq to 5288.
+ *
+ */
+static void hpcd_final_uli5288(struct pci_dev *dev)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct device_node *hosenode = hose ? hose->dn : NULL;
+	struct of_phandle_args oirq;
+	u32 laddr[3];
+
+	if (!machine_is(mpc86xx_hpcd))
+		return;
+
+	if (!hosenode)
+		return;
+
+	oirq.np = hosenode;
+	oirq.args[0] = 2;
+	oirq.args_count = 1;
+	laddr[0] = (hose->first_busno << 16) | (PCI_DEVFN(31, 0) << 8);
+	laddr[1] = laddr[2] = 0;
+	of_irq_parse_raw(laddr, &oirq);
+	dev->irq = irq_create_of_mapping(&oirq);
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x1575, hpcd_quirk_uli1575);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5288, hpcd_quirk_uli5288);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5229, hpcd_quirk_uli5229);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, 0x5288, hpcd_final_uli5288);
+
+int uli_exclude_device(struct pci_controller *hose,
+			u_char bus, u_char devfn)
+{
+	if (bus == (hose->first_busno + 2)) {
+		/* exclude Modem controller */
+		if ((PCI_SLOT(devfn) == 29) && (PCI_FUNC(devfn) == 1))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+		/* exclude HD Audio controller */
+		if ((PCI_SLOT(devfn) == 29) && (PCI_FUNC(devfn) == 2))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
diff --git a/arch/powerpc/platforms/maple/Kconfig b/arch/powerpc/platforms/maple/Kconfig
new file mode 100644
index 000000000..1ea621a94
--- /dev/null
+++ b/arch/powerpc/platforms/maple/Kconfig
@@ -0,0 +1,18 @@
+config PPC_MAPLE
+	depends on PPC64 && PPC_BOOK3S
+	bool "Maple 970FX Evaluation Board"
+	select PCI
+	select MPIC
+	select U3_DART
+	select MPIC_U3_HT_IRQS
+	select GENERIC_TBSYNC
+	select PPC_UDBG_16550
+	select PPC_970_NAP
+	select PPC_NATIVE
+	select PPC_RTAS
+	select MMIO_NVRAM
+	select ATA_NONSTANDARD if ATA
+	default n
+	help
+          This option enables support for the Maple 970FX Evaluation Board.
+	  For more information, refer to <http://www.970eval.com>
diff --git a/arch/powerpc/platforms/maple/Makefile b/arch/powerpc/platforms/maple/Makefile
new file mode 100644
index 000000000..1be1a993c
--- /dev/null
+++ b/arch/powerpc/platforms/maple/Makefile
@@ -0,0 +1 @@
+obj-y	+= setup.o pci.o time.o
diff --git a/arch/powerpc/platforms/maple/maple.h b/arch/powerpc/platforms/maple/maple.h
new file mode 100644
index 000000000..eecfa182b
--- /dev/null
+++ b/arch/powerpc/platforms/maple/maple.h
@@ -0,0 +1,14 @@
+/*
+ * Declarations for maple-specific code.
+ *
+ * Maple is the name of a PPC970 evaluation board.
+ */
+extern int maple_set_rtc_time(struct rtc_time *tm);
+extern void maple_get_rtc_time(struct rtc_time *tm);
+extern unsigned long maple_get_boot_time(void);
+extern void maple_calibrate_decr(void);
+extern void maple_pci_init(void);
+extern void maple_pci_irq_fixup(struct pci_dev *dev);
+extern int maple_pci_get_legacy_ide_irq(struct pci_dev *dev, int channel);
+
+extern struct pci_controller_ops maple_pci_controller_ops;
diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c
new file mode 100644
index 000000000..a923230e5
--- /dev/null
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -0,0 +1,666 @@
+/*
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
+ *		      IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/iommu.h>
+#include <asm/ppc-pci.h>
+
+#include "maple.h"
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+static struct pci_controller *u3_agp, *u3_ht, *u4_pcie;
+
+static int __init fixup_one_level_bus_range(struct device_node *node, int higher)
+{
+	for (; node != 0;node = node->sibling) {
+		const int *bus_range;
+		const unsigned int *class_code;
+		int len;
+
+		/* For PCI<->PCI bridges or CardBus bridges, we go down */
+		class_code = of_get_property(node, "class-code", NULL);
+		if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI &&
+			(*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS))
+			continue;
+		bus_range = of_get_property(node, "bus-range", &len);
+		if (bus_range != NULL && len > 2 * sizeof(int)) {
+			if (bus_range[1] > higher)
+				higher = bus_range[1];
+		}
+		higher = fixup_one_level_bus_range(node->child, higher);
+	}
+	return higher;
+}
+
+/* This routine fixes the "bus-range" property of all bridges in the
+ * system since they tend to have their "last" member wrong on macs
+ *
+ * Note that the bus numbers manipulated here are OF bus numbers, they
+ * are not Linux bus numbers.
+ */
+static void __init fixup_bus_range(struct device_node *bridge)
+{
+	int *bus_range;
+	struct property *prop;
+	int len;
+
+	/* Lookup the "bus-range" property for the hose */
+	prop = of_find_property(bridge, "bus-range", &len);
+	if (prop == NULL  || prop->value == NULL || len < 2 * sizeof(int)) {
+		printk(KERN_WARNING "Can't get bus-range for %s\n",
+			       bridge->full_name);
+		return;
+	}
+	bus_range = prop->value;
+	bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]);
+}
+
+
+static unsigned long u3_agp_cfa0(u8 devfn, u8 off)
+{
+	return (1 << (unsigned long)PCI_SLOT(devfn)) |
+		((unsigned long)PCI_FUNC(devfn) << 8) |
+		((unsigned long)off & 0xFCUL);
+}
+
+static unsigned long u3_agp_cfa1(u8 bus, u8 devfn, u8 off)
+{
+	return ((unsigned long)bus << 16) |
+		((unsigned long)devfn << 8) |
+		((unsigned long)off & 0xFCUL) |
+		1UL;
+}
+
+static volatile void __iomem *u3_agp_cfg_access(struct pci_controller* hose,
+				       u8 bus, u8 dev_fn, u8 offset)
+{
+	unsigned int caddr;
+
+	if (bus == hose->first_busno) {
+		if (dev_fn < (11 << 3))
+			return NULL;
+		caddr = u3_agp_cfa0(dev_fn, offset);
+	} else
+		caddr = u3_agp_cfa1(bus, dev_fn, offset);
+
+	/* Uninorth will return garbage if we don't read back the value ! */
+	do {
+		out_le32(hose->cfg_addr, caddr);
+	} while (in_le32(hose->cfg_addr) != caddr);
+
+	offset &= 0x07;
+	return hose->cfg_data + offset;
+}
+
+static int u3_agp_read_config(struct pci_bus *bus, unsigned int devfn,
+			      int offset, int len, u32 *val)
+{
+	struct pci_controller *hose;
+	volatile void __iomem *addr;
+
+	hose = pci_bus_to_host(bus);
+	if (hose == NULL)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	addr = u3_agp_cfg_access(hose, bus->number, devfn, offset);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		*val = in_8(addr);
+		break;
+	case 2:
+		*val = in_le16(addr);
+		break;
+	default:
+		*val = in_le32(addr);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int u3_agp_write_config(struct pci_bus *bus, unsigned int devfn,
+			       int offset, int len, u32 val)
+{
+	struct pci_controller *hose;
+	volatile void __iomem *addr;
+
+	hose = pci_bus_to_host(bus);
+	if (hose == NULL)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	addr = u3_agp_cfg_access(hose, bus->number, devfn, offset);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		out_8(addr, val);
+		break;
+	case 2:
+		out_le16(addr, val);
+		break;
+	default:
+		out_le32(addr, val);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops u3_agp_pci_ops =
+{
+	.read = u3_agp_read_config,
+	.write = u3_agp_write_config,
+};
+
+static unsigned long u3_ht_cfa0(u8 devfn, u8 off)
+{
+	return (devfn << 8) | off;
+}
+
+static unsigned long u3_ht_cfa1(u8 bus, u8 devfn, u8 off)
+{
+	return u3_ht_cfa0(devfn, off) + (bus << 16) + 0x01000000UL;
+}
+
+static volatile void __iomem *u3_ht_cfg_access(struct pci_controller* hose,
+				      u8 bus, u8 devfn, u8 offset)
+{
+	if (bus == hose->first_busno) {
+		if (PCI_SLOT(devfn) == 0)
+			return NULL;
+		return hose->cfg_data + u3_ht_cfa0(devfn, offset);
+	} else
+		return hose->cfg_data + u3_ht_cfa1(bus, devfn, offset);
+}
+
+static int u3_ht_root_read_config(struct pci_controller *hose, u8 offset,
+				  int len, u32 *val)
+{
+	volatile void __iomem *addr;
+
+	addr = hose->cfg_addr;
+	addr += ((offset & ~3) << 2) + (4 - len - (offset & 3));
+
+	switch (len) {
+	case 1:
+		*val = in_8(addr);
+		break;
+	case 2:
+		*val = in_be16(addr);
+		break;
+	default:
+		*val = in_be32(addr);
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int u3_ht_root_write_config(struct pci_controller *hose, u8 offset,
+				  int len, u32 val)
+{
+	volatile void __iomem *addr;
+
+	addr = hose->cfg_addr + ((offset & ~3) << 2) + (4 - len - (offset & 3));
+
+	if (offset >= PCI_BASE_ADDRESS_0 && offset < PCI_CAPABILITY_LIST)
+		return PCIBIOS_SUCCESSFUL;
+
+	switch (len) {
+	case 1:
+		out_8(addr, val);
+		break;
+	case 2:
+		out_be16(addr, val);
+		break;
+	default:
+		out_be32(addr, val);
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn,
+			     int offset, int len, u32 *val)
+{
+	struct pci_controller *hose;
+	volatile void __iomem *addr;
+
+	hose = pci_bus_to_host(bus);
+	if (hose == NULL)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (bus->number == hose->first_busno && devfn == PCI_DEVFN(0, 0))
+		return u3_ht_root_read_config(hose, offset, len, val);
+
+	if (offset > 0xff)
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	addr = u3_ht_cfg_access(hose, bus->number, devfn, offset);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		*val = in_8(addr);
+		break;
+	case 2:
+		*val = in_le16(addr);
+		break;
+	default:
+		*val = in_le32(addr);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int u3_ht_write_config(struct pci_bus *bus, unsigned int devfn,
+			      int offset, int len, u32 val)
+{
+	struct pci_controller *hose;
+	volatile void __iomem *addr;
+
+	hose = pci_bus_to_host(bus);
+	if (hose == NULL)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (bus->number == hose->first_busno && devfn == PCI_DEVFN(0, 0))
+		return u3_ht_root_write_config(hose, offset, len, val);
+
+	if (offset > 0xff)
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	addr = u3_ht_cfg_access(hose, bus->number, devfn, offset);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		out_8(addr, val);
+		break;
+	case 2:
+		out_le16(addr, val);
+		break;
+	default:
+		out_le32(addr, val);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops u3_ht_pci_ops =
+{
+	.read = u3_ht_read_config,
+	.write = u3_ht_write_config,
+};
+
+static unsigned int u4_pcie_cfa0(unsigned int devfn, unsigned int off)
+{
+	return (1 << PCI_SLOT(devfn))	|
+	       (PCI_FUNC(devfn) << 8)	|
+	       ((off >> 8) << 28) 	|
+	       (off & 0xfcu);
+}
+
+static unsigned int u4_pcie_cfa1(unsigned int bus, unsigned int devfn,
+				 unsigned int off)
+{
+        return (bus << 16)		|
+	       (devfn << 8)		|
+	       ((off >> 8) << 28)	|
+	       (off & 0xfcu)		| 1u;
+}
+
+static volatile void __iomem *u4_pcie_cfg_access(struct pci_controller* hose,
+                                        u8 bus, u8 dev_fn, int offset)
+{
+        unsigned int caddr;
+
+        if (bus == hose->first_busno)
+                caddr = u4_pcie_cfa0(dev_fn, offset);
+        else
+                caddr = u4_pcie_cfa1(bus, dev_fn, offset);
+
+        /* Uninorth will return garbage if we don't read back the value ! */
+        do {
+                out_le32(hose->cfg_addr, caddr);
+        } while (in_le32(hose->cfg_addr) != caddr);
+
+        offset &= 0x03;
+        return hose->cfg_data + offset;
+}
+
+static int u4_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
+                               int offset, int len, u32 *val)
+{
+        struct pci_controller *hose;
+        volatile void __iomem *addr;
+
+        hose = pci_bus_to_host(bus);
+        if (hose == NULL)
+                return PCIBIOS_DEVICE_NOT_FOUND;
+        if (offset >= 0x1000)
+                return  PCIBIOS_BAD_REGISTER_NUMBER;
+        addr = u4_pcie_cfg_access(hose, bus->number, devfn, offset);
+        if (!addr)
+                return PCIBIOS_DEVICE_NOT_FOUND;
+        /*
+         * Note: the caller has already checked that offset is
+         * suitably aligned and that len is 1, 2 or 4.
+         */
+        switch (len) {
+        case 1:
+                *val = in_8(addr);
+                break;
+        case 2:
+                *val = in_le16(addr);
+                break;
+        default:
+                *val = in_le32(addr);
+                break;
+        }
+        return PCIBIOS_SUCCESSFUL;
+}
+static int u4_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
+                                int offset, int len, u32 val)
+{
+        struct pci_controller *hose;
+        volatile void __iomem *addr;
+
+        hose = pci_bus_to_host(bus);
+        if (hose == NULL)
+                return PCIBIOS_DEVICE_NOT_FOUND;
+        if (offset >= 0x1000)
+                return  PCIBIOS_BAD_REGISTER_NUMBER;
+        addr = u4_pcie_cfg_access(hose, bus->number, devfn, offset);
+        if (!addr)
+                return PCIBIOS_DEVICE_NOT_FOUND;
+        /*
+         * Note: the caller has already checked that offset is
+         * suitably aligned and that len is 1, 2 or 4.
+         */
+        switch (len) {
+        case 1:
+                out_8(addr, val);
+                break;
+        case 2:
+                out_le16(addr, val);
+                break;
+        default:
+                out_le32(addr, val);
+                break;
+        }
+        return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops u4_pcie_pci_ops =
+{
+	.read = u4_pcie_read_config,
+	.write = u4_pcie_write_config,
+};
+
+static void __init setup_u3_agp(struct pci_controller* hose)
+{
+	/* On G5, we move AGP up to high bus number so we don't need
+	 * to reassign bus numbers for HT. If we ever have P2P bridges
+	 * on AGP, we'll have to move pci_assign_all_buses to the
+	 * pci_controller structure so we enable it for AGP and not for
+	 * HT childs.
+	 * We hard code the address because of the different size of
+	 * the reg address cell, we shall fix that by killing struct
+	 * reg_property and using some accessor functions instead
+	 */
+	hose->first_busno = 0xf0;
+	hose->last_busno = 0xff;
+	hose->ops = &u3_agp_pci_ops;
+	hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000);
+	hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000);
+
+	u3_agp = hose;
+}
+
+static void __init setup_u4_pcie(struct pci_controller* hose)
+{
+        /* We currently only implement the "non-atomic" config space, to
+         * be optimised later.
+         */
+        hose->ops = &u4_pcie_pci_ops;
+        hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000);
+        hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000);
+
+        u4_pcie = hose;
+}
+
+static void __init setup_u3_ht(struct pci_controller* hose)
+{
+	hose->ops = &u3_ht_pci_ops;
+
+	/* We hard code the address because of the different size of
+	 * the reg address cell, we shall fix that by killing struct
+	 * reg_property and using some accessor functions instead
+	 */
+	hose->cfg_data = ioremap(0xf2000000, 0x02000000);
+	hose->cfg_addr = ioremap(0xf8070000, 0x1000);
+
+	hose->first_busno = 0;
+	hose->last_busno = 0xef;
+
+	u3_ht = hose;
+}
+
+static int __init maple_add_bridge(struct device_node *dev)
+{
+	int len;
+	struct pci_controller *hose;
+	char* disp_name;
+	const int *bus_range;
+	int primary = 1;
+
+	DBG("Adding PCI host bridge %s\n", dev->full_name);
+
+	bus_range = of_get_property(dev, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int)) {
+		printk(KERN_WARNING "Can't get bus-range for %s, assume bus 0\n",
+		dev->full_name);
+	}
+
+	hose = pcibios_alloc_controller(dev);
+	if (hose == NULL)
+		return -ENOMEM;
+	hose->first_busno = bus_range ? bus_range[0] : 0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+	hose->controller_ops = maple_pci_controller_ops;
+
+	disp_name = NULL;
+	if (of_device_is_compatible(dev, "u3-agp")) {
+		setup_u3_agp(hose);
+		disp_name = "U3-AGP";
+		primary = 0;
+	} else if (of_device_is_compatible(dev, "u3-ht")) {
+		setup_u3_ht(hose);
+		disp_name = "U3-HT";
+		primary = 1;
+        } else if (of_device_is_compatible(dev, "u4-pcie")) {
+                setup_u4_pcie(hose);
+                disp_name = "U4-PCIE";
+                primary = 0;
+	}
+	printk(KERN_INFO "Found %s PCI host bridge. Firmware bus number: %d->%d\n",
+		disp_name, hose->first_busno, hose->last_busno);
+
+	/* Interpret the "ranges" property */
+	/* This also maps the I/O region and sets isa_io/mem_base */
+	pci_process_bridge_OF_ranges(hose, dev, primary);
+
+	/* Fixup "bus-range" OF property */
+	fixup_bus_range(dev);
+
+	/* Check for legacy IOs */
+	isa_bridge_find_early(hose);
+
+	return 0;
+}
+
+
+void maple_pci_irq_fixup(struct pci_dev *dev)
+{
+	DBG(" -> maple_pci_irq_fixup\n");
+
+	/* Fixup IRQ for PCIe host */
+	if (u4_pcie != NULL && dev->bus->number == 0 &&
+	    pci_bus_to_host(dev->bus) == u4_pcie) {
+		printk(KERN_DEBUG "Fixup U4 PCIe IRQ\n");
+		dev->irq = irq_create_mapping(NULL, 1);
+		if (dev->irq != NO_IRQ)
+			irq_set_irq_type(dev->irq, IRQ_TYPE_LEVEL_LOW);
+	}
+
+	/* Hide AMD8111 IDE interrupt when in legacy mode so
+	 * the driver calls pci_get_legacy_ide_irq()
+	 */
+	if (dev->vendor == PCI_VENDOR_ID_AMD &&
+	    dev->device == PCI_DEVICE_ID_AMD_8111_IDE &&
+	    (dev->class & 5) != 5) {
+		dev->irq = NO_IRQ;
+	}
+
+	DBG(" <- maple_pci_irq_fixup\n");
+}
+
+void __init maple_pci_init(void)
+{
+	struct device_node *np, *root;
+	struct device_node *ht = NULL;
+
+	/* Probe root PCI hosts, that is on U3 the AGP host and the
+	 * HyperTransport host. That one is actually "kept" around
+	 * and actually added last as it's resource management relies
+	 * on the AGP resources to have been setup first
+	 */
+	root = of_find_node_by_path("/");
+	if (root == NULL) {
+		printk(KERN_CRIT "maple_find_bridges: can't find root of device tree\n");
+		return;
+	}
+	for (np = NULL; (np = of_get_next_child(root, np)) != NULL;) {
+		if (!np->type)
+			continue;
+		if (strcmp(np->type, "pci") && strcmp(np->type, "ht"))
+			continue;
+		if ((of_device_is_compatible(np, "u4-pcie") ||
+		     of_device_is_compatible(np, "u3-agp")) &&
+		    maple_add_bridge(np) == 0)
+			of_node_get(np);
+
+		if (of_device_is_compatible(np, "u3-ht")) {
+			of_node_get(np);
+			ht = np;
+		}
+	}
+	of_node_put(root);
+
+	/* Now setup the HyperTransport host if we found any
+	 */
+	if (ht && maple_add_bridge(ht) != 0)
+		of_node_put(ht);
+
+	/* Setup the linkage between OF nodes and PHBs */ 
+	pci_devs_phb_init();
+
+	/* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We
+	 * assume there is no P2P bridge on the AGP bus, which should be a
+	 * safe assumptions hopefully.
+	 */
+	if (u3_agp) {
+		struct device_node *np = u3_agp->dn;
+		PCI_DN(np)->busno = 0xf0;
+		for (np = np->child; np; np = np->sibling)
+			PCI_DN(np)->busno = 0xf0;
+	}
+
+	/* Tell pci.c to not change any resource allocations.  */
+	pci_add_flags(PCI_PROBE_ONLY);
+}
+
+int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel)
+{
+	struct device_node *np;
+	unsigned int defirq = channel ? 15 : 14;
+	unsigned int irq;
+
+	if (pdev->vendor != PCI_VENDOR_ID_AMD ||
+	    pdev->device != PCI_DEVICE_ID_AMD_8111_IDE)
+		return defirq;
+
+	np = pci_device_to_OF_node(pdev);
+	if (np == NULL) {
+		printk("Failed to locate OF node for IDE %s\n",
+		       pci_name(pdev));
+		return defirq;
+	}
+	irq = irq_of_parse_and_map(np, channel & 0x1);
+	if (irq == NO_IRQ) {
+		printk("Failed to map onboard IDE interrupt for channel %d\n",
+		       channel);
+		return defirq;
+	}
+	return irq;
+}
+
+static void quirk_ipr_msi(struct pci_dev *dev)
+{
+	/* Something prevents MSIs from the IPR from working on Bimini,
+	 * and the driver has no smarts to recover. So disable MSI
+	 * on it for now. */
+
+	if (machine_is(maple)) {
+		dev->no_msi = 1;
+		dev_info(&dev->dev, "Quirk disabled MSI\n");
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_OBSIDIAN,
+			quirk_ipr_msi);
+
+struct pci_controller_ops maple_pci_controller_ops = {
+};
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
new file mode 100644
index 000000000..a83718854
--- /dev/null
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -0,0 +1,393 @@
+/*
+ *  Maple (970 eval board) setup code
+ *
+ *  (c) Copyright 2004 Benjamin Herrenschmidt (benh@kernel.crashing.org),
+ *                     IBM Corp. 
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#undef DEBUG
+
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/tty.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/major.h>
+#include <linux/initrd.h>
+#include <linux/vt_kern.h>
+#include <linux/console.h>
+#include <linux/pci.h>
+#include <linux/adb.h>
+#include <linux/cuda.h>
+#include <linux/pmu.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/serial.h>
+#include <linux/smp.h>
+#include <linux/bitops.h>
+#include <linux/of_device.h>
+#include <linux/memblock.h>
+
+#include <asm/processor.h>
+#include <asm/sections.h>
+#include <asm/prom.h>
+#include <asm/pgtable.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/dma.h>
+#include <asm/cputable.h>
+#include <asm/time.h>
+#include <asm/mpic.h>
+#include <asm/rtas.h>
+#include <asm/udbg.h>
+#include <asm/nvram.h>
+
+#include "maple.h"
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+static unsigned long maple_find_nvram_base(void)
+{
+	struct device_node *rtcs;
+	unsigned long result = 0;
+
+	/* find NVRAM device */
+	rtcs = of_find_compatible_node(NULL, "nvram", "AMD8111");
+	if (rtcs) {
+		struct resource r;
+		if (of_address_to_resource(rtcs, 0, &r)) {
+			printk(KERN_EMERG "Maple: Unable to translate NVRAM"
+			       " address\n");
+			goto bail;
+		}
+		if (!(r.flags & IORESOURCE_IO)) {
+			printk(KERN_EMERG "Maple: NVRAM address isn't PIO!\n");
+			goto bail;
+		}
+		result = r.start;
+	} else
+		printk(KERN_EMERG "Maple: Unable to find NVRAM\n");
+ bail:
+	of_node_put(rtcs);
+	return result;
+}
+
+static void maple_restart(char *cmd)
+{
+	unsigned int maple_nvram_base;
+	const unsigned int *maple_nvram_offset, *maple_nvram_command;
+	struct device_node *sp;
+
+	maple_nvram_base = maple_find_nvram_base();
+	if (maple_nvram_base == 0)
+		goto fail;
+
+	/* find service processor device */
+	sp = of_find_node_by_name(NULL, "service-processor");
+	if (!sp) {
+		printk(KERN_EMERG "Maple: Unable to find Service Processor\n");
+		goto fail;
+	}
+	maple_nvram_offset = of_get_property(sp, "restart-addr", NULL);
+	maple_nvram_command = of_get_property(sp, "restart-value", NULL);
+	of_node_put(sp);
+
+	/* send command */
+	outb_p(*maple_nvram_command, maple_nvram_base + *maple_nvram_offset);
+	for (;;) ;
+ fail:
+	printk(KERN_EMERG "Maple: Manual Restart Required\n");
+}
+
+static void maple_power_off(void)
+{
+	unsigned int maple_nvram_base;
+	const unsigned int *maple_nvram_offset, *maple_nvram_command;
+	struct device_node *sp;
+
+	maple_nvram_base = maple_find_nvram_base();
+	if (maple_nvram_base == 0)
+		goto fail;
+
+	/* find service processor device */
+	sp = of_find_node_by_name(NULL, "service-processor");
+	if (!sp) {
+		printk(KERN_EMERG "Maple: Unable to find Service Processor\n");
+		goto fail;
+	}
+	maple_nvram_offset = of_get_property(sp, "power-off-addr", NULL);
+	maple_nvram_command = of_get_property(sp, "power-off-value", NULL);
+	of_node_put(sp);
+
+	/* send command */
+	outb_p(*maple_nvram_command, maple_nvram_base + *maple_nvram_offset);
+	for (;;) ;
+ fail:
+	printk(KERN_EMERG "Maple: Manual Power-Down Required\n");
+}
+
+static void maple_halt(void)
+{
+	maple_power_off();
+}
+
+#ifdef CONFIG_SMP
+struct smp_ops_t maple_smp_ops = {
+	.probe		= smp_mpic_probe,
+	.message_pass	= smp_mpic_message_pass,
+	.kick_cpu	= smp_generic_kick_cpu,
+	.setup_cpu	= smp_mpic_setup_cpu,
+	.give_timebase	= smp_generic_give_timebase,
+	.take_timebase	= smp_generic_take_timebase,
+};
+#endif /* CONFIG_SMP */
+
+static void __init maple_use_rtas_reboot_and_halt_if_present(void)
+{
+	if (rtas_service_present("system-reboot") &&
+	    rtas_service_present("power-off")) {
+		ppc_md.restart = rtas_restart;
+		pm_power_off = rtas_power_off;
+		ppc_md.halt = rtas_halt;
+	}
+}
+
+void __init maple_setup_arch(void)
+{
+	/* init to some ~sane value until calibrate_delay() runs */
+	loops_per_jiffy = 50000000;
+
+	/* Setup SMP callback */
+#ifdef CONFIG_SMP
+	smp_ops = &maple_smp_ops;
+#endif
+	/* Lookup PCI hosts */
+       	maple_pci_init();
+
+#ifdef CONFIG_DUMMY_CONSOLE
+	conswitchp = &dummy_con;
+#endif
+	maple_use_rtas_reboot_and_halt_if_present();
+
+	printk(KERN_DEBUG "Using native/NAP idle loop\n");
+
+	mmio_nvram_init();
+}
+
+/* 
+ * Early initialization.
+ */
+static void __init maple_init_early(void)
+{
+	DBG(" -> maple_init_early\n");
+
+	iommu_init_early_dart(&maple_pci_controller_ops);
+
+	DBG(" <- maple_init_early\n");
+}
+
+/*
+ * This is almost identical to pSeries and CHRP. We need to make that
+ * code generic at one point, with appropriate bits in the device-tree to
+ * identify the presence of an HT APIC
+ */
+static void __init maple_init_IRQ(void)
+{
+	struct device_node *root, *np, *mpic_node = NULL;
+	const unsigned int *opprop;
+	unsigned long openpic_addr = 0;
+	int naddr, n, i, opplen, has_isus = 0;
+	struct mpic *mpic;
+	unsigned int flags = 0;
+
+	/* Locate MPIC in the device-tree. Note that there is a bug
+	 * in Maple device-tree where the type of the controller is
+	 * open-pic and not interrupt-controller
+	 */
+
+	for_each_node_by_type(np, "interrupt-controller")
+		if (of_device_is_compatible(np, "open-pic")) {
+			mpic_node = np;
+			break;
+		}
+	if (mpic_node == NULL)
+		for_each_node_by_type(np, "open-pic") {
+			mpic_node = np;
+			break;
+		}
+	if (mpic_node == NULL) {
+		printk(KERN_ERR
+		       "Failed to locate the MPIC interrupt controller\n");
+		return;
+	}
+
+	/* Find address list in /platform-open-pic */
+	root = of_find_node_by_path("/");
+	naddr = of_n_addr_cells(root);
+	opprop = of_get_property(root, "platform-open-pic", &opplen);
+	if (opprop != 0) {
+		openpic_addr = of_read_number(opprop, naddr);
+		has_isus = (opplen > naddr);
+		printk(KERN_DEBUG "OpenPIC addr: %lx, has ISUs: %d\n",
+		       openpic_addr, has_isus);
+	}
+
+	BUG_ON(openpic_addr == 0);
+
+	/* Check for a big endian MPIC */
+	if (of_get_property(np, "big-endian", NULL) != NULL)
+		flags |= MPIC_BIG_ENDIAN;
+
+	/* XXX Maple specific bits */
+	flags |= MPIC_U3_HT_IRQS;
+	/* All U3/U4 are big-endian, older SLOF firmware doesn't encode this */
+	flags |= MPIC_BIG_ENDIAN;
+
+	/* Setup the openpic driver. More device-tree junks, we hard code no
+	 * ISUs for now. I'll have to revisit some stuffs with the folks doing
+	 * the firmware for those
+	 */
+	mpic = mpic_alloc(mpic_node, openpic_addr, flags,
+			  /*has_isus ? 16 :*/ 0, 0, " MPIC     ");
+	BUG_ON(mpic == NULL);
+
+	/* Add ISUs */
+	opplen /= sizeof(u32);
+	for (n = 0, i = naddr; i < opplen; i += naddr, n++) {
+		unsigned long isuaddr = of_read_number(opprop + i, naddr);
+		mpic_assign_isu(mpic, n, isuaddr);
+	}
+
+	/* All ISUs are setup, complete initialization */
+	mpic_init(mpic);
+	ppc_md.get_irq = mpic_get_irq;
+	of_node_put(mpic_node);
+	of_node_put(root);
+}
+
+static void __init maple_progress(char *s, unsigned short hex)
+{
+	printk("*** %04x : %s\n", hex, s ? s : "");
+}
+
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init maple_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "Momentum,Maple") &&
+	    !of_flat_dt_is_compatible(root, "Momentum,Apache"))
+		return 0;
+	/*
+	 * On U3, the DART (iommu) must be allocated now since it
+	 * has an impact on htab_initialize (due to the large page it
+	 * occupies having to be broken up so the DART itself is not
+	 * part of the cacheable linar mapping
+	 */
+	alloc_dart_table();
+
+	hpte_init_native();
+	pm_power_off = maple_power_off;
+
+	return 1;
+}
+
+define_machine(maple) {
+	.name			= "Maple",
+	.probe			= maple_probe,
+	.setup_arch		= maple_setup_arch,
+	.init_early		= maple_init_early,
+	.init_IRQ		= maple_init_IRQ,
+	.pci_irq_fixup		= maple_pci_irq_fixup,
+	.pci_get_legacy_ide_irq	= maple_pci_get_legacy_ide_irq,
+	.restart		= maple_restart,
+	.halt			= maple_halt,
+       	.get_boot_time		= maple_get_boot_time,
+       	.set_rtc_time		= maple_set_rtc_time,
+       	.get_rtc_time		= maple_get_rtc_time,
+      	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= maple_progress,
+	.power_save		= power4_idle,
+};
+
+#ifdef CONFIG_EDAC
+/*
+ * Register a platform device for CPC925 memory controller on
+ * all boards with U3H (CPC925) bridge.
+ */
+static int __init maple_cpc925_edac_setup(void)
+{
+	struct platform_device *pdev;
+	struct device_node *np = NULL;
+	struct resource r;
+	int ret;
+	volatile void __iomem *mem;
+	u32 rev;
+
+	np = of_find_node_by_type(NULL, "memory-controller");
+	if (!np) {
+		printk(KERN_ERR "%s: Unable to find memory-controller node\n",
+			__func__);
+		return -ENODEV;
+	}
+
+	ret = of_address_to_resource(np, 0, &r);
+	of_node_put(np);
+
+	if (ret < 0) {
+		printk(KERN_ERR "%s: Unable to get memory-controller reg\n",
+			__func__);
+		return -ENODEV;
+	}
+
+	mem = ioremap(r.start, resource_size(&r));
+	if (!mem) {
+		printk(KERN_ERR "%s: Unable to map memory-controller memory\n",
+				__func__);
+		return -ENOMEM;
+	}
+
+	rev = __raw_readl(mem);
+	iounmap(mem);
+
+	if (rev < 0x34 || rev > 0x3f) { /* U3H */
+		printk(KERN_ERR "%s: Non-CPC925(U3H) bridge revision: %02x\n",
+			__func__, rev);
+		return 0;
+	}
+
+	pdev = platform_device_register_simple("cpc925_edac", 0, &r, 1);
+	if (IS_ERR(pdev))
+		return PTR_ERR(pdev);
+
+	printk(KERN_INFO "%s: CPC925 platform device created\n", __func__);
+
+	return 0;
+}
+machine_device_initcall(maple, maple_cpc925_edac_setup);
+#endif
diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c
new file mode 100644
index 000000000..b4a369dac
--- /dev/null
+++ b/arch/powerpc/platforms/maple/time.c
@@ -0,0 +1,176 @@
+/*
+ *  (c) Copyright 2004 Benjamin Herrenschmidt (benh@kernel.crashing.org),
+ *                     IBM Corp. 
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#undef DEBUG
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/time.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/interrupt.h>
+#include <linux/mc146818rtc.h>
+#include <linux/bcd.h>
+
+#include <asm/sections.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+
+#include "maple.h"
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+static int maple_rtc_addr;
+
+static int maple_clock_read(int addr)
+{
+	outb_p(addr, maple_rtc_addr);
+	return inb_p(maple_rtc_addr+1);
+}
+
+static void maple_clock_write(unsigned long val, int addr)
+{
+	outb_p(addr, maple_rtc_addr);
+	outb_p(val, maple_rtc_addr+1);
+}
+
+void maple_get_rtc_time(struct rtc_time *tm)
+{
+	do {
+		tm->tm_sec = maple_clock_read(RTC_SECONDS);
+		tm->tm_min = maple_clock_read(RTC_MINUTES);
+		tm->tm_hour = maple_clock_read(RTC_HOURS);
+		tm->tm_mday = maple_clock_read(RTC_DAY_OF_MONTH);
+		tm->tm_mon = maple_clock_read(RTC_MONTH);
+		tm->tm_year = maple_clock_read(RTC_YEAR);
+	} while (tm->tm_sec != maple_clock_read(RTC_SECONDS));
+
+	if (!(maple_clock_read(RTC_CONTROL) & RTC_DM_BINARY)
+	    || RTC_ALWAYS_BCD) {
+		tm->tm_sec = bcd2bin(tm->tm_sec);
+		tm->tm_min = bcd2bin(tm->tm_min);
+		tm->tm_hour = bcd2bin(tm->tm_hour);
+		tm->tm_mday = bcd2bin(tm->tm_mday);
+		tm->tm_mon = bcd2bin(tm->tm_mon);
+		tm->tm_year = bcd2bin(tm->tm_year);
+	  }
+	if ((tm->tm_year + 1900) < 1970)
+		tm->tm_year += 100;
+
+	GregorianDay(tm);
+}
+
+int maple_set_rtc_time(struct rtc_time *tm)
+{
+	unsigned char save_control, save_freq_select;
+	int sec, min, hour, mon, mday, year;
+
+	spin_lock(&rtc_lock);
+
+	save_control = maple_clock_read(RTC_CONTROL); /* tell the clock it's being set */
+
+	maple_clock_write((save_control|RTC_SET), RTC_CONTROL);
+
+	save_freq_select = maple_clock_read(RTC_FREQ_SELECT); /* stop and reset prescaler */
+
+	maple_clock_write((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
+
+	sec = tm->tm_sec;
+	min = tm->tm_min;
+	hour = tm->tm_hour;
+	mon = tm->tm_mon;
+	mday = tm->tm_mday;
+	year = tm->tm_year;
+
+	if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+		sec = bin2bcd(sec);
+		min = bin2bcd(min);
+		hour = bin2bcd(hour);
+		mon = bin2bcd(mon);
+		mday = bin2bcd(mday);
+		year = bin2bcd(year);
+	}
+	maple_clock_write(sec, RTC_SECONDS);
+	maple_clock_write(min, RTC_MINUTES);
+	maple_clock_write(hour, RTC_HOURS);
+	maple_clock_write(mon, RTC_MONTH);
+	maple_clock_write(mday, RTC_DAY_OF_MONTH);
+	maple_clock_write(year, RTC_YEAR);
+
+	/* The following flags have to be released exactly in this order,
+	 * otherwise the DS12887 (popular MC146818A clone with integrated
+	 * battery and quartz) will not reset the oscillator and will not
+	 * update precisely 500 ms later. You won't find this mentioned in
+	 * the Dallas Semiconductor data sheets, but who believes data
+	 * sheets anyway ...                           -- Markus Kuhn
+	 */
+	maple_clock_write(save_control, RTC_CONTROL);
+	maple_clock_write(save_freq_select, RTC_FREQ_SELECT);
+
+	spin_unlock(&rtc_lock);
+
+	return 0;
+}
+
+static struct resource rtc_iores = {
+	.name = "rtc",
+	.flags = IORESOURCE_BUSY,
+};
+
+unsigned long __init maple_get_boot_time(void)
+{
+	struct rtc_time tm;
+	struct device_node *rtcs;
+
+	rtcs = of_find_compatible_node(NULL, "rtc", "pnpPNP,b00");
+	if (rtcs) {
+		struct resource r;
+		if (of_address_to_resource(rtcs, 0, &r)) {
+			printk(KERN_EMERG "Maple: Unable to translate RTC"
+			       " address\n");
+			goto bail;
+		}
+		if (!(r.flags & IORESOURCE_IO)) {
+			printk(KERN_EMERG "Maple: RTC address isn't PIO!\n");
+			goto bail;
+		}
+		maple_rtc_addr = r.start;
+		printk(KERN_INFO "Maple: Found RTC at IO 0x%x\n",
+		       maple_rtc_addr);
+	}
+ bail:
+	if (maple_rtc_addr == 0) {
+		maple_rtc_addr = RTC_PORT(0); /* legacy address */
+		printk(KERN_INFO "Maple: No device node for RTC, assuming "
+		       "legacy address (0x%x)\n", maple_rtc_addr);
+	}
+
+	rtc_iores.start = maple_rtc_addr;
+	rtc_iores.end = maple_rtc_addr + 7;
+	request_resource(&ioport_resource, &rtc_iores);
+
+	maple_get_rtc_time(&tm);
+	return mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday,
+		      tm.tm_hour, tm.tm_min, tm.tm_sec);
+}
+
diff --git a/arch/powerpc/platforms/pasemi/Kconfig b/arch/powerpc/platforms/pasemi/Kconfig
new file mode 100644
index 000000000..a2aeb327d
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/Kconfig
@@ -0,0 +1,40 @@
+config PPC_PASEMI
+	depends on PPC64 && PPC_BOOK3S
+	bool "PA Semi SoC-based platforms"
+	default n
+	select MPIC
+	select PCI
+	select PPC_UDBG_16550
+	select PPC_NATIVE
+	select MPIC_BROKEN_REGREAD
+	help
+	  This option enables support for PA Semi's PWRficient line
+	  of SoC processors, including PA6T-1682M
+
+menu "PA Semi PWRficient options"
+	depends on PPC_PASEMI
+
+config PPC_PASEMI_IOMMU
+	bool "PA Semi IOMMU support"
+	depends on PPC_PASEMI
+	help
+	  IOMMU support for PA Semi PWRficient
+
+config PPC_PASEMI_IOMMU_DMA_FORCE
+	bool "Force DMA engine to use IOMMU"
+	depends on PPC_PASEMI_IOMMU
+	help
+	  This option forces the use of the IOMMU also for the
+	  DMA engine. Otherwise the kernel will use it only when
+	  running under a hypervisor.
+
+	  If in doubt, say "N".
+
+config PPC_PASEMI_MDIO
+	depends on PHYLIB
+	tristate "MDIO support via GPIO"
+	default y
+	help
+	  Driver for MDIO via GPIO on PWRficient platforms
+
+endmenu
diff --git a/arch/powerpc/platforms/pasemi/Makefile b/arch/powerpc/platforms/pasemi/Makefile
new file mode 100644
index 000000000..8e8d4cae5
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/Makefile
@@ -0,0 +1,2 @@
+obj-y	+= setup.o pci.o time.o idle.o powersave.o iommu.o dma_lib.o misc.o
+obj-$(CONFIG_PPC_PASEMI_MDIO)	+= gpio_mdio.o
diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c
new file mode 100644
index 000000000..aafa01ba0
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/dma_lib.c
@@ -0,0 +1,633 @@
+/*
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Common functions for DMA access on PA Semi PWRficient
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/sched.h>
+
+#include <asm/pasemi_dma.h>
+
+#define MAX_TXCH 64
+#define MAX_RXCH 64
+#define MAX_FLAGS 64
+#define MAX_FUN 8
+
+static struct pasdma_status *dma_status;
+
+static void __iomem *iob_regs;
+static void __iomem *mac_regs[6];
+static void __iomem *dma_regs;
+
+static int base_hw_irq;
+
+static int num_txch, num_rxch;
+
+static struct pci_dev *dma_pdev;
+
+/* Bitmaps to handle allocation of channels */
+
+static DECLARE_BITMAP(txch_free, MAX_TXCH);
+static DECLARE_BITMAP(rxch_free, MAX_RXCH);
+static DECLARE_BITMAP(flags_free, MAX_FLAGS);
+static DECLARE_BITMAP(fun_free, MAX_FUN);
+
+/* pasemi_read_iob_reg - read IOB register
+ * @reg: Register to read (offset into PCI CFG space)
+ */
+unsigned int pasemi_read_iob_reg(unsigned int reg)
+{
+	return in_le32(iob_regs+reg);
+}
+EXPORT_SYMBOL(pasemi_read_iob_reg);
+
+/* pasemi_write_iob_reg - write IOB register
+ * @reg: Register to write to (offset into PCI CFG space)
+ * @val: Value to write
+ */
+void pasemi_write_iob_reg(unsigned int reg, unsigned int val)
+{
+	out_le32(iob_regs+reg, val);
+}
+EXPORT_SYMBOL(pasemi_write_iob_reg);
+
+/* pasemi_read_mac_reg - read MAC register
+ * @intf: MAC interface
+ * @reg: Register to read (offset into PCI CFG space)
+ */
+unsigned int pasemi_read_mac_reg(int intf, unsigned int reg)
+{
+	return in_le32(mac_regs[intf]+reg);
+}
+EXPORT_SYMBOL(pasemi_read_mac_reg);
+
+/* pasemi_write_mac_reg - write MAC register
+ * @intf: MAC interface
+ * @reg: Register to write to (offset into PCI CFG space)
+ * @val: Value to write
+ */
+void pasemi_write_mac_reg(int intf, unsigned int reg, unsigned int val)
+{
+	out_le32(mac_regs[intf]+reg, val);
+}
+EXPORT_SYMBOL(pasemi_write_mac_reg);
+
+/* pasemi_read_dma_reg - read DMA register
+ * @reg: Register to read (offset into PCI CFG space)
+ */
+unsigned int pasemi_read_dma_reg(unsigned int reg)
+{
+	return in_le32(dma_regs+reg);
+}
+EXPORT_SYMBOL(pasemi_read_dma_reg);
+
+/* pasemi_write_dma_reg - write DMA register
+ * @reg: Register to write to (offset into PCI CFG space)
+ * @val: Value to write
+ */
+void pasemi_write_dma_reg(unsigned int reg, unsigned int val)
+{
+	out_le32(dma_regs+reg, val);
+}
+EXPORT_SYMBOL(pasemi_write_dma_reg);
+
+static int pasemi_alloc_tx_chan(enum pasemi_dmachan_type type)
+{
+	int bit;
+	int start, limit;
+
+	switch (type & (TXCHAN_EVT0|TXCHAN_EVT1)) {
+	case TXCHAN_EVT0:
+		start = 0;
+		limit = 10;
+		break;
+	case TXCHAN_EVT1:
+		start = 10;
+		limit = MAX_TXCH;
+		break;
+	default:
+		start = 0;
+		limit = MAX_TXCH;
+		break;
+	}
+retry:
+	bit = find_next_bit(txch_free, MAX_TXCH, start);
+	if (bit >= limit)
+		return -ENOSPC;
+	if (!test_and_clear_bit(bit, txch_free))
+		goto retry;
+
+	return bit;
+}
+
+static void pasemi_free_tx_chan(int chan)
+{
+	BUG_ON(test_bit(chan, txch_free));
+	set_bit(chan, txch_free);
+}
+
+static int pasemi_alloc_rx_chan(void)
+{
+	int bit;
+retry:
+	bit = find_first_bit(rxch_free, MAX_RXCH);
+	if (bit >= MAX_TXCH)
+		return -ENOSPC;
+	if (!test_and_clear_bit(bit, rxch_free))
+		goto retry;
+
+	return bit;
+}
+
+static void pasemi_free_rx_chan(int chan)
+{
+	BUG_ON(test_bit(chan, rxch_free));
+	set_bit(chan, rxch_free);
+}
+
+/* pasemi_dma_alloc_chan - Allocate a DMA channel
+ * @type: Type of channel to allocate
+ * @total_size: Total size of structure to allocate (to allow for more
+ *		room behind the structure to be used by the client)
+ * @offset: Offset in bytes from start of the total structure to the beginning
+ *	    of struct pasemi_dmachan. Needed when struct pasemi_dmachan is
+ *	    not the first member of the client structure.
+ *
+ * pasemi_dma_alloc_chan allocates a DMA channel for use by a client. The
+ * type argument specifies whether it's a RX or TX channel, and in the case
+ * of TX channels which group it needs to belong to (if any).
+ *
+ * Returns a pointer to the total structure allocated on success, NULL
+ * on failure.
+ */
+void *pasemi_dma_alloc_chan(enum pasemi_dmachan_type type,
+			    int total_size, int offset)
+{
+	void *buf;
+	struct pasemi_dmachan *chan;
+	int chno;
+
+	BUG_ON(total_size < sizeof(struct pasemi_dmachan));
+
+	buf = kzalloc(total_size, GFP_KERNEL);
+
+	if (!buf)
+		return NULL;
+	chan = buf + offset;
+
+	chan->priv = buf;
+
+	switch (type & (TXCHAN|RXCHAN)) {
+	case RXCHAN:
+		chno = pasemi_alloc_rx_chan();
+		chan->chno = chno;
+		chan->irq = irq_create_mapping(NULL,
+					       base_hw_irq + num_txch + chno);
+		chan->status = &dma_status->rx_sta[chno];
+		break;
+	case TXCHAN:
+		chno = pasemi_alloc_tx_chan(type);
+		chan->chno = chno;
+		chan->irq = irq_create_mapping(NULL, base_hw_irq + chno);
+		chan->status = &dma_status->tx_sta[chno];
+		break;
+	}
+
+	chan->chan_type = type;
+
+	return chan;
+}
+EXPORT_SYMBOL(pasemi_dma_alloc_chan);
+
+/* pasemi_dma_free_chan - Free a previously allocated channel
+ * @chan: Channel to free
+ *
+ * Frees a previously allocated channel. It will also deallocate any
+ * descriptor ring associated with the channel, if allocated.
+ */
+void pasemi_dma_free_chan(struct pasemi_dmachan *chan)
+{
+	if (chan->ring_virt)
+		pasemi_dma_free_ring(chan);
+
+	switch (chan->chan_type & (RXCHAN|TXCHAN)) {
+	case RXCHAN:
+		pasemi_free_rx_chan(chan->chno);
+		break;
+	case TXCHAN:
+		pasemi_free_tx_chan(chan->chno);
+		break;
+	}
+
+	kfree(chan->priv);
+}
+EXPORT_SYMBOL(pasemi_dma_free_chan);
+
+/* pasemi_dma_alloc_ring - Allocate descriptor ring for a channel
+ * @chan: Channel for which to allocate
+ * @ring_size: Ring size in 64-bit (8-byte) words
+ *
+ * Allocate a descriptor ring for a channel. Returns 0 on success, errno
+ * on failure. The passed in struct pasemi_dmachan is updated with the
+ * virtual and DMA addresses of the ring.
+ */
+int pasemi_dma_alloc_ring(struct pasemi_dmachan *chan, int ring_size)
+{
+	BUG_ON(chan->ring_virt);
+
+	chan->ring_size = ring_size;
+
+	chan->ring_virt = dma_alloc_coherent(&dma_pdev->dev,
+					     ring_size * sizeof(u64),
+					     &chan->ring_dma, GFP_KERNEL);
+
+	if (!chan->ring_virt)
+		return -ENOMEM;
+
+	memset(chan->ring_virt, 0, ring_size * sizeof(u64));
+
+	return 0;
+}
+EXPORT_SYMBOL(pasemi_dma_alloc_ring);
+
+/* pasemi_dma_free_ring - Free an allocated descriptor ring for a channel
+ * @chan: Channel for which to free the descriptor ring
+ *
+ * Frees a previously allocated descriptor ring for a channel.
+ */
+void pasemi_dma_free_ring(struct pasemi_dmachan *chan)
+{
+	BUG_ON(!chan->ring_virt);
+
+	dma_free_coherent(&dma_pdev->dev, chan->ring_size * sizeof(u64),
+			  chan->ring_virt, chan->ring_dma);
+	chan->ring_virt = NULL;
+	chan->ring_size = 0;
+	chan->ring_dma = 0;
+}
+EXPORT_SYMBOL(pasemi_dma_free_ring);
+
+/* pasemi_dma_start_chan - Start a DMA channel
+ * @chan: Channel to start
+ * @cmdsta: Additional CCMDSTA/TCMDSTA bits to write
+ *
+ * Enables (starts) a DMA channel with optional additional arguments.
+ */
+void pasemi_dma_start_chan(const struct pasemi_dmachan *chan, const u32 cmdsta)
+{
+	if (chan->chan_type == RXCHAN)
+		pasemi_write_dma_reg(PAS_DMA_RXCHAN_CCMDSTA(chan->chno),
+				     cmdsta | PAS_DMA_RXCHAN_CCMDSTA_EN);
+	else
+		pasemi_write_dma_reg(PAS_DMA_TXCHAN_TCMDSTA(chan->chno),
+				     cmdsta | PAS_DMA_TXCHAN_TCMDSTA_EN);
+}
+EXPORT_SYMBOL(pasemi_dma_start_chan);
+
+/* pasemi_dma_stop_chan - Stop a DMA channel
+ * @chan: Channel to stop
+ *
+ * Stops (disables) a DMA channel. This is done by setting the ST bit in the
+ * CMDSTA register and waiting on the ACT (active) bit to clear, then
+ * finally disabling the whole channel.
+ *
+ * This function will only try for a short while for the channel to stop, if
+ * it doesn't it will return failure.
+ *
+ * Returns 1 on success, 0 on failure.
+ */
+#define MAX_RETRIES 5000
+int pasemi_dma_stop_chan(const struct pasemi_dmachan *chan)
+{
+	int reg, retries;
+	u32 sta;
+
+	if (chan->chan_type == RXCHAN) {
+		reg = PAS_DMA_RXCHAN_CCMDSTA(chan->chno);
+		pasemi_write_dma_reg(reg, PAS_DMA_RXCHAN_CCMDSTA_ST);
+		for (retries = 0; retries < MAX_RETRIES; retries++) {
+			sta = pasemi_read_dma_reg(reg);
+			if (!(sta & PAS_DMA_RXCHAN_CCMDSTA_ACT)) {
+				pasemi_write_dma_reg(reg, 0);
+				return 1;
+			}
+			cond_resched();
+		}
+	} else {
+		reg = PAS_DMA_TXCHAN_TCMDSTA(chan->chno);
+		pasemi_write_dma_reg(reg, PAS_DMA_TXCHAN_TCMDSTA_ST);
+		for (retries = 0; retries < MAX_RETRIES; retries++) {
+			sta = pasemi_read_dma_reg(reg);
+			if (!(sta & PAS_DMA_TXCHAN_TCMDSTA_ACT)) {
+				pasemi_write_dma_reg(reg, 0);
+				return 1;
+			}
+			cond_resched();
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(pasemi_dma_stop_chan);
+
+/* pasemi_dma_alloc_buf - Allocate a buffer to use for DMA
+ * @chan: Channel to allocate for
+ * @size: Size of buffer in bytes
+ * @handle: DMA handle
+ *
+ * Allocate a buffer to be used by the DMA engine for read/write,
+ * similar to dma_alloc_coherent().
+ *
+ * Returns the virtual address of the buffer, or NULL in case of failure.
+ */
+void *pasemi_dma_alloc_buf(struct pasemi_dmachan *chan, int size,
+			   dma_addr_t *handle)
+{
+	return dma_alloc_coherent(&dma_pdev->dev, size, handle, GFP_KERNEL);
+}
+EXPORT_SYMBOL(pasemi_dma_alloc_buf);
+
+/* pasemi_dma_free_buf - Free a buffer used for DMA
+ * @chan: Channel the buffer was allocated for
+ * @size: Size of buffer in bytes
+ * @handle: DMA handle
+ *
+ * Frees a previously allocated buffer.
+ */
+void pasemi_dma_free_buf(struct pasemi_dmachan *chan, int size,
+			 dma_addr_t *handle)
+{
+	dma_free_coherent(&dma_pdev->dev, size, handle, GFP_KERNEL);
+}
+EXPORT_SYMBOL(pasemi_dma_free_buf);
+
+/* pasemi_dma_alloc_flag - Allocate a flag (event) for channel synchronization
+ *
+ * Allocates a flag for use with channel synchronization (event descriptors).
+ * Returns allocated flag (0-63), < 0 on error.
+ */
+int pasemi_dma_alloc_flag(void)
+{
+	int bit;
+
+retry:
+	bit = find_next_bit(flags_free, MAX_FLAGS, 0);
+	if (bit >= MAX_FLAGS)
+		return -ENOSPC;
+	if (!test_and_clear_bit(bit, flags_free))
+		goto retry;
+
+	return bit;
+}
+EXPORT_SYMBOL(pasemi_dma_alloc_flag);
+
+
+/* pasemi_dma_free_flag - Deallocates a flag (event)
+ * @flag: Flag number to deallocate
+ *
+ * Frees up a flag so it can be reused for other purposes.
+ */
+void pasemi_dma_free_flag(int flag)
+{
+	BUG_ON(test_bit(flag, flags_free));
+	BUG_ON(flag >= MAX_FLAGS);
+	set_bit(flag, flags_free);
+}
+EXPORT_SYMBOL(pasemi_dma_free_flag);
+
+
+/* pasemi_dma_set_flag - Sets a flag (event) to 1
+ * @flag: Flag number to set active
+ *
+ * Sets the flag provided to 1.
+ */
+void pasemi_dma_set_flag(int flag)
+{
+	BUG_ON(flag >= MAX_FLAGS);
+	if (flag < 32)
+		pasemi_write_dma_reg(PAS_DMA_TXF_SFLG0, 1 << flag);
+	else
+		pasemi_write_dma_reg(PAS_DMA_TXF_SFLG1, 1 << flag);
+}
+EXPORT_SYMBOL(pasemi_dma_set_flag);
+
+/* pasemi_dma_clear_flag - Sets a flag (event) to 0
+ * @flag: Flag number to set inactive
+ *
+ * Sets the flag provided to 0.
+ */
+void pasemi_dma_clear_flag(int flag)
+{
+	BUG_ON(flag >= MAX_FLAGS);
+	if (flag < 32)
+		pasemi_write_dma_reg(PAS_DMA_TXF_CFLG0, 1 << flag);
+	else
+		pasemi_write_dma_reg(PAS_DMA_TXF_CFLG1, 1 << flag);
+}
+EXPORT_SYMBOL(pasemi_dma_clear_flag);
+
+/* pasemi_dma_alloc_fun - Allocate a function engine
+ *
+ * Allocates a function engine to use for crypto/checksum offload
+ * Returns allocated engine (0-8), < 0 on error.
+ */
+int pasemi_dma_alloc_fun(void)
+{
+	int bit;
+
+retry:
+	bit = find_next_bit(fun_free, MAX_FLAGS, 0);
+	if (bit >= MAX_FLAGS)
+		return -ENOSPC;
+	if (!test_and_clear_bit(bit, fun_free))
+		goto retry;
+
+	return bit;
+}
+EXPORT_SYMBOL(pasemi_dma_alloc_fun);
+
+
+/* pasemi_dma_free_fun - Deallocates a function engine
+ * @flag: Engine number to deallocate
+ *
+ * Frees up a function engine so it can be used for other purposes.
+ */
+void pasemi_dma_free_fun(int fun)
+{
+	BUG_ON(test_bit(fun, fun_free));
+	BUG_ON(fun >= MAX_FLAGS);
+	set_bit(fun, fun_free);
+}
+EXPORT_SYMBOL(pasemi_dma_free_fun);
+
+
+static void *map_onedev(struct pci_dev *p, int index)
+{
+	struct device_node *dn;
+	void __iomem *ret;
+
+	dn = pci_device_to_OF_node(p);
+	if (!dn)
+		goto fallback;
+
+	ret = of_iomap(dn, index);
+	if (!ret)
+		goto fallback;
+
+	return ret;
+fallback:
+	/* This is hardcoded and ugly, but we have some firmware versions
+	 * that don't provide the register space in the device tree. Luckily
+	 * they are at well-known locations so we can just do the math here.
+	 */
+	return ioremap(0xe0000000 + (p->devfn << 12), 0x2000);
+}
+
+/* pasemi_dma_init - Initialize the PA Semi DMA library
+ *
+ * This function initializes the DMA library. It must be called before
+ * any other function in the library.
+ *
+ * Returns 0 on success, errno on failure.
+ */
+int pasemi_dma_init(void)
+{
+	static DEFINE_SPINLOCK(init_lock);
+	struct pci_dev *iob_pdev;
+	struct pci_dev *pdev;
+	struct resource res;
+	struct device_node *dn;
+	int i, intf, err = 0;
+	unsigned long timeout;
+	u32 tmp;
+
+	if (!machine_is(pasemi))
+		return -ENODEV;
+
+	spin_lock(&init_lock);
+
+	/* Make sure we haven't already initialized */
+	if (dma_pdev)
+		goto out;
+
+	iob_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa001, NULL);
+	if (!iob_pdev) {
+		BUG();
+		printk(KERN_WARNING "Can't find I/O Bridge\n");
+		err = -ENODEV;
+		goto out;
+	}
+	iob_regs = map_onedev(iob_pdev, 0);
+
+	dma_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa007, NULL);
+	if (!dma_pdev) {
+		BUG();
+		printk(KERN_WARNING "Can't find DMA controller\n");
+		err = -ENODEV;
+		goto out;
+	}
+	dma_regs = map_onedev(dma_pdev, 0);
+	base_hw_irq = virq_to_hw(dma_pdev->irq);
+
+	pci_read_config_dword(dma_pdev, PAS_DMA_CAP_TXCH, &tmp);
+	num_txch = (tmp & PAS_DMA_CAP_TXCH_TCHN_M) >> PAS_DMA_CAP_TXCH_TCHN_S;
+
+	pci_read_config_dword(dma_pdev, PAS_DMA_CAP_RXCH, &tmp);
+	num_rxch = (tmp & PAS_DMA_CAP_RXCH_RCHN_M) >> PAS_DMA_CAP_RXCH_RCHN_S;
+
+	intf = 0;
+	for (pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa006, NULL);
+	     pdev;
+	     pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa006, pdev))
+		mac_regs[intf++] = map_onedev(pdev, 0);
+
+	pci_dev_put(pdev);
+
+	for (pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa005, NULL);
+	     pdev;
+	     pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa005, pdev))
+		mac_regs[intf++] = map_onedev(pdev, 0);
+
+	pci_dev_put(pdev);
+
+	dn = pci_device_to_OF_node(iob_pdev);
+	if (dn)
+		err = of_address_to_resource(dn, 1, &res);
+	if (!dn || err) {
+		/* Fallback for old firmware */
+		res.start = 0xfd800000;
+		res.end = res.start + 0x1000;
+	}
+	dma_status = __ioremap(res.start, resource_size(&res), 0);
+	pci_dev_put(iob_pdev);
+
+	for (i = 0; i < MAX_TXCH; i++)
+		__set_bit(i, txch_free);
+
+	for (i = 0; i < MAX_RXCH; i++)
+		__set_bit(i, rxch_free);
+
+	timeout = jiffies + HZ;
+	pasemi_write_dma_reg(PAS_DMA_COM_RXCMD, 0);
+	while (pasemi_read_dma_reg(PAS_DMA_COM_RXSTA) & 1) {
+		if (time_after(jiffies, timeout)) {
+			pr_warning("Warning: Could not disable RX section\n");
+			break;
+		}
+	}
+
+	timeout = jiffies + HZ;
+	pasemi_write_dma_reg(PAS_DMA_COM_TXCMD, 0);
+	while (pasemi_read_dma_reg(PAS_DMA_COM_TXSTA) & 1) {
+		if (time_after(jiffies, timeout)) {
+			pr_warning("Warning: Could not disable TX section\n");
+			break;
+		}
+	}
+
+	/* setup resource allocations for the different DMA sections */
+	tmp = pasemi_read_dma_reg(PAS_DMA_COM_CFG);
+	pasemi_write_dma_reg(PAS_DMA_COM_CFG, tmp | 0x18000000);
+
+	/* enable tx section */
+	pasemi_write_dma_reg(PAS_DMA_COM_TXCMD, PAS_DMA_COM_TXCMD_EN);
+
+	/* enable rx section */
+	pasemi_write_dma_reg(PAS_DMA_COM_RXCMD, PAS_DMA_COM_RXCMD_EN);
+
+	for (i = 0; i < MAX_FLAGS; i++)
+		__set_bit(i, flags_free);
+
+	for (i = 0; i < MAX_FUN; i++)
+		__set_bit(i, fun_free);
+
+	/* clear all status flags */
+	pasemi_write_dma_reg(PAS_DMA_TXF_CFLG0, 0xffffffff);
+	pasemi_write_dma_reg(PAS_DMA_TXF_CFLG1, 0xffffffff);
+
+	printk(KERN_INFO "PA Semi PWRficient DMA library initialized "
+		"(%d tx, %d rx channels)\n", num_txch, num_rxch);
+
+out:
+	spin_unlock(&init_lock);
+	return err;
+}
+EXPORT_SYMBOL(pasemi_dma_init);
diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c
new file mode 100644
index 000000000..ae3f47b25
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Author: Olof Johansson, PA Semi
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * Based on drivers/net/fs_enet/mii-bitbang.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/phy.h>
+#include <linux/of_address.h>
+#include <linux/of_mdio.h>
+#include <linux/of_platform.h>
+
+#define DELAY 1
+
+static void __iomem *gpio_regs;
+
+struct gpio_priv {
+	int mdc_pin;
+	int mdio_pin;
+	int mdio_irqs[PHY_MAX_ADDR];
+};
+
+#define MDC_PIN(bus)	(((struct gpio_priv *)bus->priv)->mdc_pin)
+#define MDIO_PIN(bus)	(((struct gpio_priv *)bus->priv)->mdio_pin)
+
+static inline void mdio_lo(struct mii_bus *bus)
+{
+	out_le32(gpio_regs+0x10, 1 << MDIO_PIN(bus));
+}
+
+static inline void mdio_hi(struct mii_bus *bus)
+{
+	out_le32(gpio_regs, 1 << MDIO_PIN(bus));
+}
+
+static inline void mdc_lo(struct mii_bus *bus)
+{
+	out_le32(gpio_regs+0x10, 1 << MDC_PIN(bus));
+}
+
+static inline void mdc_hi(struct mii_bus *bus)
+{
+	out_le32(gpio_regs, 1 << MDC_PIN(bus));
+}
+
+static inline void mdio_active(struct mii_bus *bus)
+{
+	out_le32(gpio_regs+0x20, (1 << MDC_PIN(bus)) | (1 << MDIO_PIN(bus)));
+}
+
+static inline void mdio_tristate(struct mii_bus *bus)
+{
+	out_le32(gpio_regs+0x30, (1 << MDIO_PIN(bus)));
+}
+
+static inline int mdio_read(struct mii_bus *bus)
+{
+	return !!(in_le32(gpio_regs+0x40) & (1 << MDIO_PIN(bus)));
+}
+
+static void clock_out(struct mii_bus *bus, int bit)
+{
+	if (bit)
+		mdio_hi(bus);
+	else
+		mdio_lo(bus);
+	udelay(DELAY);
+	mdc_hi(bus);
+	udelay(DELAY);
+	mdc_lo(bus);
+}
+
+/* Utility to send the preamble, address, and register (common to read and write). */
+static void bitbang_pre(struct mii_bus *bus, int read, u8 addr, u8 reg)
+{
+	int i;
+
+	/* CFE uses a really long preamble (40 bits). We'll do the same. */
+	mdio_active(bus);
+	for (i = 0; i < 40; i++) {
+		clock_out(bus, 1);
+	}
+
+	/* send the start bit (01) and the read opcode (10) or write (10) */
+	clock_out(bus, 0);
+	clock_out(bus, 1);
+
+	clock_out(bus, read);
+	clock_out(bus, !read);
+
+	/* send the PHY address */
+	for (i = 0; i < 5; i++) {
+		clock_out(bus, (addr & 0x10) != 0);
+		addr <<= 1;
+	}
+
+	/* send the register address */
+	for (i = 0; i < 5; i++) {
+		clock_out(bus, (reg & 0x10) != 0);
+		reg <<= 1;
+	}
+}
+
+static int gpio_mdio_read(struct mii_bus *bus, int phy_id, int location)
+{
+	u16 rdreg;
+	int ret, i;
+	u8 addr = phy_id & 0xff;
+	u8 reg = location & 0xff;
+
+	bitbang_pre(bus, 1, addr, reg);
+
+	/* tri-state our MDIO I/O pin so we can read */
+	mdio_tristate(bus);
+	udelay(DELAY);
+	mdc_hi(bus);
+	udelay(DELAY);
+	mdc_lo(bus);
+
+	/* read 16 bits of register data, MSB first */
+	rdreg = 0;
+	for (i = 0; i < 16; i++) {
+		mdc_lo(bus);
+		udelay(DELAY);
+		mdc_hi(bus);
+		udelay(DELAY);
+		mdc_lo(bus);
+		udelay(DELAY);
+		rdreg <<= 1;
+		rdreg |= mdio_read(bus);
+	}
+
+	mdc_hi(bus);
+	udelay(DELAY);
+	mdc_lo(bus);
+	udelay(DELAY);
+
+	ret = rdreg;
+
+	return ret;
+}
+
+static int gpio_mdio_write(struct mii_bus *bus, int phy_id, int location, u16 val)
+{
+	int i;
+
+	u8 addr = phy_id & 0xff;
+	u8 reg = location & 0xff;
+	u16 value = val & 0xffff;
+
+	bitbang_pre(bus, 0, addr, reg);
+
+	/* send the turnaround (10) */
+	mdc_lo(bus);
+	mdio_hi(bus);
+	udelay(DELAY);
+	mdc_hi(bus);
+	udelay(DELAY);
+	mdc_lo(bus);
+	mdio_lo(bus);
+	udelay(DELAY);
+	mdc_hi(bus);
+	udelay(DELAY);
+
+	/* write 16 bits of register data, MSB first */
+	for (i = 0; i < 16; i++) {
+		mdc_lo(bus);
+		if (value & 0x8000)
+			mdio_hi(bus);
+		else
+			mdio_lo(bus);
+		udelay(DELAY);
+		mdc_hi(bus);
+		udelay(DELAY);
+		value <<= 1;
+	}
+
+	/*
+	 * Tri-state the MDIO line.
+	 */
+	mdio_tristate(bus);
+	mdc_lo(bus);
+	udelay(DELAY);
+	mdc_hi(bus);
+	udelay(DELAY);
+	return 0;
+}
+
+static int gpio_mdio_reset(struct mii_bus *bus)
+{
+	/*nothing here - dunno how to reset it*/
+	return 0;
+}
+
+
+static int gpio_mdio_probe(struct platform_device *ofdev)
+{
+	struct device *dev = &ofdev->dev;
+	struct device_node *np = ofdev->dev.of_node;
+	struct mii_bus *new_bus;
+	struct gpio_priv *priv;
+	const unsigned int *prop;
+	int err;
+
+	err = -ENOMEM;
+	priv = kzalloc(sizeof(struct gpio_priv), GFP_KERNEL);
+	if (!priv)
+		goto out;
+
+	new_bus = mdiobus_alloc();
+
+	if (!new_bus)
+		goto out_free_priv;
+
+	new_bus->name = "pasemi gpio mdio bus";
+	new_bus->read = &gpio_mdio_read;
+	new_bus->write = &gpio_mdio_write;
+	new_bus->reset = &gpio_mdio_reset;
+
+	prop = of_get_property(np, "reg", NULL);
+	snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", *prop);
+	new_bus->priv = priv;
+
+	new_bus->irq = priv->mdio_irqs;
+
+	prop = of_get_property(np, "mdc-pin", NULL);
+	priv->mdc_pin = *prop;
+
+	prop = of_get_property(np, "mdio-pin", NULL);
+	priv->mdio_pin = *prop;
+
+	new_bus->parent = dev;
+	dev_set_drvdata(dev, new_bus);
+
+	err = of_mdiobus_register(new_bus, np);
+
+	if (err != 0) {
+		printk(KERN_ERR "%s: Cannot register as MDIO bus, err %d\n",
+				new_bus->name, err);
+		goto out_free_irq;
+	}
+
+	return 0;
+
+out_free_irq:
+	kfree(new_bus);
+out_free_priv:
+	kfree(priv);
+out:
+	return err;
+}
+
+
+static int gpio_mdio_remove(struct platform_device *dev)
+{
+	struct mii_bus *bus = dev_get_drvdata(&dev->dev);
+
+	mdiobus_unregister(bus);
+
+	dev_set_drvdata(&dev->dev, NULL);
+
+	kfree(bus->priv);
+	bus->priv = NULL;
+	mdiobus_free(bus);
+
+	return 0;
+}
+
+static const struct of_device_id gpio_mdio_match[] =
+{
+	{
+		.compatible      = "gpio-mdio",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, gpio_mdio_match);
+
+static struct platform_driver gpio_mdio_driver =
+{
+	.probe		= gpio_mdio_probe,
+	.remove		= gpio_mdio_remove,
+	.driver = {
+		.name = "gpio-mdio-bitbang",
+		.of_match_table = gpio_mdio_match,
+	},
+};
+
+int gpio_mdio_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "1682m-gpio");
+	if (!np)
+		np = of_find_compatible_node(NULL, NULL,
+					     "pasemi,pwrficient-gpio");
+	if (!np)
+		return -ENODEV;
+	gpio_regs = of_iomap(np, 0);
+	of_node_put(np);
+
+	if (!gpio_regs)
+		return -ENODEV;
+
+	return platform_driver_register(&gpio_mdio_driver);
+}
+module_init(gpio_mdio_init);
+
+void gpio_mdio_exit(void)
+{
+	platform_driver_unregister(&gpio_mdio_driver);
+	if (gpio_regs)
+		iounmap(gpio_regs);
+}
+module_exit(gpio_mdio_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Olof Johansson <olof@lixom.net>");
+MODULE_DESCRIPTION("Driver for MDIO over GPIO on PA Semi PWRficient-based boards");
diff --git a/arch/powerpc/platforms/pasemi/idle.c b/arch/powerpc/platforms/pasemi/idle.c
new file mode 100644
index 000000000..75b296bc5
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/idle.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/irq.h>
+
+#include <asm/machdep.h>
+#include <asm/reg.h>
+#include <asm/smp.h>
+
+#include "pasemi.h"
+
+struct sleep_mode {
+	char *name;
+	void (*entry)(void);
+};
+
+static struct sleep_mode modes[] = {
+	{ .name = "spin", .entry = &idle_spin },
+	{ .name = "doze", .entry = &idle_doze },
+};
+
+static int current_mode = 0;
+
+static int pasemi_system_reset_exception(struct pt_regs *regs)
+{
+	/* If we were woken up from power savings, we need to return
+	 * to the calling function, since nip is not saved across
+	 * all modes.
+	 */
+
+	if (regs->msr & SRR1_WAKEMASK)
+		regs->nip = regs->link;
+
+	switch (regs->msr & SRR1_WAKEMASK) {
+	case SRR1_WAKEEE:
+		do_IRQ(regs);
+		break;
+	case SRR1_WAKEDEC:
+		timer_interrupt(regs);
+		break;
+	default:
+		/* do system reset */
+		return 0;
+	}
+
+	/* Set higher astate since we come out of power savings at 0 */
+	restore_astate(hard_smp_processor_id());
+
+	/* everything handled */
+	regs->msr |= MSR_RI;
+	return 1;
+}
+
+static int __init pasemi_idle_init(void)
+{
+#ifndef CONFIG_PPC_PASEMI_CPUFREQ
+	printk(KERN_WARNING "No cpufreq driver, powersavings modes disabled\n");
+	current_mode = 0;
+#endif
+
+	ppc_md.system_reset_exception = pasemi_system_reset_exception;
+	ppc_md.power_save = modes[current_mode].entry;
+	printk(KERN_INFO "Using PA6T idle loop (%s)\n", modes[current_mode].name);
+
+	return 0;
+}
+machine_late_initcall(pasemi, pasemi_idle_init);
+
+static int __init idle_param(char *p)
+{
+	int i;
+	for (i = 0; i < ARRAY_SIZE(modes); i++) {
+		if (!strcmp(modes[i].name, p)) {
+			current_mode = i;
+			break;
+		}
+	}
+	return 0;
+}
+
+early_param("idle", idle_param);
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
new file mode 100644
index 000000000..b8f567b2e
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -0,0 +1,269 @@
+/*
+ * Copyright (C) 2005-2008, PA Semi, Inc
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#undef DEBUG
+
+#include <linux/memblock.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+
+#include "pasemi.h"
+
+#define IOBMAP_PAGE_SHIFT	12
+#define IOBMAP_PAGE_SIZE	(1 << IOBMAP_PAGE_SHIFT)
+#define IOBMAP_PAGE_MASK	(IOBMAP_PAGE_SIZE - 1)
+
+#define IOB_BASE		0xe0000000
+#define IOB_SIZE		0x3000
+/* Configuration registers */
+#define IOBCAP_REG		0x40
+#define IOBCOM_REG		0x100
+/* Enable IOB address translation */
+#define IOBCOM_ATEN		0x00000100
+
+/* Address decode configuration register */
+#define IOB_AD_REG		0x14c
+/* IOBCOM_AD_REG fields */
+#define IOB_AD_VGPRT		0x00000e00
+#define IOB_AD_VGAEN		0x00000100
+/* Direct mapping settings */
+#define IOB_AD_MPSEL_MASK	0x00000030
+#define IOB_AD_MPSEL_B38	0x00000000
+#define IOB_AD_MPSEL_B40	0x00000010
+#define IOB_AD_MPSEL_B42	0x00000020
+/* Translation window size / enable */
+#define IOB_AD_TRNG_MASK	0x00000003
+#define IOB_AD_TRNG_256M	0x00000000
+#define IOB_AD_TRNG_2G		0x00000001
+#define IOB_AD_TRNG_128G	0x00000003
+
+#define IOB_TABLEBASE_REG	0x154
+
+/* Base of the 64 4-byte L1 registers */
+#define IOB_XLT_L1_REGBASE	0x2b00
+
+/* Register to invalidate TLB entries */
+#define IOB_AT_INVAL_TLB_REG	0x2d00
+
+/* The top two bits of the level 1 entry contains valid and type flags */
+#define IOBMAP_L1E_V		0x40000000
+#define IOBMAP_L1E_V_B		0x80000000
+
+/* For big page entries, the bottom two bits contains flags */
+#define IOBMAP_L1E_BIG_CACHED	0x00000002
+#define IOBMAP_L1E_BIG_PRIORITY	0x00000001
+
+/* For regular level 2 entries, top 2 bits contain valid and cache flags */
+#define IOBMAP_L2E_V		0x80000000
+#define IOBMAP_L2E_V_CACHED	0xc0000000
+
+static void __iomem *iob;
+static u32 iob_l1_emptyval;
+static u32 iob_l2_emptyval;
+static u32 *iob_l2_base;
+
+static struct iommu_table iommu_table_iobmap;
+static int iommu_table_iobmap_inited;
+
+static int iobmap_build(struct iommu_table *tbl, long index,
+			 long npages, unsigned long uaddr,
+			 enum dma_data_direction direction,
+			 struct dma_attrs *attrs)
+{
+	u32 *ip;
+	u32 rpn;
+	unsigned long bus_addr;
+
+	pr_debug("iobmap: build at: %lx, %lx, addr: %lx\n", index, npages, uaddr);
+
+	bus_addr = (tbl->it_offset + index) << IOBMAP_PAGE_SHIFT;
+
+	ip = ((u32 *)tbl->it_base) + index;
+
+	while (npages--) {
+		rpn = __pa(uaddr) >> IOBMAP_PAGE_SHIFT;
+
+		*(ip++) = IOBMAP_L2E_V | rpn;
+		/* invalidate tlb, can be optimized more */
+		out_le32(iob+IOB_AT_INVAL_TLB_REG, bus_addr >> 14);
+
+		uaddr += IOBMAP_PAGE_SIZE;
+		bus_addr += IOBMAP_PAGE_SIZE;
+	}
+	return 0;
+}
+
+
+static void iobmap_free(struct iommu_table *tbl, long index,
+			long npages)
+{
+	u32 *ip;
+	unsigned long bus_addr;
+
+	pr_debug("iobmap: free at: %lx, %lx\n", index, npages);
+
+	bus_addr = (tbl->it_offset + index) << IOBMAP_PAGE_SHIFT;
+
+	ip = ((u32 *)tbl->it_base) + index;
+
+	while (npages--) {
+		*(ip++) = iob_l2_emptyval;
+		/* invalidate tlb, can be optimized more */
+		out_le32(iob+IOB_AT_INVAL_TLB_REG, bus_addr >> 14);
+		bus_addr += IOBMAP_PAGE_SIZE;
+	}
+}
+
+
+static void iommu_table_iobmap_setup(void)
+{
+	pr_debug(" -> %s\n", __func__);
+	iommu_table_iobmap.it_busno = 0;
+	iommu_table_iobmap.it_offset = 0;
+	iommu_table_iobmap.it_page_shift = IOBMAP_PAGE_SHIFT;
+
+	/* it_size is in number of entries */
+	iommu_table_iobmap.it_size =
+		0x80000000 >> iommu_table_iobmap.it_page_shift;
+
+	/* Initialize the common IOMMU code */
+	iommu_table_iobmap.it_base = (unsigned long)iob_l2_base;
+	iommu_table_iobmap.it_index = 0;
+	/* XXXOJN tune this to avoid IOB cache invals.
+	 * Should probably be 8 (64 bytes)
+	 */
+	iommu_table_iobmap.it_blocksize = 4;
+	iommu_init_table(&iommu_table_iobmap, 0);
+	pr_debug(" <- %s\n", __func__);
+}
+
+
+
+static void pci_dma_bus_setup_pasemi(struct pci_bus *bus)
+{
+	pr_debug("pci_dma_bus_setup, bus %p, bus->self %p\n", bus, bus->self);
+
+	if (!iommu_table_iobmap_inited) {
+		iommu_table_iobmap_inited = 1;
+		iommu_table_iobmap_setup();
+	}
+}
+
+
+static void pci_dma_dev_setup_pasemi(struct pci_dev *dev)
+{
+	pr_debug("pci_dma_dev_setup, dev %p (%s)\n", dev, pci_name(dev));
+
+#if !defined(CONFIG_PPC_PASEMI_IOMMU_DMA_FORCE)
+	/* For non-LPAR environment, don't translate anything for the DMA
+	 * engine. The exception to this is if the user has enabled
+	 * CONFIG_PPC_PASEMI_IOMMU_DMA_FORCE at build time.
+	 */
+	if (dev->vendor == 0x1959 && dev->device == 0xa007 &&
+	    !firmware_has_feature(FW_FEATURE_LPAR)) {
+		dev->dev.archdata.dma_ops = &dma_direct_ops;
+		return;
+	}
+#endif
+
+	set_iommu_table_base(&dev->dev, &iommu_table_iobmap);
+}
+
+int __init iob_init(struct device_node *dn)
+{
+	unsigned long tmp;
+	u32 regword;
+	int i;
+
+	pr_debug(" -> %s\n", __func__);
+
+	/* Allocate a spare page to map all invalid IOTLB pages. */
+	tmp = memblock_alloc(IOBMAP_PAGE_SIZE, IOBMAP_PAGE_SIZE);
+	if (!tmp)
+		panic("IOBMAP: Cannot allocate spare page!");
+	/* Empty l1 is marked invalid */
+	iob_l1_emptyval = 0;
+	/* Empty l2 is mapped to dummy page */
+	iob_l2_emptyval = IOBMAP_L2E_V | (tmp >> IOBMAP_PAGE_SHIFT);
+
+	iob = ioremap(IOB_BASE, IOB_SIZE);
+	if (!iob)
+		panic("IOBMAP: Cannot map registers!");
+
+	/* setup direct mapping of the L1 entries */
+	for (i = 0; i < 64; i++) {
+		/* Each L1 covers 32MB, i.e. 8K entries = 32K of ram */
+		regword = IOBMAP_L1E_V | (__pa(iob_l2_base + i*0x2000) >> 12);
+		out_le32(iob+IOB_XLT_L1_REGBASE+i*4, regword);
+	}
+
+	/* set 2GB translation window, based at 0 */
+	regword = in_le32(iob+IOB_AD_REG);
+	regword &= ~IOB_AD_TRNG_MASK;
+	regword |= IOB_AD_TRNG_2G;
+	out_le32(iob+IOB_AD_REG, regword);
+
+	/* Enable translation */
+	regword = in_le32(iob+IOBCOM_REG);
+	regword |= IOBCOM_ATEN;
+	out_le32(iob+IOBCOM_REG, regword);
+
+	pr_debug(" <- %s\n", __func__);
+
+	return 0;
+}
+
+
+/* These are called very early. */
+void __init iommu_init_early_pasemi(void)
+{
+	int iommu_off;
+
+#ifndef CONFIG_PPC_PASEMI_IOMMU
+	iommu_off = 1;
+#else
+	iommu_off = of_chosen &&
+			of_get_property(of_chosen, "linux,iommu-off", NULL);
+#endif
+	if (iommu_off)
+		return;
+
+	iob_init(NULL);
+
+	pasemi_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pasemi;
+	pasemi_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pasemi;
+	ppc_md.tce_build = iobmap_build;
+	ppc_md.tce_free  = iobmap_free;
+	set_pci_dma_ops(&dma_iommu_ops);
+}
+
+void __init alloc_iobmap_l2(void)
+{
+#ifndef CONFIG_PPC_PASEMI_IOMMU
+	return;
+#endif
+	/* For 2G space, 8x64 pages (2^21 bytes) is max total l2 size */
+	iob_l2_base = (u32 *)__va(memblock_alloc_base(1UL<<21, 1UL<<21, 0x80000000));
+
+	printk(KERN_INFO "IOBMAP L2 allocated at: %p\n", iob_l2_base);
+}
diff --git a/arch/powerpc/platforms/pasemi/misc.c b/arch/powerpc/platforms/pasemi/misc.c
new file mode 100644
index 000000000..e0ab29976
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/misc.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2007 PA Semi, Inc
+ *
+ * Parts based on arch/powerpc/sysdev/fsl_soc.c:
+ *
+ * 2006 (c) MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/i2c.h>
+
+#ifdef CONFIG_I2C_BOARDINFO
+/* The below is from fsl_soc.c.  It's copied because since there are no
+ * official bus bindings at this time it doesn't make sense to share across
+ * the platforms, even though they happen to be common.
+ */
+struct i2c_driver_device {
+	char    *of_device;
+	char    *i2c_type;
+};
+
+static struct i2c_driver_device i2c_devices[] __initdata = {
+	{"dallas,ds1338",  "ds1338"},
+};
+
+static int __init find_i2c_driver(struct device_node *node,
+				     struct i2c_board_info *info)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(i2c_devices); i++) {
+		if (!of_device_is_compatible(node, i2c_devices[i].of_device))
+			continue;
+		if (strlcpy(info->type, i2c_devices[i].i2c_type,
+			    I2C_NAME_SIZE) >= I2C_NAME_SIZE)
+			return -ENOMEM;
+		return 0;
+	}
+	return -ENODEV;
+}
+
+static int __init pasemi_register_i2c_devices(void)
+{
+	struct pci_dev *pdev;
+	struct device_node *adap_node;
+	struct device_node *node;
+
+	pdev = NULL;
+	while ((pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa003, pdev))) {
+		adap_node = pci_device_to_OF_node(pdev);
+
+		if (!adap_node)
+			continue;
+
+		node = NULL;
+		while ((node = of_get_next_child(adap_node, node))) {
+			struct i2c_board_info info = {};
+			const u32 *addr;
+			int len;
+
+			addr = of_get_property(node, "reg", &len);
+			if (!addr || len < sizeof(int) ||
+			    *addr > (1 << 10) - 1) {
+				printk(KERN_WARNING
+					"pasemi_register_i2c_devices: "
+					"invalid i2c device entry\n");
+				continue;
+			}
+
+			info.irq = irq_of_parse_and_map(node, 0);
+			if (info.irq == NO_IRQ)
+				info.irq = -1;
+
+			if (find_i2c_driver(node, &info) < 0)
+				continue;
+
+			info.addr = *addr;
+
+			i2c_register_board_info(PCI_FUNC(pdev->devfn), &info,
+						1);
+		}
+	}
+	return 0;
+}
+device_initcall(pasemi_register_i2c_devices);
+#endif
diff --git a/arch/powerpc/platforms/pasemi/pasemi.h b/arch/powerpc/platforms/pasemi/pasemi.h
new file mode 100644
index 000000000..11f230a48
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/pasemi.h
@@ -0,0 +1,35 @@
+#ifndef _PASEMI_PASEMI_H
+#define _PASEMI_PASEMI_H
+
+extern unsigned long pas_get_boot_time(void);
+extern void pas_pci_init(void);
+extern void pas_pci_irq_fixup(struct pci_dev *dev);
+extern void pas_pci_dma_dev_setup(struct pci_dev *dev);
+
+extern void __iomem *pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset);
+
+extern void __init alloc_iobmap_l2(void);
+extern void __init pasemi_map_registers(void);
+
+/* Power savings modes, implemented in asm */
+extern void idle_spin(void);
+extern void idle_doze(void);
+
+/* Restore astate to last set */
+#ifdef CONFIG_PPC_PASEMI_CPUFREQ
+extern int check_astate(void);
+extern void restore_astate(int cpu);
+#else
+static inline int check_astate(void)
+{
+	/* Always return >0 so we never power save */
+	return 1;
+}
+static inline void restore_astate(int cpu)
+{
+}
+#endif
+
+extern struct pci_controller_ops pasemi_pci_controller_ops;
+
+#endif /* _PASEMI_PASEMI_H */
diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c
new file mode 100644
index 000000000..f3a68a0fe
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/pci.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) 2006 PA Semi, Inc
+ *
+ * Authors: Kip Walker, PA Semi
+ *	    Olof Johansson, PA Semi
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * Based on arch/powerpc/platforms/maple/pci.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+
+#include <asm/ppc-pci.h>
+
+#include "pasemi.h"
+
+#define PA_PXP_CFA(bus, devfn, off) (((bus) << 20) | ((devfn) << 12) | (off))
+
+static inline int pa_pxp_offset_valid(u8 bus, u8 devfn, int offset)
+{
+	/* Device 0 Function 0 is special: It's config space spans function 1 as
+	 * well, so allow larger offset. It's really a two-function device but the
+	 * second function does not probe.
+	 */
+	if (bus == 0 && devfn == 0)
+		return offset < 8192;
+	else
+		return offset < 4096;
+}
+
+static void volatile __iomem *pa_pxp_cfg_addr(struct pci_controller *hose,
+				       u8 bus, u8 devfn, int offset)
+{
+	return hose->cfg_data + PA_PXP_CFA(bus, devfn, offset);
+}
+
+static inline int is_root_port(int busno, int devfn)
+{
+	return ((busno == 0) && (PCI_FUNC(devfn) < 4) &&
+		 ((PCI_SLOT(devfn) == 16) || (PCI_SLOT(devfn) == 17)));
+}
+
+static inline int is_5945_reg(int reg)
+{
+	return (((reg >= 0x18) && (reg < 0x34)) ||
+		((reg >= 0x158) && (reg < 0x178)));
+}
+
+static int workaround_5945(struct pci_bus *bus, unsigned int devfn,
+			   int offset, int len, u32 *val)
+{
+	struct pci_controller *hose;
+	void volatile __iomem *addr, *dummy;
+	int byte;
+	u32 tmp;
+
+	if (!is_root_port(bus->number, devfn) || !is_5945_reg(offset))
+		return 0;
+
+	hose = pci_bus_to_host(bus);
+
+	addr = pa_pxp_cfg_addr(hose, bus->number, devfn, offset & ~0x3);
+	byte = offset & 0x3;
+
+	/* Workaround bug 5945: write 0 to a dummy register before reading,
+	 * and write back what we read. We must read/write the full 32-bit
+	 * contents so we need to shift and mask by hand.
+	 */
+	dummy = pa_pxp_cfg_addr(hose, bus->number, devfn, 0x10);
+	out_le32(dummy, 0);
+	tmp = in_le32(addr);
+	out_le32(addr, tmp);
+
+	switch (len) {
+	case 1:
+		*val = (tmp >> (8*byte)) & 0xff;
+		break;
+	case 2:
+		if (byte == 0)
+			*val = tmp & 0xffff;
+		else
+			*val = (tmp >> 16) & 0xffff;
+		break;
+	default:
+		*val = tmp;
+		break;
+	}
+
+	return 1;
+}
+
+static int pa_pxp_read_config(struct pci_bus *bus, unsigned int devfn,
+			      int offset, int len, u32 *val)
+{
+	struct pci_controller *hose;
+	void volatile __iomem *addr;
+
+	hose = pci_bus_to_host(bus);
+	if (!hose)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (!pa_pxp_offset_valid(bus->number, devfn, offset))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	if (workaround_5945(bus, devfn, offset, len, val))
+		return PCIBIOS_SUCCESSFUL;
+
+	addr = pa_pxp_cfg_addr(hose, bus->number, devfn, offset);
+
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		*val = in_8(addr);
+		break;
+	case 2:
+		*val = in_le16(addr);
+		break;
+	default:
+		*val = in_le32(addr);
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int pa_pxp_write_config(struct pci_bus *bus, unsigned int devfn,
+			       int offset, int len, u32 val)
+{
+	struct pci_controller *hose;
+	void volatile __iomem *addr;
+
+	hose = pci_bus_to_host(bus);
+	if (!hose)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (!pa_pxp_offset_valid(bus->number, devfn, offset))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	addr = pa_pxp_cfg_addr(hose, bus->number, devfn, offset);
+
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		out_8(addr, val);
+		break;
+	case 2:
+		out_le16(addr, val);
+		break;
+	default:
+		out_le32(addr, val);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops pa_pxp_ops = {
+	.read = pa_pxp_read_config,
+	.write = pa_pxp_write_config,
+};
+
+static void __init setup_pa_pxp(struct pci_controller *hose)
+{
+	hose->ops = &pa_pxp_ops;
+	hose->cfg_data = ioremap(0xe0000000, 0x10000000);
+}
+
+static int __init pas_add_bridge(struct device_node *dev)
+{
+	struct pci_controller *hose;
+
+	pr_debug("Adding PCI host bridge %s\n", dev->full_name);
+
+	hose = pcibios_alloc_controller(dev);
+	if (!hose)
+		return -ENOMEM;
+
+	hose->first_busno = 0;
+	hose->last_busno = 0xff;
+	hose->controller_ops = pasemi_pci_controller_ops;
+
+	setup_pa_pxp(hose);
+
+	printk(KERN_INFO "Found PA-PXP PCI host bridge.\n");
+
+	/* Interpret the "ranges" property */
+	pci_process_bridge_OF_ranges(hose, dev, 1);
+
+	return 0;
+}
+
+void __init pas_pci_init(void)
+{
+	struct device_node *np, *root;
+
+	root = of_find_node_by_path("/");
+	if (!root) {
+		printk(KERN_CRIT "pas_pci_init: can't find root "
+			"of device tree\n");
+		return;
+	}
+
+	for (np = NULL; (np = of_get_next_child(root, np)) != NULL;)
+		if (np->name && !strcmp(np->name, "pxp") && !pas_add_bridge(np))
+			of_node_get(np);
+
+	of_node_put(root);
+
+	/* Setup the linkage between OF nodes and PHBs */
+	pci_devs_phb_init();
+}
+
+void __iomem *pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset)
+{
+	struct pci_controller *hose;
+
+	hose = pci_bus_to_host(dev->bus);
+
+	return (void __iomem *)pa_pxp_cfg_addr(hose, dev->bus->number, dev->devfn, offset);
+}
+
+struct pci_controller_ops pasemi_pci_controller_ops;
diff --git a/arch/powerpc/platforms/pasemi/powersave.S b/arch/powerpc/platforms/pasemi/powersave.S
new file mode 100644
index 000000000..81ab555aa
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/powersave.S
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+#include <asm/cputable.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+
+/* Power savings opcodes since not all binutils have them at this time */
+#define DOZE	.long	0x4c000324
+#define NAP	.long	0x4c000364
+#define SLEEP	.long	0x4c0003a4
+#define RVW	.long	0x4c0003e4
+
+/* Common sequence to do before going to any of the
+ * powersavings modes.
+ */
+
+#define PRE_SLEEP_SEQUENCE	\
+	std	r3,8(r1);	\
+	ptesync	;		\
+	ld	r3,8(r1);	\
+1:	cmpd 	r3,r3;		\
+	bne	1b
+
+_doze:
+	PRE_SLEEP_SEQUENCE
+	DOZE
+	b	.
+
+
+_GLOBAL(idle_spin)
+	blr
+
+_GLOBAL(idle_doze)
+	LOAD_REG_ADDR(r3, _doze)
+	b	sleep_common
+
+/* Add more modes here later */
+
+sleep_common:
+	mflr	r0
+	std	r0, 16(r1)
+	stdu	r1,-64(r1)
+#ifdef CONFIG_PPC_PASEMI_CPUFREQ
+	std	r3, 48(r1)
+
+	/* Only do power savings when in astate 0 */
+	bl	check_astate
+	cmpwi	r3,0
+	bne	1f
+
+	ld	r3, 48(r1)
+#endif
+	LOAD_REG_IMMEDIATE(r6,MSR_DR|MSR_IR|MSR_ME|MSR_EE)
+	mfmsr	r4
+	andc	r5,r4,r6
+	mtmsrd	r5,0
+
+	mtctr	r3
+	bctrl
+
+	mtmsrd	r4,0
+
+1:	addi	r1,r1,64
+	ld	r0,16(r1)
+	mtlr	r0
+	blr
+
diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c
new file mode 100644
index 000000000..d71b2c7e8
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/setup.c
@@ -0,0 +1,448 @@
+/*
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Authors: Kip Walker, PA Semi
+ *	    Olof Johansson, PA Semi
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * Based on arch/powerpc/platforms/maple/setup.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/console.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+#include <linux/gfp.h>
+
+#include <asm/prom.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/mpic.h>
+#include <asm/smp.h>
+#include <asm/time.h>
+#include <asm/mmu.h>
+#include <asm/debug.h>
+
+#include <pcmcia/ss.h>
+#include <pcmcia/cistpl.h>
+#include <pcmcia/ds.h>
+
+#include "pasemi.h"
+
+/* SDC reset register, must be pre-mapped at reset time */
+static void __iomem *reset_reg;
+
+/* Various error status registers, must be pre-mapped at MCE time */
+
+#define MAX_MCE_REGS	32
+struct mce_regs {
+	char *name;
+	void __iomem *addr;
+};
+
+static struct mce_regs mce_regs[MAX_MCE_REGS];
+static int num_mce_regs;
+static int nmi_virq = NO_IRQ;
+
+
+static void pas_restart(char *cmd)
+{
+	/* Need to put others cpu in hold loop so they're not sleeping */
+	smp_send_stop();
+	udelay(10000);
+	printk("Restarting...\n");
+	while (1)
+		out_le32(reset_reg, 0x6000000);
+}
+
+#ifdef CONFIG_SMP
+static arch_spinlock_t timebase_lock;
+static unsigned long timebase;
+
+static void pas_give_timebase(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	hard_irq_disable();
+	arch_spin_lock(&timebase_lock);
+	mtspr(SPRN_TBCTL, TBCTL_FREEZE);
+	isync();
+	timebase = get_tb();
+	arch_spin_unlock(&timebase_lock);
+
+	while (timebase)
+		barrier();
+	mtspr(SPRN_TBCTL, TBCTL_RESTART);
+	local_irq_restore(flags);
+}
+
+static void pas_take_timebase(void)
+{
+	while (!timebase)
+		smp_rmb();
+
+	arch_spin_lock(&timebase_lock);
+	set_tb(timebase >> 32, timebase & 0xffffffff);
+	timebase = 0;
+	arch_spin_unlock(&timebase_lock);
+}
+
+struct smp_ops_t pas_smp_ops = {
+	.probe		= smp_mpic_probe,
+	.message_pass	= smp_mpic_message_pass,
+	.kick_cpu	= smp_generic_kick_cpu,
+	.setup_cpu	= smp_mpic_setup_cpu,
+	.give_timebase	= pas_give_timebase,
+	.take_timebase	= pas_take_timebase,
+};
+#endif /* CONFIG_SMP */
+
+void __init pas_setup_arch(void)
+{
+#ifdef CONFIG_SMP
+	/* Setup SMP callback */
+	smp_ops = &pas_smp_ops;
+#endif
+	/* Lookup PCI hosts */
+	pas_pci_init();
+
+#ifdef CONFIG_DUMMY_CONSOLE
+	conswitchp = &dummy_con;
+#endif
+
+	/* Remap SDC register for doing reset */
+	/* XXXOJN This should maybe come out of the device tree */
+	reset_reg = ioremap(0xfc101100, 4);
+}
+
+static int __init pas_setup_mce_regs(void)
+{
+	struct pci_dev *dev;
+	int reg;
+
+	/* Remap various SoC status registers for use by the MCE handler */
+
+	reg = 0;
+
+	dev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa00a, NULL);
+	while (dev && reg < MAX_MCE_REGS) {
+		mce_regs[reg].name = kasprintf(GFP_KERNEL,
+						"mc%d_mcdebug_errsta", reg);
+		mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0x730);
+		dev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa00a, dev);
+		reg++;
+	}
+
+	dev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa001, NULL);
+	if (dev && reg+4 < MAX_MCE_REGS) {
+		mce_regs[reg].name = "iobdbg_IntStatus1";
+		mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0x438);
+		reg++;
+		mce_regs[reg].name = "iobdbg_IOCTbusIntDbgReg";
+		mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0x454);
+		reg++;
+		mce_regs[reg].name = "iobiom_IntStatus";
+		mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0xc10);
+		reg++;
+		mce_regs[reg].name = "iobiom_IntDbgReg";
+		mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0xc1c);
+		reg++;
+	}
+
+	dev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa009, NULL);
+	if (dev && reg+2 < MAX_MCE_REGS) {
+		mce_regs[reg].name = "l2csts_IntStatus";
+		mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0x200);
+		reg++;
+		mce_regs[reg].name = "l2csts_Cnt";
+		mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0x214);
+		reg++;
+	}
+
+	num_mce_regs = reg;
+
+	return 0;
+}
+machine_device_initcall(pasemi, pas_setup_mce_regs);
+
+static __init void pas_init_IRQ(void)
+{
+	struct device_node *np;
+	struct device_node *root, *mpic_node;
+	unsigned long openpic_addr;
+	const unsigned int *opprop;
+	int naddr, opplen;
+	int mpic_flags;
+	const unsigned int *nmiprop;
+	struct mpic *mpic;
+
+	mpic_node = NULL;
+
+	for_each_node_by_type(np, "interrupt-controller")
+		if (of_device_is_compatible(np, "open-pic")) {
+			mpic_node = np;
+			break;
+		}
+	if (!mpic_node)
+		for_each_node_by_type(np, "open-pic") {
+			mpic_node = np;
+			break;
+		}
+	if (!mpic_node) {
+		printk(KERN_ERR
+			"Failed to locate the MPIC interrupt controller\n");
+		return;
+	}
+
+	/* Find address list in /platform-open-pic */
+	root = of_find_node_by_path("/");
+	naddr = of_n_addr_cells(root);
+	opprop = of_get_property(root, "platform-open-pic", &opplen);
+	if (!opprop) {
+		printk(KERN_ERR "No platform-open-pic property.\n");
+		of_node_put(root);
+		return;
+	}
+	openpic_addr = of_read_number(opprop, naddr);
+	printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr);
+
+	mpic_flags = MPIC_LARGE_VECTORS | MPIC_NO_BIAS | MPIC_NO_RESET;
+
+	nmiprop = of_get_property(mpic_node, "nmi-source", NULL);
+	if (nmiprop)
+		mpic_flags |= MPIC_ENABLE_MCK;
+
+	mpic = mpic_alloc(mpic_node, openpic_addr,
+			  mpic_flags, 0, 0, "PASEMI-OPIC");
+	BUG_ON(!mpic);
+
+	mpic_assign_isu(mpic, 0, mpic->paddr + 0x10000);
+	mpic_init(mpic);
+	/* The NMI/MCK source needs to be prio 15 */
+	if (nmiprop) {
+		nmi_virq = irq_create_mapping(NULL, *nmiprop);
+		mpic_irq_set_priority(nmi_virq, 15);
+		irq_set_irq_type(nmi_virq, IRQ_TYPE_EDGE_RISING);
+		mpic_unmask_irq(irq_get_irq_data(nmi_virq));
+	}
+
+	of_node_put(mpic_node);
+	of_node_put(root);
+}
+
+static void __init pas_progress(char *s, unsigned short hex)
+{
+	printk("[%04x] : %s\n", hex, s ? s : "");
+}
+
+
+static int pas_machine_check_handler(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+	unsigned long srr0, srr1, dsisr;
+	int dump_slb = 0;
+	int i;
+
+	srr0 = regs->nip;
+	srr1 = regs->msr;
+
+	if (nmi_virq != NO_IRQ && mpic_get_mcirq() == nmi_virq) {
+		printk(KERN_ERR "NMI delivered\n");
+		debugger(regs);
+		mpic_end_irq(irq_get_irq_data(nmi_virq));
+		goto out;
+	}
+
+	dsisr = mfspr(SPRN_DSISR);
+	printk(KERN_ERR "Machine Check on CPU %d\n", cpu);
+	printk(KERN_ERR "SRR0  0x%016lx SRR1 0x%016lx\n", srr0, srr1);
+	printk(KERN_ERR "DSISR 0x%016lx DAR  0x%016lx\n", dsisr, regs->dar);
+	printk(KERN_ERR "BER   0x%016lx MER  0x%016lx\n", mfspr(SPRN_PA6T_BER),
+		mfspr(SPRN_PA6T_MER));
+	printk(KERN_ERR "IER   0x%016lx DER  0x%016lx\n", mfspr(SPRN_PA6T_IER),
+		mfspr(SPRN_PA6T_DER));
+	printk(KERN_ERR "Cause:\n");
+
+	if (srr1 & 0x200000)
+		printk(KERN_ERR "Signalled by SDC\n");
+
+	if (srr1 & 0x100000) {
+		printk(KERN_ERR "Load/Store detected error:\n");
+		if (dsisr & 0x8000)
+			printk(KERN_ERR "D-cache ECC double-bit error or bus error\n");
+		if (dsisr & 0x4000)
+			printk(KERN_ERR "LSU snoop response error\n");
+		if (dsisr & 0x2000) {
+			printk(KERN_ERR "MMU SLB multi-hit or invalid B field\n");
+			dump_slb = 1;
+		}
+		if (dsisr & 0x1000)
+			printk(KERN_ERR "Recoverable Duptags\n");
+		if (dsisr & 0x800)
+			printk(KERN_ERR "Recoverable D-cache parity error count overflow\n");
+		if (dsisr & 0x400)
+			printk(KERN_ERR "TLB parity error count overflow\n");
+	}
+
+	if (srr1 & 0x80000)
+		printk(KERN_ERR "Bus Error\n");
+
+	if (srr1 & 0x40000) {
+		printk(KERN_ERR "I-side SLB multiple hit\n");
+		dump_slb = 1;
+	}
+
+	if (srr1 & 0x20000)
+		printk(KERN_ERR "I-cache parity error hit\n");
+
+	if (num_mce_regs == 0)
+		printk(KERN_ERR "No MCE registers mapped yet, can't dump\n");
+	else
+		printk(KERN_ERR "SoC debug registers:\n");
+
+	for (i = 0; i < num_mce_regs; i++)
+		printk(KERN_ERR "%s: 0x%08x\n", mce_regs[i].name,
+			in_le32(mce_regs[i].addr));
+
+	if (dump_slb) {
+		unsigned long e, v;
+		int i;
+
+		printk(KERN_ERR "slb contents:\n");
+		for (i = 0; i < mmu_slb_size; i++) {
+			asm volatile("slbmfee  %0,%1" : "=r" (e) : "r" (i));
+			asm volatile("slbmfev  %0,%1" : "=r" (v) : "r" (i));
+			printk(KERN_ERR "%02d %016lx %016lx\n", i, e, v);
+		}
+	}
+
+out:
+	/* SRR1[62] is from MSR[62] if recoverable, so pass that back */
+	return !!(srr1 & 0x2);
+}
+
+static void __init pas_init_early(void)
+{
+	iommu_init_early_pasemi();
+}
+
+#ifdef CONFIG_PCMCIA
+static int pcmcia_notify(struct notifier_block *nb, unsigned long action,
+			 void *data)
+{
+	struct device *dev = data;
+	struct device *parent;
+	struct pcmcia_device *pdev = to_pcmcia_dev(dev);
+
+	/* We are only intereted in device addition */
+	if (action != BUS_NOTIFY_ADD_DEVICE)
+		return 0;
+
+	parent = pdev->socket->dev.parent;
+
+	/* We know electra_cf devices will always have of_node set, since
+	 * electra_cf is an of_platform driver.
+	 */
+	if (!parent->of_node)
+		return 0;
+
+	if (!of_device_is_compatible(parent->of_node, "electra-cf"))
+		return 0;
+
+	/* We use the direct ops for localbus */
+	dev->archdata.dma_ops = &dma_direct_ops;
+
+	return 0;
+}
+
+static struct notifier_block pcmcia_notifier = {
+	.notifier_call = pcmcia_notify,
+};
+
+static inline void pasemi_pcmcia_init(void)
+{
+	extern struct bus_type pcmcia_bus_type;
+
+	bus_register_notifier(&pcmcia_bus_type, &pcmcia_notifier);
+}
+
+#else
+
+static inline void pasemi_pcmcia_init(void)
+{
+}
+
+#endif
+
+
+static const struct of_device_id pasemi_bus_ids[] = {
+	/* Unfortunately needed for legacy firmwares */
+	{ .type = "localbus", },
+	{ .type = "sdc", },
+	/* These are the proper entries, which newer firmware uses */
+	{ .compatible = "pasemi,localbus", },
+	{ .compatible = "pasemi,sdc", },
+	{},
+};
+
+static int __init pasemi_publish_devices(void)
+{
+	pasemi_pcmcia_init();
+
+	/* Publish OF platform devices for SDC and other non-PCI devices */
+	of_platform_bus_probe(NULL, pasemi_bus_ids, NULL);
+
+	return 0;
+}
+machine_device_initcall(pasemi, pasemi_publish_devices);
+
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init pas_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "PA6T-1682M") &&
+	    !of_flat_dt_is_compatible(root, "pasemi,pwrficient"))
+		return 0;
+
+	hpte_init_native();
+
+	alloc_iobmap_l2();
+
+	return 1;
+}
+
+define_machine(pasemi) {
+	.name			= "PA Semi PWRficient",
+	.probe			= pas_probe,
+	.setup_arch		= pas_setup_arch,
+	.init_early		= pas_init_early,
+	.init_IRQ		= pas_init_IRQ,
+	.get_irq		= mpic_get_irq,
+	.restart		= pas_restart,
+	.get_boot_time		= pas_get_boot_time,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= pas_progress,
+	.machine_check_exception = pas_machine_check_handler,
+};
diff --git a/arch/powerpc/platforms/pasemi/time.c b/arch/powerpc/platforms/pasemi/time.c
new file mode 100644
index 000000000..fa54351ac
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/time.c
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2006 PA Semi, Inc
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/time.h>
+
+#include <asm/time.h>
+
+unsigned long __init pas_get_boot_time(void)
+{
+	/* Let's just return a fake date right now */
+	return mktime(2006, 1, 1, 12, 0, 0);
+}
diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig
new file mode 100644
index 000000000..607124bae
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/Kconfig
@@ -0,0 +1,30 @@
+config PPC_PMAC
+	bool "Apple PowerMac based machines"
+	depends on PPC_BOOK3S
+	select MPIC
+	select PCI
+	select PPC_INDIRECT_PCI if PPC32
+	select PPC_MPC106 if PPC32
+	select PPC_NATIVE
+	default y
+
+config PPC_PMAC64
+	bool
+	depends on PPC_PMAC && PPC64
+	select MPIC
+	select U3_DART
+	select MPIC_U3_HT_IRQS
+	select GENERIC_TBSYNC
+	select PPC_970_NAP
+	default y
+
+config PPC_PMAC32_PSURGE
+	bool "Support for powersurge upgrade cards" if EXPERT
+	depends on SMP && PPC32 && PPC_PMAC
+	select PPC_SMP_MUXED_IPI
+	default y
+	help
+	  The powersurge cpu boards can be used in the generation
+	  of powermacs that have a socket for an upgradeable cpu card,
+	  including the 7500, 8500, 9500, 9600.  Support exists for
+	  both dual and quad socket upgrade cards.
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
new file mode 100644
index 000000000..52c6ce1cc
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -0,0 +1,19 @@
+CFLAGS_bootx_init.o  		+= -fPIC
+
+ifdef CONFIG_FUNCTION_TRACER
+# Do not trace early boot code
+CFLAGS_REMOVE_bootx_init.o = -pg -mno-sched-epilog
+endif
+
+obj-y				+= pic.o setup.o time.o feature.o pci.o \
+				   sleep.o low_i2c.o cache.o pfunc_core.o \
+				   pfunc_base.o udbg_scc.o udbg_adb.o
+obj-$(CONFIG_PMAC_BACKLIGHT)	+= backlight.o
+# CONFIG_NVRAM is an arch. independent tristate symbol, for pmac32 we really
+# need this to be a bool.  Cheat here and pretend CONFIG_NVRAM=m is really
+# CONFIG_NVRAM=y
+obj-$(CONFIG_NVRAM:m=y)		+= nvram.o
+# ppc64 pmac doesn't define CONFIG_NVRAM but needs nvram stuff
+obj-$(CONFIG_PPC64)		+= nvram.o
+obj-$(CONFIG_PPC32)		+= bootx_init.o
+obj-$(CONFIG_SMP)		+= smp.o
diff --git a/arch/powerpc/platforms/powermac/backlight.c b/arch/powerpc/platforms/powermac/backlight.c
new file mode 100644
index 000000000..a00096b1c
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/backlight.c
@@ -0,0 +1,220 @@
+/*
+ * Miscellaneous procedures for dealing with the PowerMac hardware.
+ * Contains support for the backlight.
+ *
+ *   Copyright (C) 2000 Benjamin Herrenschmidt
+ *   Copyright (C) 2006 Michael Hanselmann <linux-kernel@hansmi.ch>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/fb.h>
+#include <linux/backlight.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/atomic.h>
+#include <linux/export.h>
+#include <asm/prom.h>
+#include <asm/backlight.h>
+
+#define OLD_BACKLIGHT_MAX 15
+
+static void pmac_backlight_key_worker(struct work_struct *work);
+static void pmac_backlight_set_legacy_worker(struct work_struct *work);
+
+static DECLARE_WORK(pmac_backlight_key_work, pmac_backlight_key_worker);
+static DECLARE_WORK(pmac_backlight_set_legacy_work, pmac_backlight_set_legacy_worker);
+
+/* Although these variables are used in interrupt context, it makes no sense to
+ * protect them. No user is able to produce enough key events per second and
+ * notice the errors that might happen.
+ */
+static int pmac_backlight_key_queued;
+static int pmac_backlight_set_legacy_queued;
+
+/* The via-pmu code allows the backlight to be grabbed, in which case the
+ * in-kernel control of the brightness needs to be disabled. This should
+ * only be used by really old PowerBooks.
+ */
+static atomic_t kernel_backlight_disabled = ATOMIC_INIT(0);
+
+/* Protect the pmac_backlight variable below.
+   You should hold this lock when using the pmac_backlight pointer to
+   prevent its potential removal. */
+DEFINE_MUTEX(pmac_backlight_mutex);
+
+/* Main backlight storage
+ *
+ * Backlight drivers in this variable are required to have the "ops"
+ * attribute set and to have an update_status function.
+ *
+ * We can only store one backlight here, but since Apple laptops have only one
+ * internal display, it doesn't matter. Other backlight drivers can be used
+ * independently.
+ *
+ */
+struct backlight_device *pmac_backlight;
+
+int pmac_has_backlight_type(const char *type)
+{
+	struct device_node* bk_node = of_find_node_by_name(NULL, "backlight");
+
+	if (bk_node) {
+		const char *prop = of_get_property(bk_node,
+				"backlight-control", NULL);
+		if (prop && strncmp(prop, type, strlen(type)) == 0) {
+			of_node_put(bk_node);
+			return 1;
+		}
+		of_node_put(bk_node);
+	}
+
+	return 0;
+}
+
+int pmac_backlight_curve_lookup(struct fb_info *info, int value)
+{
+	int level = (FB_BACKLIGHT_LEVELS - 1);
+
+	if (info && info->bl_dev) {
+		int i, max = 0;
+
+		/* Look for biggest value */
+		for (i = 0; i < FB_BACKLIGHT_LEVELS; i++)
+			max = max((int)info->bl_curve[i], max);
+
+		/* Look for nearest value */
+		for (i = 0; i < FB_BACKLIGHT_LEVELS; i++) {
+			int diff = abs(info->bl_curve[i] - value);
+			if (diff < max) {
+				max = diff;
+				level = i;
+			}
+		}
+
+	}
+
+	return level;
+}
+
+static void pmac_backlight_key_worker(struct work_struct *work)
+{
+	if (atomic_read(&kernel_backlight_disabled))
+		return;
+
+	mutex_lock(&pmac_backlight_mutex);
+	if (pmac_backlight) {
+		struct backlight_properties *props;
+		int brightness;
+
+		props = &pmac_backlight->props;
+
+		brightness = props->brightness +
+			((pmac_backlight_key_queued?-1:1) *
+			 (props->max_brightness / 15));
+
+		if (brightness < 0)
+			brightness = 0;
+		else if (brightness > props->max_brightness)
+			brightness = props->max_brightness;
+
+		props->brightness = brightness;
+		backlight_update_status(pmac_backlight);
+	}
+	mutex_unlock(&pmac_backlight_mutex);
+}
+
+/* This function is called in interrupt context */
+void pmac_backlight_key(int direction)
+{
+	if (atomic_read(&kernel_backlight_disabled))
+		return;
+
+	/* we can receive multiple interrupts here, but the scheduled work
+	 * will run only once, with the last value
+	 */
+	pmac_backlight_key_queued = direction;
+	schedule_work(&pmac_backlight_key_work);
+}
+
+static int __pmac_backlight_set_legacy_brightness(int brightness)
+{
+	int error = -ENXIO;
+
+	mutex_lock(&pmac_backlight_mutex);
+	if (pmac_backlight) {
+		struct backlight_properties *props;
+
+		props = &pmac_backlight->props;
+		props->brightness = brightness *
+			(props->max_brightness + 1) /
+			(OLD_BACKLIGHT_MAX + 1);
+
+		if (props->brightness > props->max_brightness)
+			props->brightness = props->max_brightness;
+		else if (props->brightness < 0)
+			props->brightness = 0;
+
+		backlight_update_status(pmac_backlight);
+
+		error = 0;
+	}
+	mutex_unlock(&pmac_backlight_mutex);
+
+	return error;
+}
+
+static void pmac_backlight_set_legacy_worker(struct work_struct *work)
+{
+	if (atomic_read(&kernel_backlight_disabled))
+		return;
+
+	__pmac_backlight_set_legacy_brightness(pmac_backlight_set_legacy_queued);
+}
+
+/* This function is called in interrupt context */
+void pmac_backlight_set_legacy_brightness_pmu(int brightness) {
+	if (atomic_read(&kernel_backlight_disabled))
+		return;
+
+	pmac_backlight_set_legacy_queued = brightness;
+	schedule_work(&pmac_backlight_set_legacy_work);
+}
+
+int pmac_backlight_set_legacy_brightness(int brightness)
+{
+	return __pmac_backlight_set_legacy_brightness(brightness);
+}
+
+int pmac_backlight_get_legacy_brightness()
+{
+	int result = -ENXIO;
+
+	mutex_lock(&pmac_backlight_mutex);
+	if (pmac_backlight) {
+		struct backlight_properties *props;
+
+		props = &pmac_backlight->props;
+
+		result = props->brightness *
+			(OLD_BACKLIGHT_MAX + 1) /
+			(props->max_brightness + 1);
+	}
+	mutex_unlock(&pmac_backlight_mutex);
+
+	return result;
+}
+
+void pmac_backlight_disable()
+{
+	atomic_inc(&kernel_backlight_disabled);
+}
+
+void pmac_backlight_enable()
+{
+	atomic_dec(&kernel_backlight_disabled);
+}
+
+EXPORT_SYMBOL_GPL(pmac_backlight);
+EXPORT_SYMBOL_GPL(pmac_backlight_mutex);
+EXPORT_SYMBOL_GPL(pmac_has_backlight_type);
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
new file mode 100644
index 000000000..76f5013c3
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -0,0 +1,595 @@
+/*
+ *  Early boot support code for BootX bootloader
+ *
+ *  Copyright (C) 2005 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <generated/utsrelease.h>
+#include <asm/sections.h>
+#include <asm/prom.h>
+#include <asm/page.h>
+#include <asm/bootx.h>
+#include <asm/btext.h>
+#include <asm/io.h>
+#include <asm/setup.h>
+
+#undef DEBUG
+#define SET_BOOT_BAT
+
+#ifdef DEBUG
+#define DBG(fmt...) do { bootx_printf(fmt); } while(0)
+#else
+#define DBG(fmt...) do { } while(0)
+#endif
+
+extern void __start(unsigned long r3, unsigned long r4, unsigned long r5);
+
+static unsigned long __initdata bootx_dt_strbase;
+static unsigned long __initdata bootx_dt_strend;
+static unsigned long __initdata bootx_node_chosen;
+static boot_infos_t * __initdata bootx_info;
+static char __initdata bootx_disp_path[256];
+
+/* Is boot-info compatible ? */
+#define BOOT_INFO_IS_COMPATIBLE(bi) \
+	((bi)->compatible_version <= BOOT_INFO_VERSION)
+#define BOOT_INFO_IS_V2_COMPATIBLE(bi)	((bi)->version >= 2)
+#define BOOT_INFO_IS_V4_COMPATIBLE(bi)	((bi)->version >= 4)
+
+#ifdef CONFIG_BOOTX_TEXT
+static void __init bootx_printf(const char *format, ...)
+{
+	const char *p, *q, *s;
+	va_list args;
+	unsigned long v;
+
+	va_start(args, format);
+	for (p = format; *p != 0; p = q) {
+		for (q = p; *q != 0 && *q != '\n' && *q != '%'; ++q)
+			;
+		if (q > p)
+			btext_drawtext(p, q - p);
+		if (*q == 0)
+			break;
+		if (*q == '\n') {
+			++q;
+			btext_flushline();
+			btext_drawstring("\r\n");
+			btext_flushline();
+			continue;
+		}
+		++q;
+		if (*q == 0)
+			break;
+		switch (*q) {
+		case 's':
+			++q;
+			s = va_arg(args, const char *);
+			if (s == NULL)
+				s = "<NULL>";
+			btext_drawstring(s);
+			break;
+		case 'x':
+			++q;
+			v = va_arg(args, unsigned long);
+			btext_drawhex(v);
+			break;
+		}
+	}
+}
+#else /* CONFIG_BOOTX_TEXT */
+static void __init bootx_printf(const char *format, ...) {}
+#endif /* CONFIG_BOOTX_TEXT */
+
+static void * __init bootx_early_getprop(unsigned long base,
+					 unsigned long node,
+					 char *prop)
+{
+	struct bootx_dt_node *np = (struct bootx_dt_node *)(base + node);
+	u32 *ppp = &np->properties;
+
+	while(*ppp) {
+		struct bootx_dt_prop *pp =
+			(struct bootx_dt_prop *)(base + *ppp);
+
+		if (strcmp((char *)((unsigned long)pp->name + base),
+			   prop) == 0) {
+			return (void *)((unsigned long)pp->value + base);
+		}
+		ppp = &pp->next;
+	}
+	return NULL;
+}
+
+#define dt_push_token(token, mem) \
+	do { \
+		*(mem) = _ALIGN_UP(*(mem),4); \
+		*((u32 *)*(mem)) = token; \
+		*(mem) += 4; \
+	} while(0)
+
+static unsigned long __init bootx_dt_find_string(char *str)
+{
+	char *s, *os;
+
+	s = os = (char *)bootx_dt_strbase;
+	s += 4;
+	while (s <  (char *)bootx_dt_strend) {
+		if (strcmp(s, str) == 0)
+			return s - os;
+		s += strlen(s) + 1;
+	}
+	return 0;
+}
+
+static void __init bootx_dt_add_prop(char *name, void *data, int size,
+				  unsigned long *mem_end)
+{
+	unsigned long soff = bootx_dt_find_string(name);
+	if (data == NULL)
+		size = 0;
+	if (soff == 0) {
+		bootx_printf("WARNING: Can't find string index for <%s>\n",
+			     name);
+		return;
+	}
+	if (size > 0x20000) {
+		bootx_printf("WARNING: ignoring large property ");
+		bootx_printf("%s length 0x%x\n", name, size);
+		return;
+	}
+	dt_push_token(OF_DT_PROP, mem_end);
+	dt_push_token(size, mem_end);
+	dt_push_token(soff, mem_end);
+
+	/* push property content */
+	if (size && data) {
+		memcpy((void *)*mem_end, data, size);
+		*mem_end = _ALIGN_UP(*mem_end + size, 4);
+	}
+}
+
+static void __init bootx_add_chosen_props(unsigned long base,
+					  unsigned long *mem_end)
+{
+	u32 val;
+
+	bootx_dt_add_prop("linux,bootx", NULL, 0, mem_end);
+
+	if (bootx_info->kernelParamsOffset) {
+		char *args = (char *)((unsigned long)bootx_info) +
+			bootx_info->kernelParamsOffset;
+		bootx_dt_add_prop("bootargs", args, strlen(args) + 1, mem_end);
+	}
+	if (bootx_info->ramDisk) {
+		val = ((unsigned long)bootx_info) + bootx_info->ramDisk;
+		bootx_dt_add_prop("linux,initrd-start", &val, 4, mem_end);
+		val += bootx_info->ramDiskSize;
+		bootx_dt_add_prop("linux,initrd-end", &val, 4, mem_end);
+	}
+	if (strlen(bootx_disp_path))
+		bootx_dt_add_prop("linux,stdout-path", bootx_disp_path,
+				  strlen(bootx_disp_path) + 1, mem_end);
+}
+
+static void __init bootx_add_display_props(unsigned long base,
+					   unsigned long *mem_end,
+					   int has_real_node)
+{
+	boot_infos_t *bi = bootx_info;
+	u32 tmp;
+
+	if (has_real_node) {
+		bootx_dt_add_prop("linux,boot-display", NULL, 0, mem_end);
+		bootx_dt_add_prop("linux,opened", NULL, 0, mem_end);
+	} else
+		bootx_dt_add_prop("linux,bootx-noscreen", NULL, 0, mem_end);
+
+	tmp = bi->dispDeviceDepth;
+	bootx_dt_add_prop("linux,bootx-depth", &tmp, 4, mem_end);
+	tmp = bi->dispDeviceRect[2] - bi->dispDeviceRect[0];
+	bootx_dt_add_prop("linux,bootx-width", &tmp, 4, mem_end);
+	tmp = bi->dispDeviceRect[3] - bi->dispDeviceRect[1];
+	bootx_dt_add_prop("linux,bootx-height", &tmp, 4, mem_end);
+	tmp = bi->dispDeviceRowBytes;
+	bootx_dt_add_prop("linux,bootx-linebytes", &tmp, 4, mem_end);
+	tmp = (u32)bi->dispDeviceBase;
+	if (tmp == 0)
+		tmp = (u32)bi->logicalDisplayBase;
+	tmp += bi->dispDeviceRect[1] * bi->dispDeviceRowBytes;
+	tmp += bi->dispDeviceRect[0] * ((bi->dispDeviceDepth + 7) / 8);
+	bootx_dt_add_prop("linux,bootx-addr", &tmp, 4, mem_end);
+}
+
+static void __init bootx_dt_add_string(char *s, unsigned long *mem_end)
+{
+	unsigned int l = strlen(s) + 1;
+	memcpy((void *)*mem_end, s, l);
+	bootx_dt_strend = *mem_end = *mem_end + l;
+}
+
+static void __init bootx_scan_dt_build_strings(unsigned long base,
+					       unsigned long node,
+					       unsigned long *mem_end)
+{
+	struct bootx_dt_node *np = (struct bootx_dt_node *)(base + node);
+	u32 *cpp, *ppp = &np->properties;
+	unsigned long soff;
+	char *namep;
+
+	/* Keep refs to known nodes */
+	namep = np->full_name ? (char *)(base + np->full_name) : NULL;
+       	if (namep == NULL) {
+		bootx_printf("Node without a full name !\n");
+		namep = "";
+	}
+	DBG("* strings: %s\n", namep);
+
+	if (!strcmp(namep, "/chosen")) {
+		DBG(" detected /chosen ! adding properties names !\n");
+		bootx_dt_add_string("linux,bootx", mem_end);
+		bootx_dt_add_string("linux,stdout-path", mem_end);
+		bootx_dt_add_string("linux,initrd-start", mem_end);
+		bootx_dt_add_string("linux,initrd-end", mem_end);
+		bootx_dt_add_string("bootargs", mem_end);
+		bootx_node_chosen = node;
+	}
+	if (node == bootx_info->dispDeviceRegEntryOffset) {
+		DBG(" detected display ! adding properties names !\n");
+		bootx_dt_add_string("linux,boot-display", mem_end);
+		bootx_dt_add_string("linux,opened", mem_end);
+		strlcpy(bootx_disp_path, namep, sizeof(bootx_disp_path));
+	}
+
+	/* get and store all property names */
+	while (*ppp) {
+		struct bootx_dt_prop *pp =
+			(struct bootx_dt_prop *)(base + *ppp);
+
+		namep = pp->name ? (char *)(base + pp->name) : NULL;
+ 		if (namep == NULL || strcmp(namep, "name") == 0)
+ 			goto next;
+		/* get/create string entry */
+		soff = bootx_dt_find_string(namep);
+		if (soff == 0)
+			bootx_dt_add_string(namep, mem_end);
+	next:
+		ppp = &pp->next;
+	}
+
+	/* do all our children */
+	cpp = &np->child;
+	while(*cpp) {
+		np = (struct bootx_dt_node *)(base + *cpp);
+		bootx_scan_dt_build_strings(base, *cpp, mem_end);
+		cpp = &np->sibling;
+	}
+}
+
+static void __init bootx_scan_dt_build_struct(unsigned long base,
+					      unsigned long node,
+					      unsigned long *mem_end)
+{
+	struct bootx_dt_node *np = (struct bootx_dt_node *)(base + node);
+	u32 *cpp, *ppp = &np->properties;
+	char *namep, *p, *ep, *lp;
+	int l;
+
+	dt_push_token(OF_DT_BEGIN_NODE, mem_end);
+
+	/* get the node's full name */
+	namep = np->full_name ? (char *)(base + np->full_name) : NULL;
+	if (namep == NULL)
+		namep = "";
+	l = strlen(namep);
+
+	DBG("* struct: %s\n", namep);
+
+	/* Fixup an Apple bug where they have bogus \0 chars in the
+	 * middle of the path in some properties, and extract
+	 * the unit name (everything after the last '/').
+	 */
+	memcpy((void *)*mem_end, namep, l + 1);
+	namep = (char *)*mem_end;
+	for (lp = p = namep, ep = namep + l; p < ep; p++) {
+		if (*p == '/')
+			lp = namep;
+		else if (*p != 0)
+			*lp++ = *p;
+	}
+	*lp = 0;
+	*mem_end = _ALIGN_UP((unsigned long)lp + 1, 4);
+
+	/* get and store all properties */
+	while (*ppp) {
+		struct bootx_dt_prop *pp =
+			(struct bootx_dt_prop *)(base + *ppp);
+
+		namep = pp->name ? (char *)(base + pp->name) : NULL;
+		/* Skip "name" */
+ 		if (namep == NULL || !strcmp(namep, "name"))
+ 			goto next;
+		/* Skip "bootargs" in /chosen too as we replace it */
+		if (node == bootx_node_chosen && !strcmp(namep, "bootargs"))
+			goto next;
+
+		/* push property head */
+		bootx_dt_add_prop(namep,
+				  pp->value ? (void *)(base + pp->value): NULL,
+				  pp->length, mem_end);
+	next:
+		ppp = &pp->next;
+	}
+
+	if (node == bootx_node_chosen) {
+		bootx_add_chosen_props(base, mem_end);
+		if (bootx_info->dispDeviceRegEntryOffset == 0)
+			bootx_add_display_props(base, mem_end, 0);
+	}
+	else if (node == bootx_info->dispDeviceRegEntryOffset)
+		bootx_add_display_props(base, mem_end, 1);
+
+	/* do all our children */
+	cpp = &np->child;
+	while(*cpp) {
+		np = (struct bootx_dt_node *)(base + *cpp);
+		bootx_scan_dt_build_struct(base, *cpp, mem_end);
+		cpp = &np->sibling;
+	}
+
+	dt_push_token(OF_DT_END_NODE, mem_end);
+}
+
+static unsigned long __init bootx_flatten_dt(unsigned long start)
+{
+	boot_infos_t *bi = bootx_info;
+	unsigned long mem_start, mem_end;
+	struct boot_param_header *hdr;
+	unsigned long base;
+	u64 *rsvmap;
+
+	/* Start using memory after the big blob passed by BootX, get
+	 * some space for the header
+	 */
+	mem_start = mem_end = _ALIGN_UP(((unsigned long)bi) + start, 4);
+	DBG("Boot params header at: %x\n", mem_start);
+	hdr = (struct boot_param_header *)mem_start;
+	mem_end += sizeof(struct boot_param_header);
+	rsvmap = (u64 *)(_ALIGN_UP(mem_end, 8));
+	hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - mem_start;
+	mem_end = ((unsigned long)rsvmap) + 8 * sizeof(u64);
+
+	/* Get base of tree */
+	base = ((unsigned long)bi) + bi->deviceTreeOffset;
+
+	/* Build string array */
+	DBG("Building string array at: %x\n", mem_end);
+	DBG("Device Tree Base=%x\n", base);
+	bootx_dt_strbase = mem_end;
+	mem_end += 4;
+	bootx_dt_strend = mem_end;
+	bootx_scan_dt_build_strings(base, 4, &mem_end);
+	/* Add some strings */
+	bootx_dt_add_string("linux,bootx-noscreen", &mem_end);
+	bootx_dt_add_string("linux,bootx-depth", &mem_end);
+	bootx_dt_add_string("linux,bootx-width", &mem_end);
+	bootx_dt_add_string("linux,bootx-height", &mem_end);
+	bootx_dt_add_string("linux,bootx-linebytes", &mem_end);
+	bootx_dt_add_string("linux,bootx-addr", &mem_end);
+	/* Wrap up strings */
+	hdr->off_dt_strings = bootx_dt_strbase - mem_start;
+	hdr->dt_strings_size = bootx_dt_strend - bootx_dt_strbase;
+
+	/* Build structure */
+	mem_end = _ALIGN(mem_end, 16);
+	DBG("Building device tree structure at: %x\n", mem_end);
+	hdr->off_dt_struct = mem_end - mem_start;
+	bootx_scan_dt_build_struct(base, 4, &mem_end);
+	dt_push_token(OF_DT_END, &mem_end);
+
+	/* Finish header */
+	hdr->boot_cpuid_phys = 0;
+	hdr->magic = OF_DT_HEADER;
+	hdr->totalsize = mem_end - mem_start;
+	hdr->version = OF_DT_VERSION;
+	/* Version 16 is not backward compatible */
+	hdr->last_comp_version = 0x10;
+
+	/* Reserve the whole thing and copy the reserve map in, we
+	 * also bump mem_reserve_cnt to cause further reservations to
+	 * fail since it's too late.
+	 */
+	mem_end = _ALIGN(mem_end, PAGE_SIZE);
+	DBG("End of boot params: %x\n", mem_end);
+	rsvmap[0] = mem_start;
+	rsvmap[1] = mem_end;
+	if (bootx_info->ramDisk) {
+		rsvmap[2] = ((unsigned long)bootx_info) + bootx_info->ramDisk;
+		rsvmap[3] = rsvmap[2] + bootx_info->ramDiskSize;
+		rsvmap[4] = 0;
+		rsvmap[5] = 0;
+	} else {
+		rsvmap[2] = 0;
+		rsvmap[3] = 0;
+	}
+
+	return (unsigned long)hdr;
+}
+
+
+#ifdef CONFIG_BOOTX_TEXT
+static void __init btext_welcome(boot_infos_t *bi)
+{
+	unsigned long flags;
+	unsigned long pvr;
+
+	bootx_printf("Welcome to Linux, kernel " UTS_RELEASE "\n");
+	bootx_printf("\nlinked at        : 0x%x", KERNELBASE);
+	bootx_printf("\nframe buffer at  : 0x%x", bi->dispDeviceBase);
+	bootx_printf(" (phys), 0x%x", bi->logicalDisplayBase);
+	bootx_printf(" (log)");
+	bootx_printf("\nklimit           : 0x%x",(unsigned long)klimit);
+	bootx_printf("\nboot_info at     : 0x%x", bi);
+	__asm__ __volatile__ ("mfmsr %0" : "=r" (flags));
+	bootx_printf("\nMSR              : 0x%x", flags);
+	__asm__ __volatile__ ("mfspr %0, 287" : "=r" (pvr));
+	bootx_printf("\nPVR              : 0x%x", pvr);
+	pvr >>= 16;
+	if (pvr > 1) {
+	    __asm__ __volatile__ ("mfspr %0, 1008" : "=r" (flags));
+	    bootx_printf("\nHID0             : 0x%x", flags);
+	}
+	if (pvr == 8 || pvr == 12 || pvr == 0x800c) {
+	    __asm__ __volatile__ ("mfspr %0, 1019" : "=r" (flags));
+	    bootx_printf("\nICTC             : 0x%x", flags);
+	}
+#ifdef DEBUG
+	bootx_printf("\n\n");
+	bootx_printf("bi->deviceTreeOffset   : 0x%x\n",
+		     bi->deviceTreeOffset);
+	bootx_printf("bi->deviceTreeSize     : 0x%x\n",
+		     bi->deviceTreeSize);
+#endif
+	bootx_printf("\n\n");
+}
+#endif /* CONFIG_BOOTX_TEXT */
+
+void __init bootx_init(unsigned long r3, unsigned long r4)
+{
+	boot_infos_t *bi = (boot_infos_t *) r4;
+	unsigned long hdr;
+	unsigned long space;
+	unsigned long ptr, x;
+	char *model;
+	unsigned long offset = reloc_offset();
+
+	reloc_got2(offset);
+
+	bootx_info = bi;
+
+	/* We haven't cleared any bss at this point, make sure
+	 * what we need is initialized
+	 */
+	bootx_dt_strbase = bootx_dt_strend = 0;
+	bootx_node_chosen = 0;
+	bootx_disp_path[0] = 0;
+
+	if (!BOOT_INFO_IS_V2_COMPATIBLE(bi))
+		bi->logicalDisplayBase = bi->dispDeviceBase;
+
+	/* Fixup depth 16 -> 15 as that's what MacOS calls 16bpp */
+	if (bi->dispDeviceDepth == 16)
+		bi->dispDeviceDepth = 15;
+
+
+#ifdef CONFIG_BOOTX_TEXT
+	ptr = (unsigned long)bi->logicalDisplayBase;
+	ptr += bi->dispDeviceRect[1] * bi->dispDeviceRowBytes;
+	ptr += bi->dispDeviceRect[0] * ((bi->dispDeviceDepth + 7) / 8);
+	btext_setup_display(bi->dispDeviceRect[2] - bi->dispDeviceRect[0],
+			    bi->dispDeviceRect[3] - bi->dispDeviceRect[1],
+			    bi->dispDeviceDepth, bi->dispDeviceRowBytes,
+			    (unsigned long)bi->logicalDisplayBase);
+	btext_clearscreen();
+	btext_flushscreen();
+#endif /* CONFIG_BOOTX_TEXT */
+
+	/*
+	 * Test if boot-info is compatible.  Done only in config
+	 * CONFIG_BOOTX_TEXT since there is nothing much we can do
+	 * with an incompatible version, except display a message
+	 * and eventually hang the processor...
+	 *
+	 * I'll try to keep enough of boot-info compatible in the
+	 * future to always allow display of this message;
+	 */
+	if (!BOOT_INFO_IS_COMPATIBLE(bi)) {
+		bootx_printf(" !!! WARNING - Incompatible version"
+			     " of BootX !!!\n\n\n");
+		for (;;)
+			;
+	}
+	if (bi->architecture != BOOT_ARCH_PCI) {
+		bootx_printf(" !!! WARNING - Usupported machine"
+			     " architecture !\n");
+		for (;;)
+			;
+	}
+
+#ifdef CONFIG_BOOTX_TEXT
+	btext_welcome(bi);
+#endif
+
+	/* New BootX enters kernel with MMU off, i/os are not allowed
+	 * here. This hack will have been done by the boostrap anyway.
+	 */
+	if (bi->version < 4) {
+		/*
+		 * XXX If this is an iMac, turn off the USB controller.
+		 */
+		model = (char *) bootx_early_getprop(r4 + bi->deviceTreeOffset,
+						     4, "model");
+		if (model
+		    && (strcmp(model, "iMac,1") == 0
+			|| strcmp(model, "PowerMac1,1") == 0)) {
+			bootx_printf("iMac,1 detected, shutting down USB\n");
+			out_le32((unsigned __iomem *)0x80880008, 1);	/* XXX */
+		}
+	}
+
+	/* Get a pointer that points above the device tree, args, ramdisk,
+	 * etc... to use for generating the flattened tree
+	 */
+	if (bi->version < 5) {
+		space = bi->deviceTreeOffset + bi->deviceTreeSize;
+		if (bi->ramDisk >= space)
+			space = bi->ramDisk + bi->ramDiskSize;
+	} else
+		space = bi->totalParamsSize;
+
+	bootx_printf("Total space used by parameters & ramdisk: 0x%x\n", space);
+
+	/* New BootX will have flushed all TLBs and enters kernel with
+	 * MMU switched OFF, so this should not be useful anymore.
+	 */
+	if (bi->version < 4) {
+		bootx_printf("Touching pages...\n");
+
+		/*
+		 * Touch each page to make sure the PTEs for them
+		 * are in the hash table - the aim is to try to avoid
+		 * getting DSI exceptions while copying the kernel image.
+		 */
+		for (ptr = ((unsigned long) &_stext) & PAGE_MASK;
+		     ptr < (unsigned long)bi + space; ptr += PAGE_SIZE)
+			x = *(volatile unsigned long *)ptr;
+	}
+
+	/* Ok, now we need to generate a flattened device-tree to pass
+	 * to the kernel
+	 */
+	bootx_printf("Preparing boot params...\n");
+
+	hdr = bootx_flatten_dt(space);
+
+#ifdef CONFIG_BOOTX_TEXT
+#ifdef SET_BOOT_BAT
+	bootx_printf("Preparing BAT...\n");
+	btext_prepare_BAT();
+#else
+	btext_unmap();
+#endif
+#endif
+
+	reloc_got2(-offset);
+
+	__start(hdr, KERNELBASE + offset, 0);
+}
diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S
new file mode 100644
index 000000000..6be1a4af3
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/cache.S
@@ -0,0 +1,358 @@
+/*
+ * This file contains low-level cache management functions
+ * used for sleep and CPU speed changes on Apple machines.
+ * (In fact the only thing that is Apple-specific is that we assume
+ * that we can read from ROM at physical address 0xfff00000.)
+ *
+ *    Copyright (C) 2004 Paul Mackerras (paulus@samba.org) and
+ *                       Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/cputable.h>
+
+/*
+ * Flush and disable all data caches (dL1, L2, L3). This is used
+ * when going to sleep, when doing a PMU based cpufreq transition,
+ * or when "offlining" a CPU on SMP machines. This code is over
+ * paranoid, but I've had enough issues with various CPU revs and
+ * bugs that I decided it was worth beeing over cautious
+ */
+
+_GLOBAL(flush_disable_caches)
+#ifndef CONFIG_6xx
+	blr
+#else
+BEGIN_FTR_SECTION
+	b	flush_disable_745x
+END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
+BEGIN_FTR_SECTION
+	b	flush_disable_75x
+END_FTR_SECTION_IFSET(CPU_FTR_L2CR)
+	b	__flush_disable_L1
+
+/* This is the code for G3 and 74[01]0 */
+flush_disable_75x:
+	mflr	r10
+
+	/* Turn off EE and DR in MSR */
+	mfmsr	r11
+	rlwinm	r0,r11,0,~MSR_EE
+	rlwinm	r0,r0,0,~MSR_DR
+	sync
+	mtmsr	r0
+	isync
+
+	/* Stop DST streams */
+BEGIN_FTR_SECTION
+	DSSALL
+	sync
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+
+	/* Stop DPM */
+	mfspr	r8,SPRN_HID0		/* Save SPRN_HID0 in r8 */
+	rlwinm	r4,r8,0,12,10		/* Turn off HID0[DPM] */
+	sync
+	mtspr	SPRN_HID0,r4		/* Disable DPM */
+	sync
+
+	/* Disp-flush L1. We have a weird problem here that I never
+	 * totally figured out. On 750FX, using the ROM for the flush
+	 * results in a non-working flush. We use that workaround for
+	 * now until I finally understand what's going on. --BenH
+	 */
+
+	/* ROM base by default */
+	lis	r4,0xfff0
+	mfpvr	r3
+	srwi	r3,r3,16
+	cmplwi	cr0,r3,0x7000
+	bne+	1f
+	/* RAM base on 750FX */
+	li	r4,0
+1:	li	r4,0x4000
+	mtctr	r4
+1:	lwz	r0,0(r4)
+	addi	r4,r4,32
+	bdnz	1b
+	sync
+	isync
+
+	/* Disable / invalidate / enable L1 data */
+	mfspr	r3,SPRN_HID0
+	rlwinm	r3,r3,0,~(HID0_DCE | HID0_ICE)
+	mtspr	SPRN_HID0,r3
+	sync
+	isync
+	ori	r3,r3,(HID0_DCE|HID0_DCI|HID0_ICE|HID0_ICFI)
+	sync
+	isync
+	mtspr	SPRN_HID0,r3
+	xori	r3,r3,(HID0_DCI|HID0_ICFI)
+	mtspr	SPRN_HID0,r3
+	sync
+
+	/* Get the current enable bit of the L2CR into r4 */
+	mfspr	r5,SPRN_L2CR
+	/* Set to data-only (pre-745x bit) */
+	oris	r3,r5,L2CR_L2DO@h
+	b	2f
+	/* When disabling L2, code must be in L1 */
+	.balign 32
+1:	mtspr	SPRN_L2CR,r3
+3:	sync
+	isync
+	b	1f
+2:	b	3f
+3:	sync
+	isync
+	b	1b
+1:	/* disp-flush L2. The interesting thing here is that the L2 can be
+	 * up to 2Mb ... so using the ROM, we'll end up wrapping back to memory
+	 * but that is probbaly fine. We disp-flush over 4Mb to be safe
+	 */
+	lis	r4,2
+	mtctr	r4
+	lis	r4,0xfff0
+1:	lwz	r0,0(r4)
+	addi	r4,r4,32
+	bdnz	1b
+	sync
+	isync
+	lis	r4,2
+	mtctr	r4
+	lis	r4,0xfff0
+1:	dcbf	0,r4
+	addi	r4,r4,32
+	bdnz	1b
+	sync
+	isync
+
+	/* now disable L2 */
+	rlwinm	r5,r5,0,~L2CR_L2E
+	b	2f
+	/* When disabling L2, code must be in L1 */
+	.balign 32
+1:	mtspr	SPRN_L2CR,r5
+3:	sync
+	isync
+	b	1f
+2:	b	3f
+3:	sync
+	isync
+	b	1b
+1:	sync
+	isync
+	/* Invalidate L2. This is pre-745x, we clear the L2I bit ourselves */
+	oris	r4,r5,L2CR_L2I@h
+	mtspr	SPRN_L2CR,r4
+	sync
+	isync
+
+	/* Wait for the invalidation to complete */
+1:	mfspr	r3,SPRN_L2CR
+	rlwinm.	r0,r3,0,31,31
+	bne	1b
+
+	/* Clear L2I */
+	xoris	r4,r4,L2CR_L2I@h
+	sync
+	mtspr	SPRN_L2CR,r4
+	sync
+
+	/* now disable the L1 data cache */
+	mfspr	r0,SPRN_HID0
+	rlwinm	r0,r0,0,~(HID0_DCE|HID0_ICE)
+	mtspr	SPRN_HID0,r0
+	sync
+	isync
+
+	/* Restore HID0[DPM] to whatever it was before */
+	sync
+	mfspr	r0,SPRN_HID0
+	rlwimi	r0,r8,0,11,11		/* Turn back HID0[DPM] */
+	mtspr	SPRN_HID0,r0
+	sync
+
+	/* restore DR and EE */
+	sync
+	mtmsr	r11
+	isync
+
+	mtlr	r10
+	blr
+
+/* This code is for 745x processors */
+flush_disable_745x:
+	/* Turn off EE and DR in MSR */
+	mfmsr	r11
+	rlwinm	r0,r11,0,~MSR_EE
+	rlwinm	r0,r0,0,~MSR_DR
+	sync
+	mtmsr	r0
+	isync
+
+	/* Stop prefetch streams */
+	DSSALL
+	sync
+
+	/* Disable L2 prefetching */
+	mfspr	r0,SPRN_MSSCR0
+	rlwinm	r0,r0,0,0,29
+	mtspr	SPRN_MSSCR0,r0
+	sync
+	isync
+	lis	r4,0
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+
+	/* Due to a bug with the HW flush on some CPU revs, we occasionally
+	 * experience data corruption. I'm adding a displacement flush along
+	 * with a dcbf loop over a few Mb to "help". The problem isn't totally
+	 * fixed by this in theory, but at least, in practice, I couldn't reproduce
+	 * it even with a big hammer...
+	 */
+
+        lis     r4,0x0002
+        mtctr   r4
+ 	li      r4,0
+1:
+        lwz     r0,0(r4)
+        addi    r4,r4,32                /* Go to start of next cache line */
+        bdnz    1b
+        isync
+
+        /* Now, flush the first 4MB of memory */
+        lis     r4,0x0002
+        mtctr   r4
+	li      r4,0
+        sync
+1:
+        dcbf    0,r4
+        addi    r4,r4,32                /* Go to start of next cache line */
+        bdnz    1b
+
+	/* Flush and disable the L1 data cache */
+	mfspr	r6,SPRN_LDSTCR
+	lis	r3,0xfff0	/* read from ROM for displacement flush */
+	li	r4,0xfe		/* start with only way 0 unlocked */
+	li	r5,128		/* 128 lines in each way */
+1:	mtctr	r5
+	rlwimi	r6,r4,0,24,31
+	mtspr	SPRN_LDSTCR,r6
+	sync
+	isync
+2:	lwz	r0,0(r3)	/* touch each cache line */
+	addi	r3,r3,32
+	bdnz	2b
+	rlwinm	r4,r4,1,24,30	/* move on to the next way */
+	ori	r4,r4,1
+	cmpwi	r4,0xff		/* all done? */
+	bne	1b
+	/* now unlock the L1 data cache */
+	li	r4,0
+	rlwimi	r6,r4,0,24,31
+	sync
+	mtspr	SPRN_LDSTCR,r6
+	sync
+	isync
+
+	/* Flush the L2 cache using the hardware assist */
+	mfspr	r3,SPRN_L2CR
+	cmpwi	r3,0		/* check if it is enabled first */
+	bge	4f
+	oris	r0,r3,(L2CR_L2IO_745x|L2CR_L2DO_745x)@h
+	b	2f
+	/* When disabling/locking L2, code must be in L1 */
+	.balign 32
+1:	mtspr	SPRN_L2CR,r0	/* lock the L2 cache */
+3:	sync
+	isync
+	b	1f
+2:	b	3f
+3:	sync
+	isync
+	b	1b
+1:	sync
+	isync
+	ori	r0,r3,L2CR_L2HWF_745x
+	sync
+	mtspr	SPRN_L2CR,r0	/* set the hardware flush bit */
+3:	mfspr	r0,SPRN_L2CR	/* wait for it to go to 0 */
+	andi.	r0,r0,L2CR_L2HWF_745x
+	bne	3b
+	sync
+	rlwinm	r3,r3,0,~L2CR_L2E
+	b	2f
+	/* When disabling L2, code must be in L1 */
+	.balign 32
+1:	mtspr	SPRN_L2CR,r3	/* disable the L2 cache */
+3:	sync
+	isync
+	b	1f
+2:	b	3f
+3:	sync
+	isync
+	b	1b
+1:	sync
+	isync
+	oris	r4,r3,L2CR_L2I@h
+	mtspr	SPRN_L2CR,r4
+	sync
+	isync
+1:	mfspr	r4,SPRN_L2CR
+	andis.	r0,r4,L2CR_L2I@h
+	bne	1b
+	sync
+
+BEGIN_FTR_SECTION
+	/* Flush the L3 cache using the hardware assist */
+4:	mfspr	r3,SPRN_L3CR
+	cmpwi	r3,0		/* check if it is enabled */
+	bge	6f
+	oris	r0,r3,L3CR_L3IO@h
+	ori	r0,r0,L3CR_L3DO
+	sync
+	mtspr	SPRN_L3CR,r0	/* lock the L3 cache */
+	sync
+	isync
+	ori	r0,r0,L3CR_L3HWF
+	sync
+	mtspr	SPRN_L3CR,r0	/* set the hardware flush bit */
+5:	mfspr	r0,SPRN_L3CR	/* wait for it to go to zero */
+	andi.	r0,r0,L3CR_L3HWF
+	bne	5b
+	rlwinm	r3,r3,0,~L3CR_L3E
+	sync
+	mtspr	SPRN_L3CR,r3	/* disable the L3 cache */
+	sync
+	ori	r4,r3,L3CR_L3I
+	mtspr	SPRN_L3CR,r4
+1:	mfspr	r4,SPRN_L3CR
+	andi.	r0,r4,L3CR_L3I
+	bne	1b
+	sync
+END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
+
+6:	mfspr	r0,SPRN_HID0	/* now disable the L1 data cache */
+	rlwinm	r0,r0,0,~HID0_DCE
+	mtspr	SPRN_HID0,r0
+	sync
+	isync
+	mtmsr	r11		/* restore DR and EE */
+	isync
+	blr
+#endif	/* CONFIG_6xx */
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
new file mode 100644
index 000000000..4882bfd90
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -0,0 +1,3043 @@
+/*
+ *  Copyright (C) 1996-2001 Paul Mackerras (paulus@cs.anu.edu.au)
+ *                          Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ *  TODO:
+ *
+ *   - Replace mdelay with some schedule loop if possible
+ *   - Shorten some obfuscated delays on some routines (like modem
+ *     power)
+ *   - Refcount some clocks (see darwin)
+ *   - Split split split...
+ *
+ */
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/spinlock.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/ioport.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <asm/sections.h>
+#include <asm/errno.h>
+#include <asm/ohare.h>
+#include <asm/heathrow.h>
+#include <asm/keylargo.h>
+#include <asm/uninorth.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/pmac_feature.h>
+#include <asm/dbdma.h>
+#include <asm/pci-bridge.h>
+#include <asm/pmac_low_i2c.h>
+
+#undef DEBUG_FEATURE
+
+#ifdef DEBUG_FEATURE
+#define DBG(fmt...) printk(KERN_DEBUG fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+#ifdef CONFIG_6xx
+extern int powersave_lowspeed;
+#endif
+
+extern int powersave_nap;
+extern struct device_node *k2_skiplist[2];
+
+/*
+ * We use a single global lock to protect accesses. Each driver has
+ * to take care of its own locking
+ */
+DEFINE_RAW_SPINLOCK(feature_lock);
+
+#define LOCK(flags)	raw_spin_lock_irqsave(&feature_lock, flags);
+#define UNLOCK(flags)	raw_spin_unlock_irqrestore(&feature_lock, flags);
+
+
+/*
+ * Instance of some macio stuffs
+ */
+struct macio_chip macio_chips[MAX_MACIO_CHIPS];
+
+struct macio_chip *macio_find(struct device_node *child, int type)
+{
+	while(child) {
+		int	i;
+
+		for (i=0; i < MAX_MACIO_CHIPS && macio_chips[i].of_node; i++)
+			if (child == macio_chips[i].of_node &&
+			    (!type || macio_chips[i].type == type))
+				return &macio_chips[i];
+		child = child->parent;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(macio_find);
+
+static const char *macio_names[] =
+{
+	"Unknown",
+	"Grand Central",
+	"OHare",
+	"OHareII",
+	"Heathrow",
+	"Gatwick",
+	"Paddington",
+	"Keylargo",
+	"Pangea",
+	"Intrepid",
+	"K2",
+	"Shasta",
+};
+
+
+struct device_node *uninorth_node;
+u32 __iomem *uninorth_base;
+
+static u32 uninorth_rev;
+static int uninorth_maj;
+static void __iomem *u3_ht_base;
+
+/*
+ * For each motherboard family, we have a table of functions pointers
+ * that handle the various features.
+ */
+
+typedef long (*feature_call)(struct device_node *node, long param, long value);
+
+struct feature_table_entry {
+	unsigned int	selector;
+	feature_call	function;
+};
+
+struct pmac_mb_def
+{
+	const char*			model_string;
+	const char*			model_name;
+	int				model_id;
+	struct feature_table_entry*	features;
+	unsigned long			board_flags;
+};
+static struct pmac_mb_def pmac_mb;
+
+/*
+ * Here are the chip specific feature functions
+ */
+
+static inline int simple_feature_tweak(struct device_node *node, int type,
+				       int reg, u32 mask, int value)
+{
+	struct macio_chip*	macio;
+	unsigned long		flags;
+
+	macio = macio_find(node, type);
+	if (!macio)
+		return -ENODEV;
+	LOCK(flags);
+	if (value)
+		MACIO_BIS(reg, mask);
+	else
+		MACIO_BIC(reg, mask);
+	(void)MACIO_IN32(reg);
+	UNLOCK(flags);
+
+	return 0;
+}
+
+#ifndef CONFIG_PPC64
+
+static long ohare_htw_scc_enable(struct device_node *node, long param,
+				 long value)
+{
+	struct macio_chip*	macio;
+	unsigned long		chan_mask;
+	unsigned long		fcr;
+	unsigned long		flags;
+	int			htw, trans;
+	unsigned long		rmask;
+
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+	if (!strcmp(node->name, "ch-a"))
+		chan_mask = MACIO_FLAG_SCCA_ON;
+	else if (!strcmp(node->name, "ch-b"))
+		chan_mask = MACIO_FLAG_SCCB_ON;
+	else
+		return -ENODEV;
+
+	htw = (macio->type == macio_heathrow || macio->type == macio_paddington
+		|| macio->type == macio_gatwick);
+	/* On these machines, the HRW_SCC_TRANS_EN_N bit mustn't be touched */
+	trans = (pmac_mb.model_id != PMAC_TYPE_YOSEMITE &&
+		 pmac_mb.model_id != PMAC_TYPE_YIKES);
+	if (value) {
+#ifdef CONFIG_ADB_PMU
+		if ((param & 0xfff) == PMAC_SCC_IRDA)
+			pmu_enable_irled(1);
+#endif /* CONFIG_ADB_PMU */
+		LOCK(flags);
+		fcr = MACIO_IN32(OHARE_FCR);
+		/* Check if scc cell need enabling */
+		if (!(fcr & OH_SCC_ENABLE)) {
+			fcr |= OH_SCC_ENABLE;
+			if (htw) {
+				/* Side effect: this will also power up the
+				 * modem, but it's too messy to figure out on which
+				 * ports this controls the tranceiver and on which
+				 * it controls the modem
+				 */
+				if (trans)
+					fcr &= ~HRW_SCC_TRANS_EN_N;
+				MACIO_OUT32(OHARE_FCR, fcr);
+				fcr |= (rmask = HRW_RESET_SCC);
+				MACIO_OUT32(OHARE_FCR, fcr);
+			} else {
+				fcr |= (rmask = OH_SCC_RESET);
+				MACIO_OUT32(OHARE_FCR, fcr);
+			}
+			UNLOCK(flags);
+			(void)MACIO_IN32(OHARE_FCR);
+			mdelay(15);
+			LOCK(flags);
+			fcr &= ~rmask;
+			MACIO_OUT32(OHARE_FCR, fcr);
+		}
+		if (chan_mask & MACIO_FLAG_SCCA_ON)
+			fcr |= OH_SCCA_IO;
+		if (chan_mask & MACIO_FLAG_SCCB_ON)
+			fcr |= OH_SCCB_IO;
+		MACIO_OUT32(OHARE_FCR, fcr);
+		macio->flags |= chan_mask;
+		UNLOCK(flags);
+		if (param & PMAC_SCC_FLAG_XMON)
+			macio->flags |= MACIO_FLAG_SCC_LOCKED;
+	} else {
+		if (macio->flags & MACIO_FLAG_SCC_LOCKED)
+			return -EPERM;
+		LOCK(flags);
+		fcr = MACIO_IN32(OHARE_FCR);
+		if (chan_mask & MACIO_FLAG_SCCA_ON)
+			fcr &= ~OH_SCCA_IO;
+		if (chan_mask & MACIO_FLAG_SCCB_ON)
+			fcr &= ~OH_SCCB_IO;
+		MACIO_OUT32(OHARE_FCR, fcr);
+		if ((fcr & (OH_SCCA_IO | OH_SCCB_IO)) == 0) {
+			fcr &= ~OH_SCC_ENABLE;
+			if (htw && trans)
+				fcr |= HRW_SCC_TRANS_EN_N;
+			MACIO_OUT32(OHARE_FCR, fcr);
+		}
+		macio->flags &= ~(chan_mask);
+		UNLOCK(flags);
+		mdelay(10);
+#ifdef CONFIG_ADB_PMU
+		if ((param & 0xfff) == PMAC_SCC_IRDA)
+			pmu_enable_irled(0);
+#endif /* CONFIG_ADB_PMU */
+	}
+	return 0;
+}
+
+static long ohare_floppy_enable(struct device_node *node, long param,
+				long value)
+{
+	return simple_feature_tweak(node, macio_ohare,
+		OHARE_FCR, OH_FLOPPY_ENABLE, value);
+}
+
+static long ohare_mesh_enable(struct device_node *node, long param, long value)
+{
+	return simple_feature_tweak(node, macio_ohare,
+		OHARE_FCR, OH_MESH_ENABLE, value);
+}
+
+static long ohare_ide_enable(struct device_node *node, long param, long value)
+{
+	switch(param) {
+	case 0:
+		/* For some reason, setting the bit in set_initial_features()
+		 * doesn't stick. I'm still investigating... --BenH.
+		 */
+		if (value)
+			simple_feature_tweak(node, macio_ohare,
+				OHARE_FCR, OH_IOBUS_ENABLE, 1);
+		return simple_feature_tweak(node, macio_ohare,
+			OHARE_FCR, OH_IDE0_ENABLE, value);
+	case 1:
+		return simple_feature_tweak(node, macio_ohare,
+			OHARE_FCR, OH_BAY_IDE_ENABLE, value);
+	default:
+		return -ENODEV;
+	}
+}
+
+static long ohare_ide_reset(struct device_node *node, long param, long value)
+{
+	switch(param) {
+	case 0:
+		return simple_feature_tweak(node, macio_ohare,
+			OHARE_FCR, OH_IDE0_RESET_N, !value);
+	case 1:
+		return simple_feature_tweak(node, macio_ohare,
+			OHARE_FCR, OH_IDE1_RESET_N, !value);
+	default:
+		return -ENODEV;
+	}
+}
+
+static long ohare_sleep_state(struct device_node *node, long param, long value)
+{
+	struct macio_chip*	macio = &macio_chips[0];
+
+	if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0)
+		return -EPERM;
+	if (value == 1) {
+		MACIO_BIC(OHARE_FCR, OH_IOBUS_ENABLE);
+	} else if (value == 0) {
+		MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE);
+	}
+
+	return 0;
+}
+
+static long heathrow_modem_enable(struct device_node *node, long param,
+				  long value)
+{
+	struct macio_chip*	macio;
+	u8			gpio;
+	unsigned long		flags;
+
+	macio = macio_find(node, macio_unknown);
+	if (!macio)
+		return -ENODEV;
+	gpio = MACIO_IN8(HRW_GPIO_MODEM_RESET) & ~1;
+	if (!value) {
+		LOCK(flags);
+		MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio);
+		UNLOCK(flags);
+		(void)MACIO_IN8(HRW_GPIO_MODEM_RESET);
+		mdelay(250);
+	}
+	if (pmac_mb.model_id != PMAC_TYPE_YOSEMITE &&
+	    pmac_mb.model_id != PMAC_TYPE_YIKES) {
+		LOCK(flags);
+		if (value)
+			MACIO_BIC(HEATHROW_FCR, HRW_SCC_TRANS_EN_N);
+		else
+			MACIO_BIS(HEATHROW_FCR, HRW_SCC_TRANS_EN_N);
+		UNLOCK(flags);
+		(void)MACIO_IN32(HEATHROW_FCR);
+		mdelay(250);
+	}
+	if (value) {
+		LOCK(flags);
+		MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio | 1);
+		(void)MACIO_IN8(HRW_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250); LOCK(flags);
+		MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio);
+		(void)MACIO_IN8(HRW_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250); LOCK(flags);
+		MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio | 1);
+		(void)MACIO_IN8(HRW_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250);
+	}
+	return 0;
+}
+
+static long heathrow_floppy_enable(struct device_node *node, long param,
+				   long value)
+{
+	return simple_feature_tweak(node, macio_unknown,
+		HEATHROW_FCR,
+		HRW_SWIM_ENABLE|HRW_BAY_FLOPPY_ENABLE,
+		value);
+}
+
+static long heathrow_mesh_enable(struct device_node *node, long param,
+				 long value)
+{
+	struct macio_chip*	macio;
+	unsigned long		flags;
+
+	macio = macio_find(node, macio_unknown);
+	if (!macio)
+		return -ENODEV;
+	LOCK(flags);
+	/* Set clear mesh cell enable */
+	if (value)
+		MACIO_BIS(HEATHROW_FCR, HRW_MESH_ENABLE);
+	else
+		MACIO_BIC(HEATHROW_FCR, HRW_MESH_ENABLE);
+	(void)MACIO_IN32(HEATHROW_FCR);
+	udelay(10);
+	/* Set/Clear termination power */
+	if (value)
+		MACIO_BIC(HEATHROW_MBCR, 0x04000000);
+	else
+		MACIO_BIS(HEATHROW_MBCR, 0x04000000);
+	(void)MACIO_IN32(HEATHROW_MBCR);
+	udelay(10);
+	UNLOCK(flags);
+
+	return 0;
+}
+
+static long heathrow_ide_enable(struct device_node *node, long param,
+				long value)
+{
+	switch(param) {
+	case 0:
+		return simple_feature_tweak(node, macio_unknown,
+			HEATHROW_FCR, HRW_IDE0_ENABLE, value);
+	case 1:
+		return simple_feature_tweak(node, macio_unknown,
+			HEATHROW_FCR, HRW_BAY_IDE_ENABLE, value);
+	default:
+		return -ENODEV;
+	}
+}
+
+static long heathrow_ide_reset(struct device_node *node, long param,
+			       long value)
+{
+	switch(param) {
+	case 0:
+		return simple_feature_tweak(node, macio_unknown,
+			HEATHROW_FCR, HRW_IDE0_RESET_N, !value);
+	case 1:
+		return simple_feature_tweak(node, macio_unknown,
+			HEATHROW_FCR, HRW_IDE1_RESET_N, !value);
+	default:
+		return -ENODEV;
+	}
+}
+
+static long heathrow_bmac_enable(struct device_node *node, long param,
+				 long value)
+{
+	struct macio_chip*	macio;
+	unsigned long		flags;
+
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+	if (value) {
+		LOCK(flags);
+		MACIO_BIS(HEATHROW_FCR, HRW_BMAC_IO_ENABLE);
+		MACIO_BIS(HEATHROW_FCR, HRW_BMAC_RESET);
+		UNLOCK(flags);
+		(void)MACIO_IN32(HEATHROW_FCR);
+		mdelay(10);
+		LOCK(flags);
+		MACIO_BIC(HEATHROW_FCR, HRW_BMAC_RESET);
+		UNLOCK(flags);
+		(void)MACIO_IN32(HEATHROW_FCR);
+		mdelay(10);
+	} else {
+		LOCK(flags);
+		MACIO_BIC(HEATHROW_FCR, HRW_BMAC_IO_ENABLE);
+		UNLOCK(flags);
+	}
+	return 0;
+}
+
+static long heathrow_sound_enable(struct device_node *node, long param,
+				  long value)
+{
+	struct macio_chip*	macio;
+	unsigned long		flags;
+
+	/* B&W G3 and Yikes don't support that properly (the
+	 * sound appear to never come back after beeing shut down).
+	 */
+	if (pmac_mb.model_id == PMAC_TYPE_YOSEMITE ||
+	    pmac_mb.model_id == PMAC_TYPE_YIKES)
+		return 0;
+
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+	if (value) {
+		LOCK(flags);
+		MACIO_BIS(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE);
+		MACIO_BIC(HEATHROW_FCR, HRW_SOUND_POWER_N);
+		UNLOCK(flags);
+		(void)MACIO_IN32(HEATHROW_FCR);
+	} else {
+		LOCK(flags);
+		MACIO_BIS(HEATHROW_FCR, HRW_SOUND_POWER_N);
+		MACIO_BIC(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE);
+		UNLOCK(flags);
+	}
+	return 0;
+}
+
+static u32 save_fcr[6];
+static u32 save_mbcr;
+static struct dbdma_regs save_dbdma[13];
+static struct dbdma_regs save_alt_dbdma[13];
+
+static void dbdma_save(struct macio_chip *macio, struct dbdma_regs *save)
+{
+	int i;
+
+	/* Save state & config of DBDMA channels */
+	for (i = 0; i < 13; i++) {
+		volatile struct dbdma_regs __iomem * chan = (void __iomem *)
+			(macio->base + ((0x8000+i*0x100)>>2));
+		save[i].cmdptr_hi = in_le32(&chan->cmdptr_hi);
+		save[i].cmdptr = in_le32(&chan->cmdptr);
+		save[i].intr_sel = in_le32(&chan->intr_sel);
+		save[i].br_sel = in_le32(&chan->br_sel);
+		save[i].wait_sel = in_le32(&chan->wait_sel);
+	}
+}
+
+static void dbdma_restore(struct macio_chip *macio, struct dbdma_regs *save)
+{
+	int i;
+
+	/* Save state & config of DBDMA channels */
+	for (i = 0; i < 13; i++) {
+		volatile struct dbdma_regs __iomem * chan = (void __iomem *)
+			(macio->base + ((0x8000+i*0x100)>>2));
+		out_le32(&chan->control, (ACTIVE|DEAD|WAKE|FLUSH|PAUSE|RUN)<<16);
+		while (in_le32(&chan->status) & ACTIVE)
+			mb();
+		out_le32(&chan->cmdptr_hi, save[i].cmdptr_hi);
+		out_le32(&chan->cmdptr, save[i].cmdptr);
+		out_le32(&chan->intr_sel, save[i].intr_sel);
+		out_le32(&chan->br_sel, save[i].br_sel);
+		out_le32(&chan->wait_sel, save[i].wait_sel);
+	}
+}
+
+static void heathrow_sleep(struct macio_chip *macio, int secondary)
+{
+	if (secondary) {
+		dbdma_save(macio, save_alt_dbdma);
+		save_fcr[2] = MACIO_IN32(0x38);
+		save_fcr[3] = MACIO_IN32(0x3c);
+	} else {
+		dbdma_save(macio, save_dbdma);
+		save_fcr[0] = MACIO_IN32(0x38);
+		save_fcr[1] = MACIO_IN32(0x3c);
+		save_mbcr = MACIO_IN32(0x34);
+		/* Make sure sound is shut down */
+		MACIO_BIS(HEATHROW_FCR, HRW_SOUND_POWER_N);
+		MACIO_BIC(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE);
+		/* This seems to be necessary as well or the fan
+		 * keeps coming up and battery drains fast */
+		MACIO_BIC(HEATHROW_FCR, HRW_IOBUS_ENABLE);
+		MACIO_BIC(HEATHROW_FCR, HRW_IDE0_RESET_N);
+		/* Make sure eth is down even if module or sleep
+		 * won't work properly */
+		MACIO_BIC(HEATHROW_FCR, HRW_BMAC_IO_ENABLE | HRW_BMAC_RESET);
+	}
+	/* Make sure modem is shut down */
+	MACIO_OUT8(HRW_GPIO_MODEM_RESET,
+		MACIO_IN8(HRW_GPIO_MODEM_RESET) & ~1);
+	MACIO_BIS(HEATHROW_FCR, HRW_SCC_TRANS_EN_N);
+	MACIO_BIC(HEATHROW_FCR, OH_SCCA_IO|OH_SCCB_IO|HRW_SCC_ENABLE);
+
+	/* Let things settle */
+	(void)MACIO_IN32(HEATHROW_FCR);
+}
+
+static void heathrow_wakeup(struct macio_chip *macio, int secondary)
+{
+	if (secondary) {
+		MACIO_OUT32(0x38, save_fcr[2]);
+		(void)MACIO_IN32(0x38);
+		mdelay(1);
+		MACIO_OUT32(0x3c, save_fcr[3]);
+		(void)MACIO_IN32(0x38);
+		mdelay(10);
+		dbdma_restore(macio, save_alt_dbdma);
+	} else {
+		MACIO_OUT32(0x38, save_fcr[0] | HRW_IOBUS_ENABLE);
+		(void)MACIO_IN32(0x38);
+		mdelay(1);
+		MACIO_OUT32(0x3c, save_fcr[1]);
+		(void)MACIO_IN32(0x38);
+		mdelay(1);
+		MACIO_OUT32(0x34, save_mbcr);
+		(void)MACIO_IN32(0x38);
+		mdelay(10);
+		dbdma_restore(macio, save_dbdma);
+	}
+}
+
+static long heathrow_sleep_state(struct device_node *node, long param,
+				 long value)
+{
+	if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0)
+		return -EPERM;
+	if (value == 1) {
+		if (macio_chips[1].type == macio_gatwick)
+			heathrow_sleep(&macio_chips[0], 1);
+		heathrow_sleep(&macio_chips[0], 0);
+	} else if (value == 0) {
+		heathrow_wakeup(&macio_chips[0], 0);
+		if (macio_chips[1].type == macio_gatwick)
+			heathrow_wakeup(&macio_chips[0], 1);
+	}
+	return 0;
+}
+
+static long core99_scc_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip*	macio;
+	unsigned long		flags;
+	unsigned long		chan_mask;
+	u32			fcr;
+
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+	if (!strcmp(node->name, "ch-a"))
+		chan_mask = MACIO_FLAG_SCCA_ON;
+	else if (!strcmp(node->name, "ch-b"))
+		chan_mask = MACIO_FLAG_SCCB_ON;
+	else
+		return -ENODEV;
+
+	if (value) {
+		int need_reset_scc = 0;
+		int need_reset_irda = 0;
+
+		LOCK(flags);
+		fcr = MACIO_IN32(KEYLARGO_FCR0);
+		/* Check if scc cell need enabling */
+		if (!(fcr & KL0_SCC_CELL_ENABLE)) {
+			fcr |= KL0_SCC_CELL_ENABLE;
+			need_reset_scc = 1;
+		}
+		if (chan_mask & MACIO_FLAG_SCCA_ON) {
+			fcr |= KL0_SCCA_ENABLE;
+			/* Don't enable line drivers for I2S modem */
+			if ((param & 0xfff) == PMAC_SCC_I2S1)
+				fcr &= ~KL0_SCC_A_INTF_ENABLE;
+			else
+				fcr |= KL0_SCC_A_INTF_ENABLE;
+		}
+		if (chan_mask & MACIO_FLAG_SCCB_ON) {
+			fcr |= KL0_SCCB_ENABLE;
+			/* Perform irda specific inits */
+			if ((param & 0xfff) == PMAC_SCC_IRDA) {
+				fcr &= ~KL0_SCC_B_INTF_ENABLE;
+				fcr |= KL0_IRDA_ENABLE;
+				fcr |= KL0_IRDA_CLK32_ENABLE | KL0_IRDA_CLK19_ENABLE;
+				fcr |= KL0_IRDA_SOURCE1_SEL;
+				fcr &= ~(KL0_IRDA_FAST_CONNECT|KL0_IRDA_DEFAULT1|KL0_IRDA_DEFAULT0);
+				fcr &= ~(KL0_IRDA_SOURCE2_SEL|KL0_IRDA_HIGH_BAND);
+				need_reset_irda = 1;
+			} else
+				fcr |= KL0_SCC_B_INTF_ENABLE;
+		}
+		MACIO_OUT32(KEYLARGO_FCR0, fcr);
+		macio->flags |= chan_mask;
+		if (need_reset_scc)  {
+			MACIO_BIS(KEYLARGO_FCR0, KL0_SCC_RESET);
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+			UNLOCK(flags);
+			mdelay(15);
+			LOCK(flags);
+			MACIO_BIC(KEYLARGO_FCR0, KL0_SCC_RESET);
+		}
+		if (need_reset_irda)  {
+			MACIO_BIS(KEYLARGO_FCR0, KL0_IRDA_RESET);
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+			UNLOCK(flags);
+			mdelay(15);
+			LOCK(flags);
+			MACIO_BIC(KEYLARGO_FCR0, KL0_IRDA_RESET);
+		}
+		UNLOCK(flags);
+		if (param & PMAC_SCC_FLAG_XMON)
+			macio->flags |= MACIO_FLAG_SCC_LOCKED;
+	} else {
+		if (macio->flags & MACIO_FLAG_SCC_LOCKED)
+			return -EPERM;
+		LOCK(flags);
+		fcr = MACIO_IN32(KEYLARGO_FCR0);
+		if (chan_mask & MACIO_FLAG_SCCA_ON)
+			fcr &= ~KL0_SCCA_ENABLE;
+		if (chan_mask & MACIO_FLAG_SCCB_ON) {
+			fcr &= ~KL0_SCCB_ENABLE;
+			/* Perform irda specific clears */
+			if ((param & 0xfff) == PMAC_SCC_IRDA) {
+				fcr &= ~KL0_IRDA_ENABLE;
+				fcr &= ~(KL0_IRDA_CLK32_ENABLE | KL0_IRDA_CLK19_ENABLE);
+				fcr &= ~(KL0_IRDA_FAST_CONNECT|KL0_IRDA_DEFAULT1|KL0_IRDA_DEFAULT0);
+				fcr &= ~(KL0_IRDA_SOURCE1_SEL|KL0_IRDA_SOURCE2_SEL|KL0_IRDA_HIGH_BAND);
+			}
+		}
+		MACIO_OUT32(KEYLARGO_FCR0, fcr);
+		if ((fcr & (KL0_SCCA_ENABLE | KL0_SCCB_ENABLE)) == 0) {
+			fcr &= ~KL0_SCC_CELL_ENABLE;
+			MACIO_OUT32(KEYLARGO_FCR0, fcr);
+		}
+		macio->flags &= ~(chan_mask);
+		UNLOCK(flags);
+		mdelay(10);
+	}
+	return 0;
+}
+
+static long
+core99_modem_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip*	macio;
+	u8			gpio;
+	unsigned long		flags;
+
+	/* Hack for internal USB modem */
+	if (node == NULL) {
+		if (macio_chips[0].type != macio_keylargo)
+			return -ENODEV;
+		node = macio_chips[0].of_node;
+	}
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+	gpio = MACIO_IN8(KL_GPIO_MODEM_RESET);
+	gpio |= KEYLARGO_GPIO_OUTPUT_ENABLE;
+	gpio &= ~KEYLARGO_GPIO_OUTOUT_DATA;
+
+	if (!value) {
+		LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio);
+		UNLOCK(flags);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		mdelay(250);
+	}
+	LOCK(flags);
+	if (value) {
+		MACIO_BIC(KEYLARGO_FCR2, KL2_ALT_DATA_OUT);
+		UNLOCK(flags);
+		(void)MACIO_IN32(KEYLARGO_FCR2);
+		mdelay(250);
+	} else {
+		MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT);
+		UNLOCK(flags);
+	}
+	if (value) {
+		LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250); LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250); LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250);
+	}
+	return 0;
+}
+
+static long
+pangea_modem_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip*	macio;
+	u8			gpio;
+	unsigned long		flags;
+
+	/* Hack for internal USB modem */
+	if (node == NULL) {
+		if (macio_chips[0].type != macio_pangea &&
+		    macio_chips[0].type != macio_intrepid)
+			return -ENODEV;
+		node = macio_chips[0].of_node;
+	}
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+	gpio = MACIO_IN8(KL_GPIO_MODEM_RESET);
+	gpio |= KEYLARGO_GPIO_OUTPUT_ENABLE;
+	gpio &= ~KEYLARGO_GPIO_OUTOUT_DATA;
+
+	if (!value) {
+		LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio);
+		UNLOCK(flags);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		mdelay(250);
+	}
+	LOCK(flags);
+	if (value) {
+		MACIO_OUT8(KL_GPIO_MODEM_POWER,
+			KEYLARGO_GPIO_OUTPUT_ENABLE);
+		UNLOCK(flags);
+		(void)MACIO_IN32(KEYLARGO_FCR2);
+		mdelay(250);
+	} else {
+		MACIO_OUT8(KL_GPIO_MODEM_POWER,
+			KEYLARGO_GPIO_OUTPUT_ENABLE | KEYLARGO_GPIO_OUTOUT_DATA);
+		UNLOCK(flags);
+	}
+	if (value) {
+		LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250); LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250); LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250);
+	}
+	return 0;
+}
+
+static long
+core99_ata100_enable(struct device_node *node, long value)
+{
+	unsigned long flags;
+	struct pci_dev *pdev = NULL;
+	u8 pbus, pid;
+	int rc;
+
+	if (uninorth_rev < 0x24)
+		return -ENODEV;
+
+	LOCK(flags);
+	if (value)
+		UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_ATA100);
+	else
+		UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_ATA100);
+	(void)UN_IN(UNI_N_CLOCK_CNTL);
+	UNLOCK(flags);
+	udelay(20);
+
+	if (value) {
+		if (pci_device_from_OF_node(node, &pbus, &pid) == 0)
+			pdev = pci_get_bus_and_slot(pbus, pid);
+		if (pdev == NULL)
+			return 0;
+		rc = pci_enable_device(pdev);
+		if (rc == 0)
+			pci_set_master(pdev);
+		pci_dev_put(pdev);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+static long
+core99_ide_enable(struct device_node *node, long param, long value)
+{
+	/* Bus ID 0 to 2 are KeyLargo based IDE, busID 3 is U2
+	 * based ata-100
+	 */
+	switch(param) {
+	    case 0:
+		return simple_feature_tweak(node, macio_unknown,
+			KEYLARGO_FCR1, KL1_EIDE0_ENABLE, value);
+	    case 1:
+		return simple_feature_tweak(node, macio_unknown,
+			KEYLARGO_FCR1, KL1_EIDE1_ENABLE, value);
+	    case 2:
+		return simple_feature_tweak(node, macio_unknown,
+			KEYLARGO_FCR1, KL1_UIDE_ENABLE, value);
+	    case 3:
+		return core99_ata100_enable(node, value);
+	    default:
+		return -ENODEV;
+	}
+}
+
+static long
+core99_ide_reset(struct device_node *node, long param, long value)
+{
+	switch(param) {
+	    case 0:
+		return simple_feature_tweak(node, macio_unknown,
+			KEYLARGO_FCR1, KL1_EIDE0_RESET_N, !value);
+	    case 1:
+		return simple_feature_tweak(node, macio_unknown,
+			KEYLARGO_FCR1, KL1_EIDE1_RESET_N, !value);
+	    case 2:
+		return simple_feature_tweak(node, macio_unknown,
+			KEYLARGO_FCR1, KL1_UIDE_RESET_N, !value);
+	    default:
+		return -ENODEV;
+	}
+}
+
+static long
+core99_gmac_enable(struct device_node *node, long param, long value)
+{
+	unsigned long flags;
+
+	LOCK(flags);
+	if (value)
+		UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_GMAC);
+	else
+		UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_GMAC);
+	(void)UN_IN(UNI_N_CLOCK_CNTL);
+	UNLOCK(flags);
+	udelay(20);
+
+	return 0;
+}
+
+static long
+core99_gmac_phy_reset(struct device_node *node, long param, long value)
+{
+	unsigned long flags;
+	struct macio_chip *macio;
+
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
+		return -ENODEV;
+
+	LOCK(flags);
+	MACIO_OUT8(KL_GPIO_ETH_PHY_RESET, KEYLARGO_GPIO_OUTPUT_ENABLE);
+	(void)MACIO_IN8(KL_GPIO_ETH_PHY_RESET);
+	UNLOCK(flags);
+	mdelay(10);
+	LOCK(flags);
+	MACIO_OUT8(KL_GPIO_ETH_PHY_RESET, /*KEYLARGO_GPIO_OUTPUT_ENABLE | */
+		KEYLARGO_GPIO_OUTOUT_DATA);
+	UNLOCK(flags);
+	mdelay(10);
+
+	return 0;
+}
+
+static long
+core99_sound_chip_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip*	macio;
+	unsigned long		flags;
+
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+
+	/* Do a better probe code, screamer G4 desktops &
+	 * iMacs can do that too, add a recalibrate  in
+	 * the driver as well
+	 */
+	if (pmac_mb.model_id == PMAC_TYPE_PISMO ||
+	    pmac_mb.model_id == PMAC_TYPE_TITANIUM) {
+		LOCK(flags);
+		if (value)
+			MACIO_OUT8(KL_GPIO_SOUND_POWER,
+				KEYLARGO_GPIO_OUTPUT_ENABLE |
+				KEYLARGO_GPIO_OUTOUT_DATA);
+		else
+			MACIO_OUT8(KL_GPIO_SOUND_POWER,
+				KEYLARGO_GPIO_OUTPUT_ENABLE);
+		(void)MACIO_IN8(KL_GPIO_SOUND_POWER);
+		UNLOCK(flags);
+	}
+	return 0;
+}
+
+static long
+core99_airport_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip*	macio;
+	unsigned long		flags;
+	int			state;
+
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+
+	/* Hint: we allow passing of macio itself for the sake of the
+	 * sleep code
+	 */
+	if (node != macio->of_node &&
+	    (!node->parent || node->parent != macio->of_node))
+		return -ENODEV;
+	state = (macio->flags & MACIO_FLAG_AIRPORT_ON) != 0;
+	if (value == state)
+		return 0;
+	if (value) {
+		/* This code is a reproduction of OF enable-cardslot
+		 * and init-wireless methods, slightly hacked until
+		 * I got it working.
+		 */
+		LOCK(flags);
+		MACIO_OUT8(KEYLARGO_GPIO_0+0xf, 5);
+		(void)MACIO_IN8(KEYLARGO_GPIO_0+0xf);
+		UNLOCK(flags);
+		mdelay(10);
+		LOCK(flags);
+		MACIO_OUT8(KEYLARGO_GPIO_0+0xf, 4);
+		(void)MACIO_IN8(KEYLARGO_GPIO_0+0xf);
+		UNLOCK(flags);
+
+		mdelay(10);
+
+		LOCK(flags);
+		MACIO_BIC(KEYLARGO_FCR2, KL2_CARDSEL_16);
+		(void)MACIO_IN32(KEYLARGO_FCR2);
+		udelay(10);
+		MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+0xb, 0);
+		(void)MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+0xb);
+		udelay(10);
+		MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+0xa, 0x28);
+		(void)MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+0xa);
+		udelay(10);
+		MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+0xd, 0x28);
+		(void)MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+0xd);
+		udelay(10);
+		MACIO_OUT8(KEYLARGO_GPIO_0+0xd, 0x28);
+		(void)MACIO_IN8(KEYLARGO_GPIO_0+0xd);
+		udelay(10);
+		MACIO_OUT8(KEYLARGO_GPIO_0+0xe, 0x28);
+		(void)MACIO_IN8(KEYLARGO_GPIO_0+0xe);
+		UNLOCK(flags);
+		udelay(10);
+		MACIO_OUT32(0x1c000, 0);
+		mdelay(1);
+		MACIO_OUT8(0x1a3e0, 0x41);
+		(void)MACIO_IN8(0x1a3e0);
+		udelay(10);
+		LOCK(flags);
+		MACIO_BIS(KEYLARGO_FCR2, KL2_CARDSEL_16);
+		(void)MACIO_IN32(KEYLARGO_FCR2);
+		UNLOCK(flags);
+		mdelay(100);
+
+		macio->flags |= MACIO_FLAG_AIRPORT_ON;
+	} else {
+		LOCK(flags);
+		MACIO_BIC(KEYLARGO_FCR2, KL2_CARDSEL_16);
+		(void)MACIO_IN32(KEYLARGO_FCR2);
+		MACIO_OUT8(KL_GPIO_AIRPORT_0, 0);
+		MACIO_OUT8(KL_GPIO_AIRPORT_1, 0);
+		MACIO_OUT8(KL_GPIO_AIRPORT_2, 0);
+		MACIO_OUT8(KL_GPIO_AIRPORT_3, 0);
+		MACIO_OUT8(KL_GPIO_AIRPORT_4, 0);
+		(void)MACIO_IN8(KL_GPIO_AIRPORT_4);
+		UNLOCK(flags);
+
+		macio->flags &= ~MACIO_FLAG_AIRPORT_ON;
+	}
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+static long
+core99_reset_cpu(struct device_node *node, long param, long value)
+{
+	unsigned int reset_io = 0;
+	unsigned long flags;
+	struct macio_chip *macio;
+	struct device_node *np;
+	struct device_node *cpus;
+	const int dflt_reset_lines[] = {	KL_GPIO_RESET_CPU0,
+						KL_GPIO_RESET_CPU1,
+						KL_GPIO_RESET_CPU2,
+						KL_GPIO_RESET_CPU3 };
+
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo)
+		return -ENODEV;
+
+	cpus = of_find_node_by_path("/cpus");
+	if (cpus == NULL)
+		return -ENODEV;
+	for (np = cpus->child; np != NULL; np = np->sibling) {
+		const u32 *num = of_get_property(np, "reg", NULL);
+		const u32 *rst = of_get_property(np, "soft-reset", NULL);
+		if (num == NULL || rst == NULL)
+			continue;
+		if (param == *num) {
+			reset_io = *rst;
+			break;
+		}
+	}
+	of_node_put(cpus);
+	if (np == NULL || reset_io == 0)
+		reset_io = dflt_reset_lines[param];
+
+	LOCK(flags);
+	MACIO_OUT8(reset_io, KEYLARGO_GPIO_OUTPUT_ENABLE);
+	(void)MACIO_IN8(reset_io);
+	udelay(1);
+	MACIO_OUT8(reset_io, 0);
+	(void)MACIO_IN8(reset_io);
+	UNLOCK(flags);
+
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+static long
+core99_usb_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip *macio;
+	unsigned long flags;
+	const char *prop;
+	int number;
+	u32 reg;
+
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
+		return -ENODEV;
+
+	prop = of_get_property(node, "AAPL,clock-id", NULL);
+	if (!prop)
+		return -ENODEV;
+	if (strncmp(prop, "usb0u048", 8) == 0)
+		number = 0;
+	else if (strncmp(prop, "usb1u148", 8) == 0)
+		number = 2;
+	else if (strncmp(prop, "usb2u248", 8) == 0)
+		number = 4;
+	else
+		return -ENODEV;
+
+	/* Sorry for the brute-force locking, but this is only used during
+	 * sleep and the timing seem to be critical
+	 */
+	LOCK(flags);
+	if (value) {
+		/* Turn ON */
+		if (number == 0) {
+			MACIO_BIC(KEYLARGO_FCR0, (KL0_USB0_PAD_SUSPEND0 | KL0_USB0_PAD_SUSPEND1));
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+			UNLOCK(flags);
+			mdelay(1);
+			LOCK(flags);
+			MACIO_BIS(KEYLARGO_FCR0, KL0_USB0_CELL_ENABLE);
+		} else if (number == 2) {
+			MACIO_BIC(KEYLARGO_FCR0, (KL0_USB1_PAD_SUSPEND0 | KL0_USB1_PAD_SUSPEND1));
+			UNLOCK(flags);
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+			mdelay(1);
+			LOCK(flags);
+			MACIO_BIS(KEYLARGO_FCR0, KL0_USB1_CELL_ENABLE);
+		} else if (number == 4) {
+			MACIO_BIC(KEYLARGO_FCR1, (KL1_USB2_PAD_SUSPEND0 | KL1_USB2_PAD_SUSPEND1));
+			UNLOCK(flags);
+			(void)MACIO_IN32(KEYLARGO_FCR1);
+			mdelay(1);
+			LOCK(flags);
+			MACIO_BIS(KEYLARGO_FCR1, KL1_USB2_CELL_ENABLE);
+		}
+		if (number < 4) {
+			reg = MACIO_IN32(KEYLARGO_FCR4);
+			reg &=	~(KL4_PORT_WAKEUP_ENABLE(number) | KL4_PORT_RESUME_WAKE_EN(number) |
+				KL4_PORT_CONNECT_WAKE_EN(number) | KL4_PORT_DISCONNECT_WAKE_EN(number));
+			reg &=	~(KL4_PORT_WAKEUP_ENABLE(number+1) | KL4_PORT_RESUME_WAKE_EN(number+1) |
+				KL4_PORT_CONNECT_WAKE_EN(number+1) | KL4_PORT_DISCONNECT_WAKE_EN(number+1));
+			MACIO_OUT32(KEYLARGO_FCR4, reg);
+			(void)MACIO_IN32(KEYLARGO_FCR4);
+			udelay(10);
+		} else {
+			reg = MACIO_IN32(KEYLARGO_FCR3);
+			reg &=	~(KL3_IT_PORT_WAKEUP_ENABLE(0) | KL3_IT_PORT_RESUME_WAKE_EN(0) |
+				KL3_IT_PORT_CONNECT_WAKE_EN(0) | KL3_IT_PORT_DISCONNECT_WAKE_EN(0));
+			reg &=	~(KL3_IT_PORT_WAKEUP_ENABLE(1) | KL3_IT_PORT_RESUME_WAKE_EN(1) |
+				KL3_IT_PORT_CONNECT_WAKE_EN(1) | KL3_IT_PORT_DISCONNECT_WAKE_EN(1));
+			MACIO_OUT32(KEYLARGO_FCR3, reg);
+			(void)MACIO_IN32(KEYLARGO_FCR3);
+			udelay(10);
+		}
+		if (macio->type == macio_intrepid) {
+			/* wait for clock stopped bits to clear */
+			u32 test0 = 0, test1 = 0;
+			u32 status0, status1;
+			int timeout = 1000;
+
+			UNLOCK(flags);
+			switch (number) {
+			case 0:
+				test0 = UNI_N_CLOCK_STOPPED_USB0;
+				test1 = UNI_N_CLOCK_STOPPED_USB0PCI;
+				break;
+			case 2:
+				test0 = UNI_N_CLOCK_STOPPED_USB1;
+				test1 = UNI_N_CLOCK_STOPPED_USB1PCI;
+				break;
+			case 4:
+				test0 = UNI_N_CLOCK_STOPPED_USB2;
+				test1 = UNI_N_CLOCK_STOPPED_USB2PCI;
+				break;
+			}
+			do {
+				if (--timeout <= 0) {
+					printk(KERN_ERR "core99_usb_enable: "
+					       "Timeout waiting for clocks\n");
+					break;
+				}
+				mdelay(1);
+				status0 = UN_IN(UNI_N_CLOCK_STOP_STATUS0);
+				status1 = UN_IN(UNI_N_CLOCK_STOP_STATUS1);
+			} while ((status0 & test0) | (status1 & test1));
+			LOCK(flags);
+		}
+	} else {
+		/* Turn OFF */
+		if (number < 4) {
+			reg = MACIO_IN32(KEYLARGO_FCR4);
+			reg |=	KL4_PORT_WAKEUP_ENABLE(number) | KL4_PORT_RESUME_WAKE_EN(number) |
+				KL4_PORT_CONNECT_WAKE_EN(number) | KL4_PORT_DISCONNECT_WAKE_EN(number);
+			reg |=	KL4_PORT_WAKEUP_ENABLE(number+1) | KL4_PORT_RESUME_WAKE_EN(number+1) |
+				KL4_PORT_CONNECT_WAKE_EN(number+1) | KL4_PORT_DISCONNECT_WAKE_EN(number+1);
+			MACIO_OUT32(KEYLARGO_FCR4, reg);
+			(void)MACIO_IN32(KEYLARGO_FCR4);
+			udelay(1);
+		} else {
+			reg = MACIO_IN32(KEYLARGO_FCR3);
+			reg |=	KL3_IT_PORT_WAKEUP_ENABLE(0) | KL3_IT_PORT_RESUME_WAKE_EN(0) |
+				KL3_IT_PORT_CONNECT_WAKE_EN(0) | KL3_IT_PORT_DISCONNECT_WAKE_EN(0);
+			reg |=	KL3_IT_PORT_WAKEUP_ENABLE(1) | KL3_IT_PORT_RESUME_WAKE_EN(1) |
+				KL3_IT_PORT_CONNECT_WAKE_EN(1) | KL3_IT_PORT_DISCONNECT_WAKE_EN(1);
+			MACIO_OUT32(KEYLARGO_FCR3, reg);
+			(void)MACIO_IN32(KEYLARGO_FCR3);
+			udelay(1);
+		}
+		if (number == 0) {
+			if (macio->type != macio_intrepid)
+				MACIO_BIC(KEYLARGO_FCR0, KL0_USB0_CELL_ENABLE);
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+			udelay(1);
+			MACIO_BIS(KEYLARGO_FCR0, (KL0_USB0_PAD_SUSPEND0 | KL0_USB0_PAD_SUSPEND1));
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+		} else if (number == 2) {
+			if (macio->type != macio_intrepid)
+				MACIO_BIC(KEYLARGO_FCR0, KL0_USB1_CELL_ENABLE);
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+			udelay(1);
+			MACIO_BIS(KEYLARGO_FCR0, (KL0_USB1_PAD_SUSPEND0 | KL0_USB1_PAD_SUSPEND1));
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+		} else if (number == 4) {
+			udelay(1);
+			MACIO_BIS(KEYLARGO_FCR1, (KL1_USB2_PAD_SUSPEND0 | KL1_USB2_PAD_SUSPEND1));
+			(void)MACIO_IN32(KEYLARGO_FCR1);
+		}
+		udelay(1);
+	}
+	UNLOCK(flags);
+
+	return 0;
+}
+
+static long
+core99_firewire_enable(struct device_node *node, long param, long value)
+{
+	unsigned long flags;
+	struct macio_chip *macio;
+
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
+		return -ENODEV;
+	if (!(macio->flags & MACIO_FLAG_FW_SUPPORTED))
+		return -ENODEV;
+
+	LOCK(flags);
+	if (value) {
+		UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_FW);
+		(void)UN_IN(UNI_N_CLOCK_CNTL);
+	} else {
+		UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_FW);
+		(void)UN_IN(UNI_N_CLOCK_CNTL);
+	}
+	UNLOCK(flags);
+	mdelay(1);
+
+	return 0;
+}
+
+static long
+core99_firewire_cable_power(struct device_node *node, long param, long value)
+{
+	unsigned long flags;
+	struct macio_chip *macio;
+
+	/* Trick: we allow NULL node */
+	if ((pmac_mb.board_flags & PMAC_MB_HAS_FW_POWER) == 0)
+		return -ENODEV;
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
+		return -ENODEV;
+	if (!(macio->flags & MACIO_FLAG_FW_SUPPORTED))
+		return -ENODEV;
+
+	LOCK(flags);
+	if (value) {
+		MACIO_OUT8(KL_GPIO_FW_CABLE_POWER , 0);
+		MACIO_IN8(KL_GPIO_FW_CABLE_POWER);
+		udelay(10);
+	} else {
+		MACIO_OUT8(KL_GPIO_FW_CABLE_POWER , 4);
+		MACIO_IN8(KL_GPIO_FW_CABLE_POWER); udelay(10);
+	}
+	UNLOCK(flags);
+	mdelay(1);
+
+	return 0;
+}
+
+static long
+intrepid_aack_delay_enable(struct device_node *node, long param, long value)
+{
+	unsigned long flags;
+
+	if (uninorth_rev < 0xd2)
+		return -ENODEV;
+
+	LOCK(flags);
+	if (param)
+		UN_BIS(UNI_N_AACK_DELAY, UNI_N_AACK_DELAY_ENABLE);
+	else
+		UN_BIC(UNI_N_AACK_DELAY, UNI_N_AACK_DELAY_ENABLE);
+	UNLOCK(flags);
+
+	return 0;
+}
+
+
+#endif /* CONFIG_PPC64 */
+
+static long
+core99_read_gpio(struct device_node *node, long param, long value)
+{
+	struct macio_chip *macio = &macio_chips[0];
+
+	return MACIO_IN8(param);
+}
+
+
+static long
+core99_write_gpio(struct device_node *node, long param, long value)
+{
+	struct macio_chip *macio = &macio_chips[0];
+
+	MACIO_OUT8(param, (u8)(value & 0xff));
+	return 0;
+}
+
+#ifdef CONFIG_PPC64
+static long g5_gmac_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip *macio = &macio_chips[0];
+	unsigned long flags;
+
+	if (node == NULL)
+		return -ENODEV;
+
+	LOCK(flags);
+	if (value) {
+		MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_GMAC_CLK_ENABLE);
+		mb();
+		k2_skiplist[0] = NULL;
+	} else {
+		k2_skiplist[0] = node;
+		mb();
+		MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_GMAC_CLK_ENABLE);
+	}
+	
+	UNLOCK(flags);
+	mdelay(1);
+
+	return 0;
+}
+
+static long g5_fw_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip *macio = &macio_chips[0];
+	unsigned long flags;
+
+	if (node == NULL)
+		return -ENODEV;
+
+	LOCK(flags);
+	if (value) {
+		MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_FW_CLK_ENABLE);
+		mb();
+		k2_skiplist[1] = NULL;
+	} else {
+		k2_skiplist[1] = node;
+		mb();
+		MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_FW_CLK_ENABLE);
+	}
+	
+	UNLOCK(flags);
+	mdelay(1);
+
+	return 0;
+}
+
+static long g5_mpic_enable(struct device_node *node, long param, long value)
+{
+	unsigned long flags;
+	struct device_node *parent = of_get_parent(node);
+	int is_u3;
+
+	if (parent == NULL)
+		return 0;
+	is_u3 = strcmp(parent->name, "u3") == 0 ||
+		strcmp(parent->name, "u4") == 0;
+	of_node_put(parent);
+	if (!is_u3)
+		return 0;
+
+	LOCK(flags);
+	UN_BIS(U3_TOGGLE_REG, U3_MPIC_RESET | U3_MPIC_OUTPUT_ENABLE);
+	UNLOCK(flags);
+
+	return 0;
+}
+
+static long g5_eth_phy_reset(struct device_node *node, long param, long value)
+{
+	struct macio_chip *macio = &macio_chips[0];
+	struct device_node *phy;
+	int need_reset;
+
+	/*
+	 * We must not reset the combo PHYs, only the BCM5221 found in
+	 * the iMac G5.
+	 */
+	phy = of_get_next_child(node, NULL);
+	if (!phy)
+		return -ENODEV;
+	need_reset = of_device_is_compatible(phy, "B5221");
+	of_node_put(phy);
+	if (!need_reset)
+		return 0;
+
+	/* PHY reset is GPIO 29, not in device-tree unfortunately */
+	MACIO_OUT8(K2_GPIO_EXTINT_0 + 29,
+		   KEYLARGO_GPIO_OUTPUT_ENABLE | KEYLARGO_GPIO_OUTOUT_DATA);
+	/* Thankfully, this is now always called at a time when we can
+	 * schedule by sungem.
+	 */
+	msleep(10);
+	MACIO_OUT8(K2_GPIO_EXTINT_0 + 29, 0);
+
+	return 0;
+}
+
+static long g5_i2s_enable(struct device_node *node, long param, long value)
+{
+	/* Very crude implementation for now */
+	struct macio_chip *macio = &macio_chips[0];
+	unsigned long flags;
+	int cell;
+	u32 fcrs[3][3] = {
+		{ 0,
+		  K2_FCR1_I2S0_CELL_ENABLE |
+		  K2_FCR1_I2S0_CLK_ENABLE_BIT | K2_FCR1_I2S0_ENABLE,
+		  KL3_I2S0_CLK18_ENABLE
+		},
+		{ KL0_SCC_A_INTF_ENABLE,
+		  K2_FCR1_I2S1_CELL_ENABLE |
+		  K2_FCR1_I2S1_CLK_ENABLE_BIT | K2_FCR1_I2S1_ENABLE,
+		  KL3_I2S1_CLK18_ENABLE
+		},
+		{ KL0_SCC_B_INTF_ENABLE,
+		  SH_FCR1_I2S2_CELL_ENABLE |
+		  SH_FCR1_I2S2_CLK_ENABLE_BIT | SH_FCR1_I2S2_ENABLE,
+		  SH_FCR3_I2S2_CLK18_ENABLE
+		},
+	};
+
+	if (macio->type != macio_keylargo2 && macio->type != macio_shasta)
+		return -ENODEV;
+	if (strncmp(node->name, "i2s-", 4))
+		return -ENODEV;
+	cell = node->name[4] - 'a';
+	switch(cell) {
+	case 0:
+	case 1:
+		break;
+	case 2:
+		if (macio->type == macio_shasta)
+			break;
+	default:
+		return -ENODEV;
+	}
+
+	LOCK(flags);
+	if (value) {
+		MACIO_BIC(KEYLARGO_FCR0, fcrs[cell][0]);
+		MACIO_BIS(KEYLARGO_FCR1, fcrs[cell][1]);
+		MACIO_BIS(KEYLARGO_FCR3, fcrs[cell][2]);
+	} else {
+		MACIO_BIC(KEYLARGO_FCR3, fcrs[cell][2]);
+		MACIO_BIC(KEYLARGO_FCR1, fcrs[cell][1]);
+		MACIO_BIS(KEYLARGO_FCR0, fcrs[cell][0]);
+	}
+	udelay(10);
+	UNLOCK(flags);
+
+	return 0;
+}
+
+
+#ifdef CONFIG_SMP
+static long g5_reset_cpu(struct device_node *node, long param, long value)
+{
+	unsigned int reset_io = 0;
+	unsigned long flags;
+	struct macio_chip *macio;
+	struct device_node *np;
+	struct device_node *cpus;
+
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo2 && macio->type != macio_shasta)
+		return -ENODEV;
+
+	cpus = of_find_node_by_path("/cpus");
+	if (cpus == NULL)
+		return -ENODEV;
+	for (np = cpus->child; np != NULL; np = np->sibling) {
+		const u32 *num = of_get_property(np, "reg", NULL);
+		const u32 *rst = of_get_property(np, "soft-reset", NULL);
+		if (num == NULL || rst == NULL)
+			continue;
+		if (param == *num) {
+			reset_io = *rst;
+			break;
+		}
+	}
+	of_node_put(cpus);
+	if (np == NULL || reset_io == 0)
+		return -ENODEV;
+
+	LOCK(flags);
+	MACIO_OUT8(reset_io, KEYLARGO_GPIO_OUTPUT_ENABLE);
+	(void)MACIO_IN8(reset_io);
+	udelay(1);
+	MACIO_OUT8(reset_io, 0);
+	(void)MACIO_IN8(reset_io);
+	UNLOCK(flags);
+
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * This can be called from pmac_smp so isn't static
+ *
+ * This takes the second CPU off the bus on dual CPU machines
+ * running UP
+ */
+void g5_phy_disable_cpu1(void)
+{
+	if (uninorth_maj == 3)
+		UN_OUT(U3_API_PHY_CONFIG_1, 0);
+}
+#endif /* CONFIG_PPC64 */
+
+#ifndef CONFIG_PPC64
+
+
+#ifdef CONFIG_PM
+static u32 save_gpio_levels[2];
+static u8 save_gpio_extint[KEYLARGO_GPIO_EXTINT_CNT];
+static u8 save_gpio_normal[KEYLARGO_GPIO_CNT];
+static u32 save_unin_clock_ctl;
+
+static void keylargo_shutdown(struct macio_chip *macio, int sleep_mode)
+{
+	u32 temp;
+
+	if (sleep_mode) {
+		mdelay(1);
+		MACIO_BIS(KEYLARGO_FCR0, KL0_USB_REF_SUSPEND);
+		(void)MACIO_IN32(KEYLARGO_FCR0);
+		mdelay(1);
+	}
+
+	MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE |
+				KL0_SCC_CELL_ENABLE |
+				KL0_IRDA_ENABLE | KL0_IRDA_CLK32_ENABLE |
+				KL0_IRDA_CLK19_ENABLE);
+
+	MACIO_BIC(KEYLARGO_MBCR, KL_MBCR_MB0_DEV_MASK);
+	MACIO_BIS(KEYLARGO_MBCR, KL_MBCR_MB0_IDE_ENABLE);
+
+	MACIO_BIC(KEYLARGO_FCR1,
+		KL1_AUDIO_SEL_22MCLK | KL1_AUDIO_CLK_ENABLE_BIT |
+		KL1_AUDIO_CLK_OUT_ENABLE | KL1_AUDIO_CELL_ENABLE |
+		KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT |
+		KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE |
+		KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE |
+		KL1_EIDE0_ENABLE | KL1_EIDE0_RESET_N |
+		KL1_EIDE1_ENABLE | KL1_EIDE1_RESET_N |
+		KL1_UIDE_ENABLE);
+
+	MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT);
+	MACIO_BIC(KEYLARGO_FCR2, KL2_IOBUS_ENABLE);
+
+	temp = MACIO_IN32(KEYLARGO_FCR3);
+	if (macio->rev >= 2) {
+		temp |= KL3_SHUTDOWN_PLL2X;
+		if (sleep_mode)
+			temp |= KL3_SHUTDOWN_PLL_TOTAL;
+	}
+
+	temp |= KL3_SHUTDOWN_PLLKW6 | KL3_SHUTDOWN_PLLKW4 |
+		KL3_SHUTDOWN_PLLKW35;
+	if (sleep_mode)
+		temp |= KL3_SHUTDOWN_PLLKW12;
+	temp &= ~(KL3_CLK66_ENABLE | KL3_CLK49_ENABLE | KL3_CLK45_ENABLE
+		| KL3_CLK31_ENABLE | KL3_I2S1_CLK18_ENABLE | KL3_I2S0_CLK18_ENABLE);
+	if (sleep_mode)
+		temp &= ~(KL3_TIMER_CLK18_ENABLE | KL3_VIA_CLK16_ENABLE);
+	MACIO_OUT32(KEYLARGO_FCR3, temp);
+
+	/* Flush posted writes & wait a bit */
+	(void)MACIO_IN32(KEYLARGO_FCR0); mdelay(1);
+}
+
+static void pangea_shutdown(struct macio_chip *macio, int sleep_mode)
+{
+	u32 temp;
+
+	MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE |
+				KL0_SCC_CELL_ENABLE |
+				KL0_USB0_CELL_ENABLE | KL0_USB1_CELL_ENABLE);
+
+	MACIO_BIC(KEYLARGO_FCR1,
+		KL1_AUDIO_SEL_22MCLK | KL1_AUDIO_CLK_ENABLE_BIT |
+		KL1_AUDIO_CLK_OUT_ENABLE | KL1_AUDIO_CELL_ENABLE |
+		KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT |
+		KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE |
+		KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE |
+		KL1_UIDE_ENABLE);
+	if (pmac_mb.board_flags & PMAC_MB_MOBILE)
+		MACIO_BIC(KEYLARGO_FCR1, KL1_UIDE_RESET_N);
+
+	MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT);
+
+	temp = MACIO_IN32(KEYLARGO_FCR3);
+	temp |= KL3_SHUTDOWN_PLLKW6 | KL3_SHUTDOWN_PLLKW4 |
+		KL3_SHUTDOWN_PLLKW35;
+	temp &= ~(KL3_CLK49_ENABLE | KL3_CLK45_ENABLE | KL3_CLK31_ENABLE
+		| KL3_I2S0_CLK18_ENABLE | KL3_I2S1_CLK18_ENABLE);
+	if (sleep_mode)
+		temp &= ~(KL3_VIA_CLK16_ENABLE | KL3_TIMER_CLK18_ENABLE);
+	MACIO_OUT32(KEYLARGO_FCR3, temp);
+
+	/* Flush posted writes & wait a bit */
+	(void)MACIO_IN32(KEYLARGO_FCR0); mdelay(1);
+}
+
+static void intrepid_shutdown(struct macio_chip *macio, int sleep_mode)
+{
+	u32 temp;
+
+	MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE |
+		  KL0_SCC_CELL_ENABLE);
+
+	MACIO_BIC(KEYLARGO_FCR1,
+		KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT |
+		KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE |
+		KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE |
+		KL1_EIDE0_ENABLE);
+	if (pmac_mb.board_flags & PMAC_MB_MOBILE)
+		MACIO_BIC(KEYLARGO_FCR1, KL1_UIDE_RESET_N);
+
+	temp = MACIO_IN32(KEYLARGO_FCR3);
+	temp &= ~(KL3_CLK49_ENABLE | KL3_CLK45_ENABLE |
+		  KL3_I2S1_CLK18_ENABLE | KL3_I2S0_CLK18_ENABLE);
+	if (sleep_mode)
+		temp &= ~(KL3_TIMER_CLK18_ENABLE | KL3_IT_VIA_CLK32_ENABLE);
+	MACIO_OUT32(KEYLARGO_FCR3, temp);
+
+	/* Flush posted writes & wait a bit */
+	(void)MACIO_IN32(KEYLARGO_FCR0);
+	mdelay(10);
+}
+
+
+static int
+core99_sleep(void)
+{
+	struct macio_chip *macio;
+	int i;
+
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
+		return -ENODEV;
+
+	/* We power off the wireless slot in case it was not done
+	 * by the driver. We don't power it on automatically however
+	 */
+	if (macio->flags & MACIO_FLAG_AIRPORT_ON)
+		core99_airport_enable(macio->of_node, 0, 0);
+
+	/* We power off the FW cable. Should be done by the driver... */
+	if (macio->flags & MACIO_FLAG_FW_SUPPORTED) {
+		core99_firewire_enable(NULL, 0, 0);
+		core99_firewire_cable_power(NULL, 0, 0);
+	}
+
+	/* We make sure int. modem is off (in case driver lost it) */
+	if (macio->type == macio_keylargo)
+		core99_modem_enable(macio->of_node, 0, 0);
+	else
+		pangea_modem_enable(macio->of_node, 0, 0);
+
+	/* We make sure the sound is off as well */
+	core99_sound_chip_enable(macio->of_node, 0, 0);
+
+	/*
+	 * Save various bits of KeyLargo
+	 */
+
+	/* Save the state of the various GPIOs */
+	save_gpio_levels[0] = MACIO_IN32(KEYLARGO_GPIO_LEVELS0);
+	save_gpio_levels[1] = MACIO_IN32(KEYLARGO_GPIO_LEVELS1);
+	for (i=0; i<KEYLARGO_GPIO_EXTINT_CNT; i++)
+		save_gpio_extint[i] = MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+i);
+	for (i=0; i<KEYLARGO_GPIO_CNT; i++)
+		save_gpio_normal[i] = MACIO_IN8(KEYLARGO_GPIO_0+i);
+
+	/* Save the FCRs */
+	if (macio->type == macio_keylargo)
+		save_mbcr = MACIO_IN32(KEYLARGO_MBCR);
+	save_fcr[0] = MACIO_IN32(KEYLARGO_FCR0);
+	save_fcr[1] = MACIO_IN32(KEYLARGO_FCR1);
+	save_fcr[2] = MACIO_IN32(KEYLARGO_FCR2);
+	save_fcr[3] = MACIO_IN32(KEYLARGO_FCR3);
+	save_fcr[4] = MACIO_IN32(KEYLARGO_FCR4);
+	if (macio->type == macio_pangea || macio->type == macio_intrepid)
+		save_fcr[5] = MACIO_IN32(KEYLARGO_FCR5);
+
+	/* Save state & config of DBDMA channels */
+	dbdma_save(macio, save_dbdma);
+
+	/*
+	 * Turn off as much as we can
+	 */
+	if (macio->type == macio_pangea)
+		pangea_shutdown(macio, 1);
+	else if (macio->type == macio_intrepid)
+		intrepid_shutdown(macio, 1);
+	else if (macio->type == macio_keylargo)
+		keylargo_shutdown(macio, 1);
+
+	/*
+	 * Put the host bridge to sleep
+	 */
+
+	save_unin_clock_ctl = UN_IN(UNI_N_CLOCK_CNTL);
+	/* Note: do not switch GMAC off, driver does it when necessary, WOL must keep it
+	 * enabled !
+	 */
+	UN_OUT(UNI_N_CLOCK_CNTL, save_unin_clock_ctl &
+	       ~(/*UNI_N_CLOCK_CNTL_GMAC|*/UNI_N_CLOCK_CNTL_FW/*|UNI_N_CLOCK_CNTL_PCI*/));
+	udelay(100);
+	UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_SLEEPING);
+	UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_SLEEP);
+	mdelay(10);
+
+	/*
+	 * FIXME: A bit of black magic with OpenPIC (don't ask me why)
+	 */
+	if (pmac_mb.model_id == PMAC_TYPE_SAWTOOTH) {
+		MACIO_BIS(0x506e0, 0x00400000);
+		MACIO_BIS(0x506e0, 0x80000000);
+	}
+	return 0;
+}
+
+static int
+core99_wake_up(void)
+{
+	struct macio_chip *macio;
+	int i;
+
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
+		return -ENODEV;
+
+	/*
+	 * Wakeup the host bridge
+	 */
+	UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_NORMAL);
+	udelay(10);
+	UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_RUNNING);
+	udelay(10);
+
+	/*
+	 * Restore KeyLargo
+	 */
+
+	if (macio->type == macio_keylargo) {
+		MACIO_OUT32(KEYLARGO_MBCR, save_mbcr);
+		(void)MACIO_IN32(KEYLARGO_MBCR); udelay(10);
+	}
+	MACIO_OUT32(KEYLARGO_FCR0, save_fcr[0]);
+	(void)MACIO_IN32(KEYLARGO_FCR0); udelay(10);
+	MACIO_OUT32(KEYLARGO_FCR1, save_fcr[1]);
+	(void)MACIO_IN32(KEYLARGO_FCR1); udelay(10);
+	MACIO_OUT32(KEYLARGO_FCR2, save_fcr[2]);
+	(void)MACIO_IN32(KEYLARGO_FCR2); udelay(10);
+	MACIO_OUT32(KEYLARGO_FCR3, save_fcr[3]);
+	(void)MACIO_IN32(KEYLARGO_FCR3); udelay(10);
+	MACIO_OUT32(KEYLARGO_FCR4, save_fcr[4]);
+	(void)MACIO_IN32(KEYLARGO_FCR4); udelay(10);
+	if (macio->type == macio_pangea || macio->type == macio_intrepid) {
+		MACIO_OUT32(KEYLARGO_FCR5, save_fcr[5]);
+		(void)MACIO_IN32(KEYLARGO_FCR5); udelay(10);
+	}
+
+	dbdma_restore(macio, save_dbdma);
+
+	MACIO_OUT32(KEYLARGO_GPIO_LEVELS0, save_gpio_levels[0]);
+	MACIO_OUT32(KEYLARGO_GPIO_LEVELS1, save_gpio_levels[1]);
+	for (i=0; i<KEYLARGO_GPIO_EXTINT_CNT; i++)
+		MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+i, save_gpio_extint[i]);
+	for (i=0; i<KEYLARGO_GPIO_CNT; i++)
+		MACIO_OUT8(KEYLARGO_GPIO_0+i, save_gpio_normal[i]);
+
+	/* FIXME more black magic with OpenPIC ... */
+	if (pmac_mb.model_id == PMAC_TYPE_SAWTOOTH) {
+		MACIO_BIC(0x506e0, 0x00400000);
+		MACIO_BIC(0x506e0, 0x80000000);
+	}
+
+	UN_OUT(UNI_N_CLOCK_CNTL, save_unin_clock_ctl);
+	udelay(100);
+
+	return 0;
+}
+
+#endif /* CONFIG_PM */
+
+static long
+core99_sleep_state(struct device_node *node, long param, long value)
+{
+	/* Param == 1 means to enter the "fake sleep" mode that is
+	 * used for CPU speed switch
+	 */
+	if (param == 1) {
+		if (value == 1) {
+			UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_SLEEPING);
+			UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_IDLE2);
+		} else {
+			UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_NORMAL);
+			udelay(10);
+			UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_RUNNING);
+			udelay(10);
+		}
+		return 0;
+	}
+	if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0)
+		return -EPERM;
+
+#ifdef CONFIG_PM
+	if (value == 1)
+		return core99_sleep();
+	else if (value == 0)
+		return core99_wake_up();
+
+#endif /* CONFIG_PM */
+	return 0;
+}
+
+#endif /* CONFIG_PPC64 */
+
+static long
+generic_dev_can_wake(struct device_node *node, long param, long value)
+{
+	/* Todo: eventually check we are really dealing with on-board
+	 * video device ...
+	 */
+
+	if (pmac_mb.board_flags & PMAC_MB_MAY_SLEEP)
+		pmac_mb.board_flags |= PMAC_MB_CAN_SLEEP;
+	return 0;
+}
+
+static long generic_get_mb_info(struct device_node *node, long param, long value)
+{
+	switch(param) {
+		case PMAC_MB_INFO_MODEL:
+			return pmac_mb.model_id;
+		case PMAC_MB_INFO_FLAGS:
+			return pmac_mb.board_flags;
+		case PMAC_MB_INFO_NAME:
+			/* hack hack hack... but should work */
+			*((const char **)value) = pmac_mb.model_name;
+			return 0;
+	}
+	return -EINVAL;
+}
+
+
+/*
+ * Table definitions
+ */
+
+/* Used on any machine
+ */
+static struct feature_table_entry any_features[] = {
+	{ PMAC_FTR_GET_MB_INFO,		generic_get_mb_info },
+	{ PMAC_FTR_DEVICE_CAN_WAKE,	generic_dev_can_wake },
+	{ 0, NULL }
+};
+
+#ifndef CONFIG_PPC64
+
+/* OHare based motherboards. Currently, we only use these on the
+ * 2400,3400 and 3500 series powerbooks. Some older desktops seem
+ * to have issues with turning on/off those asic cells
+ */
+static struct feature_table_entry ohare_features[] = {
+	{ PMAC_FTR_SCC_ENABLE,		ohare_htw_scc_enable },
+	{ PMAC_FTR_SWIM3_ENABLE,	ohare_floppy_enable },
+	{ PMAC_FTR_MESH_ENABLE,		ohare_mesh_enable },
+	{ PMAC_FTR_IDE_ENABLE,		ohare_ide_enable},
+	{ PMAC_FTR_IDE_RESET,		ohare_ide_reset},
+	{ PMAC_FTR_SLEEP_STATE,		ohare_sleep_state },
+	{ 0, NULL }
+};
+
+/* Heathrow desktop machines (Beige G3).
+ * Separated as some features couldn't be properly tested
+ * and the serial port control bits appear to confuse it.
+ */
+static struct feature_table_entry heathrow_desktop_features[] = {
+	{ PMAC_FTR_SWIM3_ENABLE,	heathrow_floppy_enable },
+	{ PMAC_FTR_MESH_ENABLE,		heathrow_mesh_enable },
+	{ PMAC_FTR_IDE_ENABLE,		heathrow_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		heathrow_ide_reset },
+	{ PMAC_FTR_BMAC_ENABLE,		heathrow_bmac_enable },
+	{ 0, NULL }
+};
+
+/* Heathrow based laptop, that is the Wallstreet and mainstreet
+ * powerbooks.
+ */
+static struct feature_table_entry heathrow_laptop_features[] = {
+	{ PMAC_FTR_SCC_ENABLE,		ohare_htw_scc_enable },
+	{ PMAC_FTR_MODEM_ENABLE,	heathrow_modem_enable },
+	{ PMAC_FTR_SWIM3_ENABLE,	heathrow_floppy_enable },
+	{ PMAC_FTR_MESH_ENABLE,		heathrow_mesh_enable },
+	{ PMAC_FTR_IDE_ENABLE,		heathrow_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		heathrow_ide_reset },
+	{ PMAC_FTR_BMAC_ENABLE,		heathrow_bmac_enable },
+	{ PMAC_FTR_SOUND_CHIP_ENABLE,	heathrow_sound_enable },
+	{ PMAC_FTR_SLEEP_STATE,		heathrow_sleep_state },
+	{ 0, NULL }
+};
+
+/* Paddington based machines
+ * The lombard (101) powerbook, first iMac models, B&W G3 and Yikes G4.
+ */
+static struct feature_table_entry paddington_features[] = {
+	{ PMAC_FTR_SCC_ENABLE,		ohare_htw_scc_enable },
+	{ PMAC_FTR_MODEM_ENABLE,	heathrow_modem_enable },
+	{ PMAC_FTR_SWIM3_ENABLE,	heathrow_floppy_enable },
+	{ PMAC_FTR_MESH_ENABLE,		heathrow_mesh_enable },
+	{ PMAC_FTR_IDE_ENABLE,		heathrow_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		heathrow_ide_reset },
+	{ PMAC_FTR_BMAC_ENABLE,		heathrow_bmac_enable },
+	{ PMAC_FTR_SOUND_CHIP_ENABLE,	heathrow_sound_enable },
+	{ PMAC_FTR_SLEEP_STATE,		heathrow_sleep_state },
+	{ 0, NULL }
+};
+
+/* Core99 & MacRISC 2 machines (all machines released since the
+ * iBook (included), that is all AGP machines, except pangea
+ * chipset. The pangea chipset is the "combo" UniNorth/KeyLargo
+ * used on iBook2 & iMac "flow power".
+ */
+static struct feature_table_entry core99_features[] = {
+	{ PMAC_FTR_SCC_ENABLE,		core99_scc_enable },
+	{ PMAC_FTR_MODEM_ENABLE,	core99_modem_enable },
+	{ PMAC_FTR_IDE_ENABLE,		core99_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		core99_ide_reset },
+	{ PMAC_FTR_GMAC_ENABLE,		core99_gmac_enable },
+	{ PMAC_FTR_GMAC_PHY_RESET,	core99_gmac_phy_reset },
+	{ PMAC_FTR_SOUND_CHIP_ENABLE,	core99_sound_chip_enable },
+	{ PMAC_FTR_AIRPORT_ENABLE,	core99_airport_enable },
+	{ PMAC_FTR_USB_ENABLE,		core99_usb_enable },
+	{ PMAC_FTR_1394_ENABLE,		core99_firewire_enable },
+	{ PMAC_FTR_1394_CABLE_POWER,	core99_firewire_cable_power },
+#ifdef CONFIG_PM
+	{ PMAC_FTR_SLEEP_STATE,		core99_sleep_state },
+#endif
+#ifdef CONFIG_SMP
+	{ PMAC_FTR_RESET_CPU,		core99_reset_cpu },
+#endif /* CONFIG_SMP */
+	{ PMAC_FTR_READ_GPIO,		core99_read_gpio },
+	{ PMAC_FTR_WRITE_GPIO,		core99_write_gpio },
+	{ 0, NULL }
+};
+
+/* RackMac
+ */
+static struct feature_table_entry rackmac_features[] = {
+	{ PMAC_FTR_SCC_ENABLE,		core99_scc_enable },
+	{ PMAC_FTR_IDE_ENABLE,		core99_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		core99_ide_reset },
+	{ PMAC_FTR_GMAC_ENABLE,		core99_gmac_enable },
+	{ PMAC_FTR_GMAC_PHY_RESET,	core99_gmac_phy_reset },
+	{ PMAC_FTR_USB_ENABLE,		core99_usb_enable },
+	{ PMAC_FTR_1394_ENABLE,		core99_firewire_enable },
+	{ PMAC_FTR_1394_CABLE_POWER,	core99_firewire_cable_power },
+	{ PMAC_FTR_SLEEP_STATE,		core99_sleep_state },
+#ifdef CONFIG_SMP
+	{ PMAC_FTR_RESET_CPU,		core99_reset_cpu },
+#endif /* CONFIG_SMP */
+	{ PMAC_FTR_READ_GPIO,		core99_read_gpio },
+	{ PMAC_FTR_WRITE_GPIO,		core99_write_gpio },
+	{ 0, NULL }
+};
+
+/* Pangea features
+ */
+static struct feature_table_entry pangea_features[] = {
+	{ PMAC_FTR_SCC_ENABLE,		core99_scc_enable },
+	{ PMAC_FTR_MODEM_ENABLE,	pangea_modem_enable },
+	{ PMAC_FTR_IDE_ENABLE,		core99_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		core99_ide_reset },
+	{ PMAC_FTR_GMAC_ENABLE,		core99_gmac_enable },
+	{ PMAC_FTR_GMAC_PHY_RESET,	core99_gmac_phy_reset },
+	{ PMAC_FTR_SOUND_CHIP_ENABLE,	core99_sound_chip_enable },
+	{ PMAC_FTR_AIRPORT_ENABLE,	core99_airport_enable },
+	{ PMAC_FTR_USB_ENABLE,		core99_usb_enable },
+	{ PMAC_FTR_1394_ENABLE,		core99_firewire_enable },
+	{ PMAC_FTR_1394_CABLE_POWER,	core99_firewire_cable_power },
+	{ PMAC_FTR_SLEEP_STATE,		core99_sleep_state },
+	{ PMAC_FTR_READ_GPIO,		core99_read_gpio },
+	{ PMAC_FTR_WRITE_GPIO,		core99_write_gpio },
+	{ 0, NULL }
+};
+
+/* Intrepid features
+ */
+static struct feature_table_entry intrepid_features[] = {
+	{ PMAC_FTR_SCC_ENABLE,		core99_scc_enable },
+	{ PMAC_FTR_MODEM_ENABLE,	pangea_modem_enable },
+	{ PMAC_FTR_IDE_ENABLE,		core99_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		core99_ide_reset },
+	{ PMAC_FTR_GMAC_ENABLE,		core99_gmac_enable },
+	{ PMAC_FTR_GMAC_PHY_RESET,	core99_gmac_phy_reset },
+	{ PMAC_FTR_SOUND_CHIP_ENABLE,	core99_sound_chip_enable },
+	{ PMAC_FTR_AIRPORT_ENABLE,	core99_airport_enable },
+	{ PMAC_FTR_USB_ENABLE,		core99_usb_enable },
+	{ PMAC_FTR_1394_ENABLE,		core99_firewire_enable },
+	{ PMAC_FTR_1394_CABLE_POWER,	core99_firewire_cable_power },
+	{ PMAC_FTR_SLEEP_STATE,		core99_sleep_state },
+	{ PMAC_FTR_READ_GPIO,		core99_read_gpio },
+	{ PMAC_FTR_WRITE_GPIO,		core99_write_gpio },
+	{ PMAC_FTR_AACK_DELAY_ENABLE,	intrepid_aack_delay_enable },
+	{ 0, NULL }
+};
+
+#else /* CONFIG_PPC64 */
+
+/* G5 features
+ */
+static struct feature_table_entry g5_features[] = {
+	{ PMAC_FTR_GMAC_ENABLE,		g5_gmac_enable },
+	{ PMAC_FTR_1394_ENABLE,		g5_fw_enable },
+	{ PMAC_FTR_ENABLE_MPIC,		g5_mpic_enable },
+	{ PMAC_FTR_GMAC_PHY_RESET,	g5_eth_phy_reset },
+	{ PMAC_FTR_SOUND_CHIP_ENABLE,	g5_i2s_enable },
+#ifdef CONFIG_SMP
+	{ PMAC_FTR_RESET_CPU,		g5_reset_cpu },
+#endif /* CONFIG_SMP */
+	{ PMAC_FTR_READ_GPIO,		core99_read_gpio },
+	{ PMAC_FTR_WRITE_GPIO,		core99_write_gpio },
+	{ 0, NULL }
+};
+
+#endif /* CONFIG_PPC64 */
+
+static struct pmac_mb_def pmac_mb_defs[] = {
+#ifndef CONFIG_PPC64
+	/*
+	 * Desktops
+	 */
+
+	{	"AAPL,8500",			"PowerMac 8500/8600",
+		PMAC_TYPE_PSURGE,		NULL,
+		0
+	},
+	{	"AAPL,9500",			"PowerMac 9500/9600",
+		PMAC_TYPE_PSURGE,		NULL,
+		0
+	},
+	{	"AAPL,7200",			"PowerMac 7200",
+		PMAC_TYPE_PSURGE,		NULL,
+		0
+	},
+	{	"AAPL,7300",			"PowerMac 7200/7300",
+		PMAC_TYPE_PSURGE,		NULL,
+		0
+	},
+	{	"AAPL,7500",			"PowerMac 7500",
+		PMAC_TYPE_PSURGE,		NULL,
+		0
+	},
+	{	"AAPL,ShinerESB",		"Apple Network Server",
+		PMAC_TYPE_ANS,			NULL,
+		0
+	},
+	{	"AAPL,e407",			"Alchemy",
+		PMAC_TYPE_ALCHEMY,		NULL,
+		0
+	},
+	{	"AAPL,e411",			"Gazelle",
+		PMAC_TYPE_GAZELLE,		NULL,
+		0
+	},
+	{	"AAPL,Gossamer",		"PowerMac G3 (Gossamer)",
+		PMAC_TYPE_GOSSAMER,		heathrow_desktop_features,
+		0
+	},
+	{	"AAPL,PowerMac G3",		"PowerMac G3 (Silk)",
+		PMAC_TYPE_SILK,			heathrow_desktop_features,
+		0
+	},
+	{	"PowerMac1,1",			"Blue&White G3",
+		PMAC_TYPE_YOSEMITE,		paddington_features,
+		0
+	},
+	{	"PowerMac1,2",			"PowerMac G4 PCI Graphics",
+		PMAC_TYPE_YIKES,		paddington_features,
+		0
+	},
+	{	"PowerMac2,1",			"iMac FireWire",
+		PMAC_TYPE_FW_IMAC,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99
+	},
+	{	"PowerMac2,2",			"iMac FireWire",
+		PMAC_TYPE_FW_IMAC,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99
+	},
+	{	"PowerMac3,1",			"PowerMac G4 AGP Graphics",
+		PMAC_TYPE_SAWTOOTH,		core99_features,
+		PMAC_MB_OLD_CORE99
+	},
+	{	"PowerMac3,2",			"PowerMac G4 AGP Graphics",
+		PMAC_TYPE_SAWTOOTH,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99
+	},
+	{	"PowerMac3,3",			"PowerMac G4 AGP Graphics",
+		PMAC_TYPE_SAWTOOTH,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99
+	},
+	{	"PowerMac3,4",			"PowerMac G4 Silver",
+		PMAC_TYPE_QUICKSILVER,		core99_features,
+		PMAC_MB_MAY_SLEEP
+	},
+	{	"PowerMac3,5",			"PowerMac G4 Silver",
+		PMAC_TYPE_QUICKSILVER,		core99_features,
+		PMAC_MB_MAY_SLEEP
+	},
+	{	"PowerMac3,6",			"PowerMac G4 Windtunnel",
+		PMAC_TYPE_WINDTUNNEL,		core99_features,
+		PMAC_MB_MAY_SLEEP,
+	},
+	{	"PowerMac4,1",			"iMac \"Flower Power\"",
+		PMAC_TYPE_PANGEA_IMAC,		pangea_features,
+		PMAC_MB_MAY_SLEEP
+	},
+	{	"PowerMac4,2",			"Flat panel iMac",
+		PMAC_TYPE_FLAT_PANEL_IMAC,	pangea_features,
+		PMAC_MB_CAN_SLEEP
+	},
+	{	"PowerMac4,4",			"eMac",
+		PMAC_TYPE_EMAC,			core99_features,
+		PMAC_MB_MAY_SLEEP
+	},
+	{	"PowerMac5,1",			"PowerMac G4 Cube",
+		PMAC_TYPE_CUBE,			core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99
+	},
+	{	"PowerMac6,1",			"Flat panel iMac",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP,
+	},
+	{	"PowerMac6,3",			"Flat panel iMac",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP,
+	},
+	{	"PowerMac6,4",			"eMac",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP,
+	},
+	{	"PowerMac10,1",			"Mac mini",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP,
+	},
+	{       "PowerMac10,2",                 "Mac mini (Late 2005)",
+		PMAC_TYPE_UNKNOWN_INTREPID,     intrepid_features,
+		PMAC_MB_MAY_SLEEP,
+	},
+ 	{	"iMac,1",			"iMac (first generation)",
+		PMAC_TYPE_ORIG_IMAC,		paddington_features,
+		0
+	},
+
+	/*
+	 * Xserve's
+	 */
+
+	{	"RackMac1,1",			"XServe",
+		PMAC_TYPE_RACKMAC,		rackmac_features,
+		0,
+	},
+	{	"RackMac1,2",			"XServe rev. 2",
+		PMAC_TYPE_RACKMAC,		rackmac_features,
+		0,
+	},
+
+	/*
+	 * Laptops
+	 */
+
+	{	"AAPL,3400/2400",		"PowerBook 3400",
+		PMAC_TYPE_HOOPER,		ohare_features,
+		PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE
+	},
+	{	"AAPL,3500",			"PowerBook 3500",
+		PMAC_TYPE_KANGA,		ohare_features,
+		PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE
+	},
+	{	"AAPL,PowerBook1998",		"PowerBook Wallstreet",
+		PMAC_TYPE_WALLSTREET,		heathrow_laptop_features,
+		PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE
+	},
+	{	"PowerBook1,1",			"PowerBook 101 (Lombard)",
+		PMAC_TYPE_101_PBOOK,		paddington_features,
+		PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE
+	},
+	{	"PowerBook2,1",			"iBook (first generation)",
+		PMAC_TYPE_ORIG_IBOOK,		core99_features,
+		PMAC_MB_CAN_SLEEP | PMAC_MB_OLD_CORE99 | PMAC_MB_MOBILE
+	},
+	{	"PowerBook2,2",			"iBook FireWire",
+		PMAC_TYPE_FW_IBOOK,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER |
+		PMAC_MB_OLD_CORE99 | PMAC_MB_MOBILE
+	},
+	{	"PowerBook3,1",			"PowerBook Pismo",
+		PMAC_TYPE_PISMO,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER |
+		PMAC_MB_OLD_CORE99 | PMAC_MB_MOBILE
+	},
+	{	"PowerBook3,2",			"PowerBook Titanium",
+		PMAC_TYPE_TITANIUM,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook3,3",			"PowerBook Titanium II",
+		PMAC_TYPE_TITANIUM2,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook3,4",			"PowerBook Titanium III",
+		PMAC_TYPE_TITANIUM3,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook3,5",			"PowerBook Titanium IV",
+		PMAC_TYPE_TITANIUM4,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook4,1",			"iBook 2",
+		PMAC_TYPE_IBOOK2,		pangea_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook4,2",			"iBook 2",
+		PMAC_TYPE_IBOOK2,		pangea_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook4,3",			"iBook 2 rev. 2",
+		PMAC_TYPE_IBOOK2,		pangea_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook5,1",			"PowerBook G4 17\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,2",			"PowerBook G4 15\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,3",			"PowerBook G4 17\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,4",			"PowerBook G4 15\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,5",			"PowerBook G4 17\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,6",			"PowerBook G4 15\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,7",			"PowerBook G4 17\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,8",			"PowerBook G4 15\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP  | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,9",			"PowerBook G4 17\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook6,1",			"PowerBook G4 12\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook6,2",			"PowerBook G4",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook6,3",			"iBook G4",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook6,4",			"PowerBook G4 12\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook6,5",			"iBook G4",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook6,7",			"iBook G4",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook6,8",			"PowerBook G4 12\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+#else /* CONFIG_PPC64 */
+	{	"PowerMac7,2",			"PowerMac G5",
+		PMAC_TYPE_POWERMAC_G5,		g5_features,
+		0,
+	},
+#ifdef CONFIG_PPC64
+	{	"PowerMac7,3",			"PowerMac G5",
+		PMAC_TYPE_POWERMAC_G5,		g5_features,
+		0,
+	},
+	{	"PowerMac8,1",			"iMac G5",
+		PMAC_TYPE_IMAC_G5,		g5_features,
+		0,
+	},
+	{	"PowerMac9,1",			"PowerMac G5",
+		PMAC_TYPE_POWERMAC_G5_U3L,	g5_features,
+		0,
+	},
+	{	"PowerMac11,2",			"PowerMac G5 Dual Core",
+		PMAC_TYPE_POWERMAC_G5_U3L,	g5_features,
+		0,
+	},
+	{	"PowerMac12,1",			"iMac G5 (iSight)",
+		PMAC_TYPE_POWERMAC_G5_U3L,	g5_features,
+		0,
+	},
+	{       "RackMac3,1",                   "XServe G5",
+		PMAC_TYPE_XSERVE_G5,		g5_features,
+		0,
+	},
+#endif /* CONFIG_PPC64 */
+#endif /* CONFIG_PPC64 */
+};
+
+/*
+ * The toplevel feature_call callback
+ */
+long pmac_do_feature_call(unsigned int selector, ...)
+{
+	struct device_node *node;
+	long param, value;
+	int i;
+	feature_call func = NULL;
+	va_list args;
+
+	if (pmac_mb.features)
+		for (i=0; pmac_mb.features[i].function; i++)
+			if (pmac_mb.features[i].selector == selector) {
+				func = pmac_mb.features[i].function;
+				break;
+			}
+	if (!func)
+		for (i=0; any_features[i].function; i++)
+			if (any_features[i].selector == selector) {
+				func = any_features[i].function;
+				break;
+			}
+	if (!func)
+		return -ENODEV;
+
+	va_start(args, selector);
+	node = (struct device_node*)va_arg(args, void*);
+	param = va_arg(args, long);
+	value = va_arg(args, long);
+	va_end(args);
+
+	return func(node, param, value);
+}
+
+static int __init probe_motherboard(void)
+{
+	int i;
+	struct macio_chip *macio = &macio_chips[0];
+	const char *model = NULL;
+	struct device_node *dt;
+	int ret = 0;
+
+	/* Lookup known motherboard type in device-tree. First try an
+	 * exact match on the "model" property, then try a "compatible"
+	 * match is none is found.
+	 */
+	dt = of_find_node_by_name(NULL, "device-tree");
+	if (dt != NULL)
+		model = of_get_property(dt, "model", NULL);
+	for(i=0; model && i<ARRAY_SIZE(pmac_mb_defs); i++) {
+	    if (strcmp(model, pmac_mb_defs[i].model_string) == 0) {
+		pmac_mb = pmac_mb_defs[i];
+		goto found;
+	    }
+	}
+	for(i=0; i<ARRAY_SIZE(pmac_mb_defs); i++) {
+	    if (of_machine_is_compatible(pmac_mb_defs[i].model_string)) {
+		pmac_mb = pmac_mb_defs[i];
+		goto found;
+	    }
+	}
+
+	/* Fallback to selection depending on mac-io chip type */
+	switch(macio->type) {
+#ifndef CONFIG_PPC64
+	    case macio_grand_central:
+		pmac_mb.model_id = PMAC_TYPE_PSURGE;
+		pmac_mb.model_name = "Unknown PowerSurge";
+		break;
+	    case macio_ohare:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_OHARE;
+		pmac_mb.model_name = "Unknown OHare-based";
+		break;
+	    case macio_heathrow:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_HEATHROW;
+		pmac_mb.model_name = "Unknown Heathrow-based";
+		pmac_mb.features = heathrow_desktop_features;
+		break;
+	    case macio_paddington:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_PADDINGTON;
+		pmac_mb.model_name = "Unknown Paddington-based";
+		pmac_mb.features = paddington_features;
+		break;
+	    case macio_keylargo:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_CORE99;
+		pmac_mb.model_name = "Unknown Keylargo-based";
+		pmac_mb.features = core99_features;
+		break;
+	    case macio_pangea:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_PANGEA;
+		pmac_mb.model_name = "Unknown Pangea-based";
+		pmac_mb.features = pangea_features;
+		break;
+	    case macio_intrepid:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_INTREPID;
+		pmac_mb.model_name = "Unknown Intrepid-based";
+		pmac_mb.features = intrepid_features;
+		break;
+#else /* CONFIG_PPC64 */
+	case macio_keylargo2:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_K2;
+		pmac_mb.model_name = "Unknown K2-based";
+		pmac_mb.features = g5_features;
+		break;
+	case macio_shasta:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_SHASTA;
+		pmac_mb.model_name = "Unknown Shasta-based";
+		pmac_mb.features = g5_features;
+		break;
+#endif /* CONFIG_PPC64 */
+	default:
+		ret = -ENODEV;
+		goto done;
+	}
+found:
+#ifndef CONFIG_PPC64
+	/* Fixup Hooper vs. Comet */
+	if (pmac_mb.model_id == PMAC_TYPE_HOOPER) {
+		u32 __iomem * mach_id_ptr = ioremap(0xf3000034, 4);
+		if (!mach_id_ptr) {
+			ret = -ENODEV;
+			goto done;
+		}
+		/* Here, I used to disable the media-bay on comet. It
+		 * appears this is wrong, the floppy connector is actually
+		 * a kind of media-bay and works with the current driver.
+		 */
+		if (__raw_readl(mach_id_ptr) & 0x20000000UL)
+			pmac_mb.model_id = PMAC_TYPE_COMET;
+		iounmap(mach_id_ptr);
+	}
+
+	/* Set default value of powersave_nap on machines that support it.
+	 * It appears that uninorth rev 3 has a problem with it, we don't
+	 * enable it on those. In theory, the flush-on-lock property is
+	 * supposed to be set when not supported, but I'm not very confident
+	 * that all Apple OF revs did it properly, I do it the paranoid way.
+	 */
+	while (uninorth_base && uninorth_rev > 3) {
+		struct device_node *cpus = of_find_node_by_path("/cpus");
+		struct device_node *np;
+
+		if (!cpus || !cpus->child) {
+			printk(KERN_WARNING "Can't find CPU(s) in device tree !\n");
+			of_node_put(cpus);
+			break;
+		}
+		np = cpus->child;
+		/* Nap mode not supported on SMP */
+		if (np->sibling) {
+			of_node_put(cpus);
+			break;
+		}
+		/* Nap mode not supported if flush-on-lock property is present */
+		if (of_get_property(np, "flush-on-lock", NULL)) {
+			of_node_put(cpus);
+			break;
+		}
+		of_node_put(cpus);
+		powersave_nap = 1;
+		printk(KERN_DEBUG "Processor NAP mode on idle enabled.\n");
+		break;
+	}
+
+	/* On CPUs that support it (750FX), lowspeed by default during
+	 * NAP mode
+	 */
+	powersave_lowspeed = 1;
+
+#else /* CONFIG_PPC64 */
+	powersave_nap = 1;
+#endif  /* CONFIG_PPC64 */
+
+	/* Check for "mobile" machine */
+	if (model && (strncmp(model, "PowerBook", 9) == 0
+		   || strncmp(model, "iBook", 5) == 0))
+		pmac_mb.board_flags |= PMAC_MB_MOBILE;
+
+
+	printk(KERN_INFO "PowerMac motherboard: %s\n", pmac_mb.model_name);
+done:
+	of_node_put(dt);
+	return ret;
+}
+
+/* Initialize the Core99 UniNorth host bridge and memory controller
+ */
+static void __init probe_uninorth(void)
+{
+	const u32 *addrp;
+	phys_addr_t address;
+	unsigned long actrl;
+
+	/* Locate core99 Uni-N */
+	uninorth_node = of_find_node_by_name(NULL, "uni-n");
+	uninorth_maj = 1;
+
+	/* Locate G5 u3 */
+	if (uninorth_node == NULL) {
+		uninorth_node = of_find_node_by_name(NULL, "u3");
+		uninorth_maj = 3;
+	}
+	/* Locate G5 u4 */
+	if (uninorth_node == NULL) {
+		uninorth_node = of_find_node_by_name(NULL, "u4");
+		uninorth_maj = 4;
+	}
+	if (uninorth_node == NULL) {
+		uninorth_maj = 0;
+		return;
+	}
+
+	addrp = of_get_property(uninorth_node, "reg", NULL);
+	if (addrp == NULL)
+		return;
+	address = of_translate_address(uninorth_node, addrp);
+	if (address == 0)
+		return;
+	uninorth_base = ioremap(address, 0x40000);
+	if (uninorth_base == NULL)
+		return;
+	uninorth_rev = in_be32(UN_REG(UNI_N_VERSION));
+	if (uninorth_maj == 3 || uninorth_maj == 4) {
+		u3_ht_base = ioremap(address + U3_HT_CONFIG_BASE, 0x1000);
+		if (u3_ht_base == NULL) {
+			iounmap(uninorth_base);
+			return;
+		}
+	}
+
+	printk(KERN_INFO "Found %s memory controller & host bridge"
+	       " @ 0x%08x revision: 0x%02x\n", uninorth_maj == 3 ? "U3" :
+	       uninorth_maj == 4 ? "U4" : "UniNorth",
+	       (unsigned int)address, uninorth_rev);
+	printk(KERN_INFO "Mapped at 0x%08lx\n", (unsigned long)uninorth_base);
+
+	/* Set the arbitrer QAck delay according to what Apple does
+	 */
+	if (uninorth_rev < 0x11) {
+		actrl = UN_IN(UNI_N_ARB_CTRL) & ~UNI_N_ARB_CTRL_QACK_DELAY_MASK;
+		actrl |= ((uninorth_rev < 3) ? UNI_N_ARB_CTRL_QACK_DELAY105 :
+			UNI_N_ARB_CTRL_QACK_DELAY) <<
+			UNI_N_ARB_CTRL_QACK_DELAY_SHIFT;
+		UN_OUT(UNI_N_ARB_CTRL, actrl);
+	}
+
+	/* Some more magic as done by them in recent MacOS X on UniNorth
+	 * revs 1.5 to 2.O and Pangea. Seem to toggle the UniN Maxbus/PCI
+	 * memory timeout
+	 */
+	if ((uninorth_rev >= 0x11 && uninorth_rev <= 0x24) ||
+	    uninorth_rev == 0xc0)
+		UN_OUT(0x2160, UN_IN(0x2160) & 0x00ffffff);
+}
+
+static void __init probe_one_macio(const char *name, const char *compat, int type)
+{
+	struct device_node*	node;
+	int			i;
+	volatile u32 __iomem	*base;
+	const u32		*addrp, *revp;
+	phys_addr_t		addr;
+	u64			size;
+
+	for (node = NULL; (node = of_find_node_by_name(node, name)) != NULL;) {
+		if (!compat)
+			break;
+		if (of_device_is_compatible(node, compat))
+			break;
+	}
+	if (!node)
+		return;
+	for(i=0; i<MAX_MACIO_CHIPS; i++) {
+		if (!macio_chips[i].of_node)
+			break;
+		if (macio_chips[i].of_node == node)
+			return;
+	}
+
+	if (i >= MAX_MACIO_CHIPS) {
+		printk(KERN_ERR "pmac_feature: Please increase MAX_MACIO_CHIPS !\n");
+		printk(KERN_ERR "pmac_feature: %s skipped\n", node->full_name);
+		return;
+	}
+	addrp = of_get_pci_address(node, 0, &size, NULL);
+	if (addrp == NULL) {
+		printk(KERN_ERR "pmac_feature: %s: can't find base !\n",
+		       node->full_name);
+		return;
+	}
+	addr = of_translate_address(node, addrp);
+	if (addr == 0) {
+		printk(KERN_ERR "pmac_feature: %s, can't translate base !\n",
+		       node->full_name);
+		return;
+	}
+	base = ioremap(addr, (unsigned long)size);
+	if (!base) {
+		printk(KERN_ERR "pmac_feature: %s, can't map mac-io chip !\n",
+		       node->full_name);
+		return;
+	}
+	if (type == macio_keylargo || type == macio_keylargo2) {
+		const u32 *did = of_get_property(node, "device-id", NULL);
+		if (*did == 0x00000025)
+			type = macio_pangea;
+		if (*did == 0x0000003e)
+			type = macio_intrepid;
+		if (*did == 0x0000004f)
+			type = macio_shasta;
+	}
+	macio_chips[i].of_node	= node;
+	macio_chips[i].type	= type;
+	macio_chips[i].base	= base;
+	macio_chips[i].flags	= MACIO_FLAG_SCCA_ON | MACIO_FLAG_SCCB_ON;
+	macio_chips[i].name	= macio_names[type];
+	revp = of_get_property(node, "revision-id", NULL);
+	if (revp)
+		macio_chips[i].rev = *revp;
+	printk(KERN_INFO "Found a %s mac-io controller, rev: %d, mapped at 0x%p\n",
+		macio_names[type], macio_chips[i].rev, macio_chips[i].base);
+}
+
+static int __init
+probe_macios(void)
+{
+	/* Warning, ordering is important */
+	probe_one_macio("gc", NULL, macio_grand_central);
+	probe_one_macio("ohare", NULL, macio_ohare);
+	probe_one_macio("pci106b,7", NULL, macio_ohareII);
+	probe_one_macio("mac-io", "keylargo", macio_keylargo);
+	probe_one_macio("mac-io", "paddington", macio_paddington);
+	probe_one_macio("mac-io", "gatwick", macio_gatwick);
+	probe_one_macio("mac-io", "heathrow", macio_heathrow);
+	probe_one_macio("mac-io", "K2-Keylargo", macio_keylargo2);
+
+	/* Make sure the "main" macio chip appear first */
+	if (macio_chips[0].type == macio_gatwick
+	    && macio_chips[1].type == macio_heathrow) {
+		struct macio_chip temp = macio_chips[0];
+		macio_chips[0] = macio_chips[1];
+		macio_chips[1] = temp;
+	}
+	if (macio_chips[0].type == macio_ohareII
+	    && macio_chips[1].type == macio_ohare) {
+		struct macio_chip temp = macio_chips[0];
+		macio_chips[0] = macio_chips[1];
+		macio_chips[1] = temp;
+	}
+	macio_chips[0].lbus.index = 0;
+	macio_chips[1].lbus.index = 1;
+
+	return (macio_chips[0].of_node == NULL) ? -ENODEV : 0;
+}
+
+static void __init
+initial_serial_shutdown(struct device_node *np)
+{
+	int len;
+	const struct slot_names_prop {
+		int	count;
+		char	name[1];
+	} *slots;
+	const char *conn;
+	int port_type = PMAC_SCC_ASYNC;
+	int modem = 0;
+
+	slots = of_get_property(np, "slot-names", &len);
+	conn = of_get_property(np, "AAPL,connector", &len);
+	if (conn && (strcmp(conn, "infrared") == 0))
+		port_type = PMAC_SCC_IRDA;
+	else if (of_device_is_compatible(np, "cobalt"))
+		modem = 1;
+	else if (slots && slots->count > 0) {
+		if (strcmp(slots->name, "IrDA") == 0)
+			port_type = PMAC_SCC_IRDA;
+		else if (strcmp(slots->name, "Modem") == 0)
+			modem = 1;
+	}
+	if (modem)
+		pmac_call_feature(PMAC_FTR_MODEM_ENABLE, np, 0, 0);
+	pmac_call_feature(PMAC_FTR_SCC_ENABLE, np, port_type, 0);
+}
+
+static void __init
+set_initial_features(void)
+{
+	struct device_node *np;
+
+	/* That hack appears to be necessary for some StarMax motherboards
+	 * but I'm not too sure it was audited for side-effects on other
+	 * ohare based machines...
+	 * Since I still have difficulties figuring the right way to
+	 * differenciate them all and since that hack was there for a long
+	 * time, I'll keep it around
+	 */
+	if (macio_chips[0].type == macio_ohare) {
+		struct macio_chip *macio = &macio_chips[0];
+		np = of_find_node_by_name(NULL, "via-pmu");
+		if (np)
+			MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE);
+		else
+			MACIO_OUT32(OHARE_FCR, STARMAX_FEATURES);
+		of_node_put(np);
+	} else if (macio_chips[1].type == macio_ohare) {
+		struct macio_chip *macio = &macio_chips[1];
+		MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE);
+	}
+
+#ifdef CONFIG_PPC64
+	if (macio_chips[0].type == macio_keylargo2 ||
+	    macio_chips[0].type == macio_shasta) {
+#ifndef CONFIG_SMP
+		/* On SMP machines running UP, we have the second CPU eating
+		 * bus cycles. We need to take it off the bus. This is done
+		 * from pmac_smp for SMP kernels running on one CPU
+		 */
+		np = of_find_node_by_type(NULL, "cpu");
+		if (np != NULL)
+			np = of_find_node_by_type(np, "cpu");
+		if (np != NULL) {
+			g5_phy_disable_cpu1();
+			of_node_put(np);
+		}
+#endif /* CONFIG_SMP */
+		/* Enable GMAC for now for PCI probing. It will be disabled
+		 * later on after PCI probe
+		 */
+		for_each_node_by_name(np, "ethernet")
+			if (of_device_is_compatible(np, "K2-GMAC"))
+				g5_gmac_enable(np, 0, 1);
+
+		/* Enable FW before PCI probe. Will be disabled later on
+		 * Note: We should have a batter way to check that we are
+		 * dealing with uninorth internal cell and not a PCI cell
+		 * on the external PCI. The code below works though.
+		 */
+		for_each_node_by_name(np, "firewire") {
+			if (of_device_is_compatible(np, "pci106b,5811")) {
+				macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED;
+				g5_fw_enable(np, 0, 1);
+			}
+		}
+	}
+#else /* CONFIG_PPC64 */
+
+	if (macio_chips[0].type == macio_keylargo ||
+	    macio_chips[0].type == macio_pangea ||
+	    macio_chips[0].type == macio_intrepid) {
+		/* Enable GMAC for now for PCI probing. It will be disabled
+		 * later on after PCI probe
+		 */
+		for_each_node_by_name(np, "ethernet") {
+			if (np->parent
+			    && of_device_is_compatible(np->parent, "uni-north")
+			    && of_device_is_compatible(np, "gmac"))
+				core99_gmac_enable(np, 0, 1);
+		}
+
+		/* Enable FW before PCI probe. Will be disabled later on
+		 * Note: We should have a batter way to check that we are
+		 * dealing with uninorth internal cell and not a PCI cell
+		 * on the external PCI. The code below works though.
+		 */
+		for_each_node_by_name(np, "firewire") {
+			if (np->parent
+			    && of_device_is_compatible(np->parent, "uni-north")
+			    && (of_device_is_compatible(np, "pci106b,18") ||
+			        of_device_is_compatible(np, "pci106b,30") ||
+			        of_device_is_compatible(np, "pci11c1,5811"))) {
+				macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED;
+				core99_firewire_enable(np, 0, 1);
+			}
+		}
+
+		/* Enable ATA-100 before PCI probe. */
+		np = of_find_node_by_name(NULL, "ata-6");
+		for_each_node_by_name(np, "ata-6") {
+			if (np->parent
+			    && of_device_is_compatible(np->parent, "uni-north")
+			    && of_device_is_compatible(np, "kauai-ata")) {
+				core99_ata100_enable(np, 1);
+			}
+		}
+
+		/* Switch airport off */
+		for_each_node_by_name(np, "radio") {
+			if (np->parent == macio_chips[0].of_node) {
+				macio_chips[0].flags |= MACIO_FLAG_AIRPORT_ON;
+				core99_airport_enable(np, 0, 0);
+			}
+		}
+	}
+
+	/* On all machines that support sound PM, switch sound off */
+	if (macio_chips[0].of_node)
+		pmac_do_feature_call(PMAC_FTR_SOUND_CHIP_ENABLE,
+			macio_chips[0].of_node, 0, 0);
+
+	/* While on some desktop G3s, we turn it back on */
+	if (macio_chips[0].of_node && macio_chips[0].type == macio_heathrow
+		&& (pmac_mb.model_id == PMAC_TYPE_GOSSAMER ||
+		    pmac_mb.model_id == PMAC_TYPE_SILK)) {
+		struct macio_chip *macio = &macio_chips[0];
+		MACIO_BIS(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE);
+		MACIO_BIC(HEATHROW_FCR, HRW_SOUND_POWER_N);
+	}
+
+#endif /* CONFIG_PPC64 */
+
+	/* On all machines, switch modem & serial ports off */
+	for_each_node_by_name(np, "ch-a")
+		initial_serial_shutdown(np);
+	of_node_put(np);
+	for_each_node_by_name(np, "ch-b")
+		initial_serial_shutdown(np);
+	of_node_put(np);
+}
+
+void __init
+pmac_feature_init(void)
+{
+	/* Detect the UniNorth memory controller */
+	probe_uninorth();
+
+	/* Probe mac-io controllers */
+	if (probe_macios()) {
+		printk(KERN_WARNING "No mac-io chip found\n");
+		return;
+	}
+
+	/* Probe machine type */
+	if (probe_motherboard())
+		printk(KERN_WARNING "Unknown PowerMac !\n");
+
+	/* Set some initial features (turn off some chips that will
+	 * be later turned on)
+	 */
+	set_initial_features();
+}
+
+#if 0
+static void dump_HT_speeds(char *name, u32 cfg, u32 frq)
+{
+	int	freqs[16] = { 200,300,400,500,600,800,1000,0,0,0,0,0,0,0,0,0 };
+	int	bits[8] = { 8,16,0,32,2,4,0,0 };
+	int	freq = (frq >> 8) & 0xf;
+
+	if (freqs[freq] == 0)
+		printk("%s: Unknown HT link frequency %x\n", name, freq);
+	else
+		printk("%s: %d MHz on main link, (%d in / %d out) bits width\n",
+		       name, freqs[freq],
+		       bits[(cfg >> 28) & 0x7], bits[(cfg >> 24) & 0x7]);
+}
+
+void __init pmac_check_ht_link(void)
+{
+	u32	ufreq, freq, ucfg, cfg;
+	struct device_node *pcix_node;
+	u8	px_bus, px_devfn;
+	struct pci_controller *px_hose;
+
+	(void)in_be32(u3_ht_base + U3_HT_LINK_COMMAND);
+	ucfg = cfg = in_be32(u3_ht_base + U3_HT_LINK_CONFIG);
+	ufreq = freq = in_be32(u3_ht_base + U3_HT_LINK_FREQ);
+	dump_HT_speeds("U3 HyperTransport", cfg, freq);
+
+	pcix_node = of_find_compatible_node(NULL, "pci", "pci-x");
+	if (pcix_node == NULL) {
+		printk("No PCI-X bridge found\n");
+		return;
+	}
+	if (pci_device_from_OF_node(pcix_node, &px_bus, &px_devfn) != 0) {
+		printk("PCI-X bridge found but not matched to pci\n");
+		return;
+	}
+	px_hose = pci_find_hose_for_OF_device(pcix_node);
+	if (px_hose == NULL) {
+		printk("PCI-X bridge found but not matched to host\n");
+		return;
+	}	
+	early_read_config_dword(px_hose, px_bus, px_devfn, 0xc4, &cfg);
+	early_read_config_dword(px_hose, px_bus, px_devfn, 0xcc, &freq);
+	dump_HT_speeds("PCI-X HT Uplink", cfg, freq);
+	early_read_config_dword(px_hose, px_bus, px_devfn, 0xc8, &cfg);
+	early_read_config_dword(px_hose, px_bus, px_devfn, 0xd0, &freq);
+	dump_HT_speeds("PCI-X HT Downlink", cfg, freq);
+}
+#endif /* 0 */
+
+/*
+ * Early video resume hook
+ */
+
+static void (*pmac_early_vresume_proc)(void *data);
+static void *pmac_early_vresume_data;
+
+void pmac_set_early_video_resume(void (*proc)(void *data), void *data)
+{
+	if (!machine_is(powermac))
+		return;
+	preempt_disable();
+	pmac_early_vresume_proc = proc;
+	pmac_early_vresume_data = data;
+	preempt_enable();
+}
+EXPORT_SYMBOL(pmac_set_early_video_resume);
+
+void pmac_call_early_video_resume(void)
+{
+	if (pmac_early_vresume_proc)
+		pmac_early_vresume_proc(pmac_early_vresume_data);
+}
+
+/*
+ * AGP related suspend/resume code
+ */
+
+static struct pci_dev *pmac_agp_bridge;
+static int (*pmac_agp_suspend)(struct pci_dev *bridge);
+static int (*pmac_agp_resume)(struct pci_dev *bridge);
+
+void pmac_register_agp_pm(struct pci_dev *bridge,
+				 int (*suspend)(struct pci_dev *bridge),
+				 int (*resume)(struct pci_dev *bridge))
+{
+	if (suspend || resume) {
+		pmac_agp_bridge = bridge;
+		pmac_agp_suspend = suspend;
+		pmac_agp_resume = resume;
+		return;
+	}
+	if (bridge != pmac_agp_bridge)
+		return;
+	pmac_agp_suspend = pmac_agp_resume = NULL;
+	return;
+}
+EXPORT_SYMBOL(pmac_register_agp_pm);
+
+void pmac_suspend_agp_for_card(struct pci_dev *dev)
+{
+	if (pmac_agp_bridge == NULL || pmac_agp_suspend == NULL)
+		return;
+	if (pmac_agp_bridge->bus != dev->bus)
+		return;
+	pmac_agp_suspend(pmac_agp_bridge);
+}
+EXPORT_SYMBOL(pmac_suspend_agp_for_card);
+
+void pmac_resume_agp_for_card(struct pci_dev *dev)
+{
+	if (pmac_agp_bridge == NULL || pmac_agp_resume == NULL)
+		return;
+	if (pmac_agp_bridge->bus != dev->bus)
+		return;
+	pmac_agp_resume(pmac_agp_bridge);
+}
+EXPORT_SYMBOL(pmac_resume_agp_for_card);
+
+int pmac_get_uninorth_variant(void)
+{
+	return uninorth_maj;
+}
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
new file mode 100644
index 000000000..7553b6a77
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -0,0 +1,1515 @@
+/*
+ * arch/powerpc/platforms/powermac/low_i2c.c
+ *
+ *  Copyright (C) 2003-2005 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ * The linux i2c layer isn't completely suitable for our needs for various
+ * reasons ranging from too late initialisation to semantics not perfectly
+ * matching some requirements of the apple platform functions etc...
+ *
+ * This file thus provides a simple low level unified i2c interface for
+ * powermac that covers the various types of i2c busses used in Apple machines.
+ * For now, keywest, PMU and SMU, though we could add Cuda, or other bit
+ * banging busses found on older chipstes in earlier machines if we ever need
+ * one of them.
+ *
+ * The drivers in this file are synchronous/blocking. In addition, the
+ * keywest one is fairly slow due to the use of msleep instead of interrupts
+ * as the interrupt is currently used by i2c-keywest. In the long run, we
+ * might want to get rid of those high-level interfaces to linux i2c layer
+ * either completely (converting all drivers) or replacing them all with a
+ * single stub driver on top of this one. Once done, the interrupt will be
+ * available for our use.
+ */
+
+#undef DEBUG
+#undef DEBUG_LOW
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/delay.h>
+#include <linux/completion.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/timer.h>
+#include <linux/mutex.h>
+#include <linux/i2c.h>
+#include <linux/slab.h>
+#include <asm/keylargo.h>
+#include <asm/uninorth.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/smu.h>
+#include <asm/pmac_pfunc.h>
+#include <asm/pmac_low_i2c.h>
+
+#ifdef DEBUG
+#define DBG(x...) do {\
+		printk(KERN_DEBUG "low_i2c:" x);	\
+	} while(0)
+#else
+#define DBG(x...)
+#endif
+
+#ifdef DEBUG_LOW
+#define DBG_LOW(x...) do {\
+		printk(KERN_DEBUG "low_i2c:" x);	\
+	} while(0)
+#else
+#define DBG_LOW(x...)
+#endif
+
+
+static int pmac_i2c_force_poll = 1;
+
+/*
+ * A bus structure. Each bus in the system has such a structure associated.
+ */
+struct pmac_i2c_bus
+{
+	struct list_head	link;
+	struct device_node	*controller;
+	struct device_node	*busnode;
+	int			type;
+	int			flags;
+	struct i2c_adapter	adapter;
+	void			*hostdata;
+	int			channel;	/* some hosts have multiple */
+	int			mode;		/* current mode */
+	struct mutex		mutex;
+	int			opened;
+	int			polled;		/* open mode */
+	struct platform_device	*platform_dev;
+
+	/* ops */
+	int (*open)(struct pmac_i2c_bus *bus);
+	void (*close)(struct pmac_i2c_bus *bus);
+	int (*xfer)(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
+		    u32 subaddr, u8 *data, int len);
+};
+
+static LIST_HEAD(pmac_i2c_busses);
+
+/*
+ * Keywest implementation
+ */
+
+struct pmac_i2c_host_kw
+{
+	struct mutex		mutex;		/* Access mutex for use by
+						 * i2c-keywest */
+	void __iomem		*base;		/* register base address */
+	int			bsteps;		/* register stepping */
+	int			speed;		/* speed */
+	int			irq;
+	u8			*data;
+	unsigned		len;
+	int			state;
+	int			rw;
+	int			polled;
+	int			result;
+	struct completion	complete;
+	spinlock_t		lock;
+	struct timer_list	timeout_timer;
+};
+
+/* Register indices */
+typedef enum {
+	reg_mode = 0,
+	reg_control,
+	reg_status,
+	reg_isr,
+	reg_ier,
+	reg_addr,
+	reg_subaddr,
+	reg_data
+} reg_t;
+
+/* The Tumbler audio equalizer can be really slow sometimes */
+#define KW_POLL_TIMEOUT		(2*HZ)
+
+/* Mode register */
+#define KW_I2C_MODE_100KHZ	0x00
+#define KW_I2C_MODE_50KHZ	0x01
+#define KW_I2C_MODE_25KHZ	0x02
+#define KW_I2C_MODE_DUMB	0x00
+#define KW_I2C_MODE_STANDARD	0x04
+#define KW_I2C_MODE_STANDARDSUB	0x08
+#define KW_I2C_MODE_COMBINED	0x0C
+#define KW_I2C_MODE_MODE_MASK	0x0C
+#define KW_I2C_MODE_CHAN_MASK	0xF0
+
+/* Control register */
+#define KW_I2C_CTL_AAK		0x01
+#define KW_I2C_CTL_XADDR	0x02
+#define KW_I2C_CTL_STOP		0x04
+#define KW_I2C_CTL_START	0x08
+
+/* Status register */
+#define KW_I2C_STAT_BUSY	0x01
+#define KW_I2C_STAT_LAST_AAK	0x02
+#define KW_I2C_STAT_LAST_RW	0x04
+#define KW_I2C_STAT_SDA		0x08
+#define KW_I2C_STAT_SCL		0x10
+
+/* IER & ISR registers */
+#define KW_I2C_IRQ_DATA		0x01
+#define KW_I2C_IRQ_ADDR		0x02
+#define KW_I2C_IRQ_STOP		0x04
+#define KW_I2C_IRQ_START	0x08
+#define KW_I2C_IRQ_MASK		0x0F
+
+/* State machine states */
+enum {
+	state_idle,
+	state_addr,
+	state_read,
+	state_write,
+	state_stop,
+	state_dead
+};
+
+#define WRONG_STATE(name) do {\
+		printk(KERN_DEBUG "KW: wrong state. Got %s, state: %s " \
+		       "(isr: %02x)\n",	\
+		       name, __kw_state_names[host->state], isr); \
+	} while(0)
+
+static const char *__kw_state_names[] = {
+	"state_idle",
+	"state_addr",
+	"state_read",
+	"state_write",
+	"state_stop",
+	"state_dead"
+};
+
+static inline u8 __kw_read_reg(struct pmac_i2c_host_kw *host, reg_t reg)
+{
+	return readb(host->base + (((unsigned int)reg) << host->bsteps));
+}
+
+static inline void __kw_write_reg(struct pmac_i2c_host_kw *host,
+				  reg_t reg, u8 val)
+{
+	writeb(val, host->base + (((unsigned)reg) << host->bsteps));
+	(void)__kw_read_reg(host, reg_subaddr);
+}
+
+#define kw_write_reg(reg, val)	__kw_write_reg(host, reg, val)
+#define kw_read_reg(reg)	__kw_read_reg(host, reg)
+
+static u8 kw_i2c_wait_interrupt(struct pmac_i2c_host_kw *host)
+{
+	int i, j;
+	u8 isr;
+	
+	for (i = 0; i < 1000; i++) {
+		isr = kw_read_reg(reg_isr) & KW_I2C_IRQ_MASK;
+		if (isr != 0)
+			return isr;
+
+		/* This code is used with the timebase frozen, we cannot rely
+		 * on udelay nor schedule when in polled mode !
+		 * For now, just use a bogus loop....
+		 */
+		if (host->polled) {
+			for (j = 1; j < 100000; j++)
+				mb();
+		} else
+			msleep(1);
+	}
+	return isr;
+}
+
+static void kw_i2c_do_stop(struct pmac_i2c_host_kw *host, int result)
+{
+	kw_write_reg(reg_control, KW_I2C_CTL_STOP);
+	host->state = state_stop;
+	host->result = result;
+}
+
+
+static void kw_i2c_handle_interrupt(struct pmac_i2c_host_kw *host, u8 isr)
+{
+	u8 ack;
+
+	DBG_LOW("kw_handle_interrupt(%s, isr: %x)\n",
+		__kw_state_names[host->state], isr);
+
+	if (host->state == state_idle) {
+		printk(KERN_WARNING "low_i2c: Keywest got an out of state"
+		       " interrupt, ignoring\n");
+		kw_write_reg(reg_isr, isr);
+		return;
+	}
+
+	if (isr == 0) {
+		printk(KERN_WARNING "low_i2c: Timeout in i2c transfer"
+		       " on keywest !\n");
+		if (host->state != state_stop) {
+			kw_i2c_do_stop(host, -EIO);
+			return;
+		}
+		ack = kw_read_reg(reg_status);
+		if (ack & KW_I2C_STAT_BUSY)
+			kw_write_reg(reg_status, 0);
+		host->state = state_idle;
+		kw_write_reg(reg_ier, 0x00);
+		if (!host->polled)
+			complete(&host->complete);
+		return;
+	}
+
+	if (isr & KW_I2C_IRQ_ADDR) {
+		ack = kw_read_reg(reg_status);
+		if (host->state != state_addr) {
+			WRONG_STATE("KW_I2C_IRQ_ADDR"); 
+			kw_i2c_do_stop(host, -EIO);
+		}
+		if ((ack & KW_I2C_STAT_LAST_AAK) == 0) {
+			host->result = -ENXIO;
+			host->state = state_stop;
+			DBG_LOW("KW: NAK on address\n");
+		} else {
+			if (host->len == 0)
+				kw_i2c_do_stop(host, 0);
+			else if (host->rw) {
+				host->state = state_read;
+				if (host->len > 1)
+					kw_write_reg(reg_control,
+						     KW_I2C_CTL_AAK);
+			} else {
+				host->state = state_write;
+				kw_write_reg(reg_data, *(host->data++));
+				host->len--;
+			}
+		}
+		kw_write_reg(reg_isr, KW_I2C_IRQ_ADDR);
+	}
+
+	if (isr & KW_I2C_IRQ_DATA) {
+		if (host->state == state_read) {
+			*(host->data++) = kw_read_reg(reg_data);
+			host->len--;
+			kw_write_reg(reg_isr, KW_I2C_IRQ_DATA);
+			if (host->len == 0)
+				host->state = state_stop;
+			else if (host->len == 1)
+				kw_write_reg(reg_control, 0);
+		} else if (host->state == state_write) {
+			ack = kw_read_reg(reg_status);
+			if ((ack & KW_I2C_STAT_LAST_AAK) == 0) {
+				DBG_LOW("KW: nack on data write\n");
+				host->result = -EFBIG;
+				host->state = state_stop;
+			} else if (host->len) {
+				kw_write_reg(reg_data, *(host->data++));
+				host->len--;
+			} else
+				kw_i2c_do_stop(host, 0);
+		} else {
+			WRONG_STATE("KW_I2C_IRQ_DATA"); 
+			if (host->state != state_stop)
+				kw_i2c_do_stop(host, -EIO);
+		}
+		kw_write_reg(reg_isr, KW_I2C_IRQ_DATA);
+	}
+
+	if (isr & KW_I2C_IRQ_STOP) {
+		kw_write_reg(reg_isr, KW_I2C_IRQ_STOP);
+		if (host->state != state_stop) {
+			WRONG_STATE("KW_I2C_IRQ_STOP");
+			host->result = -EIO;
+		}
+		host->state = state_idle;
+		if (!host->polled)
+			complete(&host->complete);
+	}
+
+	/* Below should only happen in manual mode which we don't use ... */
+	if (isr & KW_I2C_IRQ_START)
+		kw_write_reg(reg_isr, KW_I2C_IRQ_START);
+
+}
+
+/* Interrupt handler */
+static irqreturn_t kw_i2c_irq(int irq, void *dev_id)
+{
+	struct pmac_i2c_host_kw *host = dev_id;
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->lock, flags);
+	del_timer(&host->timeout_timer);
+	kw_i2c_handle_interrupt(host, kw_read_reg(reg_isr));
+	if (host->state != state_idle) {
+		host->timeout_timer.expires = jiffies + KW_POLL_TIMEOUT;
+		add_timer(&host->timeout_timer);
+	}
+	spin_unlock_irqrestore(&host->lock, flags);
+	return IRQ_HANDLED;
+}
+
+static void kw_i2c_timeout(unsigned long data)
+{
+	struct pmac_i2c_host_kw *host = (struct pmac_i2c_host_kw *)data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->lock, flags);
+
+	/*
+	 * If the timer is pending, that means we raced with the
+	 * irq, in which case we just return
+	 */
+	if (timer_pending(&host->timeout_timer))
+		goto skip;
+
+	kw_i2c_handle_interrupt(host, kw_read_reg(reg_isr));
+	if (host->state != state_idle) {
+		host->timeout_timer.expires = jiffies + KW_POLL_TIMEOUT;
+		add_timer(&host->timeout_timer);
+	}
+ skip:
+	spin_unlock_irqrestore(&host->lock, flags);
+}
+
+static int kw_i2c_open(struct pmac_i2c_bus *bus)
+{
+	struct pmac_i2c_host_kw *host = bus->hostdata;
+	mutex_lock(&host->mutex);
+	return 0;
+}
+
+static void kw_i2c_close(struct pmac_i2c_bus *bus)
+{
+	struct pmac_i2c_host_kw *host = bus->hostdata;
+	mutex_unlock(&host->mutex);
+}
+
+static int kw_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
+		       u32 subaddr, u8 *data, int len)
+{
+	struct pmac_i2c_host_kw *host = bus->hostdata;
+	u8 mode_reg = host->speed;
+	int use_irq = host->irq != NO_IRQ && !bus->polled;
+
+	/* Setup mode & subaddress if any */
+	switch(bus->mode) {
+	case pmac_i2c_mode_dumb:
+		return -EINVAL;
+	case pmac_i2c_mode_std:
+		mode_reg |= KW_I2C_MODE_STANDARD;
+		if (subsize != 0)
+			return -EINVAL;
+		break;
+	case pmac_i2c_mode_stdsub:
+		mode_reg |= KW_I2C_MODE_STANDARDSUB;
+		if (subsize != 1)
+			return -EINVAL;
+		break;
+	case pmac_i2c_mode_combined:
+		mode_reg |= KW_I2C_MODE_COMBINED;
+		if (subsize != 1)
+			return -EINVAL;
+		break;
+	}
+
+	/* Setup channel & clear pending irqs */
+	kw_write_reg(reg_isr, kw_read_reg(reg_isr));
+	kw_write_reg(reg_mode, mode_reg | (bus->channel << 4));
+	kw_write_reg(reg_status, 0);
+
+	/* Set up address and r/w bit, strip possible stale bus number from
+	 * address top bits
+	 */
+	kw_write_reg(reg_addr, addrdir & 0xff);
+
+	/* Set up the sub address */
+	if ((mode_reg & KW_I2C_MODE_MODE_MASK) == KW_I2C_MODE_STANDARDSUB
+	    || (mode_reg & KW_I2C_MODE_MODE_MASK) == KW_I2C_MODE_COMBINED)
+		kw_write_reg(reg_subaddr, subaddr);
+
+	/* Prepare for async operations */
+	host->data = data;
+	host->len = len;
+	host->state = state_addr;
+	host->result = 0;
+	host->rw = (addrdir & 1);
+	host->polled = bus->polled;
+
+	/* Enable interrupt if not using polled mode and interrupt is
+	 * available
+	 */
+	if (use_irq) {
+		/* Clear completion */
+		reinit_completion(&host->complete);
+		/* Ack stale interrupts */
+		kw_write_reg(reg_isr, kw_read_reg(reg_isr));
+		/* Arm timeout */
+		host->timeout_timer.expires = jiffies + KW_POLL_TIMEOUT;
+		add_timer(&host->timeout_timer);
+		/* Enable emission */
+		kw_write_reg(reg_ier, KW_I2C_IRQ_MASK);
+	}
+
+	/* Start sending address */
+	kw_write_reg(reg_control, KW_I2C_CTL_XADDR);
+
+	/* Wait for completion */
+	if (use_irq)
+		wait_for_completion(&host->complete);
+	else {
+		while(host->state != state_idle) {
+			unsigned long flags;
+
+			u8 isr = kw_i2c_wait_interrupt(host);
+			spin_lock_irqsave(&host->lock, flags);
+			kw_i2c_handle_interrupt(host, isr);
+			spin_unlock_irqrestore(&host->lock, flags);
+		}
+	}
+
+	/* Disable emission */
+	kw_write_reg(reg_ier, 0);
+
+	return host->result;
+}
+
+static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
+{
+	struct pmac_i2c_host_kw *host;
+	const u32		*psteps, *prate, *addrp;
+	u32			steps;
+
+	host = kzalloc(sizeof(struct pmac_i2c_host_kw), GFP_KERNEL);
+	if (host == NULL) {
+		printk(KERN_ERR "low_i2c: Can't allocate host for %s\n",
+		       np->full_name);
+		return NULL;
+	}
+
+	/* Apple is kind enough to provide a valid AAPL,address property
+	 * on all i2c keywest nodes so far ... we would have to fallback
+	 * to macio parsing if that wasn't the case
+	 */
+	addrp = of_get_property(np, "AAPL,address", NULL);
+	if (addrp == NULL) {
+		printk(KERN_ERR "low_i2c: Can't find address for %s\n",
+		       np->full_name);
+		kfree(host);
+		return NULL;
+	}
+	mutex_init(&host->mutex);
+	init_completion(&host->complete);
+	spin_lock_init(&host->lock);
+	init_timer(&host->timeout_timer);
+	host->timeout_timer.function = kw_i2c_timeout;
+	host->timeout_timer.data = (unsigned long)host;
+
+	psteps = of_get_property(np, "AAPL,address-step", NULL);
+	steps = psteps ? (*psteps) : 0x10;
+	for (host->bsteps = 0; (steps & 0x01) == 0; host->bsteps++)
+		steps >>= 1;
+	/* Select interface rate */
+	host->speed = KW_I2C_MODE_25KHZ;
+	prate = of_get_property(np, "AAPL,i2c-rate", NULL);
+	if (prate) switch(*prate) {
+	case 100:
+		host->speed = KW_I2C_MODE_100KHZ;
+		break;
+	case 50:
+		host->speed = KW_I2C_MODE_50KHZ;
+		break;
+	case 25:
+		host->speed = KW_I2C_MODE_25KHZ;
+		break;
+	}	
+	host->irq = irq_of_parse_and_map(np, 0);
+	if (host->irq == NO_IRQ)
+		printk(KERN_WARNING
+		       "low_i2c: Failed to map interrupt for %s\n",
+		       np->full_name);
+
+	host->base = ioremap((*addrp), 0x1000);
+	if (host->base == NULL) {
+		printk(KERN_ERR "low_i2c: Can't map registers for %s\n",
+		       np->full_name);
+		kfree(host);
+		return NULL;
+	}
+
+	/* Make sure IRQ is disabled */
+	kw_write_reg(reg_ier, 0);
+
+	/* Request chip interrupt. We set IRQF_NO_SUSPEND because we don't
+	 * want that interrupt disabled between the 2 passes of driver
+	 * suspend or we'll have issues running the pfuncs
+	 */
+	if (request_irq(host->irq, kw_i2c_irq, IRQF_NO_SUSPEND,
+			"keywest i2c", host))
+		host->irq = NO_IRQ;
+
+	printk(KERN_INFO "KeyWest i2c @0x%08x irq %d %s\n",
+	       *addrp, host->irq, np->full_name);
+
+	return host;
+}
+
+
+static void __init kw_i2c_add(struct pmac_i2c_host_kw *host,
+			      struct device_node *controller,
+			      struct device_node *busnode,
+			      int channel)
+{
+	struct pmac_i2c_bus *bus;
+
+	bus = kzalloc(sizeof(struct pmac_i2c_bus), GFP_KERNEL);
+	if (bus == NULL)
+		return;
+
+	bus->controller = of_node_get(controller);
+	bus->busnode = of_node_get(busnode);
+	bus->type = pmac_i2c_bus_keywest;
+	bus->hostdata = host;
+	bus->channel = channel;
+	bus->mode = pmac_i2c_mode_std;
+	bus->open = kw_i2c_open;
+	bus->close = kw_i2c_close;
+	bus->xfer = kw_i2c_xfer;
+	mutex_init(&bus->mutex);
+	if (controller == busnode)
+		bus->flags = pmac_i2c_multibus;
+	list_add(&bus->link, &pmac_i2c_busses);
+
+	printk(KERN_INFO " channel %d bus %s\n", channel,
+	       (controller == busnode) ? "<multibus>" : busnode->full_name);
+}
+
+static void __init kw_i2c_probe(void)
+{
+	struct device_node *np, *child, *parent;
+
+	/* Probe keywest-i2c busses */
+	for_each_compatible_node(np, "i2c","keywest-i2c") {
+		struct pmac_i2c_host_kw *host;
+		int multibus;
+
+		/* Found one, init a host structure */
+		host = kw_i2c_host_init(np);
+		if (host == NULL)
+			continue;
+
+		/* Now check if we have a multibus setup (old style) or if we
+		 * have proper bus nodes. Note that the "new" way (proper bus
+		 * nodes) might cause us to not create some busses that are
+		 * kept hidden in the device-tree. In the future, we might
+		 * want to work around that by creating busses without a node
+		 * but not for now
+		 */
+		child = of_get_next_child(np, NULL);
+		multibus = !child || strcmp(child->name, "i2c-bus");
+		of_node_put(child);
+
+		/* For a multibus setup, we get the bus count based on the
+		 * parent type
+		 */
+		if (multibus) {
+			int chans, i;
+
+			parent = of_get_parent(np);
+			if (parent == NULL)
+				continue;
+			chans = parent->name[0] == 'u' ? 2 : 1;
+			for (i = 0; i < chans; i++)
+				kw_i2c_add(host, np, np, i);
+		} else {
+			for (child = NULL;
+			     (child = of_get_next_child(np, child)) != NULL;) {
+				const u32 *reg = of_get_property(child,
+						"reg", NULL);
+				if (reg == NULL)
+					continue;
+				kw_i2c_add(host, np, child, *reg);
+			}
+		}
+	}
+}
+
+
+/*
+ *
+ * PMU implementation
+ *
+ */
+
+#ifdef CONFIG_ADB_PMU
+
+/*
+ * i2c command block to the PMU
+ */
+struct pmu_i2c_hdr {
+	u8	bus;
+	u8	mode;
+	u8	bus2;
+	u8	address;
+	u8	sub_addr;
+	u8	comb_addr;
+	u8	count;
+	u8	data[];
+};
+
+static void pmu_i2c_complete(struct adb_request *req)
+{
+	complete(req->arg);
+}
+
+static int pmu_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
+			u32 subaddr, u8 *data, int len)
+{
+	struct adb_request *req = bus->hostdata;
+	struct pmu_i2c_hdr *hdr = (struct pmu_i2c_hdr *)&req->data[1];
+	struct completion comp;
+	int read = addrdir & 1;
+	int retry;
+	int rc = 0;
+
+	/* For now, limit ourselves to 16 bytes transfers */
+	if (len > 16)
+		return -EINVAL;
+
+	init_completion(&comp);
+
+	for (retry = 0; retry < 16; retry++) {
+		memset(req, 0, sizeof(struct adb_request));
+		hdr->bus = bus->channel;
+		hdr->count = len;
+
+		switch(bus->mode) {
+		case pmac_i2c_mode_std:
+			if (subsize != 0)
+				return -EINVAL;
+			hdr->address = addrdir;
+			hdr->mode = PMU_I2C_MODE_SIMPLE;
+			break;
+		case pmac_i2c_mode_stdsub:
+		case pmac_i2c_mode_combined:
+			if (subsize != 1)
+				return -EINVAL;
+			hdr->address = addrdir & 0xfe;
+			hdr->comb_addr = addrdir;
+			hdr->sub_addr = subaddr;
+			if (bus->mode == pmac_i2c_mode_stdsub)
+				hdr->mode = PMU_I2C_MODE_STDSUB;
+			else
+				hdr->mode = PMU_I2C_MODE_COMBINED;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		reinit_completion(&comp);
+		req->data[0] = PMU_I2C_CMD;
+		req->reply[0] = 0xff;
+		req->nbytes = sizeof(struct pmu_i2c_hdr) + 1;
+		req->done = pmu_i2c_complete;
+		req->arg = &comp;
+		if (!read && len) {
+			memcpy(hdr->data, data, len);
+			req->nbytes += len;
+		}
+		rc = pmu_queue_request(req);
+		if (rc)
+			return rc;
+		wait_for_completion(&comp);
+		if (req->reply[0] == PMU_I2C_STATUS_OK)
+			break;
+		msleep(15);
+	}
+	if (req->reply[0] != PMU_I2C_STATUS_OK)
+		return -EIO;
+
+	for (retry = 0; retry < 16; retry++) {
+		memset(req, 0, sizeof(struct adb_request));
+
+		/* I know that looks like a lot, slow as hell, but darwin
+		 * does it so let's be on the safe side for now
+		 */
+		msleep(15);
+
+		hdr->bus = PMU_I2C_BUS_STATUS;
+
+		reinit_completion(&comp);
+		req->data[0] = PMU_I2C_CMD;
+		req->reply[0] = 0xff;
+		req->nbytes = 2;
+		req->done = pmu_i2c_complete;
+		req->arg = &comp;
+		rc = pmu_queue_request(req);
+		if (rc)
+			return rc;
+		wait_for_completion(&comp);
+
+		if (req->reply[0] == PMU_I2C_STATUS_OK && !read)
+			return 0;
+		if (req->reply[0] == PMU_I2C_STATUS_DATAREAD && read) {
+			int rlen = req->reply_len - 1;
+
+			if (rlen != len) {
+				printk(KERN_WARNING "low_i2c: PMU returned %d"
+				       " bytes, expected %d !\n", rlen, len);
+				return -EIO;
+			}
+			if (len)
+				memcpy(data, &req->reply[1], len);
+			return 0;
+		}
+	}
+	return -EIO;
+}
+
+static void __init pmu_i2c_probe(void)
+{
+	struct pmac_i2c_bus *bus;
+	struct device_node *busnode;
+	int channel, sz;
+
+	if (!pmu_present())
+		return;
+
+	/* There might or might not be a "pmu-i2c" node, we use that
+	 * or via-pmu itself, whatever we find. I haven't seen a machine
+	 * with separate bus nodes, so we assume a multibus setup
+	 */
+	busnode = of_find_node_by_name(NULL, "pmu-i2c");
+	if (busnode == NULL)
+		busnode = of_find_node_by_name(NULL, "via-pmu");
+	if (busnode == NULL)
+		return;
+
+	printk(KERN_INFO "PMU i2c %s\n", busnode->full_name);
+
+	/*
+	 * We add bus 1 and 2 only for now, bus 0 is "special"
+	 */
+	for (channel = 1; channel <= 2; channel++) {
+		sz = sizeof(struct pmac_i2c_bus) + sizeof(struct adb_request);
+		bus = kzalloc(sz, GFP_KERNEL);
+		if (bus == NULL)
+			return;
+
+		bus->controller = busnode;
+		bus->busnode = busnode;
+		bus->type = pmac_i2c_bus_pmu;
+		bus->channel = channel;
+		bus->mode = pmac_i2c_mode_std;
+		bus->hostdata = bus + 1;
+		bus->xfer = pmu_i2c_xfer;
+		mutex_init(&bus->mutex);
+		bus->flags = pmac_i2c_multibus;
+		list_add(&bus->link, &pmac_i2c_busses);
+
+		printk(KERN_INFO " channel %d bus <multibus>\n", channel);
+	}
+}
+
+#endif /* CONFIG_ADB_PMU */
+
+
+/*
+ *
+ * SMU implementation
+ *
+ */
+
+#ifdef CONFIG_PMAC_SMU
+
+static void smu_i2c_complete(struct smu_i2c_cmd *cmd, void *misc)
+{
+	complete(misc);
+}
+
+static int smu_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
+			u32 subaddr, u8 *data, int len)
+{
+	struct smu_i2c_cmd *cmd = bus->hostdata;
+	struct completion comp;
+	int read = addrdir & 1;
+	int rc = 0;
+
+	if ((read && len > SMU_I2C_READ_MAX) ||
+	    ((!read) && len > SMU_I2C_WRITE_MAX))
+		return -EINVAL;
+
+	memset(cmd, 0, sizeof(struct smu_i2c_cmd));
+	cmd->info.bus = bus->channel;
+	cmd->info.devaddr = addrdir;
+	cmd->info.datalen = len;
+
+	switch(bus->mode) {
+	case pmac_i2c_mode_std:
+		if (subsize != 0)
+			return -EINVAL;
+		cmd->info.type = SMU_I2C_TRANSFER_SIMPLE;
+		break;
+	case pmac_i2c_mode_stdsub:
+	case pmac_i2c_mode_combined:
+		if (subsize > 3 || subsize < 1)
+			return -EINVAL;
+		cmd->info.sublen = subsize;
+		/* that's big-endian only but heh ! */
+		memcpy(&cmd->info.subaddr, ((char *)&subaddr) + (4 - subsize),
+		       subsize);
+		if (bus->mode == pmac_i2c_mode_stdsub)
+			cmd->info.type = SMU_I2C_TRANSFER_STDSUB;
+		else
+			cmd->info.type = SMU_I2C_TRANSFER_COMBINED;
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (!read && len)
+		memcpy(cmd->info.data, data, len);
+
+	init_completion(&comp);
+	cmd->done = smu_i2c_complete;
+	cmd->misc = &comp;
+	rc = smu_queue_i2c(cmd);
+	if (rc < 0)
+		return rc;
+	wait_for_completion(&comp);
+	rc = cmd->status;
+
+	if (read && len)
+		memcpy(data, cmd->info.data, len);
+	return rc < 0 ? rc : 0;
+}
+
+static void __init smu_i2c_probe(void)
+{
+	struct device_node *controller, *busnode;
+	struct pmac_i2c_bus *bus;
+	const u32 *reg;
+	int sz;
+
+	if (!smu_present())
+		return;
+
+	controller = of_find_node_by_name(NULL, "smu-i2c-control");
+	if (controller == NULL)
+		controller = of_find_node_by_name(NULL, "smu");
+	if (controller == NULL)
+		return;
+
+	printk(KERN_INFO "SMU i2c %s\n", controller->full_name);
+
+	/* Look for childs, note that they might not be of the right
+	 * type as older device trees mix i2c busses and other things
+	 * at the same level
+	 */
+	for (busnode = NULL;
+	     (busnode = of_get_next_child(controller, busnode)) != NULL;) {
+		if (strcmp(busnode->type, "i2c") &&
+		    strcmp(busnode->type, "i2c-bus"))
+			continue;
+		reg = of_get_property(busnode, "reg", NULL);
+		if (reg == NULL)
+			continue;
+
+		sz = sizeof(struct pmac_i2c_bus) + sizeof(struct smu_i2c_cmd);
+		bus = kzalloc(sz, GFP_KERNEL);
+		if (bus == NULL)
+			return;
+
+		bus->controller = controller;
+		bus->busnode = of_node_get(busnode);
+		bus->type = pmac_i2c_bus_smu;
+		bus->channel = *reg;
+		bus->mode = pmac_i2c_mode_std;
+		bus->hostdata = bus + 1;
+		bus->xfer = smu_i2c_xfer;
+		mutex_init(&bus->mutex);
+		bus->flags = 0;
+		list_add(&bus->link, &pmac_i2c_busses);
+
+		printk(KERN_INFO " channel %x bus %s\n",
+		       bus->channel, busnode->full_name);
+	}
+}
+
+#endif /* CONFIG_PMAC_SMU */
+
+/*
+ *
+ * Core code
+ *
+ */
+
+
+struct pmac_i2c_bus *pmac_i2c_find_bus(struct device_node *node)
+{
+	struct device_node *p = of_node_get(node);
+	struct device_node *prev = NULL;
+	struct pmac_i2c_bus *bus;
+
+	while(p) {
+		list_for_each_entry(bus, &pmac_i2c_busses, link) {
+			if (p == bus->busnode) {
+				if (prev && bus->flags & pmac_i2c_multibus) {
+					const u32 *reg;
+					reg = of_get_property(prev, "reg",
+								NULL);
+					if (!reg)
+						continue;
+					if (((*reg) >> 8) != bus->channel)
+						continue;
+				}
+				of_node_put(p);
+				of_node_put(prev);
+				return bus;
+			}
+		}
+		of_node_put(prev);
+		prev = p;
+		p = of_get_parent(p);
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_find_bus);
+
+u8 pmac_i2c_get_dev_addr(struct device_node *device)
+{
+	const u32 *reg = of_get_property(device, "reg", NULL);
+
+	if (reg == NULL)
+		return 0;
+
+	return (*reg) & 0xff;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_get_dev_addr);
+
+struct device_node *pmac_i2c_get_controller(struct pmac_i2c_bus *bus)
+{
+	return bus->controller;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_get_controller);
+
+struct device_node *pmac_i2c_get_bus_node(struct pmac_i2c_bus *bus)
+{
+	return bus->busnode;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_get_bus_node);
+
+int pmac_i2c_get_type(struct pmac_i2c_bus *bus)
+{
+	return bus->type;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_get_type);
+
+int pmac_i2c_get_flags(struct pmac_i2c_bus *bus)
+{
+	return bus->flags;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_get_flags);
+
+int pmac_i2c_get_channel(struct pmac_i2c_bus *bus)
+{
+	return bus->channel;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_get_channel);
+
+
+struct i2c_adapter *pmac_i2c_get_adapter(struct pmac_i2c_bus *bus)
+{
+	return &bus->adapter;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_get_adapter);
+
+struct pmac_i2c_bus *pmac_i2c_adapter_to_bus(struct i2c_adapter *adapter)
+{
+	struct pmac_i2c_bus *bus;
+
+	list_for_each_entry(bus, &pmac_i2c_busses, link)
+		if (&bus->adapter == adapter)
+			return bus;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_adapter_to_bus);
+
+int pmac_i2c_match_adapter(struct device_node *dev, struct i2c_adapter *adapter)
+{
+	struct pmac_i2c_bus *bus = pmac_i2c_find_bus(dev);
+
+	if (bus == NULL)
+		return 0;
+	return (&bus->adapter == adapter);
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_match_adapter);
+
+int pmac_low_i2c_lock(struct device_node *np)
+{
+	struct pmac_i2c_bus *bus, *found = NULL;
+
+	list_for_each_entry(bus, &pmac_i2c_busses, link) {
+		if (np == bus->controller) {
+			found = bus;
+			break;
+		}
+	}
+	if (!found)
+		return -ENODEV;
+	return pmac_i2c_open(bus, 0);
+}
+EXPORT_SYMBOL_GPL(pmac_low_i2c_lock);
+
+int pmac_low_i2c_unlock(struct device_node *np)
+{
+	struct pmac_i2c_bus *bus, *found = NULL;
+
+	list_for_each_entry(bus, &pmac_i2c_busses, link) {
+		if (np == bus->controller) {
+			found = bus;
+			break;
+		}
+	}
+	if (!found)
+		return -ENODEV;
+	pmac_i2c_close(bus);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pmac_low_i2c_unlock);
+
+
+int pmac_i2c_open(struct pmac_i2c_bus *bus, int polled)
+{
+	int rc;
+
+	mutex_lock(&bus->mutex);
+	bus->polled = polled || pmac_i2c_force_poll;
+	bus->opened = 1;
+	bus->mode = pmac_i2c_mode_std;
+	if (bus->open && (rc = bus->open(bus)) != 0) {
+		bus->opened = 0;
+		mutex_unlock(&bus->mutex);
+		return rc;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_open);
+
+void pmac_i2c_close(struct pmac_i2c_bus *bus)
+{
+	WARN_ON(!bus->opened);
+	if (bus->close)
+		bus->close(bus);
+	bus->opened = 0;
+	mutex_unlock(&bus->mutex);
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_close);
+
+int pmac_i2c_setmode(struct pmac_i2c_bus *bus, int mode)
+{
+	WARN_ON(!bus->opened);
+
+	/* Report me if you see the error below as there might be a new
+	 * "combined4" mode that I need to implement for the SMU bus
+	 */
+	if (mode < pmac_i2c_mode_dumb || mode > pmac_i2c_mode_combined) {
+		printk(KERN_ERR "low_i2c: Invalid mode %d requested on"
+		       " bus %s !\n", mode, bus->busnode->full_name);
+		return -EINVAL;
+	}
+	bus->mode = mode;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_setmode);
+
+int pmac_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
+		  u32 subaddr, u8 *data, int len)
+{
+	int rc;
+
+	WARN_ON(!bus->opened);
+
+	DBG("xfer() chan=%d, addrdir=0x%x, mode=%d, subsize=%d, subaddr=0x%x,"
+	    " %d bytes, bus %s\n", bus->channel, addrdir, bus->mode, subsize,
+	    subaddr, len, bus->busnode->full_name);
+
+	rc = bus->xfer(bus, addrdir, subsize, subaddr, data, len);
+
+#ifdef DEBUG
+	if (rc)
+		DBG("xfer error %d\n", rc);
+#endif
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_xfer);
+
+/* some quirks for platform function decoding */
+enum {
+	pmac_i2c_quirk_invmask = 0x00000001u,
+	pmac_i2c_quirk_skip = 0x00000002u,
+};
+
+static void pmac_i2c_devscan(void (*callback)(struct device_node *dev,
+					      int quirks))
+{
+	struct pmac_i2c_bus *bus;
+	struct device_node *np;
+	static struct whitelist_ent {
+		char *name;
+		char *compatible;
+		int quirks;
+	} whitelist[] = {
+		/* XXX Study device-tree's & apple drivers are get the quirks
+		 * right !
+		 */
+		/* Workaround: It seems that running the clockspreading
+		 * properties on the eMac will cause lockups during boot.
+		 * The machine seems to work fine without that. So for now,
+		 * let's make sure i2c-hwclock doesn't match about "imic"
+		 * clocks and we'll figure out if we really need to do
+		 * something special about those later.
+		 */
+		{ "i2c-hwclock", "imic5002", pmac_i2c_quirk_skip },
+		{ "i2c-hwclock", "imic5003", pmac_i2c_quirk_skip },
+		{ "i2c-hwclock", NULL, pmac_i2c_quirk_invmask },
+		{ "i2c-cpu-voltage", NULL, 0},
+		{  "temp-monitor", NULL, 0 },
+		{  "supply-monitor", NULL, 0 },
+		{ NULL, NULL, 0 },
+	};
+
+	/* Only some devices need to have platform functions instanciated
+	 * here. For now, we have a table. Others, like 9554 i2c GPIOs used
+	 * on Xserve, if we ever do a driver for them, will use their own
+	 * platform function instance
+	 */
+	list_for_each_entry(bus, &pmac_i2c_busses, link) {
+		for (np = NULL;
+		     (np = of_get_next_child(bus->busnode, np)) != NULL;) {
+			struct whitelist_ent *p;
+			/* If multibus, check if device is on that bus */
+			if (bus->flags & pmac_i2c_multibus)
+				if (bus != pmac_i2c_find_bus(np))
+					continue;
+			for (p = whitelist; p->name != NULL; p++) {
+				if (strcmp(np->name, p->name))
+					continue;
+				if (p->compatible &&
+				    !of_device_is_compatible(np, p->compatible))
+					continue;
+				if (p->quirks & pmac_i2c_quirk_skip)
+					break;
+				callback(np, p->quirks);
+				break;
+			}
+		}
+	}
+}
+
+#define MAX_I2C_DATA	64
+
+struct pmac_i2c_pf_inst
+{
+	struct pmac_i2c_bus	*bus;
+	u8			addr;
+	u8			buffer[MAX_I2C_DATA];
+	u8			scratch[MAX_I2C_DATA];
+	int			bytes;
+	int			quirks;
+};
+
+static void* pmac_i2c_do_begin(struct pmf_function *func, struct pmf_args *args)
+{
+	struct pmac_i2c_pf_inst *inst;
+	struct pmac_i2c_bus	*bus;
+
+	bus = pmac_i2c_find_bus(func->node);
+	if (bus == NULL) {
+		printk(KERN_ERR "low_i2c: Can't find bus for %s (pfunc)\n",
+		       func->node->full_name);
+		return NULL;
+	}
+	if (pmac_i2c_open(bus, 0)) {
+		printk(KERN_ERR "low_i2c: Can't open i2c bus for %s (pfunc)\n",
+		       func->node->full_name);
+		return NULL;
+	}
+
+	/* XXX might need GFP_ATOMIC when called during the suspend process,
+	 * but then, there are already lots of issues with suspending when
+	 * near OOM that need to be resolved, the allocator itself should
+	 * probably make GFP_NOIO implicit during suspend
+	 */
+	inst = kzalloc(sizeof(struct pmac_i2c_pf_inst), GFP_KERNEL);
+	if (inst == NULL) {
+		pmac_i2c_close(bus);
+		return NULL;
+	}
+	inst->bus = bus;
+	inst->addr = pmac_i2c_get_dev_addr(func->node);
+	inst->quirks = (int)(long)func->driver_data;
+	return inst;
+}
+
+static void pmac_i2c_do_end(struct pmf_function *func, void *instdata)
+{
+	struct pmac_i2c_pf_inst *inst = instdata;
+
+	if (inst == NULL)
+		return;
+	pmac_i2c_close(inst->bus);
+	kfree(inst);
+}
+
+static int pmac_i2c_do_read(PMF_STD_ARGS, u32 len)
+{
+	struct pmac_i2c_pf_inst *inst = instdata;
+
+	inst->bytes = len;
+	return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_read, 0, 0,
+			     inst->buffer, len);
+}
+
+static int pmac_i2c_do_write(PMF_STD_ARGS, u32 len, const u8 *data)
+{
+	struct pmac_i2c_pf_inst *inst = instdata;
+
+	return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_write, 0, 0,
+			     (u8 *)data, len);
+}
+
+/* This function is used to do the masking & OR'ing for the "rmw" type
+ * callbacks. Ze should apply the mask and OR in the values in the
+ * buffer before writing back. The problem is that it seems that
+ * various darwin drivers implement the mask/or differently, thus
+ * we need to check the quirks first
+ */
+static void pmac_i2c_do_apply_rmw(struct pmac_i2c_pf_inst *inst,
+				  u32 len, const u8 *mask, const u8 *val)
+{
+	int i;
+
+	if (inst->quirks & pmac_i2c_quirk_invmask) {
+		for (i = 0; i < len; i ++)
+			inst->scratch[i] = (inst->buffer[i] & mask[i]) | val[i];
+	} else {
+		for (i = 0; i < len; i ++)
+			inst->scratch[i] = (inst->buffer[i] & ~mask[i])
+				| (val[i] & mask[i]);
+	}
+}
+
+static int pmac_i2c_do_rmw(PMF_STD_ARGS, u32 masklen, u32 valuelen,
+			   u32 totallen, const u8 *maskdata,
+			   const u8 *valuedata)
+{
+	struct pmac_i2c_pf_inst *inst = instdata;
+
+	if (masklen > inst->bytes || valuelen > inst->bytes ||
+	    totallen > inst->bytes || valuelen > masklen)
+		return -EINVAL;
+
+	pmac_i2c_do_apply_rmw(inst, masklen, maskdata, valuedata);
+
+	return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_write, 0, 0,
+			     inst->scratch, totallen);
+}
+
+static int pmac_i2c_do_read_sub(PMF_STD_ARGS, u8 subaddr, u32 len)
+{
+	struct pmac_i2c_pf_inst *inst = instdata;
+
+	inst->bytes = len;
+	return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_read, 1, subaddr,
+			     inst->buffer, len);
+}
+
+static int pmac_i2c_do_write_sub(PMF_STD_ARGS, u8 subaddr, u32 len,
+				     const u8 *data)
+{
+	struct pmac_i2c_pf_inst *inst = instdata;
+
+	return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_write, 1,
+			     subaddr, (u8 *)data, len);
+}
+
+static int pmac_i2c_do_set_mode(PMF_STD_ARGS, int mode)
+{
+	struct pmac_i2c_pf_inst *inst = instdata;
+
+	return pmac_i2c_setmode(inst->bus, mode);
+}
+
+static int pmac_i2c_do_rmw_sub(PMF_STD_ARGS, u8 subaddr, u32 masklen,
+			       u32 valuelen, u32 totallen, const u8 *maskdata,
+			       const u8 *valuedata)
+{
+	struct pmac_i2c_pf_inst *inst = instdata;
+
+	if (masklen > inst->bytes || valuelen > inst->bytes ||
+	    totallen > inst->bytes || valuelen > masklen)
+		return -EINVAL;
+
+	pmac_i2c_do_apply_rmw(inst, masklen, maskdata, valuedata);
+
+	return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_write, 1,
+			     subaddr, inst->scratch, totallen);
+}
+
+static int pmac_i2c_do_mask_and_comp(PMF_STD_ARGS, u32 len,
+				     const u8 *maskdata,
+				     const u8 *valuedata)
+{
+	struct pmac_i2c_pf_inst *inst = instdata;
+	int i, match;
+
+	/* Get return value pointer, it's assumed to be a u32 */
+	if (!args || !args->count || !args->u[0].p)
+		return -EINVAL;
+
+	/* Check buffer */
+	if (len > inst->bytes)
+		return -EINVAL;
+
+	for (i = 0, match = 1; match && i < len; i ++)
+		if ((inst->buffer[i] & maskdata[i]) != valuedata[i])
+			match = 0;
+	*args->u[0].p = match;
+	return 0;
+}
+
+static int pmac_i2c_do_delay(PMF_STD_ARGS, u32 duration)
+{
+	msleep((duration + 999) / 1000);
+	return 0;
+}
+
+
+static struct pmf_handlers pmac_i2c_pfunc_handlers = {
+	.begin			= pmac_i2c_do_begin,
+	.end			= pmac_i2c_do_end,
+	.read_i2c		= pmac_i2c_do_read,
+	.write_i2c		= pmac_i2c_do_write,
+	.rmw_i2c		= pmac_i2c_do_rmw,
+	.read_i2c_sub		= pmac_i2c_do_read_sub,
+	.write_i2c_sub		= pmac_i2c_do_write_sub,
+	.rmw_i2c_sub		= pmac_i2c_do_rmw_sub,
+	.set_i2c_mode		= pmac_i2c_do_set_mode,
+	.mask_and_compare	= pmac_i2c_do_mask_and_comp,
+	.delay			= pmac_i2c_do_delay,
+};
+
+static void __init pmac_i2c_dev_create(struct device_node *np, int quirks)
+{
+	DBG("dev_create(%s)\n", np->full_name);
+
+	pmf_register_driver(np, &pmac_i2c_pfunc_handlers,
+			    (void *)(long)quirks);
+}
+
+static void __init pmac_i2c_dev_init(struct device_node *np, int quirks)
+{
+	DBG("dev_create(%s)\n", np->full_name);
+
+	pmf_do_functions(np, NULL, 0, PMF_FLAGS_ON_INIT, NULL);
+}
+
+static void pmac_i2c_dev_suspend(struct device_node *np, int quirks)
+{
+	DBG("dev_suspend(%s)\n", np->full_name);
+	pmf_do_functions(np, NULL, 0, PMF_FLAGS_ON_SLEEP, NULL);
+}
+
+static void pmac_i2c_dev_resume(struct device_node *np, int quirks)
+{
+	DBG("dev_resume(%s)\n", np->full_name);
+	pmf_do_functions(np, NULL, 0, PMF_FLAGS_ON_WAKE, NULL);
+}
+
+void pmac_pfunc_i2c_suspend(void)
+{
+	pmac_i2c_devscan(pmac_i2c_dev_suspend);
+}
+
+void pmac_pfunc_i2c_resume(void)
+{
+	pmac_i2c_devscan(pmac_i2c_dev_resume);
+}
+
+/*
+ * Initialize us: probe all i2c busses on the machine, instantiate
+ * busses and platform functions as needed.
+ */
+/* This is non-static as it might be called early by smp code */
+int __init pmac_i2c_init(void)
+{
+	static int i2c_inited;
+
+	if (i2c_inited)
+		return 0;
+	i2c_inited = 1;
+
+	/* Probe keywest-i2c busses */
+	kw_i2c_probe();
+
+#ifdef CONFIG_ADB_PMU
+	/* Probe PMU i2c busses */
+	pmu_i2c_probe();
+#endif
+
+#ifdef CONFIG_PMAC_SMU
+	/* Probe SMU i2c busses */
+	smu_i2c_probe();
+#endif
+
+	/* Now add plaform functions for some known devices */
+	pmac_i2c_devscan(pmac_i2c_dev_create);
+
+	return 0;
+}
+machine_arch_initcall(powermac, pmac_i2c_init);
+
+/* Since pmac_i2c_init can be called too early for the platform device
+ * registration, we need to do it at a later time. In our case, subsys
+ * happens to fit well, though I agree it's a bit of a hack...
+ */
+static int __init pmac_i2c_create_platform_devices(void)
+{
+	struct pmac_i2c_bus *bus;
+	int i = 0;
+
+	/* In the case where we are initialized from smp_init(), we must
+	 * not use the timer (and thus the irq). It's safe from now on
+	 * though
+	 */
+	pmac_i2c_force_poll = 0;
+
+	/* Create platform devices */
+	list_for_each_entry(bus, &pmac_i2c_busses, link) {
+		bus->platform_dev =
+			platform_device_alloc("i2c-powermac", i++);
+		if (bus->platform_dev == NULL)
+			return -ENOMEM;
+		bus->platform_dev->dev.platform_data = bus;
+		bus->platform_dev->dev.of_node = bus->busnode;
+		platform_device_add(bus->platform_dev);
+	}
+
+	/* Now call platform "init" functions */
+	pmac_i2c_devscan(pmac_i2c_dev_init);
+
+	return 0;
+}
+machine_subsys_initcall(powermac, pmac_i2c_create_platform_devices);
diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c
new file mode 100644
index 000000000..60b03a170
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/nvram.c
@@ -0,0 +1,648 @@
+/*
+ *  Copyright (C) 2002 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ *  Todo: - add support for the OF persistent properties
+ */
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/nvram.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/bootmem.h>
+#include <linux/completion.h>
+#include <linux/spinlock.h>
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/nvram.h>
+
+#include "pmac.h"
+
+#define DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+#define NVRAM_SIZE		0x2000	/* 8kB of non-volatile RAM */
+
+#define CORE99_SIGNATURE	0x5a
+#define CORE99_ADLER_START	0x14
+
+/* On Core99, nvram is either a sharp, a micron or an AMD flash */
+#define SM_FLASH_STATUS_DONE	0x80
+#define SM_FLASH_STATUS_ERR	0x38
+
+#define SM_FLASH_CMD_ERASE_CONFIRM	0xd0
+#define SM_FLASH_CMD_ERASE_SETUP	0x20
+#define SM_FLASH_CMD_RESET		0xff
+#define SM_FLASH_CMD_WRITE_SETUP	0x40
+#define SM_FLASH_CMD_CLEAR_STATUS	0x50
+#define SM_FLASH_CMD_READ_STATUS	0x70
+
+/* CHRP NVRAM header */
+struct chrp_header {
+  u8		signature;
+  u8		cksum;
+  u16		len;
+  char          name[12];
+  u8		data[0];
+};
+
+struct core99_header {
+  struct chrp_header	hdr;
+  u32			adler;
+  u32			generation;
+  u32			reserved[2];
+};
+
+/*
+ * Read and write the non-volatile RAM on PowerMacs and CHRP machines.
+ */
+static int nvram_naddrs;
+static volatile unsigned char __iomem *nvram_data;
+static int is_core_99;
+static int core99_bank = 0;
+static int nvram_partitions[3];
+// XXX Turn that into a sem
+static DEFINE_RAW_SPINLOCK(nv_lock);
+
+static int (*core99_write_bank)(int bank, u8* datas);
+static int (*core99_erase_bank)(int bank);
+
+static char *nvram_image;
+
+
+static unsigned char core99_nvram_read_byte(int addr)
+{
+	if (nvram_image == NULL)
+		return 0xff;
+	return nvram_image[addr];
+}
+
+static void core99_nvram_write_byte(int addr, unsigned char val)
+{
+	if (nvram_image == NULL)
+		return;
+	nvram_image[addr] = val;
+}
+
+static ssize_t core99_nvram_read(char *buf, size_t count, loff_t *index)
+{
+	int i;
+
+	if (nvram_image == NULL)
+		return -ENODEV;
+	if (*index > NVRAM_SIZE)
+		return 0;
+
+	i = *index;
+	if (i + count > NVRAM_SIZE)
+		count = NVRAM_SIZE - i;
+
+	memcpy(buf, &nvram_image[i], count);
+	*index = i + count;
+	return count;
+}
+
+static ssize_t core99_nvram_write(char *buf, size_t count, loff_t *index)
+{
+	int i;
+
+	if (nvram_image == NULL)
+		return -ENODEV;
+	if (*index > NVRAM_SIZE)
+		return 0;
+
+	i = *index;
+	if (i + count > NVRAM_SIZE)
+		count = NVRAM_SIZE - i;
+
+	memcpy(&nvram_image[i], buf, count);
+	*index = i + count;
+	return count;
+}
+
+static ssize_t core99_nvram_size(void)
+{
+	if (nvram_image == NULL)
+		return -ENODEV;
+	return NVRAM_SIZE;
+}
+
+#ifdef CONFIG_PPC32
+static volatile unsigned char __iomem *nvram_addr;
+static int nvram_mult;
+
+static unsigned char direct_nvram_read_byte(int addr)
+{
+	return in_8(&nvram_data[(addr & (NVRAM_SIZE - 1)) * nvram_mult]);
+}
+
+static void direct_nvram_write_byte(int addr, unsigned char val)
+{
+	out_8(&nvram_data[(addr & (NVRAM_SIZE - 1)) * nvram_mult], val);
+}
+
+
+static unsigned char indirect_nvram_read_byte(int addr)
+{
+	unsigned char val;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&nv_lock, flags);
+	out_8(nvram_addr, addr >> 5);
+	val = in_8(&nvram_data[(addr & 0x1f) << 4]);
+	raw_spin_unlock_irqrestore(&nv_lock, flags);
+
+	return val;
+}
+
+static void indirect_nvram_write_byte(int addr, unsigned char val)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&nv_lock, flags);
+	out_8(nvram_addr, addr >> 5);
+	out_8(&nvram_data[(addr & 0x1f) << 4], val);
+	raw_spin_unlock_irqrestore(&nv_lock, flags);
+}
+
+
+#ifdef CONFIG_ADB_PMU
+
+static void pmu_nvram_complete(struct adb_request *req)
+{
+	if (req->arg)
+		complete((struct completion *)req->arg);
+}
+
+static unsigned char pmu_nvram_read_byte(int addr)
+{
+	struct adb_request req;
+	DECLARE_COMPLETION_ONSTACK(req_complete);
+	
+	req.arg = system_state == SYSTEM_RUNNING ? &req_complete : NULL;
+	if (pmu_request(&req, pmu_nvram_complete, 3, PMU_READ_NVRAM,
+			(addr >> 8) & 0xff, addr & 0xff))
+		return 0xff;
+	if (system_state == SYSTEM_RUNNING)
+		wait_for_completion(&req_complete);
+	while (!req.complete)
+		pmu_poll();
+	return req.reply[0];
+}
+
+static void pmu_nvram_write_byte(int addr, unsigned char val)
+{
+	struct adb_request req;
+	DECLARE_COMPLETION_ONSTACK(req_complete);
+	
+	req.arg = system_state == SYSTEM_RUNNING ? &req_complete : NULL;
+	if (pmu_request(&req, pmu_nvram_complete, 4, PMU_WRITE_NVRAM,
+			(addr >> 8) & 0xff, addr & 0xff, val))
+		return;
+	if (system_state == SYSTEM_RUNNING)
+		wait_for_completion(&req_complete);
+	while (!req.complete)
+		pmu_poll();
+}
+
+#endif /* CONFIG_ADB_PMU */
+#endif /* CONFIG_PPC32 */
+
+static u8 chrp_checksum(struct chrp_header* hdr)
+{
+	u8 *ptr;
+	u16 sum = hdr->signature;
+	for (ptr = (u8 *)&hdr->len; ptr < hdr->data; ptr++)
+		sum += *ptr;
+	while (sum > 0xFF)
+		sum = (sum & 0xFF) + (sum>>8);
+	return sum;
+}
+
+static u32 core99_calc_adler(u8 *buffer)
+{
+	int cnt;
+	u32 low, high;
+
+   	buffer += CORE99_ADLER_START;
+	low = 1;
+	high = 0;
+	for (cnt=0; cnt<(NVRAM_SIZE-CORE99_ADLER_START); cnt++) {
+		if ((cnt % 5000) == 0) {
+			high  %= 65521UL;
+			high %= 65521UL;
+		}
+		low += buffer[cnt];
+		high += low;
+	}
+	low  %= 65521UL;
+	high %= 65521UL;
+
+	return (high << 16) | low;
+}
+
+static u32 core99_check(u8* datas)
+{
+	struct core99_header* hdr99 = (struct core99_header*)datas;
+
+	if (hdr99->hdr.signature != CORE99_SIGNATURE) {
+		DBG("Invalid signature\n");
+		return 0;
+	}
+	if (hdr99->hdr.cksum != chrp_checksum(&hdr99->hdr)) {
+		DBG("Invalid checksum\n");
+		return 0;
+	}
+	if (hdr99->adler != core99_calc_adler(datas)) {
+		DBG("Invalid adler\n");
+		return 0;
+	}
+	return hdr99->generation;
+}
+
+static int sm_erase_bank(int bank)
+{
+	int stat;
+	unsigned long timeout;
+
+	u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE;
+
+       	DBG("nvram: Sharp/Micron Erasing bank %d...\n", bank);
+
+	out_8(base, SM_FLASH_CMD_ERASE_SETUP);
+	out_8(base, SM_FLASH_CMD_ERASE_CONFIRM);
+	timeout = 0;
+	do {
+		if (++timeout > 1000000) {
+			printk(KERN_ERR "nvram: Sharp/Micron flash erase timeout !\n");
+			break;
+		}
+		out_8(base, SM_FLASH_CMD_READ_STATUS);
+		stat = in_8(base);
+	} while (!(stat & SM_FLASH_STATUS_DONE));
+
+	out_8(base, SM_FLASH_CMD_CLEAR_STATUS);
+	out_8(base, SM_FLASH_CMD_RESET);
+
+	if (memchr_inv(base, 0xff, NVRAM_SIZE)) {
+		printk(KERN_ERR "nvram: Sharp/Micron flash erase failed !\n");
+		return -ENXIO;
+	}
+	return 0;
+}
+
+static int sm_write_bank(int bank, u8* datas)
+{
+	int i, stat = 0;
+	unsigned long timeout;
+
+	u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE;
+
+       	DBG("nvram: Sharp/Micron Writing bank %d...\n", bank);
+
+	for (i=0; i<NVRAM_SIZE; i++) {
+		out_8(base+i, SM_FLASH_CMD_WRITE_SETUP);
+		udelay(1);
+		out_8(base+i, datas[i]);
+		timeout = 0;
+		do {
+			if (++timeout > 1000000) {
+				printk(KERN_ERR "nvram: Sharp/Micron flash write timeout !\n");
+				break;
+			}
+			out_8(base, SM_FLASH_CMD_READ_STATUS);
+			stat = in_8(base);
+		} while (!(stat & SM_FLASH_STATUS_DONE));
+		if (!(stat & SM_FLASH_STATUS_DONE))
+			break;
+	}
+	out_8(base, SM_FLASH_CMD_CLEAR_STATUS);
+	out_8(base, SM_FLASH_CMD_RESET);
+	if (memcmp(base, datas, NVRAM_SIZE)) {
+		printk(KERN_ERR "nvram: Sharp/Micron flash write failed !\n");
+		return -ENXIO;
+	}
+	return 0;
+}
+
+static int amd_erase_bank(int bank)
+{
+	int stat = 0;
+	unsigned long timeout;
+
+	u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE;
+
+       	DBG("nvram: AMD Erasing bank %d...\n", bank);
+
+	/* Unlock 1 */
+	out_8(base+0x555, 0xaa);
+	udelay(1);
+	/* Unlock 2 */
+	out_8(base+0x2aa, 0x55);
+	udelay(1);
+
+	/* Sector-Erase */
+	out_8(base+0x555, 0x80);
+	udelay(1);
+	out_8(base+0x555, 0xaa);
+	udelay(1);
+	out_8(base+0x2aa, 0x55);
+	udelay(1);
+	out_8(base, 0x30);
+	udelay(1);
+
+	timeout = 0;
+	do {
+		if (++timeout > 1000000) {
+			printk(KERN_ERR "nvram: AMD flash erase timeout !\n");
+			break;
+		}
+		stat = in_8(base) ^ in_8(base);
+	} while (stat != 0);
+	
+	/* Reset */
+	out_8(base, 0xf0);
+	udelay(1);
+
+	if (memchr_inv(base, 0xff, NVRAM_SIZE)) {
+		printk(KERN_ERR "nvram: AMD flash erase failed !\n");
+		return -ENXIO;
+	}
+	return 0;
+}
+
+static int amd_write_bank(int bank, u8* datas)
+{
+	int i, stat = 0;
+	unsigned long timeout;
+
+	u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE;
+
+       	DBG("nvram: AMD Writing bank %d...\n", bank);
+
+	for (i=0; i<NVRAM_SIZE; i++) {
+		/* Unlock 1 */
+		out_8(base+0x555, 0xaa);
+		udelay(1);
+		/* Unlock 2 */
+		out_8(base+0x2aa, 0x55);
+		udelay(1);
+
+		/* Write single word */
+		out_8(base+0x555, 0xa0);
+		udelay(1);
+		out_8(base+i, datas[i]);
+		
+		timeout = 0;
+		do {
+			if (++timeout > 1000000) {
+				printk(KERN_ERR "nvram: AMD flash write timeout !\n");
+				break;
+			}
+			stat = in_8(base) ^ in_8(base);
+		} while (stat != 0);
+		if (stat != 0)
+			break;
+	}
+
+	/* Reset */
+	out_8(base, 0xf0);
+	udelay(1);
+
+	if (memcmp(base, datas, NVRAM_SIZE)) {
+		printk(KERN_ERR "nvram: AMD flash write failed !\n");
+		return -ENXIO;
+	}
+	return 0;
+}
+
+static void __init lookup_partitions(void)
+{
+	u8 buffer[17];
+	int i, offset;
+	struct chrp_header* hdr;
+
+	if (pmac_newworld) {
+		nvram_partitions[pmac_nvram_OF] = -1;
+		nvram_partitions[pmac_nvram_XPRAM] = -1;
+		nvram_partitions[pmac_nvram_NR] = -1;
+		hdr = (struct chrp_header *)buffer;
+
+		offset = 0;
+		buffer[16] = 0;
+		do {
+			for (i=0;i<16;i++)
+				buffer[i] = ppc_md.nvram_read_val(offset+i);
+			if (!strcmp(hdr->name, "common"))
+				nvram_partitions[pmac_nvram_OF] = offset + 0x10;
+			if (!strcmp(hdr->name, "APL,MacOS75")) {
+				nvram_partitions[pmac_nvram_XPRAM] = offset + 0x10;
+				nvram_partitions[pmac_nvram_NR] = offset + 0x110;
+			}
+			offset += (hdr->len * 0x10);
+		} while(offset < NVRAM_SIZE);
+	} else {
+		nvram_partitions[pmac_nvram_OF] = 0x1800;
+		nvram_partitions[pmac_nvram_XPRAM] = 0x1300;
+		nvram_partitions[pmac_nvram_NR] = 0x1400;
+	}
+	DBG("nvram: OF partition at 0x%x\n", nvram_partitions[pmac_nvram_OF]);
+	DBG("nvram: XP partition at 0x%x\n", nvram_partitions[pmac_nvram_XPRAM]);
+	DBG("nvram: NR partition at 0x%x\n", nvram_partitions[pmac_nvram_NR]);
+}
+
+static void core99_nvram_sync(void)
+{
+	struct core99_header* hdr99;
+	unsigned long flags;
+
+	if (!is_core_99 || !nvram_data || !nvram_image)
+		return;
+
+	raw_spin_lock_irqsave(&nv_lock, flags);
+	if (!memcmp(nvram_image, (u8*)nvram_data + core99_bank*NVRAM_SIZE,
+		NVRAM_SIZE))
+		goto bail;
+
+	DBG("Updating nvram...\n");
+
+	hdr99 = (struct core99_header*)nvram_image;
+	hdr99->generation++;
+	hdr99->hdr.signature = CORE99_SIGNATURE;
+	hdr99->hdr.cksum = chrp_checksum(&hdr99->hdr);
+	hdr99->adler = core99_calc_adler(nvram_image);
+	core99_bank = core99_bank ? 0 : 1;
+	if (core99_erase_bank)
+		if (core99_erase_bank(core99_bank)) {
+			printk("nvram: Error erasing bank %d\n", core99_bank);
+			goto bail;
+		}
+	if (core99_write_bank)
+		if (core99_write_bank(core99_bank, nvram_image))
+			printk("nvram: Error writing bank %d\n", core99_bank);
+ bail:
+	raw_spin_unlock_irqrestore(&nv_lock, flags);
+
+#ifdef DEBUG
+       	mdelay(2000);
+#endif
+}
+
+static int __init core99_nvram_setup(struct device_node *dp, unsigned long addr)
+{
+	int i;
+	u32 gen_bank0, gen_bank1;
+
+	if (nvram_naddrs < 1) {
+		printk(KERN_ERR "nvram: no address\n");
+		return -EINVAL;
+	}
+	nvram_image = memblock_virt_alloc(NVRAM_SIZE, 0);
+	nvram_data = ioremap(addr, NVRAM_SIZE*2);
+	nvram_naddrs = 1; /* Make sure we get the correct case */
+
+	DBG("nvram: Checking bank 0...\n");
+
+	gen_bank0 = core99_check((u8 *)nvram_data);
+	gen_bank1 = core99_check((u8 *)nvram_data + NVRAM_SIZE);
+	core99_bank = (gen_bank0 < gen_bank1) ? 1 : 0;
+
+	DBG("nvram: gen0=%d, gen1=%d\n", gen_bank0, gen_bank1);
+	DBG("nvram: Active bank is: %d\n", core99_bank);
+
+	for (i=0; i<NVRAM_SIZE; i++)
+		nvram_image[i] = nvram_data[i + core99_bank*NVRAM_SIZE];
+
+	ppc_md.nvram_read_val	= core99_nvram_read_byte;
+	ppc_md.nvram_write_val	= core99_nvram_write_byte;
+	ppc_md.nvram_read	= core99_nvram_read;
+	ppc_md.nvram_write	= core99_nvram_write;
+	ppc_md.nvram_size	= core99_nvram_size;
+	ppc_md.nvram_sync	= core99_nvram_sync;
+	ppc_md.machine_shutdown	= core99_nvram_sync;
+	/* 
+	 * Maybe we could be smarter here though making an exclusive list
+	 * of known flash chips is a bit nasty as older OF didn't provide us
+	 * with a useful "compatible" entry. A solution would be to really
+	 * identify the chip using flash id commands and base ourselves on
+	 * a list of known chips IDs
+	 */
+	if (of_device_is_compatible(dp, "amd-0137")) {
+		core99_erase_bank = amd_erase_bank;
+		core99_write_bank = amd_write_bank;
+	} else {
+		core99_erase_bank = sm_erase_bank;
+		core99_write_bank = sm_write_bank;
+	}
+	return 0;
+}
+
+int __init pmac_nvram_init(void)
+{
+	struct device_node *dp;
+	struct resource r1, r2;
+	unsigned int s1 = 0, s2 = 0;
+	int err = 0;
+
+	nvram_naddrs = 0;
+
+	dp = of_find_node_by_name(NULL, "nvram");
+	if (dp == NULL) {
+		printk(KERN_ERR "Can't find NVRAM device\n");
+		return -ENODEV;
+	}
+
+	/* Try to obtain an address */
+	if (of_address_to_resource(dp, 0, &r1) == 0) {
+		nvram_naddrs = 1;
+		s1 = resource_size(&r1);
+		if (of_address_to_resource(dp, 1, &r2) == 0) {
+			nvram_naddrs = 2;
+			s2 = resource_size(&r2);
+		}
+	}
+
+	is_core_99 = of_device_is_compatible(dp, "nvram,flash");
+	if (is_core_99) {
+		err = core99_nvram_setup(dp, r1.start);
+		goto bail;
+	}
+
+#ifdef CONFIG_PPC32
+	if (machine_is(chrp) && nvram_naddrs == 1) {
+		nvram_data = ioremap(r1.start, s1);
+		nvram_mult = 1;
+		ppc_md.nvram_read_val	= direct_nvram_read_byte;
+		ppc_md.nvram_write_val	= direct_nvram_write_byte;
+	} else if (nvram_naddrs == 1) {
+		nvram_data = ioremap(r1.start, s1);
+		nvram_mult = (s1 + NVRAM_SIZE - 1) / NVRAM_SIZE;
+		ppc_md.nvram_read_val	= direct_nvram_read_byte;
+		ppc_md.nvram_write_val	= direct_nvram_write_byte;
+	} else if (nvram_naddrs == 2) {
+		nvram_addr = ioremap(r1.start, s1);
+		nvram_data = ioremap(r2.start, s2);
+		ppc_md.nvram_read_val	= indirect_nvram_read_byte;
+		ppc_md.nvram_write_val	= indirect_nvram_write_byte;
+	} else if (nvram_naddrs == 0 && sys_ctrler == SYS_CTRLER_PMU) {
+#ifdef CONFIG_ADB_PMU
+		nvram_naddrs = -1;
+		ppc_md.nvram_read_val	= pmu_nvram_read_byte;
+		ppc_md.nvram_write_val	= pmu_nvram_write_byte;
+#endif /* CONFIG_ADB_PMU */
+	} else {
+		printk(KERN_ERR "Incompatible type of NVRAM\n");
+		err = -ENXIO;
+	}
+#endif /* CONFIG_PPC32 */
+bail:
+	of_node_put(dp);
+	if (err == 0)
+		lookup_partitions();
+	return err;
+}
+
+int pmac_get_partition(int partition)
+{
+	return nvram_partitions[partition];
+}
+
+u8 pmac_xpram_read(int xpaddr)
+{
+	int offset = pmac_get_partition(pmac_nvram_XPRAM);
+
+	if (offset < 0 || xpaddr < 0 || xpaddr > 0x100)
+		return 0xff;
+
+	return ppc_md.nvram_read_val(xpaddr + offset);
+}
+
+void pmac_xpram_write(int xpaddr, u8 data)
+{
+	int offset = pmac_get_partition(pmac_nvram_XPRAM);
+
+	if (offset < 0 || xpaddr < 0 || xpaddr > 0x100)
+		return;
+
+	ppc_md.nvram_write_val(xpaddr + offset, data);
+}
+
+EXPORT_SYMBOL(pmac_get_partition);
+EXPORT_SYMBOL(pmac_xpram_read);
+EXPORT_SYMBOL(pmac_xpram_write);
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
new file mode 100644
index 000000000..59ab16fa6
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -0,0 +1,1255 @@
+/*
+ * Support for PCI bridges found on Power Macintoshes.
+ *
+ * Copyright (C) 2003-2005 Benjamin Herrenschmuidt (benh@kernel.crashing.org)
+ * Copyright (C) 1997 Paul Mackerras (paulus@samba.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/of_pci.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/pmac_feature.h>
+#include <asm/grackle.h>
+#include <asm/ppc-pci.h>
+
+#include "pmac.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+/* XXX Could be per-controller, but I don't think we risk anything by
+ * assuming we won't have both UniNorth and Bandit */
+static int has_uninorth;
+#ifdef CONFIG_PPC64
+static struct pci_controller *u3_agp;
+#else
+static int has_second_ohare;
+#endif /* CONFIG_PPC64 */
+
+extern int pcibios_assign_bus_offset;
+
+struct device_node *k2_skiplist[2];
+
+/*
+ * Magic constants for enabling cache coherency in the bandit/PSX bridge.
+ */
+#define BANDIT_DEVID_2	8
+#define BANDIT_REVID	3
+
+#define BANDIT_DEVNUM	11
+#define BANDIT_MAGIC	0x50
+#define BANDIT_COHERENT	0x40
+
+static int __init fixup_one_level_bus_range(struct device_node *node, int higher)
+{
+	for (; node != 0;node = node->sibling) {
+		const int * bus_range;
+		const unsigned int *class_code;
+		int len;
+
+		/* For PCI<->PCI bridges or CardBus bridges, we go down */
+		class_code = of_get_property(node, "class-code", NULL);
+		if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI &&
+			(*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS))
+			continue;
+		bus_range = of_get_property(node, "bus-range", &len);
+		if (bus_range != NULL && len > 2 * sizeof(int)) {
+			if (bus_range[1] > higher)
+				higher = bus_range[1];
+		}
+		higher = fixup_one_level_bus_range(node->child, higher);
+	}
+	return higher;
+}
+
+/* This routine fixes the "bus-range" property of all bridges in the
+ * system since they tend to have their "last" member wrong on macs
+ *
+ * Note that the bus numbers manipulated here are OF bus numbers, they
+ * are not Linux bus numbers.
+ */
+static void __init fixup_bus_range(struct device_node *bridge)
+{
+	int *bus_range, len;
+	struct property *prop;
+
+	/* Lookup the "bus-range" property for the hose */
+	prop = of_find_property(bridge, "bus-range", &len);
+	if (prop == NULL || prop->length < 2 * sizeof(int))
+		return;
+
+	bus_range = prop->value;
+	bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]);
+}
+
+/*
+ * Apple MacRISC (U3, UniNorth, Bandit, Chaos) PCI controllers.
+ *
+ * The "Bandit" version is present in all early PCI PowerMacs,
+ * and up to the first ones using Grackle. Some machines may
+ * have 2 bandit controllers (2 PCI busses).
+ *
+ * "Chaos" is used in some "Bandit"-type machines as a bridge
+ * for the separate display bus. It is accessed the same
+ * way as bandit, but cannot be probed for devices. It therefore
+ * has its own config access functions.
+ *
+ * The "UniNorth" version is present in all Core99 machines
+ * (iBook, G4, new IMacs, and all the recent Apple machines).
+ * It contains 3 controllers in one ASIC.
+ *
+ * The U3 is the bridge used on G5 machines. It contains an
+ * AGP bus which is dealt with the old UniNorth access routines
+ * and a HyperTransport bus which uses its own set of access
+ * functions.
+ */
+
+#define MACRISC_CFA0(devfn, off)	\
+	((1 << (unsigned int)PCI_SLOT(dev_fn)) \
+	| (((unsigned int)PCI_FUNC(dev_fn)) << 8) \
+	| (((unsigned int)(off)) & 0xFCUL))
+
+#define MACRISC_CFA1(bus, devfn, off)	\
+	((((unsigned int)(bus)) << 16) \
+	|(((unsigned int)(devfn)) << 8) \
+	|(((unsigned int)(off)) & 0xFCUL) \
+	|1UL)
+
+static void __iomem *macrisc_cfg_map_bus(struct pci_bus *bus,
+					 unsigned int dev_fn,
+					 int offset)
+{
+	unsigned int caddr;
+	struct pci_controller *hose;
+
+	hose = pci_bus_to_host(bus);
+	if (hose == NULL)
+		return NULL;
+
+	if (bus->number == hose->first_busno) {
+		if (dev_fn < (11 << 3))
+			return NULL;
+		caddr = MACRISC_CFA0(dev_fn, offset);
+	} else
+		caddr = MACRISC_CFA1(bus->number, dev_fn, offset);
+
+	/* Uninorth will return garbage if we don't read back the value ! */
+	do {
+		out_le32(hose->cfg_addr, caddr);
+	} while (in_le32(hose->cfg_addr) != caddr);
+
+	offset &= has_uninorth ? 0x07 : 0x03;
+	return hose->cfg_data + offset;
+}
+
+static struct pci_ops macrisc_pci_ops =
+{
+	.map_bus = macrisc_cfg_map_bus,
+	.read = pci_generic_config_read,
+	.write = pci_generic_config_write,
+};
+
+#ifdef CONFIG_PPC32
+/*
+ * Verify that a specific (bus, dev_fn) exists on chaos
+ */
+static void __iomem *chaos_map_bus(struct pci_bus *bus, unsigned int devfn,
+				   int offset)
+{
+	struct device_node *np;
+	const u32 *vendor, *device;
+
+	if (offset >= 0x100)
+		return NULL;
+	np = of_pci_find_child_device(bus->dev.of_node, devfn);
+	if (np == NULL)
+		return NULL;
+
+	vendor = of_get_property(np, "vendor-id", NULL);
+	device = of_get_property(np, "device-id", NULL);
+	if (vendor == NULL || device == NULL)
+		return NULL;
+
+	if ((*vendor == 0x106b) && (*device == 3) && (offset >= 0x10)
+	    && (offset != 0x14) && (offset != 0x18) && (offset <= 0x24))
+		return NULL;
+
+	return macrisc_cfg_map_bus(bus, devfn, offset);
+}
+
+static struct pci_ops chaos_pci_ops =
+{
+	.map_bus = chaos_map_bus,
+	.read = pci_generic_config_read,
+	.write = pci_generic_config_write,
+};
+
+static void __init setup_chaos(struct pci_controller *hose,
+			       struct resource *addr)
+{
+	/* assume a `chaos' bridge */
+	hose->ops = &chaos_pci_ops;
+	hose->cfg_addr = ioremap(addr->start + 0x800000, 0x1000);
+	hose->cfg_data = ioremap(addr->start + 0xc00000, 0x1000);
+}
+#endif /* CONFIG_PPC32 */
+
+#ifdef CONFIG_PPC64
+/*
+ * These versions of U3 HyperTransport config space access ops do not
+ * implement self-view of the HT host yet
+ */
+
+/*
+ * This function deals with some "special cases" devices.
+ *
+ *  0 -> No special case
+ *  1 -> Skip the device but act as if the access was successful
+ *       (return 0xff's on reads, eventually, cache config space
+ *       accesses in a later version)
+ * -1 -> Hide the device (unsuccessful access)
+ */
+static int u3_ht_skip_device(struct pci_controller *hose,
+			     struct pci_bus *bus, unsigned int devfn)
+{
+	struct device_node *busdn, *dn;
+	int i;
+
+	/* We only allow config cycles to devices that are in OF device-tree
+	 * as we are apparently having some weird things going on with some
+	 * revs of K2 on recent G5s, except for the host bridge itself, which
+	 * is missing from the tree but we know we can probe.
+	 */
+	if (bus->self)
+		busdn = pci_device_to_OF_node(bus->self);
+	else if (devfn == 0)
+		return 0;
+	else
+		busdn = hose->dn;
+	for (dn = busdn->child; dn; dn = dn->sibling)
+		if (PCI_DN(dn) && PCI_DN(dn)->devfn == devfn)
+			break;
+	if (dn == NULL)
+		return -1;
+
+	/*
+	 * When a device in K2 is powered down, we die on config
+	 * cycle accesses. Fix that here.
+	 */
+	for (i=0; i<2; i++)
+		if (k2_skiplist[i] == dn)
+			return 1;
+
+	return 0;
+}
+
+#define U3_HT_CFA0(devfn, off)		\
+		((((unsigned int)devfn) << 8) | offset)
+#define U3_HT_CFA1(bus, devfn, off)	\
+		(U3_HT_CFA0(devfn, off) \
+		+ (((unsigned int)bus) << 16) \
+		+ 0x01000000UL)
+
+static void __iomem *u3_ht_cfg_access(struct pci_controller *hose, u8 bus,
+				      u8 devfn, u8 offset, int *swap)
+{
+	*swap = 1;
+	if (bus == hose->first_busno) {
+		if (devfn != 0)
+			return hose->cfg_data + U3_HT_CFA0(devfn, offset);
+		*swap = 0;
+		return ((void __iomem *)hose->cfg_addr) + (offset << 2);
+	} else
+		return hose->cfg_data + U3_HT_CFA1(bus, devfn, offset);
+}
+
+static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn,
+				    int offset, int len, u32 *val)
+{
+	struct pci_controller *hose;
+	void __iomem *addr;
+	int swap;
+
+	hose = pci_bus_to_host(bus);
+	if (hose == NULL)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	if (offset >= 0x100)
+		return  PCIBIOS_BAD_REGISTER_NUMBER;
+	addr = u3_ht_cfg_access(hose, bus->number, devfn, offset, &swap);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	switch (u3_ht_skip_device(hose, bus, devfn)) {
+	case 0:
+		break;
+	case 1:
+		switch (len) {
+		case 1:
+			*val = 0xff; break;
+		case 2:
+			*val = 0xffff; break;
+		default:
+			*val = 0xfffffffful; break;
+		}
+		return PCIBIOS_SUCCESSFUL;
+	default:
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		*val = in_8(addr);
+		break;
+	case 2:
+		*val = swap ? in_le16(addr) : in_be16(addr);
+		break;
+	default:
+		*val = swap ? in_le32(addr) : in_be32(addr);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int u3_ht_write_config(struct pci_bus *bus, unsigned int devfn,
+				     int offset, int len, u32 val)
+{
+	struct pci_controller *hose;
+	void __iomem *addr;
+	int swap;
+
+	hose = pci_bus_to_host(bus);
+	if (hose == NULL)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	if (offset >= 0x100)
+		return  PCIBIOS_BAD_REGISTER_NUMBER;
+	addr = u3_ht_cfg_access(hose, bus->number, devfn, offset, &swap);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	switch (u3_ht_skip_device(hose, bus, devfn)) {
+	case 0:
+		break;
+	case 1:
+		return PCIBIOS_SUCCESSFUL;
+	default:
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		out_8(addr, val);
+		break;
+	case 2:
+		swap ? out_le16(addr, val) : out_be16(addr, val);
+		break;
+	default:
+		swap ? out_le32(addr, val) : out_be32(addr, val);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops u3_ht_pci_ops =
+{
+	.read = u3_ht_read_config,
+	.write = u3_ht_write_config,
+};
+
+#define U4_PCIE_CFA0(devfn, off)	\
+	((1 << ((unsigned int)PCI_SLOT(dev_fn)))	\
+	 | (((unsigned int)PCI_FUNC(dev_fn)) << 8)	\
+	 | ((((unsigned int)(off)) >> 8) << 28) \
+	 | (((unsigned int)(off)) & 0xfcU))
+
+#define U4_PCIE_CFA1(bus, devfn, off)	\
+	((((unsigned int)(bus)) << 16) \
+	 |(((unsigned int)(devfn)) << 8)	\
+	 | ((((unsigned int)(off)) >> 8) << 28) \
+	 |(((unsigned int)(off)) & 0xfcU)	\
+	 |1UL)
+
+static void __iomem *u4_pcie_cfg_map_bus(struct pci_bus *bus,
+					 unsigned int dev_fn,
+					 int offset)
+{
+	struct pci_controller *hose;
+	unsigned int caddr;
+
+	if (offset >= 0x1000)
+		return NULL;
+
+	hose = pci_bus_to_host(bus);
+	if (!hose)
+		return NULL;
+
+	if (bus->number == hose->first_busno) {
+		caddr = U4_PCIE_CFA0(dev_fn, offset);
+	} else
+		caddr = U4_PCIE_CFA1(bus->number, dev_fn, offset);
+
+	/* Uninorth will return garbage if we don't read back the value ! */
+	do {
+		out_le32(hose->cfg_addr, caddr);
+	} while (in_le32(hose->cfg_addr) != caddr);
+
+	offset &= 0x03;
+	return hose->cfg_data + offset;
+}
+
+static struct pci_ops u4_pcie_pci_ops =
+{
+	.map_bus = u4_pcie_cfg_map_bus,
+	.read = pci_generic_config_read,
+	.write = pci_generic_config_write,
+};
+
+static void pmac_pci_fixup_u4_of_node(struct pci_dev *dev)
+{
+	/* Apple's device-tree "hides" the root complex virtual P2P bridge
+	 * on U4. However, Linux sees it, causing the PCI <-> OF matching
+	 * code to fail to properly match devices below it. This works around
+	 * it by setting the node of the bridge to point to the PHB node,
+	 * which is not entirely correct but fixes the matching code and
+	 * doesn't break anything else. It's also the simplest possible fix.
+	 */
+	if (dev->dev.of_node == NULL)
+		dev->dev.of_node = pcibios_get_phb_of_node(dev->bus);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_APPLE, 0x5b, pmac_pci_fixup_u4_of_node);
+
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_PPC32
+/*
+ * For a bandit bridge, turn on cache coherency if necessary.
+ * N.B. we could clean this up using the hose ops directly.
+ */
+static void __init init_bandit(struct pci_controller *bp)
+{
+	unsigned int vendev, magic;
+	int rev;
+
+	/* read the word at offset 0 in config space for device 11 */
+	out_le32(bp->cfg_addr, (1UL << BANDIT_DEVNUM) + PCI_VENDOR_ID);
+	udelay(2);
+	vendev = in_le32(bp->cfg_data);
+	if (vendev == (PCI_DEVICE_ID_APPLE_BANDIT << 16) +
+			PCI_VENDOR_ID_APPLE) {
+		/* read the revision id */
+		out_le32(bp->cfg_addr,
+			 (1UL << BANDIT_DEVNUM) + PCI_REVISION_ID);
+		udelay(2);
+		rev = in_8(bp->cfg_data);
+		if (rev != BANDIT_REVID)
+			printk(KERN_WARNING
+			       "Unknown revision %d for bandit\n", rev);
+	} else if (vendev != (BANDIT_DEVID_2 << 16) + PCI_VENDOR_ID_APPLE) {
+		printk(KERN_WARNING "bandit isn't? (%x)\n", vendev);
+		return;
+	}
+
+	/* read the word at offset 0x50 */
+	out_le32(bp->cfg_addr, (1UL << BANDIT_DEVNUM) + BANDIT_MAGIC);
+	udelay(2);
+	magic = in_le32(bp->cfg_data);
+	if ((magic & BANDIT_COHERENT) != 0)
+		return;
+	magic |= BANDIT_COHERENT;
+	udelay(2);
+	out_le32(bp->cfg_data, magic);
+	printk(KERN_INFO "Cache coherency enabled for bandit/PSX\n");
+}
+
+/*
+ * Tweak the PCI-PCI bridge chip on the blue & white G3s.
+ */
+static void __init init_p2pbridge(void)
+{
+	struct device_node *p2pbridge;
+	struct pci_controller* hose;
+	u8 bus, devfn;
+	u16 val;
+
+	/* XXX it would be better here to identify the specific
+	   PCI-PCI bridge chip we have. */
+	p2pbridge = of_find_node_by_name(NULL, "pci-bridge");
+	if (p2pbridge == NULL
+	    || p2pbridge->parent == NULL
+	    || strcmp(p2pbridge->parent->name, "pci") != 0)
+		goto done;
+	if (pci_device_from_OF_node(p2pbridge, &bus, &devfn) < 0) {
+		DBG("Can't find PCI infos for PCI<->PCI bridge\n");
+		goto done;
+	}
+	/* Warning: At this point, we have not yet renumbered all busses.
+	 * So we must use OF walking to find out hose
+	 */
+	hose = pci_find_hose_for_OF_device(p2pbridge);
+	if (!hose) {
+		DBG("Can't find hose for PCI<->PCI bridge\n");
+		goto done;
+	}
+	if (early_read_config_word(hose, bus, devfn,
+				   PCI_BRIDGE_CONTROL, &val) < 0) {
+		printk(KERN_ERR "init_p2pbridge: couldn't read bridge"
+		       " control\n");
+		goto done;
+	}
+	val &= ~PCI_BRIDGE_CTL_MASTER_ABORT;
+	early_write_config_word(hose, bus, devfn, PCI_BRIDGE_CONTROL, val);
+done:
+	of_node_put(p2pbridge);
+}
+
+static void __init init_second_ohare(void)
+{
+	struct device_node *np = of_find_node_by_name(NULL, "pci106b,7");
+	unsigned char bus, devfn;
+	unsigned short cmd;
+
+	if (np == NULL)
+		return;
+
+	/* This must run before we initialize the PICs since the second
+	 * ohare hosts a PIC that will be accessed there.
+	 */
+	if (pci_device_from_OF_node(np, &bus, &devfn) == 0) {
+		struct pci_controller* hose =
+			pci_find_hose_for_OF_device(np);
+		if (!hose) {
+			printk(KERN_ERR "Can't find PCI hose for OHare2 !\n");
+			of_node_put(np);
+			return;
+		}
+		early_read_config_word(hose, bus, devfn, PCI_COMMAND, &cmd);
+		cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
+		cmd &= ~PCI_COMMAND_IO;
+		early_write_config_word(hose, bus, devfn, PCI_COMMAND, cmd);
+	}
+	has_second_ohare = 1;
+	of_node_put(np);
+}
+
+/*
+ * Some Apple desktop machines have a NEC PD720100A USB2 controller
+ * on the motherboard. Open Firmware, on these, will disable the
+ * EHCI part of it so it behaves like a pair of OHCI's. This fixup
+ * code re-enables it ;)
+ */
+static void __init fixup_nec_usb2(void)
+{
+	struct device_node *nec;
+
+	for_each_node_by_name(nec, "usb") {
+		struct pci_controller *hose;
+		u32 data;
+		const u32 *prop;
+		u8 bus, devfn;
+
+		prop = of_get_property(nec, "vendor-id", NULL);
+		if (prop == NULL)
+			continue;
+		if (0x1033 != *prop)
+			continue;
+		prop = of_get_property(nec, "device-id", NULL);
+		if (prop == NULL)
+			continue;
+		if (0x0035 != *prop)
+			continue;
+		prop = of_get_property(nec, "reg", NULL);
+		if (prop == NULL)
+			continue;
+		devfn = (prop[0] >> 8) & 0xff;
+		bus = (prop[0] >> 16) & 0xff;
+		if (PCI_FUNC(devfn) != 0)
+			continue;
+		hose = pci_find_hose_for_OF_device(nec);
+		if (!hose)
+			continue;
+		early_read_config_dword(hose, bus, devfn, 0xe4, &data);
+		if (data & 1UL) {
+			printk("Found NEC PD720100A USB2 chip with disabled"
+			       " EHCI, fixing up...\n");
+			data &= ~1UL;
+			early_write_config_dword(hose, bus, devfn, 0xe4, data);
+		}
+	}
+}
+
+static void __init setup_bandit(struct pci_controller *hose,
+				struct resource *addr)
+{
+	hose->ops = &macrisc_pci_ops;
+	hose->cfg_addr = ioremap(addr->start + 0x800000, 0x1000);
+	hose->cfg_data = ioremap(addr->start + 0xc00000, 0x1000);
+	init_bandit(hose);
+}
+
+static int __init setup_uninorth(struct pci_controller *hose,
+				 struct resource *addr)
+{
+	pci_add_flags(PCI_REASSIGN_ALL_BUS);
+	has_uninorth = 1;
+	hose->ops = &macrisc_pci_ops;
+	hose->cfg_addr = ioremap(addr->start + 0x800000, 0x1000);
+	hose->cfg_data = ioremap(addr->start + 0xc00000, 0x1000);
+	/* We "know" that the bridge at f2000000 has the PCI slots. */
+	return addr->start == 0xf2000000;
+}
+#endif /* CONFIG_PPC32 */
+
+#ifdef CONFIG_PPC64
+static void __init setup_u3_agp(struct pci_controller* hose)
+{
+	/* On G5, we move AGP up to high bus number so we don't need
+	 * to reassign bus numbers for HT. If we ever have P2P bridges
+	 * on AGP, we'll have to move pci_assign_all_busses to the
+	 * pci_controller structure so we enable it for AGP and not for
+	 * HT childs.
+	 * We hard code the address because of the different size of
+	 * the reg address cell, we shall fix that by killing struct
+	 * reg_property and using some accessor functions instead
+	 */
+	hose->first_busno = 0xf0;
+	hose->last_busno = 0xff;
+	has_uninorth = 1;
+	hose->ops = &macrisc_pci_ops;
+	hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000);
+	hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000);
+	u3_agp = hose;
+}
+
+static void __init setup_u4_pcie(struct pci_controller* hose)
+{
+	/* We currently only implement the "non-atomic" config space, to
+	 * be optimised later.
+	 */
+	hose->ops = &u4_pcie_pci_ops;
+	hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000);
+	hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000);
+
+	/* The bus contains a bridge from root -> device, we need to
+	 * make it visible on bus 0 so that we pick the right type
+	 * of config cycles. If we didn't, we would have to force all
+	 * config cycles to be type 1. So we override the "bus-range"
+	 * property here
+	 */
+	hose->first_busno = 0x00;
+	hose->last_busno = 0xff;
+}
+
+static void __init parse_region_decode(struct pci_controller *hose,
+				       u32 decode)
+{
+	unsigned long base, end, next = -1;
+	int i, cur = -1;
+
+	/* Iterate through all bits. We ignore the last bit as this region is
+	 * reserved for the ROM among other niceties
+	 */
+	for (i = 0; i < 31; i++) {
+		if ((decode & (0x80000000 >> i)) == 0)
+			continue;
+		if (i < 16) {
+			base = 0xf0000000 | (((u32)i) << 24);
+			end = base + 0x00ffffff;
+		} else {
+			base = ((u32)i-16) << 28;
+			end = base + 0x0fffffff;
+		}
+		if (base != next) {
+			if (++cur >= 3) {
+				printk(KERN_WARNING "PCI: Too many ranges !\n");
+				break;
+			}
+			hose->mem_resources[cur].flags = IORESOURCE_MEM;
+			hose->mem_resources[cur].name = hose->dn->full_name;
+			hose->mem_resources[cur].start = base;
+			hose->mem_resources[cur].end = end;
+			hose->mem_offset[cur] = 0;
+			DBG("  %d: 0x%08lx-0x%08lx\n", cur, base, end);
+		} else {
+			DBG("   :           -0x%08lx\n", end);
+			hose->mem_resources[cur].end = end;
+		}
+		next = end + 1;
+	}
+}
+
+static void __init setup_u3_ht(struct pci_controller* hose)
+{
+	struct device_node *np = hose->dn;
+	struct resource cfg_res, self_res;
+	u32 decode;
+
+	hose->ops = &u3_ht_pci_ops;
+
+	/* Get base addresses from OF tree
+	 */
+	if (of_address_to_resource(np, 0, &cfg_res) ||
+	    of_address_to_resource(np, 1, &self_res)) {
+		printk(KERN_ERR "PCI: Failed to get U3/U4 HT resources !\n");
+		return;
+	}
+
+	/* Map external cfg space access into cfg_data and self registers
+	 * into cfg_addr
+	 */
+	hose->cfg_data = ioremap(cfg_res.start, 0x02000000);
+	hose->cfg_addr = ioremap(self_res.start, resource_size(&self_res));
+
+	/*
+	 * /ht node doesn't expose a "ranges" property, we read the register
+	 * that controls the decoding logic and use that for memory regions.
+	 * The IO region is hard coded since it is fixed in HW as well.
+	 */
+	hose->io_base_phys = 0xf4000000;
+	hose->pci_io_size = 0x00400000;
+	hose->io_resource.name = np->full_name;
+	hose->io_resource.start = 0;
+	hose->io_resource.end = 0x003fffff;
+	hose->io_resource.flags = IORESOURCE_IO;
+	hose->first_busno = 0;
+	hose->last_busno = 0xef;
+
+	/* Note: fix offset when cfg_addr becomes a void * */
+	decode = in_be32(hose->cfg_addr + 0x80);
+
+	DBG("PCI: Apple HT bridge decode register: 0x%08x\n", decode);
+
+	/* NOTE: The decode register setup is a bit weird... region
+	 * 0xf8000000 for example is marked as enabled in there while it's
+	 & actually the memory controller registers.
+	 * That means that we are incorrectly attributing it to HT.
+	 *
+	 * In a similar vein, region 0xf4000000 is actually the HT IO space but
+	 * also marked as enabled in here and 0xf9000000 is used by some other
+	 * internal bits of the northbridge.
+	 *
+	 * Unfortunately, we can't just mask out those bit as we would end
+	 * up with more regions than we can cope (linux can only cope with
+	 * 3 memory regions for a PHB at this stage).
+	 *
+	 * So for now, we just do a little hack. We happen to -know- that
+	 * Apple firmware doesn't assign things below 0xfa000000 for that
+	 * bridge anyway so we mask out all bits we don't want.
+	 */
+	decode &= 0x003fffff;
+
+	/* Now parse the resulting bits and build resources */
+	parse_region_decode(hose, decode);
+}
+#endif /* CONFIG_PPC64 */
+
+/*
+ * We assume that if we have a G3 powermac, we have one bridge called
+ * "pci" (a MPC106) and no bandit or chaos bridges, and contrariwise,
+ * if we have one or more bandit or chaos bridges, we don't have a MPC106.
+ */
+static int __init pmac_add_bridge(struct device_node *dev)
+{
+	int len;
+	struct pci_controller *hose;
+	struct resource rsrc;
+	char *disp_name;
+	const int *bus_range;
+	int primary = 1, has_address = 0;
+
+	DBG("Adding PCI host bridge %s\n", dev->full_name);
+
+	/* Fetch host bridge registers address */
+	has_address = (of_address_to_resource(dev, 0, &rsrc) == 0);
+
+	/* Get bus range if any */
+	bus_range = of_get_property(dev, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int)) {
+		printk(KERN_WARNING "Can't get bus-range for %s, assume"
+		       " bus 0\n", dev->full_name);
+	}
+
+	hose = pcibios_alloc_controller(dev);
+	if (!hose)
+		return -ENOMEM;
+	hose->first_busno = bus_range ? bus_range[0] : 0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+	hose->controller_ops = pmac_pci_controller_ops;
+
+	disp_name = NULL;
+
+	/* 64 bits only bridges */
+#ifdef CONFIG_PPC64
+	if (of_device_is_compatible(dev, "u3-agp")) {
+		setup_u3_agp(hose);
+		disp_name = "U3-AGP";
+		primary = 0;
+	} else if (of_device_is_compatible(dev, "u3-ht")) {
+		setup_u3_ht(hose);
+		disp_name = "U3-HT";
+		primary = 1;
+	} else if (of_device_is_compatible(dev, "u4-pcie")) {
+		setup_u4_pcie(hose);
+		disp_name = "U4-PCIE";
+		primary = 0;
+	}
+	printk(KERN_INFO "Found %s PCI host bridge.  Firmware bus number:"
+	       " %d->%d\n", disp_name, hose->first_busno, hose->last_busno);
+#endif /* CONFIG_PPC64 */
+
+	/* 32 bits only bridges */
+#ifdef CONFIG_PPC32
+	if (of_device_is_compatible(dev, "uni-north")) {
+		primary = setup_uninorth(hose, &rsrc);
+		disp_name = "UniNorth";
+	} else if (strcmp(dev->name, "pci") == 0) {
+		/* XXX assume this is a mpc106 (grackle) */
+		setup_grackle(hose);
+		disp_name = "Grackle (MPC106)";
+	} else if (strcmp(dev->name, "bandit") == 0) {
+		setup_bandit(hose, &rsrc);
+		disp_name = "Bandit";
+	} else if (strcmp(dev->name, "chaos") == 0) {
+		setup_chaos(hose, &rsrc);
+		disp_name = "Chaos";
+		primary = 0;
+	}
+	printk(KERN_INFO "Found %s PCI host bridge at 0x%016llx. "
+	       "Firmware bus number: %d->%d\n",
+		disp_name, (unsigned long long)rsrc.start, hose->first_busno,
+		hose->last_busno);
+#endif /* CONFIG_PPC32 */
+
+	DBG(" ->Hose at 0x%p, cfg_addr=0x%p,cfg_data=0x%p\n",
+		hose, hose->cfg_addr, hose->cfg_data);
+
+	/* Interpret the "ranges" property */
+	/* This also maps the I/O region and sets isa_io/mem_base */
+	pci_process_bridge_OF_ranges(hose, dev, primary);
+
+	/* Fixup "bus-range" OF property */
+	fixup_bus_range(dev);
+
+	return 0;
+}
+
+void pmac_pci_irq_fixup(struct pci_dev *dev)
+{
+#ifdef CONFIG_PPC32
+	/* Fixup interrupt for the modem/ethernet combo controller.
+	 * on machines with a second ohare chip.
+	 * The number in the device tree (27) is bogus (correct for
+	 * the ethernet-only board but not the combo ethernet/modem
+	 * board). The real interrupt is 28 on the second controller
+	 * -> 28+32 = 60.
+	 */
+	if (has_second_ohare &&
+	    dev->vendor == PCI_VENDOR_ID_DEC &&
+	    dev->device == PCI_DEVICE_ID_DEC_TULIP_PLUS) {
+		dev->irq = irq_create_mapping(NULL, 60);
+		irq_set_irq_type(dev->irq, IRQ_TYPE_LEVEL_LOW);
+	}
+#endif /* CONFIG_PPC32 */
+}
+
+void __init pmac_pci_init(void)
+{
+	struct device_node *np, *root;
+	struct device_node *ht = NULL;
+
+	pci_set_flags(PCI_CAN_SKIP_ISA_ALIGN);
+
+	root = of_find_node_by_path("/");
+	if (root == NULL) {
+		printk(KERN_CRIT "pmac_pci_init: can't find root "
+		       "of device tree\n");
+		return;
+	}
+	for (np = NULL; (np = of_get_next_child(root, np)) != NULL;) {
+		if (np->name == NULL)
+			continue;
+		if (strcmp(np->name, "bandit") == 0
+		    || strcmp(np->name, "chaos") == 0
+		    || strcmp(np->name, "pci") == 0) {
+			if (pmac_add_bridge(np) == 0)
+				of_node_get(np);
+		}
+		if (strcmp(np->name, "ht") == 0) {
+			of_node_get(np);
+			ht = np;
+		}
+	}
+	of_node_put(root);
+
+#ifdef CONFIG_PPC64
+	/* Probe HT last as it relies on the agp resources to be already
+	 * setup
+	 */
+	if (ht && pmac_add_bridge(ht) != 0)
+		of_node_put(ht);
+
+	/* Setup the linkage between OF nodes and PHBs */
+	pci_devs_phb_init();
+
+	/* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We
+	 * assume there is no P2P bridge on the AGP bus, which should be a
+	 * safe assumptions for now. We should do something better in the
+	 * future though
+	 */
+	if (u3_agp) {
+		struct device_node *np = u3_agp->dn;
+		PCI_DN(np)->busno = 0xf0;
+		for (np = np->child; np; np = np->sibling)
+			PCI_DN(np)->busno = 0xf0;
+	}
+	/* pmac_check_ht_link(); */
+
+#else /* CONFIG_PPC64 */
+	init_p2pbridge();
+	init_second_ohare();
+	fixup_nec_usb2();
+
+	/* We are still having some issues with the Xserve G4, enabling
+	 * some offset between bus number and domains for now when we
+	 * assign all busses should help for now
+	 */
+	if (pci_has_flag(PCI_REASSIGN_ALL_BUS))
+		pcibios_assign_bus_offset = 0x10;
+#endif
+}
+
+#ifdef CONFIG_PPC32
+static bool pmac_pci_enable_device_hook(struct pci_dev *dev)
+{
+	struct device_node* node;
+	int updatecfg = 0;
+	int uninorth_child;
+
+	node = pci_device_to_OF_node(dev);
+
+	/* We don't want to enable USB controllers absent from the OF tree
+	 * (iBook second controller)
+	 */
+	if (dev->vendor == PCI_VENDOR_ID_APPLE
+	    && dev->class == PCI_CLASS_SERIAL_USB_OHCI
+	    && !node) {
+		printk(KERN_INFO "Apple USB OHCI %s disabled by firmware\n",
+		       pci_name(dev));
+		return false;
+	}
+
+	if (!node)
+		return true;
+
+	uninorth_child = node->parent &&
+		of_device_is_compatible(node->parent, "uni-north");
+
+	/* Firewire & GMAC were disabled after PCI probe, the driver is
+	 * claiming them, we must re-enable them now.
+	 */
+	if (uninorth_child && !strcmp(node->name, "firewire") &&
+	    (of_device_is_compatible(node, "pci106b,18") ||
+	     of_device_is_compatible(node, "pci106b,30") ||
+	     of_device_is_compatible(node, "pci11c1,5811"))) {
+		pmac_call_feature(PMAC_FTR_1394_CABLE_POWER, node, 0, 1);
+		pmac_call_feature(PMAC_FTR_1394_ENABLE, node, 0, 1);
+		updatecfg = 1;
+	}
+	if (uninorth_child && !strcmp(node->name, "ethernet") &&
+	    of_device_is_compatible(node, "gmac")) {
+		pmac_call_feature(PMAC_FTR_GMAC_ENABLE, node, 0, 1);
+		updatecfg = 1;
+	}
+
+	/*
+	 * Fixup various header fields on 32 bits. We don't do that on
+	 * 64 bits as some of these have strange values behind the HT
+	 * bridge and we must not, for example, enable MWI or set the
+	 * cache line size on them.
+	 */
+	if (updatecfg) {
+		u16 cmd;
+
+		pci_read_config_word(dev, PCI_COMMAND, &cmd);
+		cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER
+			| PCI_COMMAND_INVALIDATE;
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+		pci_write_config_byte(dev, PCI_LATENCY_TIMER, 16);
+
+		pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE,
+				      L1_CACHE_BYTES >> 2);
+	}
+
+	return true;
+}
+
+void pmac_pci_fixup_ohci(struct pci_dev *dev)
+{
+	struct device_node *node = pci_device_to_OF_node(dev);
+
+	/* We don't want to assign resources to USB controllers
+	 * absent from the OF tree (iBook second controller)
+	 */
+	if (dev->class == PCI_CLASS_SERIAL_USB_OHCI && !node)
+		dev->resource[0].flags = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_APPLE, PCI_ANY_ID, pmac_pci_fixup_ohci);
+
+/* We power down some devices after they have been probed. They'll
+ * be powered back on later on
+ */
+void __init pmac_pcibios_after_init(void)
+{
+	struct device_node* nd;
+
+	for_each_node_by_name(nd, "firewire") {
+		if (nd->parent && (of_device_is_compatible(nd, "pci106b,18") ||
+				   of_device_is_compatible(nd, "pci106b,30") ||
+				   of_device_is_compatible(nd, "pci11c1,5811"))
+		    && of_device_is_compatible(nd->parent, "uni-north")) {
+			pmac_call_feature(PMAC_FTR_1394_ENABLE, nd, 0, 0);
+			pmac_call_feature(PMAC_FTR_1394_CABLE_POWER, nd, 0, 0);
+		}
+	}
+	for_each_node_by_name(nd, "ethernet") {
+		if (nd->parent && of_device_is_compatible(nd, "gmac")
+		    && of_device_is_compatible(nd->parent, "uni-north"))
+			pmac_call_feature(PMAC_FTR_GMAC_ENABLE, nd, 0, 0);
+	}
+}
+
+void pmac_pci_fixup_cardbus(struct pci_dev* dev)
+{
+	if (!machine_is(powermac))
+		return;
+	/*
+	 * Fix the interrupt routing on the various cardbus bridges
+	 * used on powerbooks
+	 */
+	if (dev->vendor != PCI_VENDOR_ID_TI)
+		return;
+	if (dev->device == PCI_DEVICE_ID_TI_1130 ||
+	    dev->device == PCI_DEVICE_ID_TI_1131) {
+		u8 val;
+		/* Enable PCI interrupt */
+		if (pci_read_config_byte(dev, 0x91, &val) == 0)
+			pci_write_config_byte(dev, 0x91, val | 0x30);
+		/* Disable ISA interrupt mode */
+		if (pci_read_config_byte(dev, 0x92, &val) == 0)
+			pci_write_config_byte(dev, 0x92, val & ~0x06);
+	}
+	if (dev->device == PCI_DEVICE_ID_TI_1210 ||
+	    dev->device == PCI_DEVICE_ID_TI_1211 ||
+	    dev->device == PCI_DEVICE_ID_TI_1410 ||
+	    dev->device == PCI_DEVICE_ID_TI_1510) {
+		u8 val;
+		/* 0x8c == TI122X_IRQMUX, 2 says to route the INTA
+		   signal out the MFUNC0 pin */
+		if (pci_read_config_byte(dev, 0x8c, &val) == 0)
+			pci_write_config_byte(dev, 0x8c, (val & ~0x0f) | 2);
+		/* Disable ISA interrupt mode */
+		if (pci_read_config_byte(dev, 0x92, &val) == 0)
+			pci_write_config_byte(dev, 0x92, val & ~0x06);
+	}
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_TI, PCI_ANY_ID, pmac_pci_fixup_cardbus);
+
+void pmac_pci_fixup_pciata(struct pci_dev* dev)
+{
+       u8 progif = 0;
+
+       /*
+        * On PowerMacs, we try to switch any PCI ATA controller to
+	* fully native mode
+        */
+	if (!machine_is(powermac))
+		return;
+
+	/* Some controllers don't have the class IDE */
+	if (dev->vendor == PCI_VENDOR_ID_PROMISE)
+		switch(dev->device) {
+		case PCI_DEVICE_ID_PROMISE_20246:
+		case PCI_DEVICE_ID_PROMISE_20262:
+		case PCI_DEVICE_ID_PROMISE_20263:
+		case PCI_DEVICE_ID_PROMISE_20265:
+		case PCI_DEVICE_ID_PROMISE_20267:
+		case PCI_DEVICE_ID_PROMISE_20268:
+		case PCI_DEVICE_ID_PROMISE_20269:
+		case PCI_DEVICE_ID_PROMISE_20270:
+		case PCI_DEVICE_ID_PROMISE_20271:
+		case PCI_DEVICE_ID_PROMISE_20275:
+		case PCI_DEVICE_ID_PROMISE_20276:
+		case PCI_DEVICE_ID_PROMISE_20277:
+			goto good;
+		}
+	/* Others, check PCI class */
+	if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE)
+		return;
+ good:
+	pci_read_config_byte(dev, PCI_CLASS_PROG, &progif);
+	if ((progif & 5) != 5) {
+		printk(KERN_INFO "PCI: %s Forcing PCI IDE into native mode\n",
+		       pci_name(dev));
+		(void) pci_write_config_byte(dev, PCI_CLASS_PROG, progif|5);
+		if (pci_read_config_byte(dev, PCI_CLASS_PROG, &progif) ||
+		    (progif & 5) != 5)
+			printk(KERN_ERR "Rewrite of PROGIF failed !\n");
+		else {
+			/* Clear IO BARs, they will be reassigned */
+			pci_write_config_dword(dev, PCI_BASE_ADDRESS_0, 0);
+			pci_write_config_dword(dev, PCI_BASE_ADDRESS_1, 0);
+			pci_write_config_dword(dev, PCI_BASE_ADDRESS_2, 0);
+			pci_write_config_dword(dev, PCI_BASE_ADDRESS_3, 0);
+		}
+	}
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pmac_pci_fixup_pciata);
+#endif /* CONFIG_PPC32 */
+
+/*
+ * Disable second function on K2-SATA, it's broken
+ * and disable IO BARs on first one
+ */
+static void fixup_k2_sata(struct pci_dev* dev)
+{
+	int i;
+	u16 cmd;
+
+	if (PCI_FUNC(dev->devfn) > 0) {
+		pci_read_config_word(dev, PCI_COMMAND, &cmd);
+		cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+		for (i = 0; i < 6; i++) {
+			dev->resource[i].start = dev->resource[i].end = 0;
+			dev->resource[i].flags = 0;
+			pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i,
+					       0);
+		}
+	} else {
+		pci_read_config_word(dev, PCI_COMMAND, &cmd);
+		cmd &= ~PCI_COMMAND_IO;
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+		for (i = 0; i < 5; i++) {
+			dev->resource[i].start = dev->resource[i].end = 0;
+			dev->resource[i].flags = 0;
+			pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i,
+					       0);
+		}
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SERVERWORKS, 0x0240, fixup_k2_sata);
+
+/*
+ * On U4 (aka CPC945) the PCIe root complex "P2P" bridge resource ranges aren't
+ * configured by the firmware. The bridge itself seems to ignore them but it
+ * causes problems with Linux which then re-assigns devices below the bridge,
+ * thus changing addresses of those devices from what was in the device-tree,
+ * which sucks when those are video cards using offb
+ *
+ * We could just mark it transparent but I prefer fixing up the resources to
+ * properly show what's going on here, as I have some doubts about having them
+ * badly configured potentially being an issue for DMA.
+ *
+ * We leave PIO alone, it seems to be fine
+ *
+ * Oh and there's another funny bug. The OF properties advertize the region
+ * 0xf1000000..0xf1ffffff as being forwarded as memory space. But that's
+ * actually not true, this region is the memory mapped config space. So we
+ * also need to filter it out or we'll map things in the wrong place.
+ */
+static void fixup_u4_pcie(struct pci_dev* dev)
+{
+	struct pci_controller *host = pci_bus_to_host(dev->bus);
+	struct resource *region = NULL;
+	u32 reg;
+	int i;
+
+	/* Only do that on PowerMac */
+	if (!machine_is(powermac))
+		return;
+
+	/* Find the largest MMIO region */
+	for (i = 0; i < 3; i++) {
+		struct resource *r = &host->mem_resources[i];
+		if (!(r->flags & IORESOURCE_MEM))
+			continue;
+		/* Skip the 0xf0xxxxxx..f2xxxxxx regions, we know they
+		 * are reserved by HW for other things
+		 */
+		if (r->start >= 0xf0000000 && r->start < 0xf3000000)
+			continue;
+		if (!region || resource_size(r) > resource_size(region))
+			region = r;
+	}
+	/* Nothing found, bail */
+	if (region == 0)
+		return;
+
+	/* Print things out */
+	printk(KERN_INFO "PCI: Fixup U4 PCIe bridge range: %pR\n", region);
+
+	/* Fixup bridge config space. We know it's a Mac, resource aren't
+	 * offset so let's just blast them as-is. We also know that they
+	 * fit in 32 bits
+	 */
+	reg = ((region->start >> 16) & 0xfff0) | (region->end & 0xfff00000);
+	pci_write_config_dword(dev, PCI_MEMORY_BASE, reg);
+	pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0);
+	pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0);
+	pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_U4_PCIE, fixup_u4_pcie);
+
+#ifdef CONFIG_PPC64
+static int pmac_pci_probe_mode(struct pci_bus *bus)
+{
+	struct device_node *node = pci_bus_to_OF_node(bus);
+
+	/* We need to use normal PCI probing for the AGP bus,
+	 * since the device for the AGP bridge isn't in the tree.
+	 * Same for the PCIe host on U4 and the HT host bridge.
+	 */
+	if (bus->self == NULL && (of_device_is_compatible(node, "u3-agp") ||
+				  of_device_is_compatible(node, "u4-pcie") ||
+				  of_device_is_compatible(node, "u3-ht")))
+		return PCI_PROBE_NORMAL;
+	return PCI_PROBE_DEVTREE;
+}
+#endif /* CONFIG_PPC64 */
+
+struct pci_controller_ops pmac_pci_controller_ops = {
+#ifdef CONFIG_PPC64
+	.probe_mode		= pmac_pci_probe_mode,
+#endif
+#ifdef CONFIG_PPC32
+	.enable_device_hook	= pmac_pci_enable_device_hook,
+#endif
+};
+
diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c
new file mode 100644
index 000000000..e49d07f3d
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pfunc_base.c
@@ -0,0 +1,410 @@
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/of_irq.h>
+
+#include <asm/pmac_feature.h>
+#include <asm/pmac_pfunc.h>
+
+#undef DEBUG
+#ifdef DEBUG
+#define DBG(fmt...)	printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+static irqreturn_t macio_gpio_irq(int irq, void *data)
+{
+	pmf_do_irq(data);
+
+	return IRQ_HANDLED;
+}
+
+static int macio_do_gpio_irq_enable(struct pmf_function *func)
+{
+	unsigned int irq = irq_of_parse_and_map(func->node, 0);
+	if (irq == NO_IRQ)
+		return -EINVAL;
+	return request_irq(irq, macio_gpio_irq, 0, func->node->name, func);
+}
+
+static int macio_do_gpio_irq_disable(struct pmf_function *func)
+{
+	unsigned int irq = irq_of_parse_and_map(func->node, 0);
+	if (irq == NO_IRQ)
+		return -EINVAL;
+	free_irq(irq, func);
+	return 0;
+}
+
+static int macio_do_gpio_write(PMF_STD_ARGS, u8 value, u8 mask)
+{
+	u8 __iomem *addr = (u8 __iomem *)func->driver_data;
+	unsigned long flags;
+	u8 tmp;
+
+	/* Check polarity */
+	if (args && args->count && !args->u[0].v)
+		value = ~value;
+
+	/* Toggle the GPIO */
+	raw_spin_lock_irqsave(&feature_lock, flags);
+	tmp = readb(addr);
+	tmp = (tmp & ~mask) | (value & mask);
+	DBG("Do write 0x%02x to GPIO %s (%p)\n",
+	    tmp, func->node->full_name, addr);
+	writeb(tmp, addr);
+	raw_spin_unlock_irqrestore(&feature_lock, flags);
+
+	return 0;
+}
+
+static int macio_do_gpio_read(PMF_STD_ARGS, u8 mask, int rshift, u8 xor)
+{
+	u8 __iomem *addr = (u8 __iomem *)func->driver_data;
+	u32 value;
+
+	/* Check if we have room for reply */
+	if (args == NULL || args->count == 0 || args->u[0].p == NULL)
+		return -EINVAL;
+
+	value = readb(addr);
+	*args->u[0].p = ((value & mask) >> rshift) ^ xor;
+
+	return 0;
+}
+
+static int macio_do_delay(PMF_STD_ARGS, u32 duration)
+{
+	/* assume we can sleep ! */
+	msleep((duration + 999) / 1000);
+	return 0;
+}
+
+static struct pmf_handlers macio_gpio_handlers = {
+	.irq_enable	= macio_do_gpio_irq_enable,
+	.irq_disable	= macio_do_gpio_irq_disable,
+	.write_gpio	= macio_do_gpio_write,
+	.read_gpio	= macio_do_gpio_read,
+	.delay		= macio_do_delay,
+};
+
+static void macio_gpio_init_one(struct macio_chip *macio)
+{
+	struct device_node *gparent, *gp;
+
+	/*
+	 * Find the "gpio" parent node
+	 */
+
+	for (gparent = NULL;
+	     (gparent = of_get_next_child(macio->of_node, gparent)) != NULL;)
+		if (strcmp(gparent->name, "gpio") == 0)
+			break;
+	if (gparent == NULL)
+		return;
+
+	DBG("Installing GPIO functions for macio %s\n",
+	    macio->of_node->full_name);
+
+	/*
+	 * Ok, got one, we dont need anything special to track them down, so
+	 * we just create them all
+	 */
+	for (gp = NULL; (gp = of_get_next_child(gparent, gp)) != NULL;) {
+		const u32 *reg = of_get_property(gp, "reg", NULL);
+		unsigned long offset;
+		if (reg == NULL)
+			continue;
+		offset = *reg;
+		/* Deal with old style device-tree. We can safely hard code the
+		 * offset for now too even if it's a bit gross ...
+		 */
+		if (offset < 0x50)
+			offset += 0x50;
+		offset += (unsigned long)macio->base;
+		pmf_register_driver(gp, &macio_gpio_handlers, (void *)offset);
+	}
+
+	DBG("Calling initial GPIO functions for macio %s\n",
+	    macio->of_node->full_name);
+
+	/* And now we run all the init ones */
+	for (gp = NULL; (gp = of_get_next_child(gparent, gp)) != NULL;)
+		pmf_do_functions(gp, NULL, 0, PMF_FLAGS_ON_INIT, NULL);
+
+	/* Note: We do not at this point implement the "at sleep" or "at wake"
+	 * functions. I yet to find any for GPIOs anyway
+	 */
+}
+
+static int macio_do_write_reg32(PMF_STD_ARGS, u32 offset, u32 value, u32 mask)
+{
+	struct macio_chip *macio = func->driver_data;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&feature_lock, flags);
+	MACIO_OUT32(offset, (MACIO_IN32(offset) & ~mask) | (value & mask));
+	raw_spin_unlock_irqrestore(&feature_lock, flags);
+	return 0;
+}
+
+static int macio_do_read_reg32(PMF_STD_ARGS, u32 offset)
+{
+	struct macio_chip *macio = func->driver_data;
+
+	/* Check if we have room for reply */
+	if (args == NULL || args->count == 0 || args->u[0].p == NULL)
+		return -EINVAL;
+
+	*args->u[0].p = MACIO_IN32(offset);
+	return 0;
+}
+
+static int macio_do_write_reg8(PMF_STD_ARGS, u32 offset, u8 value, u8 mask)
+{
+	struct macio_chip *macio = func->driver_data;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&feature_lock, flags);
+	MACIO_OUT8(offset, (MACIO_IN8(offset) & ~mask) | (value & mask));
+	raw_spin_unlock_irqrestore(&feature_lock, flags);
+	return 0;
+}
+
+static int macio_do_read_reg8(PMF_STD_ARGS, u32 offset)
+{
+	struct macio_chip *macio = func->driver_data;
+
+	/* Check if we have room for reply */
+	if (args == NULL || args->count == 0 || args->u[0].p == NULL)
+		return -EINVAL;
+
+	*((u8 *)(args->u[0].p)) = MACIO_IN8(offset);
+	return 0;
+}
+
+static int macio_do_read_reg32_msrx(PMF_STD_ARGS, u32 offset, u32 mask,
+				    u32 shift, u32 xor)
+{
+	struct macio_chip *macio = func->driver_data;
+
+	/* Check if we have room for reply */
+	if (args == NULL || args->count == 0 || args->u[0].p == NULL)
+		return -EINVAL;
+
+	*args->u[0].p = ((MACIO_IN32(offset) & mask) >> shift) ^ xor;
+	return 0;
+}
+
+static int macio_do_read_reg8_msrx(PMF_STD_ARGS, u32 offset, u32 mask,
+				   u32 shift, u32 xor)
+{
+	struct macio_chip *macio = func->driver_data;
+
+	/* Check if we have room for reply */
+	if (args == NULL || args->count == 0 || args->u[0].p == NULL)
+		return -EINVAL;
+
+	*((u8 *)(args->u[0].p)) = ((MACIO_IN8(offset) & mask) >> shift) ^ xor;
+	return 0;
+}
+
+static int macio_do_write_reg32_slm(PMF_STD_ARGS, u32 offset, u32 shift,
+				    u32 mask)
+{
+	struct macio_chip *macio = func->driver_data;
+	unsigned long flags;
+	u32 tmp, val;
+
+	/* Check args */
+	if (args == NULL || args->count == 0)
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&feature_lock, flags);
+	tmp = MACIO_IN32(offset);
+	val = args->u[0].v << shift;
+	tmp = (tmp & ~mask) | (val & mask);
+	MACIO_OUT32(offset, tmp);
+	raw_spin_unlock_irqrestore(&feature_lock, flags);
+	return 0;
+}
+
+static int macio_do_write_reg8_slm(PMF_STD_ARGS, u32 offset, u32 shift,
+				   u32 mask)
+{
+	struct macio_chip *macio = func->driver_data;
+	unsigned long flags;
+	u32 tmp, val;
+
+	/* Check args */
+	if (args == NULL || args->count == 0)
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&feature_lock, flags);
+	tmp = MACIO_IN8(offset);
+	val = args->u[0].v << shift;
+	tmp = (tmp & ~mask) | (val & mask);
+	MACIO_OUT8(offset, tmp);
+	raw_spin_unlock_irqrestore(&feature_lock, flags);
+	return 0;
+}
+
+static struct pmf_handlers macio_mmio_handlers = {
+	.write_reg32		= macio_do_write_reg32,
+	.read_reg32		= macio_do_read_reg32,
+	.write_reg8		= macio_do_write_reg8,
+	.read_reg8		= macio_do_read_reg8,
+	.read_reg32_msrx	= macio_do_read_reg32_msrx,
+	.read_reg8_msrx		= macio_do_read_reg8_msrx,
+	.write_reg32_slm	= macio_do_write_reg32_slm,
+	.write_reg8_slm		= macio_do_write_reg8_slm,
+	.delay			= macio_do_delay,
+};
+
+static void macio_mmio_init_one(struct macio_chip *macio)
+{
+	DBG("Installing MMIO functions for macio %s\n",
+	    macio->of_node->full_name);
+
+	pmf_register_driver(macio->of_node, &macio_mmio_handlers, macio);
+}
+
+static struct device_node *unin_hwclock;
+
+static int unin_do_write_reg32(PMF_STD_ARGS, u32 offset, u32 value, u32 mask)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&feature_lock, flags);
+	/* This is fairly bogus in darwin, but it should work for our needs
+	 * implemeted that way:
+	 */
+	UN_OUT(offset, (UN_IN(offset) & ~mask) | (value & mask));
+	raw_spin_unlock_irqrestore(&feature_lock, flags);
+	return 0;
+}
+
+
+static struct pmf_handlers unin_mmio_handlers = {
+	.write_reg32		= unin_do_write_reg32,
+	.delay			= macio_do_delay,
+};
+
+static void uninorth_install_pfunc(void)
+{
+	struct device_node *np;
+
+	DBG("Installing functions for UniN %s\n",
+	    uninorth_node->full_name);
+
+	/*
+	 * Install handlers for the bridge itself
+	 */
+	pmf_register_driver(uninorth_node, &unin_mmio_handlers, NULL);
+	pmf_do_functions(uninorth_node, NULL, 0, PMF_FLAGS_ON_INIT, NULL);
+
+
+	/*
+	 * Install handlers for the hwclock child if any
+	 */
+	for (np = NULL; (np = of_get_next_child(uninorth_node, np)) != NULL;)
+		if (strcmp(np->name, "hw-clock") == 0) {
+			unin_hwclock = np;
+			break;
+		}
+	if (unin_hwclock) {
+		DBG("Installing functions for UniN clock %s\n",
+		    unin_hwclock->full_name);
+		pmf_register_driver(unin_hwclock, &unin_mmio_handlers, NULL);
+		pmf_do_functions(unin_hwclock, NULL, 0, PMF_FLAGS_ON_INIT,
+				 NULL);
+	}
+}
+
+/* We export this as the SMP code might init us early */
+int __init pmac_pfunc_base_install(void)
+{
+	static int pfbase_inited;
+	int i;
+
+	if (pfbase_inited)
+		return 0;
+	pfbase_inited = 1;
+
+	if (!machine_is(powermac))
+		return 0;
+
+	DBG("Installing base platform functions...\n");
+
+	/*
+	 * Locate mac-io chips and install handlers
+	 */
+	for (i = 0 ; i < MAX_MACIO_CHIPS; i++) {
+		if (macio_chips[i].of_node) {
+			macio_mmio_init_one(&macio_chips[i]);
+			macio_gpio_init_one(&macio_chips[i]);
+		}
+	}
+
+	/*
+	 * Install handlers for northbridge and direct mapped hwclock
+	 * if any. We do not implement the config space access callback
+	 * which is only ever used for functions that we do not call in
+	 * the current driver (enabling/disabling cells in U2, mostly used
+	 * to restore the PCI settings, we do that differently)
+	 */
+	if (uninorth_node && uninorth_base)
+		uninorth_install_pfunc();
+
+	DBG("All base functions installed\n");
+
+	return 0;
+}
+machine_arch_initcall(powermac, pmac_pfunc_base_install);
+
+#ifdef CONFIG_PM
+
+/* Those can be called by pmac_feature. Ultimately, I should use a sysdev
+ * or a device, but for now, that's good enough until I sort out some
+ * ordering issues. Also, we do not bother with GPIOs, as so far I yet have
+ * to see a case where a GPIO function has the on-suspend or on-resume bit
+ */
+void pmac_pfunc_base_suspend(void)
+{
+	int i;
+
+	for (i = 0 ; i < MAX_MACIO_CHIPS; i++) {
+		if (macio_chips[i].of_node)
+			pmf_do_functions(macio_chips[i].of_node, NULL, 0,
+					 PMF_FLAGS_ON_SLEEP, NULL);
+	}
+	if (uninorth_node)
+		pmf_do_functions(uninorth_node, NULL, 0,
+				 PMF_FLAGS_ON_SLEEP, NULL);
+	if (unin_hwclock)
+		pmf_do_functions(unin_hwclock, NULL, 0,
+				 PMF_FLAGS_ON_SLEEP, NULL);
+}
+
+void pmac_pfunc_base_resume(void)
+{
+	int i;
+
+	if (unin_hwclock)
+		pmf_do_functions(unin_hwclock, NULL, 0,
+				 PMF_FLAGS_ON_WAKE, NULL);
+	if (uninorth_node)
+		pmf_do_functions(uninorth_node, NULL, 0,
+				 PMF_FLAGS_ON_WAKE, NULL);
+	for (i = 0 ; i < MAX_MACIO_CHIPS; i++) {
+		if (macio_chips[i].of_node)
+			pmf_do_functions(macio_chips[i].of_node, NULL, 0,
+					 PMF_FLAGS_ON_WAKE, NULL);
+	}
+}
+
+#endif /* CONFIG_PM */
diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c
new file mode 100644
index 000000000..430750817
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pfunc_core.c
@@ -0,0 +1,1021 @@
+/*
+ *
+ * FIXME: Properly make this race free with refcounting etc...
+ *
+ * FIXME: LOCKING !!!
+ */
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+
+#include <asm/prom.h>
+#include <asm/pmac_pfunc.h>
+
+/* Debug */
+#define LOG_PARSE(fmt...)
+#define LOG_ERROR(fmt...)	printk(fmt)
+#define LOG_BLOB(t,b,c)
+
+#undef DEBUG
+#ifdef DEBUG
+#define DBG(fmt...)		printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/* Command numbers */
+#define PMF_CMD_LIST			0
+#define PMF_CMD_WRITE_GPIO		1
+#define PMF_CMD_READ_GPIO		2
+#define PMF_CMD_WRITE_REG32		3
+#define PMF_CMD_READ_REG32		4
+#define PMF_CMD_WRITE_REG16		5
+#define PMF_CMD_READ_REG16		6
+#define PMF_CMD_WRITE_REG8		7
+#define PMF_CMD_READ_REG8		8
+#define PMF_CMD_DELAY			9
+#define PMF_CMD_WAIT_REG32		10
+#define PMF_CMD_WAIT_REG16		11
+#define PMF_CMD_WAIT_REG8		12
+#define PMF_CMD_READ_I2C		13
+#define PMF_CMD_WRITE_I2C		14
+#define PMF_CMD_RMW_I2C			15
+#define PMF_CMD_GEN_I2C			16
+#define PMF_CMD_SHIFT_BYTES_RIGHT	17
+#define PMF_CMD_SHIFT_BYTES_LEFT	18
+#define PMF_CMD_READ_CFG		19
+#define PMF_CMD_WRITE_CFG		20
+#define PMF_CMD_RMW_CFG			21
+#define PMF_CMD_READ_I2C_SUBADDR	22
+#define PMF_CMD_WRITE_I2C_SUBADDR	23
+#define PMF_CMD_SET_I2C_MODE		24
+#define PMF_CMD_RMW_I2C_SUBADDR		25
+#define PMF_CMD_READ_REG32_MASK_SHR_XOR	26
+#define PMF_CMD_READ_REG16_MASK_SHR_XOR	27
+#define PMF_CMD_READ_REG8_MASK_SHR_XOR	28
+#define PMF_CMD_WRITE_REG32_SHL_MASK	29
+#define PMF_CMD_WRITE_REG16_SHL_MASK	30
+#define PMF_CMD_WRITE_REG8_SHL_MASK	31
+#define PMF_CMD_MASK_AND_COMPARE	32
+#define PMF_CMD_COUNT			33
+
+/* This structure holds the state of the parser while walking through
+ * a function definition
+ */
+struct pmf_cmd {
+	const void		*cmdptr;
+	const void		*cmdend;
+	struct pmf_function	*func;
+	void			*instdata;
+	struct pmf_args		*args;
+	int			error;
+};
+
+#if 0
+/* Debug output */
+static void print_blob(const char *title, const void *blob, int bytes)
+{
+	printk("%s", title);
+	while(bytes--) {
+		printk("%02x ", *((u8 *)blob));
+		blob += 1;
+	}
+	printk("\n");
+}
+#endif
+
+/*
+ * Parser helpers
+ */
+
+static u32 pmf_next32(struct pmf_cmd *cmd)
+{
+	u32 value;
+	if ((cmd->cmdend - cmd->cmdptr) < 4) {
+		cmd->error = 1;
+		return 0;
+	}
+	value = *((u32 *)cmd->cmdptr);
+	cmd->cmdptr += 4;
+	return value;
+}
+
+static const void* pmf_next_blob(struct pmf_cmd *cmd, int count)
+{
+	const void *value;
+	if ((cmd->cmdend - cmd->cmdptr) < count) {
+		cmd->error = 1;
+		return NULL;
+	}
+	value = cmd->cmdptr;
+	cmd->cmdptr += count;
+	return value;
+}
+
+/*
+ * Individual command parsers
+ */
+
+#define PMF_PARSE_CALL(name, cmd, handlers, p...) \
+	do { \
+		if (cmd->error) \
+			return -ENXIO; \
+		if (handlers == NULL) \
+			return 0; \
+		if (handlers->name)				      \
+			return handlers->name(cmd->func, cmd->instdata, \
+					      cmd->args, p);	      \
+		return -1; \
+	} while(0) \
+
+
+static int pmf_parser_write_gpio(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u8 value = (u8)pmf_next32(cmd);
+	u8 mask = (u8)pmf_next32(cmd);
+
+	LOG_PARSE("pmf: write_gpio(value: %02x, mask: %02x)\n", value, mask);
+
+	PMF_PARSE_CALL(write_gpio, cmd, h, value, mask);
+}
+
+static int pmf_parser_read_gpio(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u8 mask = (u8)pmf_next32(cmd);
+	int rshift = (int)pmf_next32(cmd);
+	u8 xor = (u8)pmf_next32(cmd);
+
+	LOG_PARSE("pmf: read_gpio(mask: %02x, rshift: %d, xor: %02x)\n",
+		  mask, rshift, xor);
+
+	PMF_PARSE_CALL(read_gpio, cmd, h, mask, rshift, xor);
+}
+
+static int pmf_parser_write_reg32(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 value = pmf_next32(cmd);
+	u32 mask = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: write_reg32(offset: %08x, value: %08x, mask: %08x)\n",
+		  offset, value, mask);
+
+	PMF_PARSE_CALL(write_reg32, cmd, h, offset, value, mask);
+}
+
+static int pmf_parser_read_reg32(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: read_reg32(offset: %08x)\n", offset);
+
+	PMF_PARSE_CALL(read_reg32, cmd, h, offset);
+}
+
+
+static int pmf_parser_write_reg16(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u16 value = (u16)pmf_next32(cmd);
+	u16 mask = (u16)pmf_next32(cmd);
+
+	LOG_PARSE("pmf: write_reg16(offset: %08x, value: %04x, mask: %04x)\n",
+		  offset, value, mask);
+
+	PMF_PARSE_CALL(write_reg16, cmd, h, offset, value, mask);
+}
+
+static int pmf_parser_read_reg16(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: read_reg16(offset: %08x)\n", offset);
+
+	PMF_PARSE_CALL(read_reg16, cmd, h, offset);
+}
+
+
+static int pmf_parser_write_reg8(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u8 value = (u16)pmf_next32(cmd);
+	u8 mask = (u16)pmf_next32(cmd);
+
+	LOG_PARSE("pmf: write_reg8(offset: %08x, value: %02x, mask: %02x)\n",
+		  offset, value, mask);
+
+	PMF_PARSE_CALL(write_reg8, cmd, h, offset, value, mask);
+}
+
+static int pmf_parser_read_reg8(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: read_reg8(offset: %08x)\n", offset);
+
+	PMF_PARSE_CALL(read_reg8, cmd, h, offset);
+}
+
+static int pmf_parser_delay(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 duration = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: delay(duration: %d us)\n", duration);
+
+	PMF_PARSE_CALL(delay, cmd, h, duration);
+}
+
+static int pmf_parser_wait_reg32(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 value = pmf_next32(cmd);
+	u32 mask = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: wait_reg32(offset: %08x, comp_value: %08x,mask: %08x)\n",
+		  offset, value, mask);
+
+	PMF_PARSE_CALL(wait_reg32, cmd, h, offset, value, mask);
+}
+
+static int pmf_parser_wait_reg16(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u16 value = (u16)pmf_next32(cmd);
+	u16 mask = (u16)pmf_next32(cmd);
+
+	LOG_PARSE("pmf: wait_reg16(offset: %08x, comp_value: %04x,mask: %04x)\n",
+		  offset, value, mask);
+
+	PMF_PARSE_CALL(wait_reg16, cmd, h, offset, value, mask);
+}
+
+static int pmf_parser_wait_reg8(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u8 value = (u8)pmf_next32(cmd);
+	u8 mask = (u8)pmf_next32(cmd);
+
+	LOG_PARSE("pmf: wait_reg8(offset: %08x, comp_value: %02x,mask: %02x)\n",
+		  offset, value, mask);
+
+	PMF_PARSE_CALL(wait_reg8, cmd, h, offset, value, mask);
+}
+
+static int pmf_parser_read_i2c(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 bytes = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: read_i2c(bytes: %ud)\n", bytes);
+
+	PMF_PARSE_CALL(read_i2c, cmd, h, bytes);
+}
+
+static int pmf_parser_write_i2c(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 bytes = pmf_next32(cmd);
+	const void *blob = pmf_next_blob(cmd, bytes);
+
+	LOG_PARSE("pmf: write_i2c(bytes: %ud) ...\n", bytes);
+	LOG_BLOB("pmf:   data: \n", blob, bytes);
+
+	PMF_PARSE_CALL(write_i2c, cmd, h, bytes, blob);
+}
+
+
+static int pmf_parser_rmw_i2c(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 maskbytes = pmf_next32(cmd);
+	u32 valuesbytes = pmf_next32(cmd);
+	u32 totalbytes = pmf_next32(cmd);
+	const void *maskblob = pmf_next_blob(cmd, maskbytes);
+	const void *valuesblob = pmf_next_blob(cmd, valuesbytes);
+
+	LOG_PARSE("pmf: rmw_i2c(maskbytes: %ud, valuebytes: %ud, "
+		  "totalbytes: %d) ...\n",
+		  maskbytes, valuesbytes, totalbytes);
+	LOG_BLOB("pmf:   mask data: \n", maskblob, maskbytes);
+	LOG_BLOB("pmf:   values data: \n", valuesblob, valuesbytes);
+
+	PMF_PARSE_CALL(rmw_i2c, cmd, h, maskbytes, valuesbytes, totalbytes,
+		       maskblob, valuesblob);
+}
+
+static int pmf_parser_read_cfg(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 bytes = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: read_cfg(offset: %x, bytes: %ud)\n", offset, bytes);
+
+	PMF_PARSE_CALL(read_cfg, cmd, h, offset, bytes);
+}
+
+
+static int pmf_parser_write_cfg(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 bytes = pmf_next32(cmd);
+	const void *blob = pmf_next_blob(cmd, bytes);
+
+	LOG_PARSE("pmf: write_cfg(offset: %x, bytes: %ud)\n", offset, bytes);
+	LOG_BLOB("pmf:   data: \n", blob, bytes);
+
+	PMF_PARSE_CALL(write_cfg, cmd, h, offset, bytes, blob);
+}
+
+static int pmf_parser_rmw_cfg(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 maskbytes = pmf_next32(cmd);
+	u32 valuesbytes = pmf_next32(cmd);
+	u32 totalbytes = pmf_next32(cmd);
+	const void *maskblob = pmf_next_blob(cmd, maskbytes);
+	const void *valuesblob = pmf_next_blob(cmd, valuesbytes);
+
+	LOG_PARSE("pmf: rmw_cfg(maskbytes: %ud, valuebytes: %ud,"
+		  " totalbytes: %d) ...\n",
+		  maskbytes, valuesbytes, totalbytes);
+	LOG_BLOB("pmf:   mask data: \n", maskblob, maskbytes);
+	LOG_BLOB("pmf:   values data: \n", valuesblob, valuesbytes);
+
+	PMF_PARSE_CALL(rmw_cfg, cmd, h, offset, maskbytes, valuesbytes,
+		       totalbytes, maskblob, valuesblob);
+}
+
+
+static int pmf_parser_read_i2c_sub(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u8 subaddr = (u8)pmf_next32(cmd);
+	u32 bytes = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: read_i2c_sub(subaddr: %x, bytes: %ud)\n",
+		  subaddr, bytes);
+
+	PMF_PARSE_CALL(read_i2c_sub, cmd, h, subaddr, bytes);
+}
+
+static int pmf_parser_write_i2c_sub(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u8 subaddr = (u8)pmf_next32(cmd);
+	u32 bytes = pmf_next32(cmd);
+	const void *blob = pmf_next_blob(cmd, bytes);
+
+	LOG_PARSE("pmf: write_i2c_sub(subaddr: %x, bytes: %ud) ...\n",
+		  subaddr, bytes);
+	LOG_BLOB("pmf:   data: \n", blob, bytes);
+
+	PMF_PARSE_CALL(write_i2c_sub, cmd, h, subaddr, bytes, blob);
+}
+
+static int pmf_parser_set_i2c_mode(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u32 mode = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: set_i2c_mode(mode: %d)\n", mode);
+
+	PMF_PARSE_CALL(set_i2c_mode, cmd, h, mode);
+}
+
+
+static int pmf_parser_rmw_i2c_sub(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+	u8 subaddr = (u8)pmf_next32(cmd);
+	u32 maskbytes = pmf_next32(cmd);
+	u32 valuesbytes = pmf_next32(cmd);
+	u32 totalbytes = pmf_next32(cmd);
+	const void *maskblob = pmf_next_blob(cmd, maskbytes);
+	const void *valuesblob = pmf_next_blob(cmd, valuesbytes);
+
+	LOG_PARSE("pmf: rmw_i2c_sub(subaddr: %x, maskbytes: %ud, valuebytes: %ud"
+		  ", totalbytes: %d) ...\n",
+		  subaddr, maskbytes, valuesbytes, totalbytes);
+	LOG_BLOB("pmf:   mask data: \n", maskblob, maskbytes);
+	LOG_BLOB("pmf:   values data: \n", valuesblob, valuesbytes);
+
+	PMF_PARSE_CALL(rmw_i2c_sub, cmd, h, subaddr, maskbytes, valuesbytes,
+		       totalbytes, maskblob, valuesblob);
+}
+
+static int pmf_parser_read_reg32_msrx(struct pmf_cmd *cmd,
+				      struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 mask = pmf_next32(cmd);
+	u32 shift = pmf_next32(cmd);
+	u32 xor = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: read_reg32_msrx(offset: %x, mask: %x, shift: %x,"
+		  " xor: %x\n", offset, mask, shift, xor);
+
+	PMF_PARSE_CALL(read_reg32_msrx, cmd, h, offset, mask, shift, xor);
+}
+
+static int pmf_parser_read_reg16_msrx(struct pmf_cmd *cmd,
+				      struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 mask = pmf_next32(cmd);
+	u32 shift = pmf_next32(cmd);
+	u32 xor = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: read_reg16_msrx(offset: %x, mask: %x, shift: %x,"
+		  " xor: %x\n", offset, mask, shift, xor);
+
+	PMF_PARSE_CALL(read_reg16_msrx, cmd, h, offset, mask, shift, xor);
+}
+static int pmf_parser_read_reg8_msrx(struct pmf_cmd *cmd,
+				     struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 mask = pmf_next32(cmd);
+	u32 shift = pmf_next32(cmd);
+	u32 xor = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: read_reg8_msrx(offset: %x, mask: %x, shift: %x,"
+		  " xor: %x\n", offset, mask, shift, xor);
+
+	PMF_PARSE_CALL(read_reg8_msrx, cmd, h, offset, mask, shift, xor);
+}
+
+static int pmf_parser_write_reg32_slm(struct pmf_cmd *cmd,
+				      struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 shift = pmf_next32(cmd);
+	u32 mask = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: write_reg32_slm(offset: %x, shift: %x, mask: %x\n",
+		  offset, shift, mask);
+
+	PMF_PARSE_CALL(write_reg32_slm, cmd, h, offset, shift, mask);
+}
+
+static int pmf_parser_write_reg16_slm(struct pmf_cmd *cmd,
+				      struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 shift = pmf_next32(cmd);
+	u32 mask = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: write_reg16_slm(offset: %x, shift: %x, mask: %x\n",
+		  offset, shift, mask);
+
+	PMF_PARSE_CALL(write_reg16_slm, cmd, h, offset, shift, mask);
+}
+
+static int pmf_parser_write_reg8_slm(struct pmf_cmd *cmd,
+				     struct pmf_handlers *h)
+{
+	u32 offset = pmf_next32(cmd);
+	u32 shift = pmf_next32(cmd);
+	u32 mask = pmf_next32(cmd);
+
+	LOG_PARSE("pmf: write_reg8_slm(offset: %x, shift: %x, mask: %x\n",
+		  offset, shift, mask);
+
+	PMF_PARSE_CALL(write_reg8_slm, cmd, h, offset, shift, mask);
+}
+
+static int pmf_parser_mask_and_compare(struct pmf_cmd *cmd,
+				       struct pmf_handlers *h)
+{
+	u32 bytes = pmf_next32(cmd);
+	const void *maskblob = pmf_next_blob(cmd, bytes);
+	const void *valuesblob = pmf_next_blob(cmd, bytes);
+
+	LOG_PARSE("pmf: mask_and_compare(length: %ud ...\n", bytes);
+	LOG_BLOB("pmf:   mask data: \n", maskblob, bytes);
+	LOG_BLOB("pmf:   values data: \n", valuesblob, bytes);
+
+	PMF_PARSE_CALL(mask_and_compare, cmd, h,
+		       bytes, maskblob, valuesblob);
+}
+
+
+typedef int (*pmf_cmd_parser_t)(struct pmf_cmd *cmd, struct pmf_handlers *h);
+
+static pmf_cmd_parser_t pmf_parsers[PMF_CMD_COUNT] =
+{
+	NULL,
+	pmf_parser_write_gpio,
+	pmf_parser_read_gpio,
+	pmf_parser_write_reg32,
+	pmf_parser_read_reg32,
+	pmf_parser_write_reg16,
+	pmf_parser_read_reg16,
+	pmf_parser_write_reg8,
+	pmf_parser_read_reg8,
+	pmf_parser_delay,
+	pmf_parser_wait_reg32,
+	pmf_parser_wait_reg16,
+	pmf_parser_wait_reg8,
+	pmf_parser_read_i2c,
+	pmf_parser_write_i2c,
+	pmf_parser_rmw_i2c,
+	NULL, /* Bogus command */
+	NULL, /* Shift bytes right: NYI */
+	NULL, /* Shift bytes left: NYI */
+	pmf_parser_read_cfg,
+	pmf_parser_write_cfg,
+	pmf_parser_rmw_cfg,
+	pmf_parser_read_i2c_sub,
+	pmf_parser_write_i2c_sub,
+	pmf_parser_set_i2c_mode,
+	pmf_parser_rmw_i2c_sub,
+	pmf_parser_read_reg32_msrx,
+	pmf_parser_read_reg16_msrx,
+	pmf_parser_read_reg8_msrx,
+	pmf_parser_write_reg32_slm,
+	pmf_parser_write_reg16_slm,
+	pmf_parser_write_reg8_slm,
+	pmf_parser_mask_and_compare,
+};
+
+struct pmf_device {
+	struct list_head	link;
+	struct device_node	*node;
+	struct pmf_handlers	*handlers;
+	struct list_head	functions;
+	struct kref		ref;
+};
+
+static LIST_HEAD(pmf_devices);
+static DEFINE_SPINLOCK(pmf_lock);
+static DEFINE_MUTEX(pmf_irq_mutex);
+
+static void pmf_release_device(struct kref *kref)
+{
+	struct pmf_device *dev = container_of(kref, struct pmf_device, ref);
+	kfree(dev);
+}
+
+static inline void pmf_put_device(struct pmf_device *dev)
+{
+	kref_put(&dev->ref, pmf_release_device);
+}
+
+static inline struct pmf_device *pmf_get_device(struct pmf_device *dev)
+{
+	kref_get(&dev->ref);
+	return dev;
+}
+
+static inline struct pmf_device *pmf_find_device(struct device_node *np)
+{
+	struct pmf_device *dev;
+
+	list_for_each_entry(dev, &pmf_devices, link) {
+		if (dev->node == np)
+			return pmf_get_device(dev);
+	}
+	return NULL;
+}
+
+static int pmf_parse_one(struct pmf_function *func,
+			 struct pmf_handlers *handlers,
+			 void *instdata, struct pmf_args *args)
+{
+	struct pmf_cmd cmd;
+	u32 ccode;
+	int count, rc;
+
+	cmd.cmdptr		= func->data;
+	cmd.cmdend		= func->data + func->length;
+	cmd.func       		= func;
+	cmd.instdata		= instdata;
+	cmd.args		= args;
+	cmd.error		= 0;
+
+	LOG_PARSE("pmf: func %s, %d bytes, %s...\n",
+		  func->name, func->length,
+		  handlers ? "executing" : "parsing");
+
+	/* One subcommand to parse for now */
+	count = 1;
+
+	while(count-- && cmd.cmdptr < cmd.cmdend) {
+		/* Get opcode */
+		ccode = pmf_next32(&cmd);
+		/* Check if we are hitting a command list, fetch new count */
+		if (ccode == 0) {
+			count = pmf_next32(&cmd) - 1;
+			ccode = pmf_next32(&cmd);
+		}
+		if (cmd.error) {
+			LOG_ERROR("pmf: parse error, not enough data\n");
+			return -ENXIO;
+		}
+		if (ccode >= PMF_CMD_COUNT) {
+			LOG_ERROR("pmf: command code %d unknown !\n", ccode);
+			return -ENXIO;
+		}
+		if (pmf_parsers[ccode] == NULL) {
+			LOG_ERROR("pmf: no parser for command %d !\n", ccode);
+			return -ENXIO;
+		}
+		rc = pmf_parsers[ccode](&cmd, handlers);
+		if (rc != 0) {
+			LOG_ERROR("pmf: parser for command %d returned"
+				  " error %d\n", ccode, rc);
+			return rc;
+		}
+	}
+
+	/* We are doing an initial parse pass, we need to adjust the size */
+	if (handlers == NULL)
+		func->length = cmd.cmdptr - func->data;
+
+	return 0;
+}
+
+static int pmf_add_function_prop(struct pmf_device *dev, void *driverdata,
+				 const char *name, u32 *data,
+				 unsigned int length)
+{
+	int count = 0;
+	struct pmf_function *func = NULL;
+
+	DBG("pmf: Adding functions for platform-do-%s\n", name);
+
+	while (length >= 12) {
+		/* Allocate a structure */
+		func = kzalloc(sizeof(struct pmf_function), GFP_KERNEL);
+		if (func == NULL)
+			goto bail;
+		kref_init(&func->ref);
+		INIT_LIST_HEAD(&func->irq_clients);
+		func->node = dev->node;
+		func->driver_data = driverdata;
+		func->name = name;
+		func->phandle = data[0];
+		func->flags = data[1];
+		data += 2;
+		length -= 8;
+		func->data = data;
+		func->length = length;
+		func->dev = dev;
+		DBG("pmf: idx %d: flags=%08x, phandle=%08x "
+		    " %d bytes remaining, parsing...\n",
+		    count+1, func->flags, func->phandle, length);
+		if (pmf_parse_one(func, NULL, NULL, NULL)) {
+			kfree(func);
+			goto bail;
+		}
+		length -= func->length;
+		data = (u32 *)(((u8 *)data) + func->length);
+		list_add(&func->link, &dev->functions);
+		pmf_get_device(dev);
+		count++;
+	}
+ bail:
+	DBG("pmf: Added %d functions\n", count);
+
+	return count;
+}
+
+static int pmf_add_functions(struct pmf_device *dev, void *driverdata)
+{
+	struct property *pp;
+#define PP_PREFIX "platform-do-"
+	const int plen = strlen(PP_PREFIX);
+	int count = 0;
+
+	for (pp = dev->node->properties; pp != 0; pp = pp->next) {
+		const char *name;
+		if (strncmp(pp->name, PP_PREFIX, plen) != 0)
+			continue;
+		name = pp->name + plen;
+		if (strlen(name) && pp->length >= 12)
+			count += pmf_add_function_prop(dev, driverdata, name,
+						       pp->value, pp->length);
+	}
+	return count;
+}
+
+
+int pmf_register_driver(struct device_node *np,
+			struct pmf_handlers *handlers,
+			void *driverdata)
+{
+	struct pmf_device *dev;
+	unsigned long flags;
+	int rc = 0;
+
+	if (handlers == NULL)
+		return -EINVAL;
+
+	DBG("pmf: registering driver for node %s\n", np->full_name);
+
+	spin_lock_irqsave(&pmf_lock, flags);
+	dev = pmf_find_device(np);
+	spin_unlock_irqrestore(&pmf_lock, flags);
+	if (dev != NULL) {
+		DBG("pmf: already there !\n");
+		pmf_put_device(dev);
+		return -EBUSY;
+	}
+
+	dev = kzalloc(sizeof(struct pmf_device), GFP_KERNEL);
+	if (dev == NULL) {
+		DBG("pmf: no memory !\n");
+		return -ENOMEM;
+	}
+	kref_init(&dev->ref);
+	dev->node = of_node_get(np);
+	dev->handlers = handlers;
+	INIT_LIST_HEAD(&dev->functions);
+
+	rc = pmf_add_functions(dev, driverdata);
+	if (rc == 0) {
+		DBG("pmf: no functions, disposing.. \n");
+		of_node_put(np);
+		kfree(dev);
+		return -ENODEV;
+	}
+
+	spin_lock_irqsave(&pmf_lock, flags);
+	list_add(&dev->link, &pmf_devices);
+	spin_unlock_irqrestore(&pmf_lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pmf_register_driver);
+
+struct pmf_function *pmf_get_function(struct pmf_function *func)
+{
+	if (!try_module_get(func->dev->handlers->owner))
+		return NULL;
+	kref_get(&func->ref);
+	return func;
+}
+EXPORT_SYMBOL_GPL(pmf_get_function);
+
+static void pmf_release_function(struct kref *kref)
+{
+	struct pmf_function *func =
+		container_of(kref, struct pmf_function, ref);
+	pmf_put_device(func->dev);
+	kfree(func);
+}
+
+static inline void __pmf_put_function(struct pmf_function *func)
+{
+	kref_put(&func->ref, pmf_release_function);
+}
+
+void pmf_put_function(struct pmf_function *func)
+{
+	if (func == NULL)
+		return;
+	module_put(func->dev->handlers->owner);
+	__pmf_put_function(func);
+}
+EXPORT_SYMBOL_GPL(pmf_put_function);
+
+void pmf_unregister_driver(struct device_node *np)
+{
+	struct pmf_device *dev;
+	unsigned long flags;
+
+	DBG("pmf: unregistering driver for node %s\n", np->full_name);
+
+	spin_lock_irqsave(&pmf_lock, flags);
+	dev = pmf_find_device(np);
+	if (dev == NULL) {
+		DBG("pmf: not such driver !\n");
+		spin_unlock_irqrestore(&pmf_lock, flags);
+		return;
+	}
+	list_del(&dev->link);
+
+	while(!list_empty(&dev->functions)) {
+		struct pmf_function *func =
+			list_entry(dev->functions.next, typeof(*func), link);
+		list_del(&func->link);
+		__pmf_put_function(func);
+	}
+
+	pmf_put_device(dev);
+	spin_unlock_irqrestore(&pmf_lock, flags);
+}
+EXPORT_SYMBOL_GPL(pmf_unregister_driver);
+
+struct pmf_function *__pmf_find_function(struct device_node *target,
+					 const char *name, u32 flags)
+{
+	struct device_node *actor = of_node_get(target);
+	struct pmf_device *dev;
+	struct pmf_function *func, *result = NULL;
+	char fname[64];
+	const u32 *prop;
+	u32 ph;
+
+	/*
+	 * Look for a "platform-*" function reference. If we can't find
+	 * one, then we fallback to a direct call attempt
+	 */
+	snprintf(fname, 63, "platform-%s", name);
+	prop = of_get_property(target, fname, NULL);
+	if (prop == NULL)
+		goto find_it;
+	ph = *prop;
+	if (ph == 0)
+		goto find_it;
+
+	/*
+	 * Ok, now try to find the actor. If we can't find it, we fail,
+	 * there is no point in falling back there
+	 */
+	of_node_put(actor);
+	actor = of_find_node_by_phandle(ph);
+	if (actor == NULL)
+		return NULL;
+ find_it:
+	dev = pmf_find_device(actor);
+	if (dev == NULL) {
+		result = NULL;
+		goto out;
+	}
+
+	list_for_each_entry(func, &dev->functions, link) {
+		if (name && strcmp(name, func->name))
+			continue;
+		if (func->phandle && target->phandle != func->phandle)
+			continue;
+		if ((func->flags & flags) == 0)
+			continue;
+		result = func;
+		break;
+	}
+	pmf_put_device(dev);
+out:
+	of_node_put(actor);
+	return result;
+}
+
+
+int pmf_register_irq_client(struct device_node *target,
+			    const char *name,
+			    struct pmf_irq_client *client)
+{
+	struct pmf_function *func;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pmf_lock, flags);
+	func = __pmf_find_function(target, name, PMF_FLAGS_INT_GEN);
+	if (func)
+		func = pmf_get_function(func);
+	spin_unlock_irqrestore(&pmf_lock, flags);
+	if (func == NULL)
+		return -ENODEV;
+
+	/* guard against manipulations of list */
+	mutex_lock(&pmf_irq_mutex);
+	if (list_empty(&func->irq_clients))
+		func->dev->handlers->irq_enable(func);
+
+	/* guard against pmf_do_irq while changing list */
+	spin_lock_irqsave(&pmf_lock, flags);
+	list_add(&client->link, &func->irq_clients);
+	spin_unlock_irqrestore(&pmf_lock, flags);
+
+	client->func = func;
+	mutex_unlock(&pmf_irq_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pmf_register_irq_client);
+
+void pmf_unregister_irq_client(struct pmf_irq_client *client)
+{
+	struct pmf_function *func = client->func;
+	unsigned long flags;
+
+	BUG_ON(func == NULL);
+
+	/* guard against manipulations of list */
+	mutex_lock(&pmf_irq_mutex);
+	client->func = NULL;
+
+	/* guard against pmf_do_irq while changing list */
+	spin_lock_irqsave(&pmf_lock, flags);
+	list_del(&client->link);
+	spin_unlock_irqrestore(&pmf_lock, flags);
+
+	if (list_empty(&func->irq_clients))
+		func->dev->handlers->irq_disable(func);
+	mutex_unlock(&pmf_irq_mutex);
+	pmf_put_function(func);
+}
+EXPORT_SYMBOL_GPL(pmf_unregister_irq_client);
+
+
+void pmf_do_irq(struct pmf_function *func)
+{
+	unsigned long flags;
+	struct pmf_irq_client *client;
+
+	/* For now, using a spinlock over the whole function. Can be made
+	 * to drop the lock using 2 lists if necessary
+	 */
+	spin_lock_irqsave(&pmf_lock, flags);
+	list_for_each_entry(client, &func->irq_clients, link) {
+		if (!try_module_get(client->owner))
+			continue;
+		client->handler(client->data);
+		module_put(client->owner);
+	}
+	spin_unlock_irqrestore(&pmf_lock, flags);
+}
+EXPORT_SYMBOL_GPL(pmf_do_irq);
+
+
+int pmf_call_one(struct pmf_function *func, struct pmf_args *args)
+{
+	struct pmf_device *dev = func->dev;
+	void *instdata = NULL;
+	int rc = 0;
+
+	DBG(" ** pmf_call_one(%s/%s) **\n", dev->node->full_name, func->name);
+
+	if (dev->handlers->begin)
+		instdata = dev->handlers->begin(func, args);
+	rc = pmf_parse_one(func, dev->handlers, instdata, args);
+	if (dev->handlers->end)
+		dev->handlers->end(func, instdata);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pmf_call_one);
+
+int pmf_do_functions(struct device_node *np, const char *name,
+		     u32 phandle, u32 fflags, struct pmf_args *args)
+{
+	struct pmf_device *dev;
+	struct pmf_function *func, *tmp;
+	unsigned long flags;
+	int rc = -ENODEV;
+
+	spin_lock_irqsave(&pmf_lock, flags);
+
+	dev = pmf_find_device(np);
+	if (dev == NULL) {
+		spin_unlock_irqrestore(&pmf_lock, flags);
+		return -ENODEV;
+	}
+	list_for_each_entry_safe(func, tmp, &dev->functions, link) {
+		if (name && strcmp(name, func->name))
+			continue;
+		if (phandle && func->phandle && phandle != func->phandle)
+			continue;
+		if ((func->flags & fflags) == 0)
+			continue;
+		if (pmf_get_function(func) == NULL)
+			continue;
+		spin_unlock_irqrestore(&pmf_lock, flags);
+		rc = pmf_call_one(func, args);
+		pmf_put_function(func);
+		spin_lock_irqsave(&pmf_lock, flags);
+	}
+	pmf_put_device(dev);
+	spin_unlock_irqrestore(&pmf_lock, flags);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pmf_do_functions);
+
+
+struct pmf_function *pmf_find_function(struct device_node *target,
+				       const char *name)
+{
+	struct pmf_function *func;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pmf_lock, flags);
+	func = __pmf_find_function(target, name, PMF_FLAGS_ON_DEMAND);
+	if (func)
+		func = pmf_get_function(func);
+	spin_unlock_irqrestore(&pmf_lock, flags);
+	return func;
+}
+EXPORT_SYMBOL_GPL(pmf_find_function);
+
+int pmf_call_function(struct device_node *target, const char *name,
+		      struct pmf_args *args)
+{
+	struct pmf_function *func = pmf_find_function(target, name);
+	int rc;
+
+	if (func == NULL)
+		return -ENODEV;
+
+	rc = pmf_call_one(func, args);
+	pmf_put_function(func);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pmf_call_function);
+
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
new file mode 100644
index 000000000..59cfc9d63
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -0,0 +1,659 @@
+/*
+ *  Support for the interrupt controllers found on Power Macintosh,
+ *  currently Apple's "Grand Central" interrupt controller in all
+ *  it's incarnations. OpenPIC support used on newer machines is
+ *  in a separate file
+ *
+ *  Copyright (C) 1997 Paul Mackerras (paulus@samba.org)
+ *  Copyright (C) 2005 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *                     IBM, Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/syscore_ops.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/time.h>
+#include <asm/pmac_feature.h>
+#include <asm/mpic.h>
+#include <asm/xmon.h>
+
+#include "pmac.h"
+
+#ifdef CONFIG_PPC32
+struct pmac_irq_hw {
+        unsigned int    event;
+        unsigned int    enable;
+        unsigned int    ack;
+        unsigned int    level;
+};
+
+/* Workaround flags for 32bit powermac machines */
+unsigned int of_irq_workarounds;
+struct device_node *of_irq_dflt_pic;
+
+/* Default addresses */
+static volatile struct pmac_irq_hw __iomem *pmac_irq_hw[4];
+
+static int max_irqs;
+static int max_real_irqs;
+
+static DEFINE_RAW_SPINLOCK(pmac_pic_lock);
+
+/* The max irq number this driver deals with is 128; see max_irqs */
+static DECLARE_BITMAP(ppc_lost_interrupts, 128);
+static DECLARE_BITMAP(ppc_cached_irq_mask, 128);
+static int pmac_irq_cascade = -1;
+static struct irq_domain *pmac_pic_host;
+
+static void __pmac_retrigger(unsigned int irq_nr)
+{
+	if (irq_nr >= max_real_irqs && pmac_irq_cascade > 0) {
+		__set_bit(irq_nr, ppc_lost_interrupts);
+		irq_nr = pmac_irq_cascade;
+		mb();
+	}
+	if (!__test_and_set_bit(irq_nr, ppc_lost_interrupts)) {
+		atomic_inc(&ppc_n_lost_interrupts);
+		set_dec(1);
+	}
+}
+
+static void pmac_mask_and_ack_irq(struct irq_data *d)
+{
+	unsigned int src = irqd_to_hwirq(d);
+        unsigned long bit = 1UL << (src & 0x1f);
+        int i = src >> 5;
+        unsigned long flags;
+
+	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+        __clear_bit(src, ppc_cached_irq_mask);
+        if (__test_and_clear_bit(src, ppc_lost_interrupts))
+                atomic_dec(&ppc_n_lost_interrupts);
+        out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]);
+        out_le32(&pmac_irq_hw[i]->ack, bit);
+        do {
+                /* make sure ack gets to controller before we enable
+                   interrupts */
+                mb();
+        } while((in_le32(&pmac_irq_hw[i]->enable) & bit)
+                != (ppc_cached_irq_mask[i] & bit));
+	raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+}
+
+static void pmac_ack_irq(struct irq_data *d)
+{
+	unsigned int src = irqd_to_hwirq(d);
+        unsigned long bit = 1UL << (src & 0x1f);
+        int i = src >> 5;
+        unsigned long flags;
+
+	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+	if (__test_and_clear_bit(src, ppc_lost_interrupts))
+                atomic_dec(&ppc_n_lost_interrupts);
+        out_le32(&pmac_irq_hw[i]->ack, bit);
+        (void)in_le32(&pmac_irq_hw[i]->ack);
+	raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+}
+
+static void __pmac_set_irq_mask(unsigned int irq_nr, int nokicklost)
+{
+        unsigned long bit = 1UL << (irq_nr & 0x1f);
+        int i = irq_nr >> 5;
+
+        if ((unsigned)irq_nr >= max_irqs)
+                return;
+
+        /* enable unmasked interrupts */
+        out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]);
+
+        do {
+                /* make sure mask gets to controller before we
+                   return to user */
+                mb();
+        } while((in_le32(&pmac_irq_hw[i]->enable) & bit)
+                != (ppc_cached_irq_mask[i] & bit));
+
+        /*
+         * Unfortunately, setting the bit in the enable register
+         * when the device interrupt is already on *doesn't* set
+         * the bit in the flag register or request another interrupt.
+         */
+        if (bit & ppc_cached_irq_mask[i] & in_le32(&pmac_irq_hw[i]->level))
+		__pmac_retrigger(irq_nr);
+}
+
+/* When an irq gets requested for the first client, if it's an
+ * edge interrupt, we clear any previous one on the controller
+ */
+static unsigned int pmac_startup_irq(struct irq_data *d)
+{
+	unsigned long flags;
+	unsigned int src = irqd_to_hwirq(d);
+        unsigned long bit = 1UL << (src & 0x1f);
+        int i = src >> 5;
+
+	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+	if (!irqd_is_level_type(d))
+		out_le32(&pmac_irq_hw[i]->ack, bit);
+        __set_bit(src, ppc_cached_irq_mask);
+        __pmac_set_irq_mask(src, 0);
+	raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+
+	return 0;
+}
+
+static void pmac_mask_irq(struct irq_data *d)
+{
+	unsigned long flags;
+	unsigned int src = irqd_to_hwirq(d);
+
+	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+        __clear_bit(src, ppc_cached_irq_mask);
+        __pmac_set_irq_mask(src, 1);
+	raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+}
+
+static void pmac_unmask_irq(struct irq_data *d)
+{
+	unsigned long flags;
+	unsigned int src = irqd_to_hwirq(d);
+
+	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+	__set_bit(src, ppc_cached_irq_mask);
+        __pmac_set_irq_mask(src, 0);
+	raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+}
+
+static int pmac_retrigger(struct irq_data *d)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+	__pmac_retrigger(irqd_to_hwirq(d));
+	raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+	return 1;
+}
+
+static struct irq_chip pmac_pic = {
+	.name		= "PMAC-PIC",
+	.irq_startup	= pmac_startup_irq,
+	.irq_mask	= pmac_mask_irq,
+	.irq_ack	= pmac_ack_irq,
+	.irq_mask_ack	= pmac_mask_and_ack_irq,
+	.irq_unmask	= pmac_unmask_irq,
+	.irq_retrigger	= pmac_retrigger,
+};
+
+static irqreturn_t gatwick_action(int cpl, void *dev_id)
+{
+	unsigned long flags;
+	int irq, bits;
+	int rc = IRQ_NONE;
+
+	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+	for (irq = max_irqs; (irq -= 32) >= max_real_irqs; ) {
+		int i = irq >> 5;
+		bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i];
+		bits |= in_le32(&pmac_irq_hw[i]->level);
+		bits &= ppc_cached_irq_mask[i];
+		if (bits == 0)
+			continue;
+		irq += __ilog2(bits);
+		raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+		generic_handle_irq(irq);
+		raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+		rc = IRQ_HANDLED;
+	}
+	raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+	return rc;
+}
+
+static unsigned int pmac_pic_get_irq(void)
+{
+	int irq;
+	unsigned long bits = 0;
+	unsigned long flags;
+
+#ifdef CONFIG_PPC_PMAC32_PSURGE
+	/* IPI's are a hack on the powersurge -- Cort */
+	if (smp_processor_id() != 0) {
+		return  psurge_secondary_virq;
+        }
+#endif /* CONFIG_PPC_PMAC32_PSURGE */
+	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+	for (irq = max_real_irqs; (irq -= 32) >= 0; ) {
+		int i = irq >> 5;
+		bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i];
+		bits |= in_le32(&pmac_irq_hw[i]->level);
+		bits &= ppc_cached_irq_mask[i];
+		if (bits == 0)
+			continue;
+		irq += __ilog2(bits);
+		break;
+	}
+	raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+	if (unlikely(irq < 0))
+		return NO_IRQ;
+	return irq_linear_revmap(pmac_pic_host, irq);
+}
+
+#ifdef CONFIG_XMON
+static struct irqaction xmon_action = {
+	.handler	= xmon_irq,
+	.flags		= 0,
+	.name		= "NMI - XMON"
+};
+#endif
+
+static struct irqaction gatwick_cascade_action = {
+	.handler	= gatwick_action,
+	.name		= "cascade",
+};
+
+static int pmac_pic_host_match(struct irq_domain *h, struct device_node *node)
+{
+	/* We match all, we don't always have a node anyway */
+	return 1;
+}
+
+static int pmac_pic_host_map(struct irq_domain *h, unsigned int virq,
+			     irq_hw_number_t hw)
+{
+	if (hw >= max_irqs)
+		return -EINVAL;
+
+	/* Mark level interrupts, set delayed disable for edge ones and set
+	 * handlers
+	 */
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &pmac_pic, handle_level_irq);
+	return 0;
+}
+
+static const struct irq_domain_ops pmac_pic_host_ops = {
+	.match = pmac_pic_host_match,
+	.map = pmac_pic_host_map,
+	.xlate = irq_domain_xlate_onecell,
+};
+
+static void __init pmac_pic_probe_oldstyle(void)
+{
+        int i;
+        struct device_node *master = NULL;
+	struct device_node *slave = NULL;
+	u8 __iomem *addr;
+	struct resource r;
+
+	/* Set our get_irq function */
+	ppc_md.get_irq = pmac_pic_get_irq;
+
+	/*
+	 * Find the interrupt controller type & node
+	 */
+
+	if ((master = of_find_node_by_name(NULL, "gc")) != NULL) {
+		max_irqs = max_real_irqs = 32;
+	} else if ((master = of_find_node_by_name(NULL, "ohare")) != NULL) {
+		max_irqs = max_real_irqs = 32;
+		/* We might have a second cascaded ohare */
+		slave = of_find_node_by_name(NULL, "pci106b,7");
+		if (slave)
+			max_irqs = 64;
+	} else if ((master = of_find_node_by_name(NULL, "mac-io")) != NULL) {
+		max_irqs = max_real_irqs = 64;
+
+		/* We might have a second cascaded heathrow */
+
+		/* Compensate for of_node_put() in of_find_node_by_name() */
+		of_node_get(master);
+		slave = of_find_node_by_name(master, "mac-io");
+
+		/* Check ordering of master & slave */
+		if (of_device_is_compatible(master, "gatwick")) {
+			struct device_node *tmp;
+			BUG_ON(slave == NULL);
+			tmp = master;
+			master = slave;
+			slave = tmp;
+		}
+
+		/* We found a slave */
+		if (slave)
+			max_irqs = 128;
+	}
+	BUG_ON(master == NULL);
+
+	/*
+	 * Allocate an irq host
+	 */
+	pmac_pic_host = irq_domain_add_linear(master, max_irqs,
+					      &pmac_pic_host_ops, NULL);
+	BUG_ON(pmac_pic_host == NULL);
+	irq_set_default_host(pmac_pic_host);
+
+	/* Get addresses of first controller if we have a node for it */
+	BUG_ON(of_address_to_resource(master, 0, &r));
+
+	/* Map interrupts of primary controller */
+	addr = (u8 __iomem *) ioremap(r.start, 0x40);
+	i = 0;
+	pmac_irq_hw[i++] = (volatile struct pmac_irq_hw __iomem *)
+		(addr + 0x20);
+	if (max_real_irqs > 32)
+		pmac_irq_hw[i++] = (volatile struct pmac_irq_hw __iomem *)
+			(addr + 0x10);
+	of_node_put(master);
+
+	printk(KERN_INFO "irq: Found primary Apple PIC %s for %d irqs\n",
+	       master->full_name, max_real_irqs);
+
+	/* Map interrupts of cascaded controller */
+	if (slave && !of_address_to_resource(slave, 0, &r)) {
+		addr = (u8 __iomem *)ioremap(r.start, 0x40);
+		pmac_irq_hw[i++] = (volatile struct pmac_irq_hw __iomem *)
+			(addr + 0x20);
+		if (max_irqs > 64)
+			pmac_irq_hw[i++] =
+				(volatile struct pmac_irq_hw __iomem *)
+				(addr + 0x10);
+		pmac_irq_cascade = irq_of_parse_and_map(slave, 0);
+
+		printk(KERN_INFO "irq: Found slave Apple PIC %s for %d irqs"
+		       " cascade: %d\n", slave->full_name,
+		       max_irqs - max_real_irqs, pmac_irq_cascade);
+	}
+	of_node_put(slave);
+
+	/* Disable all interrupts in all controllers */
+	for (i = 0; i * 32 < max_irqs; ++i)
+		out_le32(&pmac_irq_hw[i]->enable, 0);
+
+	/* Hookup cascade irq */
+	if (slave && pmac_irq_cascade != NO_IRQ)
+		setup_irq(pmac_irq_cascade, &gatwick_cascade_action);
+
+	printk(KERN_INFO "irq: System has %d possible interrupts\n", max_irqs);
+#ifdef CONFIG_XMON
+	setup_irq(irq_create_mapping(NULL, 20), &xmon_action);
+#endif
+}
+
+int of_irq_parse_oldworld(struct device_node *device, int index,
+			struct of_phandle_args *out_irq)
+{
+	const u32 *ints = NULL;
+	int intlen;
+
+	/*
+	 * Old machines just have a list of interrupt numbers
+	 * and no interrupt-controller nodes. We also have dodgy
+	 * cases where the APPL,interrupts property is completely
+	 * missing behind pci-pci bridges and we have to get it
+	 * from the parent (the bridge itself, as apple just wired
+	 * everything together on these)
+	 */
+	while (device) {
+		ints = of_get_property(device, "AAPL,interrupts", &intlen);
+		if (ints != NULL)
+			break;
+		device = device->parent;
+		if (device && strcmp(device->type, "pci") != 0)
+			break;
+	}
+	if (ints == NULL)
+		return -EINVAL;
+	intlen /= sizeof(u32);
+
+	if (index >= intlen)
+		return -EINVAL;
+
+	out_irq->np = NULL;
+	out_irq->args[0] = ints[index];
+	out_irq->args_count = 1;
+
+	return 0;
+}
+#endif /* CONFIG_PPC32 */
+
+static void __init pmac_pic_setup_mpic_nmi(struct mpic *mpic)
+{
+#if defined(CONFIG_XMON) && defined(CONFIG_PPC32)
+	struct device_node* pswitch;
+	int nmi_irq;
+
+	pswitch = of_find_node_by_name(NULL, "programmer-switch");
+	if (pswitch) {
+		nmi_irq = irq_of_parse_and_map(pswitch, 0);
+		if (nmi_irq != NO_IRQ) {
+			mpic_irq_set_priority(nmi_irq, 9);
+			setup_irq(nmi_irq, &xmon_action);
+		}
+		of_node_put(pswitch);
+	}
+#endif	/* defined(CONFIG_XMON) && defined(CONFIG_PPC32) */
+}
+
+static struct mpic * __init pmac_setup_one_mpic(struct device_node *np,
+						int master)
+{
+	const char *name = master ? " MPIC 1   " : " MPIC 2   ";
+	struct mpic *mpic;
+	unsigned int flags = master ? 0 : MPIC_SECONDARY;
+
+	pmac_call_feature(PMAC_FTR_ENABLE_MPIC, np, 0, 0);
+
+	if (of_get_property(np, "big-endian", NULL))
+		flags |= MPIC_BIG_ENDIAN;
+
+	/* Primary Big Endian means HT interrupts. This is quite dodgy
+	 * but works until I find a better way
+	 */
+	if (master && (flags & MPIC_BIG_ENDIAN))
+		flags |= MPIC_U3_HT_IRQS;
+
+	mpic = mpic_alloc(np, 0, flags, 0, 0, name);
+	if (mpic == NULL)
+		return NULL;
+
+	mpic_init(mpic);
+
+	return mpic;
+ }
+
+static int __init pmac_pic_probe_mpic(void)
+{
+	struct mpic *mpic1, *mpic2;
+	struct device_node *np, *master = NULL, *slave = NULL;
+
+	/* We can have up to 2 MPICs cascaded */
+	for (np = NULL; (np = of_find_node_by_type(np, "open-pic"))
+		     != NULL;) {
+		if (master == NULL &&
+		    of_get_property(np, "interrupts", NULL) == NULL)
+			master = of_node_get(np);
+		else if (slave == NULL)
+			slave = of_node_get(np);
+		if (master && slave)
+			break;
+	}
+
+	/* Check for bogus setups */
+	if (master == NULL && slave != NULL) {
+		master = slave;
+		slave = NULL;
+	}
+
+	/* Not found, default to good old pmac pic */
+	if (master == NULL)
+		return -ENODEV;
+
+	/* Set master handler */
+	ppc_md.get_irq = mpic_get_irq;
+
+	/* Setup master */
+	mpic1 = pmac_setup_one_mpic(master, 1);
+	BUG_ON(mpic1 == NULL);
+
+	/* Install NMI if any */
+	pmac_pic_setup_mpic_nmi(mpic1);
+
+	of_node_put(master);
+
+	/* Set up a cascaded controller, if present */
+	if (slave) {
+		mpic2 = pmac_setup_one_mpic(slave, 0);
+		if (mpic2 == NULL)
+			printk(KERN_ERR "Failed to setup slave MPIC\n");
+		of_node_put(slave);
+	}
+
+	return 0;
+}
+
+
+void __init pmac_pic_init(void)
+{
+	/* We configure the OF parsing based on our oldworld vs. newworld
+	 * platform type and whether we were booted by BootX.
+	 */
+#ifdef CONFIG_PPC32
+	if (!pmac_newworld)
+		of_irq_workarounds |= OF_IMAP_OLDWORLD_MAC;
+	if (of_get_property(of_chosen, "linux,bootx", NULL) != NULL)
+		of_irq_workarounds |= OF_IMAP_NO_PHANDLE;
+
+	/* If we don't have phandles on a newworld, then try to locate a
+	 * default interrupt controller (happens when booting with BootX).
+	 * We do a first match here, hopefully, that only ever happens on
+	 * machines with one controller.
+	 */
+	if (pmac_newworld && (of_irq_workarounds & OF_IMAP_NO_PHANDLE)) {
+		struct device_node *np;
+
+		for_each_node_with_property(np, "interrupt-controller") {
+			/* Skip /chosen/interrupt-controller */
+			if (strcmp(np->name, "chosen") == 0)
+				continue;
+			/* It seems like at least one person wants
+			 * to use BootX on a machine with an AppleKiwi
+			 * controller which happens to pretend to be an
+			 * interrupt controller too. */
+			if (strcmp(np->name, "AppleKiwi") == 0)
+				continue;
+			/* I think we found one ! */
+			of_irq_dflt_pic = np;
+			break;
+		}
+	}
+#endif /* CONFIG_PPC32 */
+
+	/* We first try to detect Apple's new Core99 chipset, since mac-io
+	 * is quite different on those machines and contains an IBM MPIC2.
+	 */
+	if (pmac_pic_probe_mpic() == 0)
+		return;
+
+#ifdef CONFIG_PPC32
+	pmac_pic_probe_oldstyle();
+#endif
+}
+
+#if defined(CONFIG_PM) && defined(CONFIG_PPC32)
+/*
+ * These procedures are used in implementing sleep on the powerbooks.
+ * sleep_save_intrs() saves the states of all interrupt enables
+ * and disables all interrupts except for the nominated one.
+ * sleep_restore_intrs() restores the states of all interrupt enables.
+ */
+unsigned long sleep_save_mask[2];
+
+/* This used to be passed by the PMU driver but that link got
+ * broken with the new driver model. We use this tweak for now...
+ * We really want to do things differently though...
+ */
+static int pmacpic_find_viaint(void)
+{
+	int viaint = -1;
+
+#ifdef CONFIG_ADB_PMU
+	struct device_node *np;
+
+	if (pmu_get_model() != PMU_OHARE_BASED)
+		goto not_found;
+	np = of_find_node_by_name(NULL, "via-pmu");
+	if (np == NULL)
+		goto not_found;
+	viaint = irq_of_parse_and_map(np, 0);
+
+not_found:
+#endif /* CONFIG_ADB_PMU */
+	return viaint;
+}
+
+static int pmacpic_suspend(void)
+{
+	int viaint = pmacpic_find_viaint();
+
+	sleep_save_mask[0] = ppc_cached_irq_mask[0];
+	sleep_save_mask[1] = ppc_cached_irq_mask[1];
+	ppc_cached_irq_mask[0] = 0;
+	ppc_cached_irq_mask[1] = 0;
+	if (viaint > 0)
+		set_bit(viaint, ppc_cached_irq_mask);
+	out_le32(&pmac_irq_hw[0]->enable, ppc_cached_irq_mask[0]);
+	if (max_real_irqs > 32)
+		out_le32(&pmac_irq_hw[1]->enable, ppc_cached_irq_mask[1]);
+	(void)in_le32(&pmac_irq_hw[0]->event);
+	/* make sure mask gets to controller before we return to caller */
+	mb();
+        (void)in_le32(&pmac_irq_hw[0]->enable);
+
+        return 0;
+}
+
+static void pmacpic_resume(void)
+{
+	int i;
+
+	out_le32(&pmac_irq_hw[0]->enable, 0);
+	if (max_real_irqs > 32)
+		out_le32(&pmac_irq_hw[1]->enable, 0);
+	mb();
+	for (i = 0; i < max_real_irqs; ++i)
+		if (test_bit(i, sleep_save_mask))
+			pmac_unmask_irq(irq_get_irq_data(i));
+}
+
+static struct syscore_ops pmacpic_syscore_ops = {
+	.suspend	= pmacpic_suspend,
+	.resume		= pmacpic_resume,
+};
+
+static int __init init_pmacpic_syscore(void)
+{
+	if (pmac_irq_hw[0])
+		register_syscore_ops(&pmacpic_syscore_ops);
+	return 0;
+}
+
+machine_subsys_initcall(powermac, init_pmacpic_syscore);
+
+#endif /* CONFIG_PM && CONFIG_PPC32 */
diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h
new file mode 100644
index 000000000..e7f8163d6
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pmac.h
@@ -0,0 +1,43 @@
+#ifndef __PMAC_H__
+#define __PMAC_H__
+
+#include <linux/pci.h>
+#include <linux/irq.h>
+
+/*
+ * Declaration for the various functions exported by the
+ * pmac_* files. Mostly for use by pmac_setup
+ */
+
+struct rtc_time;
+
+extern int pmac_newworld;
+
+extern long pmac_time_init(void);
+extern unsigned long pmac_get_boot_time(void);
+extern void pmac_get_rtc_time(struct rtc_time *);
+extern int pmac_set_rtc_time(struct rtc_time *);
+extern void pmac_read_rtc_time(void);
+extern void pmac_calibrate_decr(void);
+extern void pmac_pci_irq_fixup(struct pci_dev *);
+extern void pmac_pci_init(void);
+
+extern void pmac_nvram_update(void);
+extern unsigned char pmac_nvram_read_byte(int addr);
+extern void pmac_nvram_write_byte(int addr, unsigned char val);
+extern void pmac_pcibios_after_init(void);
+extern int of_show_percpuinfo(struct seq_file *m, int i);
+
+extern void pmac_setup_pci_dma(void);
+extern void pmac_check_ht_link(void);
+
+extern void pmac_setup_smp(void);
+extern int psurge_secondary_virq;
+extern void low_cpu_die(void) __attribute__((noreturn));
+
+extern int pmac_nvram_init(void);
+extern void pmac_pic_init(void);
+
+extern struct pci_controller_ops pmac_pci_controller_ops;
+
+#endif /* __PMAC_H__ */
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
new file mode 100644
index 000000000..8dd78f4e1
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -0,0 +1,666 @@
+/*
+ *  Powermac setup and early boot code plus other random bits.
+ *
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Adapted for Power Macintosh by Paul Mackerras
+ *    Copyright (C) 1996 Paul Mackerras (paulus@samba.org)
+ *
+ *  Derived from "arch/alpha/kernel/setup.c"
+ *    Copyright (C) 1995 Linus Torvalds
+ *
+ *  Maintained by Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+/*
+ * bootup setup stuff..
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/export.h>
+#include <linux/user.h>
+#include <linux/tty.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/major.h>
+#include <linux/initrd.h>
+#include <linux/vt_kern.h>
+#include <linux/console.h>
+#include <linux/pci.h>
+#include <linux/adb.h>
+#include <linux/cuda.h>
+#include <linux/pmu.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/bitops.h>
+#include <linux/suspend.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/memblock.h>
+
+#include <asm/reg.h>
+#include <asm/sections.h>
+#include <asm/prom.h>
+#include <asm/pgtable.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/ohare.h>
+#include <asm/mediabay.h>
+#include <asm/machdep.h>
+#include <asm/dma.h>
+#include <asm/cputable.h>
+#include <asm/btext.h>
+#include <asm/pmac_feature.h>
+#include <asm/time.h>
+#include <asm/mmu_context.h>
+#include <asm/iommu.h>
+#include <asm/smu.h>
+#include <asm/pmc.h>
+#include <asm/udbg.h>
+
+#include "pmac.h"
+
+#undef SHOW_GATWICK_IRQS
+
+int ppc_override_l2cr = 0;
+int ppc_override_l2cr_value;
+int has_l2cache = 0;
+
+int pmac_newworld;
+
+static int current_root_goodness = -1;
+
+extern struct machdep_calls pmac_md;
+
+#define DEFAULT_ROOT_DEVICE Root_SDA1	/* sda1 - slightly silly choice */
+
+#ifdef CONFIG_PPC64
+int sccdbg;
+#endif
+
+sys_ctrler_t sys_ctrler = SYS_CTRLER_UNKNOWN;
+EXPORT_SYMBOL(sys_ctrler);
+
+#ifdef CONFIG_PMAC_SMU
+unsigned long smu_cmdbuf_abs;
+EXPORT_SYMBOL(smu_cmdbuf_abs);
+#endif
+
+static void pmac_show_cpuinfo(struct seq_file *m)
+{
+	struct device_node *np;
+	const char *pp;
+	int plen;
+	int mbmodel;
+	unsigned int mbflags;
+	char* mbname;
+
+	mbmodel = pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL,
+				    PMAC_MB_INFO_MODEL, 0);
+	mbflags = pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL,
+				    PMAC_MB_INFO_FLAGS, 0);
+	if (pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL, PMAC_MB_INFO_NAME,
+			      (long) &mbname) != 0)
+		mbname = "Unknown";
+
+	/* find motherboard type */
+	seq_printf(m, "machine\t\t: ");
+	np = of_find_node_by_path("/");
+	if (np != NULL) {
+		pp = of_get_property(np, "model", NULL);
+		if (pp != NULL)
+			seq_printf(m, "%s\n", pp);
+		else
+			seq_printf(m, "PowerMac\n");
+		pp = of_get_property(np, "compatible", &plen);
+		if (pp != NULL) {
+			seq_printf(m, "motherboard\t:");
+			while (plen > 0) {
+				int l = strlen(pp) + 1;
+				seq_printf(m, " %s", pp);
+				plen -= l;
+				pp += l;
+			}
+			seq_printf(m, "\n");
+		}
+		of_node_put(np);
+	} else
+		seq_printf(m, "PowerMac\n");
+
+	/* print parsed model */
+	seq_printf(m, "detected as\t: %d (%s)\n", mbmodel, mbname);
+	seq_printf(m, "pmac flags\t: %08x\n", mbflags);
+
+	/* find l2 cache info */
+	np = of_find_node_by_name(NULL, "l2-cache");
+	if (np == NULL)
+		np = of_find_node_by_type(NULL, "cache");
+	if (np != NULL) {
+		const unsigned int *ic =
+			of_get_property(np, "i-cache-size", NULL);
+		const unsigned int *dc =
+			of_get_property(np, "d-cache-size", NULL);
+		seq_printf(m, "L2 cache\t:");
+		has_l2cache = 1;
+		if (of_get_property(np, "cache-unified", NULL) != 0 && dc) {
+			seq_printf(m, " %dK unified", *dc / 1024);
+		} else {
+			if (ic)
+				seq_printf(m, " %dK instruction", *ic / 1024);
+			if (dc)
+				seq_printf(m, "%s %dK data",
+					   (ic? " +": ""), *dc / 1024);
+		}
+		pp = of_get_property(np, "ram-type", NULL);
+		if (pp)
+			seq_printf(m, " %s", pp);
+		seq_printf(m, "\n");
+		of_node_put(np);
+	}
+
+	/* Indicate newworld/oldworld */
+	seq_printf(m, "pmac-generation\t: %s\n",
+		   pmac_newworld ? "NewWorld" : "OldWorld");
+}
+
+#ifndef CONFIG_ADB_CUDA
+int find_via_cuda(void)
+{
+	struct device_node *dn = of_find_node_by_name(NULL, "via-cuda");
+
+	if (!dn)
+		return 0;
+	of_node_put(dn);
+	printk("WARNING ! Your machine is CUDA-based but your kernel\n");
+	printk("          wasn't compiled with CONFIG_ADB_CUDA option !\n");
+	return 0;
+}
+#endif
+
+#ifndef CONFIG_ADB_PMU
+int find_via_pmu(void)
+{
+	struct device_node *dn = of_find_node_by_name(NULL, "via-pmu");
+
+	if (!dn)
+		return 0;
+	of_node_put(dn);
+	printk("WARNING ! Your machine is PMU-based but your kernel\n");
+	printk("          wasn't compiled with CONFIG_ADB_PMU option !\n");
+	return 0;
+}
+#endif
+
+#ifndef CONFIG_PMAC_SMU
+int smu_init(void)
+{
+	/* should check and warn if SMU is present */
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_PPC32
+static volatile u32 *sysctrl_regs;
+
+static void __init ohare_init(void)
+{
+	struct device_node *dn;
+
+	/* this area has the CPU identification register
+	   and some registers used by smp boards */
+	sysctrl_regs = (volatile u32 *) ioremap(0xf8000000, 0x1000);
+
+	/*
+	 * Turn on the L2 cache.
+	 * We assume that we have a PSX memory controller iff
+	 * we have an ohare I/O controller.
+	 */
+	dn = of_find_node_by_name(NULL, "ohare");
+	if (dn) {
+		of_node_put(dn);
+		if (((sysctrl_regs[2] >> 24) & 0xf) >= 3) {
+			if (sysctrl_regs[4] & 0x10)
+				sysctrl_regs[4] |= 0x04000020;
+			else
+				sysctrl_regs[4] |= 0x04000000;
+			if(has_l2cache)
+				printk(KERN_INFO "Level 2 cache enabled\n");
+		}
+	}
+}
+
+static void __init l2cr_init(void)
+{
+	/* Checks "l2cr-value" property in the registry */
+	if (cpu_has_feature(CPU_FTR_L2CR)) {
+		struct device_node *np = of_find_node_by_name(NULL, "cpus");
+		if (np == 0)
+			np = of_find_node_by_type(NULL, "cpu");
+		if (np != 0) {
+			const unsigned int *l2cr =
+				of_get_property(np, "l2cr-value", NULL);
+			if (l2cr != 0) {
+				ppc_override_l2cr = 1;
+				ppc_override_l2cr_value = *l2cr;
+				_set_L2CR(0);
+				_set_L2CR(ppc_override_l2cr_value);
+			}
+			of_node_put(np);
+		}
+	}
+
+	if (ppc_override_l2cr)
+		printk(KERN_INFO "L2CR overridden (0x%x), "
+		       "backside cache is %s\n",
+		       ppc_override_l2cr_value,
+		       (ppc_override_l2cr_value & 0x80000000)
+				? "enabled" : "disabled");
+}
+#endif
+
+static void __init pmac_setup_arch(void)
+{
+	struct device_node *cpu, *ic;
+	const int *fp;
+	unsigned long pvr;
+
+	pvr = PVR_VER(mfspr(SPRN_PVR));
+
+	/* Set loops_per_jiffy to a half-way reasonable value,
+	   for use until calibrate_delay gets called. */
+	loops_per_jiffy = 50000000 / HZ;
+	cpu = of_find_node_by_type(NULL, "cpu");
+	if (cpu != NULL) {
+		fp = of_get_property(cpu, "clock-frequency", NULL);
+		if (fp != NULL) {
+			if (pvr >= 0x30 && pvr < 0x80)
+				/* PPC970 etc. */
+				loops_per_jiffy = *fp / (3 * HZ);
+			else if (pvr == 4 || pvr >= 8)
+				/* 604, G3, G4 etc. */
+				loops_per_jiffy = *fp / HZ;
+			else
+				/* 601, 603, etc. */
+				loops_per_jiffy = *fp / (2 * HZ);
+		}
+		of_node_put(cpu);
+	}
+
+	/* See if newworld or oldworld */
+	ic = of_find_node_with_property(NULL, "interrupt-controller");
+	if (ic) {
+		pmac_newworld = 1;
+		of_node_put(ic);
+	}
+
+	/* Lookup PCI hosts */
+	pmac_pci_init();
+
+#ifdef CONFIG_PPC32
+	ohare_init();
+	l2cr_init();
+#endif /* CONFIG_PPC32 */
+
+	find_via_cuda();
+	find_via_pmu();
+	smu_init();
+
+#if defined(CONFIG_NVRAM) || defined(CONFIG_NVRAM_MODULE) || \
+    defined(CONFIG_PPC64)
+	pmac_nvram_init();
+#endif
+
+#ifdef CONFIG_PPC32
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (initrd_start)
+		ROOT_DEV = Root_RAM0;
+	else
+#endif
+		ROOT_DEV = DEFAULT_ROOT_DEVICE;
+#endif
+
+#ifdef CONFIG_ADB
+	if (strstr(boot_command_line, "adb_sync")) {
+		extern int __adb_probe_sync;
+		__adb_probe_sync = 1;
+	}
+#endif /* CONFIG_ADB */
+}
+
+#ifdef CONFIG_SCSI
+void note_scsi_host(struct device_node *node, void *host)
+{
+}
+EXPORT_SYMBOL(note_scsi_host);
+#endif
+
+static int initializing = 1;
+
+static int pmac_late_init(void)
+{
+	initializing = 0;
+	return 0;
+}
+machine_late_initcall(powermac, pmac_late_init);
+
+/*
+ * This is __init_refok because we check for "initializing" before
+ * touching any of the __init sensitive things and "initializing"
+ * will be false after __init time. This can't be __init because it
+ * can be called whenever a disk is first accessed.
+ */
+void __init_refok note_bootable_part(dev_t dev, int part, int goodness)
+{
+	char *p;
+
+	if (!initializing)
+		return;
+	if ((goodness <= current_root_goodness) &&
+	    ROOT_DEV != DEFAULT_ROOT_DEVICE)
+		return;
+	p = strstr(boot_command_line, "root=");
+	if (p != NULL && (p == boot_command_line || p[-1] == ' '))
+		return;
+
+	ROOT_DEV = dev + part;
+	current_root_goodness = goodness;
+}
+
+#ifdef CONFIG_ADB_CUDA
+static void cuda_restart(void)
+{
+	struct adb_request req;
+
+	cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_RESET_SYSTEM);
+	for (;;)
+		cuda_poll();
+}
+
+static void cuda_shutdown(void)
+{
+	struct adb_request req;
+
+	cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_POWERDOWN);
+	for (;;)
+		cuda_poll();
+}
+
+#else
+#define cuda_restart()
+#define cuda_shutdown()
+#endif
+
+#ifndef CONFIG_ADB_PMU
+#define pmu_restart()
+#define pmu_shutdown()
+#endif
+
+#ifndef CONFIG_PMAC_SMU
+#define smu_restart()
+#define smu_shutdown()
+#endif
+
+static void pmac_restart(char *cmd)
+{
+	switch (sys_ctrler) {
+	case SYS_CTRLER_CUDA:
+		cuda_restart();
+		break;
+	case SYS_CTRLER_PMU:
+		pmu_restart();
+		break;
+	case SYS_CTRLER_SMU:
+		smu_restart();
+		break;
+	default: ;
+	}
+}
+
+static void pmac_power_off(void)
+{
+	switch (sys_ctrler) {
+	case SYS_CTRLER_CUDA:
+		cuda_shutdown();
+		break;
+	case SYS_CTRLER_PMU:
+		pmu_shutdown();
+		break;
+	case SYS_CTRLER_SMU:
+		smu_shutdown();
+		break;
+	default: ;
+	}
+}
+
+static void
+pmac_halt(void)
+{
+	pmac_power_off();
+}
+
+/* 
+ * Early initialization.
+ */
+static void __init pmac_init_early(void)
+{
+	/* Enable early btext debug if requested */
+	if (strstr(boot_command_line, "btextdbg")) {
+		udbg_adb_init_early();
+		register_early_udbg_console();
+	}
+
+	/* Probe motherboard chipset */
+	pmac_feature_init();
+
+	/* Initialize debug stuff */
+	udbg_scc_init(!!strstr(boot_command_line, "sccdbg"));
+	udbg_adb_init(!!strstr(boot_command_line, "btextdbg"));
+
+#ifdef CONFIG_PPC64
+	iommu_init_early_dart(&pmac_pci_controller_ops);
+#endif
+
+	/* SMP Init has to be done early as we need to patch up
+	 * cpu_possible_mask before interrupt stacks are allocated
+	 * or kaboom...
+	 */
+#ifdef CONFIG_SMP
+	pmac_setup_smp();
+#endif
+}
+
+static int __init pmac_declare_of_platform_devices(void)
+{
+	struct device_node *np;
+
+	if (machine_is(chrp))
+		return -1;
+
+	np = of_find_node_by_name(NULL, "valkyrie");
+	if (np) {
+		of_platform_device_create(np, "valkyrie", NULL);
+		of_node_put(np);
+	}
+	np = of_find_node_by_name(NULL, "platinum");
+	if (np) {
+		of_platform_device_create(np, "platinum", NULL);
+		of_node_put(np);
+	}
+        np = of_find_node_by_type(NULL, "smu");
+        if (np) {
+		of_platform_device_create(np, "smu", NULL);
+		of_node_put(np);
+	}
+	np = of_find_node_by_type(NULL, "fcu");
+	if (np == NULL) {
+		/* Some machines have strangely broken device-tree */
+		np = of_find_node_by_path("/u3@0,f8000000/i2c@f8001000/fan@15e");
+	}
+	if (np) {
+		of_platform_device_create(np, "temperature", NULL);
+		of_node_put(np);
+	}
+
+	return 0;
+}
+machine_device_initcall(powermac, pmac_declare_of_platform_devices);
+
+#ifdef CONFIG_SERIAL_PMACZILOG_CONSOLE
+/*
+ * This is called very early, as part of console_init() (typically just after
+ * time_init()). This function is respondible for trying to find a good
+ * default console on serial ports. It tries to match the open firmware
+ * default output with one of the available serial console drivers.
+ */
+static int __init check_pmac_serial_console(void)
+{
+	struct device_node *prom_stdout = NULL;
+	int offset = 0;
+	const char *name;
+#ifdef CONFIG_SERIAL_PMACZILOG_TTYS
+	char *devname = "ttyS";
+#else
+	char *devname = "ttyPZ";
+#endif
+
+	pr_debug(" -> check_pmac_serial_console()\n");
+
+	/* The user has requested a console so this is already set up. */
+	if (strstr(boot_command_line, "console=")) {
+		pr_debug(" console was specified !\n");
+		return -EBUSY;
+	}
+
+	if (!of_chosen) {
+		pr_debug(" of_chosen is NULL !\n");
+		return -ENODEV;
+	}
+
+	/* We are getting a weird phandle from OF ... */
+	/* ... So use the full path instead */
+	name = of_get_property(of_chosen, "linux,stdout-path", NULL);
+	if (name == NULL) {
+		pr_debug(" no linux,stdout-path !\n");
+		return -ENODEV;
+	}
+	prom_stdout = of_find_node_by_path(name);
+	if (!prom_stdout) {
+		pr_debug(" can't find stdout package %s !\n", name);
+		return -ENODEV;
+	}
+	pr_debug("stdout is %s\n", prom_stdout->full_name);
+
+	name = of_get_property(prom_stdout, "name", NULL);
+	if (!name) {
+		pr_debug(" stdout package has no name !\n");
+		goto not_found;
+	}
+
+	if (strcmp(name, "ch-a") == 0)
+		offset = 0;
+	else if (strcmp(name, "ch-b") == 0)
+		offset = 1;
+	else
+		goto not_found;
+	of_node_put(prom_stdout);
+
+	pr_debug("Found serial console at %s%d\n", devname, offset);
+
+	return add_preferred_console(devname, offset, NULL);
+
+ not_found:
+	pr_debug("No preferred console found !\n");
+	of_node_put(prom_stdout);
+	return -ENODEV;
+}
+console_initcall(check_pmac_serial_console);
+
+#endif /* CONFIG_SERIAL_PMACZILOG_CONSOLE */
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init pmac_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "Power Macintosh") &&
+	    !of_flat_dt_is_compatible(root, "MacRISC"))
+		return 0;
+
+#ifdef CONFIG_PPC64
+	/*
+	 * On U3, the DART (iommu) must be allocated now since it
+	 * has an impact on htab_initialize (due to the large page it
+	 * occupies having to be broken up so the DART itself is not
+	 * part of the cacheable linar mapping
+	 */
+	alloc_dart_table();
+
+	hpte_init_native();
+#endif
+
+#ifdef CONFIG_PPC32
+	/* isa_io_base gets set in pmac_pci_init */
+	ISA_DMA_THRESHOLD = ~0L;
+	DMA_MODE_READ = 1;
+	DMA_MODE_WRITE = 2;
+#endif /* CONFIG_PPC32 */
+
+#ifdef CONFIG_PMAC_SMU
+	/*
+	 * SMU based G5s need some memory below 2Gb, at least the current
+	 * driver needs that. We have to allocate it now. We allocate 4k
+	 * (1 small page) for now.
+	 */
+	smu_cmdbuf_abs = memblock_alloc_base(4096, 4096, 0x80000000UL);
+#endif /* CONFIG_PMAC_SMU */
+
+	pm_power_off = pmac_power_off;
+
+	return 1;
+}
+
+define_machine(powermac) {
+	.name			= "PowerMac",
+	.probe			= pmac_probe,
+	.setup_arch		= pmac_setup_arch,
+	.init_early		= pmac_init_early,
+	.show_cpuinfo		= pmac_show_cpuinfo,
+	.init_IRQ		= pmac_pic_init,
+	.get_irq		= NULL,	/* changed later */
+	.pci_irq_fixup		= pmac_pci_irq_fixup,
+	.restart		= pmac_restart,
+	.halt			= pmac_halt,
+	.time_init		= pmac_time_init,
+	.get_boot_time		= pmac_get_boot_time,
+	.set_rtc_time		= pmac_set_rtc_time,
+	.get_rtc_time		= pmac_get_rtc_time,
+	.calibrate_decr		= pmac_calibrate_decr,
+	.feature_call		= pmac_do_feature_call,
+	.progress		= udbg_progress,
+#ifdef CONFIG_PPC64
+	.power_save		= power4_idle,
+	.enable_pmcs		= power4_enable_pmcs,
+#endif /* CONFIG_PPC64 */
+#ifdef CONFIG_PPC32
+	.pcibios_after_init	= pmac_pcibios_after_init,
+	.phys_mem_access_prot	= pci_phys_mem_access_prot,
+#endif
+};
diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S
new file mode 100644
index 000000000..1c2802fab
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/sleep.S
@@ -0,0 +1,397 @@
+/*
+ * This file contains sleep low-level functions for PowerBook G3.
+ *    Copyright (C) 1999 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *    and Paul Mackerras (paulus@samba.org).
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+#include <asm/cputable.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm/mmu.h>
+
+#define MAGIC	0x4c617273	/* 'Lars' */
+
+/*
+ * Structure for storing CPU registers on the stack.
+ */
+#define SL_SP		0
+#define SL_PC		4
+#define SL_MSR		8
+#define SL_SDR1		0xc
+#define SL_SPRG0	0x10	/* 4 sprg's */
+#define SL_DBAT0	0x20
+#define SL_IBAT0	0x28
+#define SL_DBAT1	0x30
+#define SL_IBAT1	0x38
+#define SL_DBAT2	0x40
+#define SL_IBAT2	0x48
+#define SL_DBAT3	0x50
+#define SL_IBAT3	0x58
+#define SL_TB		0x60
+#define SL_R2		0x68
+#define SL_CR		0x6c
+#define SL_R12		0x70	/* r12 to r31 */
+#define SL_SIZE		(SL_R12 + 80)
+
+	.section .text
+	.align	5
+
+#if defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ_PMAC) || \
+    (defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PPC32))
+
+/* This gets called by via-pmu.c late during the sleep process.
+ * The PMU was already send the sleep command and will shut us down
+ * soon. We need to save all that is needed and setup the wakeup
+ * vector that will be called by the ROM on wakeup
+ */
+_GLOBAL(low_sleep_handler)
+#ifndef CONFIG_6xx
+	blr
+#else
+	mflr	r0
+	stw	r0,4(r1)
+	stwu	r1,-SL_SIZE(r1)
+	mfcr	r0
+	stw	r0,SL_CR(r1)
+	stw	r2,SL_R2(r1)
+	stmw	r12,SL_R12(r1)
+
+	/* Save MSR & SDR1 */
+	mfmsr	r4
+	stw	r4,SL_MSR(r1)
+	mfsdr1	r4
+	stw	r4,SL_SDR1(r1)
+
+	/* Get a stable timebase and save it */
+1:	mftbu	r4
+	stw	r4,SL_TB(r1)
+	mftb	r5
+	stw	r5,SL_TB+4(r1)
+	mftbu	r3
+	cmpw	r3,r4
+	bne	1b
+
+	/* Save SPRGs */
+	mfsprg	r4,0
+	stw	r4,SL_SPRG0(r1)
+	mfsprg	r4,1
+	stw	r4,SL_SPRG0+4(r1)
+	mfsprg	r4,2
+	stw	r4,SL_SPRG0+8(r1)
+	mfsprg	r4,3
+	stw	r4,SL_SPRG0+12(r1)
+
+	/* Save BATs */
+	mfdbatu	r4,0
+	stw	r4,SL_DBAT0(r1)
+	mfdbatl	r4,0
+	stw	r4,SL_DBAT0+4(r1)
+	mfdbatu	r4,1
+	stw	r4,SL_DBAT1(r1)
+	mfdbatl	r4,1
+	stw	r4,SL_DBAT1+4(r1)
+	mfdbatu	r4,2
+	stw	r4,SL_DBAT2(r1)
+	mfdbatl	r4,2
+	stw	r4,SL_DBAT2+4(r1)
+	mfdbatu	r4,3
+	stw	r4,SL_DBAT3(r1)
+	mfdbatl	r4,3
+	stw	r4,SL_DBAT3+4(r1)
+	mfibatu	r4,0
+	stw	r4,SL_IBAT0(r1)
+	mfibatl	r4,0
+	stw	r4,SL_IBAT0+4(r1)
+	mfibatu	r4,1
+	stw	r4,SL_IBAT1(r1)
+	mfibatl	r4,1
+	stw	r4,SL_IBAT1+4(r1)
+	mfibatu	r4,2
+	stw	r4,SL_IBAT2(r1)
+	mfibatl	r4,2
+	stw	r4,SL_IBAT2+4(r1)
+	mfibatu	r4,3
+	stw	r4,SL_IBAT3(r1)
+	mfibatl	r4,3
+	stw	r4,SL_IBAT3+4(r1)
+
+	/* Backup various CPU config stuffs */
+	bl	__save_cpu_setup
+
+	/* The ROM can wake us up via 2 different vectors:
+	 *  - On wallstreet & lombard, we must write a magic
+	 *    value 'Lars' at address 4 and a pointer to a
+	 *    memory location containing the PC to resume from
+	 *    at address 0.
+	 *  - On Core99, we must store the wakeup vector at
+	 *    address 0x80 and eventually it's parameters
+	 *    at address 0x84. I've have some trouble with those
+	 *    parameters however and I no longer use them.
+	 */
+	lis	r5,grackle_wake_up@ha
+	addi	r5,r5,grackle_wake_up@l
+	tophys(r5,r5)
+	stw	r5,SL_PC(r1)
+	lis	r4,KERNELBASE@h
+	tophys(r5,r1)
+	addi	r5,r5,SL_PC
+	lis	r6,MAGIC@ha
+	addi	r6,r6,MAGIC@l
+	stw	r5,0(r4)
+	stw	r6,4(r4)
+	/* Setup stuffs at 0x80-0x84 for Core99 */
+	lis	r3,core99_wake_up@ha
+	addi	r3,r3,core99_wake_up@l
+	tophys(r3,r3)
+	stw	r3,0x80(r4)
+	stw	r5,0x84(r4)
+	/* Store a pointer to our backup storage into
+	 * a kernel global
+	 */
+	lis r3,sleep_storage@ha
+	addi r3,r3,sleep_storage@l
+	stw r5,0(r3)
+
+	.globl	low_cpu_die
+low_cpu_die:
+	/* Flush & disable all caches */
+	bl	flush_disable_caches
+
+	/* Turn off data relocation. */
+	mfmsr	r3		/* Save MSR in r7 */
+	rlwinm	r3,r3,0,28,26	/* Turn off DR bit */
+	sync
+	mtmsr	r3
+	isync
+
+BEGIN_FTR_SECTION
+	/* Flush any pending L2 data prefetches to work around HW bug */
+	sync
+	lis	r3,0xfff0
+	lwz	r0,0(r3)	/* perform cache-inhibited load to ROM */
+	sync			/* (caches are disabled at this point) */
+END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
+
+/*
+ * Set the HID0 and MSR for sleep.
+ */
+	mfspr	r2,SPRN_HID0
+	rlwinm	r2,r2,0,10,7	/* clear doze, nap */
+	oris	r2,r2,HID0_SLEEP@h
+	sync
+	isync
+	mtspr	SPRN_HID0,r2
+	sync
+
+/* This loop puts us back to sleep in case we have a spurrious
+ * wakeup so that the host bridge properly stays asleep. The
+ * CPU will be turned off, either after a known time (about 1
+ * second) on wallstreet & lombard, or as soon as the CPU enters
+ * SLEEP mode on core99
+ */
+	mfmsr	r2
+	oris	r2,r2,MSR_POW@h
+1:	sync
+	mtmsr	r2
+	isync
+	b	1b
+
+/*
+ * Here is the resume code.
+ */
+
+
+/*
+ * Core99 machines resume here
+ * r4 has the physical address of SL_PC(sp) (unused)
+ */
+_GLOBAL(core99_wake_up)
+	/* Make sure HID0 no longer contains any sleep bit and that data cache
+	 * is disabled
+	 */
+	mfspr	r3,SPRN_HID0
+	rlwinm	r3,r3,0,11,7		/* clear SLEEP, NAP, DOZE bits */
+	rlwinm	3,r3,0,18,15		/* clear DCE, ICE */
+	mtspr	SPRN_HID0,r3
+	sync
+	isync
+
+	/* sanitize MSR */
+	mfmsr	r3
+	ori	r3,r3,MSR_EE|MSR_IP
+	xori	r3,r3,MSR_EE|MSR_IP
+	sync
+	isync
+	mtmsr	r3
+	sync
+	isync
+
+	/* Recover sleep storage */
+	lis	r3,sleep_storage@ha
+	addi	r3,r3,sleep_storage@l
+	tophys(r3,r3)
+	lwz	r1,0(r3)
+
+	/* Pass thru to older resume code ... */
+/*
+ * Here is the resume code for older machines.
+ * r1 has the physical address of SL_PC(sp).
+ */
+
+grackle_wake_up:
+
+	/* Restore the kernel's segment registers before
+	 * we do any r1 memory access as we are not sure they
+	 * are in a sane state above the first 256Mb region
+	 */
+	li	r0,16		/* load up segment register values */
+	mtctr	r0		/* for context 0 */
+	lis	r3,0x2000	/* Ku = 1, VSID = 0 */
+	li	r4,0
+3:	mtsrin	r3,r4
+	addi	r3,r3,0x111	/* increment VSID */
+	addis	r4,r4,0x1000	/* address of next segment */
+	bdnz	3b
+	sync
+	isync
+
+	subi	r1,r1,SL_PC
+
+	/* Restore various CPU config stuffs */
+	bl	__restore_cpu_setup
+
+	/* Make sure all FPRs have been initialized */
+	bl	reloc_offset
+	bl	__init_fpu_registers
+
+	/* Invalidate & enable L1 cache, we don't care about
+	 * whatever the ROM may have tried to write to memory
+	 */
+	bl	__inval_enable_L1
+
+	/* Restore the BATs, and SDR1.  Then we can turn on the MMU. */
+	lwz	r4,SL_SDR1(r1)
+	mtsdr1	r4
+	lwz	r4,SL_SPRG0(r1)
+	mtsprg	0,r4
+	lwz	r4,SL_SPRG0+4(r1)
+	mtsprg	1,r4
+	lwz	r4,SL_SPRG0+8(r1)
+	mtsprg	2,r4
+	lwz	r4,SL_SPRG0+12(r1)
+	mtsprg	3,r4
+
+	lwz	r4,SL_DBAT0(r1)
+	mtdbatu	0,r4
+	lwz	r4,SL_DBAT0+4(r1)
+	mtdbatl	0,r4
+	lwz	r4,SL_DBAT1(r1)
+	mtdbatu	1,r4
+	lwz	r4,SL_DBAT1+4(r1)
+	mtdbatl	1,r4
+	lwz	r4,SL_DBAT2(r1)
+	mtdbatu	2,r4
+	lwz	r4,SL_DBAT2+4(r1)
+	mtdbatl	2,r4
+	lwz	r4,SL_DBAT3(r1)
+	mtdbatu	3,r4
+	lwz	r4,SL_DBAT3+4(r1)
+	mtdbatl	3,r4
+	lwz	r4,SL_IBAT0(r1)
+	mtibatu	0,r4
+	lwz	r4,SL_IBAT0+4(r1)
+	mtibatl	0,r4
+	lwz	r4,SL_IBAT1(r1)
+	mtibatu	1,r4
+	lwz	r4,SL_IBAT1+4(r1)
+	mtibatl	1,r4
+	lwz	r4,SL_IBAT2(r1)
+	mtibatu	2,r4
+	lwz	r4,SL_IBAT2+4(r1)
+	mtibatl	2,r4
+	lwz	r4,SL_IBAT3(r1)
+	mtibatu	3,r4
+	lwz	r4,SL_IBAT3+4(r1)
+	mtibatl	3,r4
+
+BEGIN_MMU_FTR_SECTION
+	li	r4,0
+	mtspr	SPRN_DBAT4U,r4
+	mtspr	SPRN_DBAT4L,r4
+	mtspr	SPRN_DBAT5U,r4
+	mtspr	SPRN_DBAT5L,r4
+	mtspr	SPRN_DBAT6U,r4
+	mtspr	SPRN_DBAT6L,r4
+	mtspr	SPRN_DBAT7U,r4
+	mtspr	SPRN_DBAT7L,r4
+	mtspr	SPRN_IBAT4U,r4
+	mtspr	SPRN_IBAT4L,r4
+	mtspr	SPRN_IBAT5U,r4
+	mtspr	SPRN_IBAT5L,r4
+	mtspr	SPRN_IBAT6U,r4
+	mtspr	SPRN_IBAT6L,r4
+	mtspr	SPRN_IBAT7U,r4
+	mtspr	SPRN_IBAT7L,r4
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+
+	/* Flush all TLBs */
+	lis	r4,0x1000
+1:	addic.	r4,r4,-0x1000
+	tlbie	r4
+	blt	1b
+	sync
+
+	/* restore the MSR and turn on the MMU */
+	lwz	r3,SL_MSR(r1)
+	bl	turn_on_mmu
+
+	/* get back the stack pointer */
+	tovirt(r1,r1)
+
+	/* Restore TB */
+	li	r3,0
+	mttbl	r3
+	lwz	r3,SL_TB(r1)
+	lwz	r4,SL_TB+4(r1)
+	mttbu	r3
+	mttbl	r4
+
+	/* Restore the callee-saved registers and return */
+	lwz	r0,SL_CR(r1)
+	mtcr	r0
+	lwz	r2,SL_R2(r1)
+	lmw	r12,SL_R12(r1)
+	addi	r1,r1,SL_SIZE
+	lwz	r0,4(r1)
+	mtlr	r0
+	blr
+
+turn_on_mmu:
+	mflr	r4
+	tovirt(r4,r4)
+	mtsrr0	r4
+	mtsrr1	r3
+	sync
+	isync
+	rfi
+
+#endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */
+
+	.section .data
+	.balign	L1_CACHE_BYTES
+sleep_storage:
+	.long 0
+	.balign	L1_CACHE_BYTES, 0
+
+#endif /* CONFIG_6xx */
+	.section .text
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
new file mode 100644
index 000000000..28a147ca3
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -0,0 +1,1033 @@
+/*
+ * SMP support for power macintosh.
+ *
+ * We support both the old "powersurge" SMP architecture
+ * and the current Core99 (G4 PowerMac) machines.
+ *
+ * Note that we don't support the very first rev. of
+ * Apple/DayStar 2 CPUs board, the one with the funky
+ * watchdog. Hopefully, none of these should be there except
+ * maybe internally to Apple. I should probably still add some
+ * code to detect this card though and disable SMP. --BenH.
+ *
+ * Support Macintosh G4 SMP by Troy Benjegerdes (hozer@drgw.net)
+ * and Ben Herrenschmidt <benh@kernel.crashing.org>.
+ *
+ * Support for DayStar quad CPU cards
+ * Copyright (C) XLR8, Inc. 1994-2000
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/cpu.h>
+#include <linux/compiler.h>
+
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+#include <asm/code-patching.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/pmac_feature.h>
+#include <asm/time.h>
+#include <asm/mpic.h>
+#include <asm/cacheflush.h>
+#include <asm/keylargo.h>
+#include <asm/pmac_low_i2c.h>
+#include <asm/pmac_pfunc.h>
+
+#include "pmac.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+extern void __secondary_start_pmac_0(void);
+extern int pmac_pfunc_base_install(void);
+
+static void (*pmac_tb_freeze)(int freeze);
+static u64 timebase;
+static int tb_req;
+
+#ifdef CONFIG_PPC_PMAC32_PSURGE
+
+/*
+ * Powersurge (old powermac SMP) support.
+ */
+
+/* Addresses for powersurge registers */
+#define HAMMERHEAD_BASE		0xf8000000
+#define HHEAD_CONFIG		0x90
+#define HHEAD_SEC_INTR		0xc0
+
+/* register for interrupting the primary processor on the powersurge */
+/* N.B. this is actually the ethernet ROM! */
+#define PSURGE_PRI_INTR		0xf3019000
+
+/* register for storing the start address for the secondary processor */
+/* N.B. this is the PCI config space address register for the 1st bridge */
+#define PSURGE_START		0xf2800000
+
+/* Daystar/XLR8 4-CPU card */
+#define PSURGE_QUAD_REG_ADDR	0xf8800000
+
+#define PSURGE_QUAD_IRQ_SET	0
+#define PSURGE_QUAD_IRQ_CLR	1
+#define PSURGE_QUAD_IRQ_PRIMARY	2
+#define PSURGE_QUAD_CKSTOP_CTL	3
+#define PSURGE_QUAD_PRIMARY_ARB	4
+#define PSURGE_QUAD_BOARD_ID	6
+#define PSURGE_QUAD_WHICH_CPU	7
+#define PSURGE_QUAD_CKSTOP_RDBK	8
+#define PSURGE_QUAD_RESET_CTL	11
+
+#define PSURGE_QUAD_OUT(r, v)	(out_8(quad_base + ((r) << 4) + 4, (v)))
+#define PSURGE_QUAD_IN(r)	(in_8(quad_base + ((r) << 4) + 4) & 0x0f)
+#define PSURGE_QUAD_BIS(r, v)	(PSURGE_QUAD_OUT((r), PSURGE_QUAD_IN(r) | (v)))
+#define PSURGE_QUAD_BIC(r, v)	(PSURGE_QUAD_OUT((r), PSURGE_QUAD_IN(r) & ~(v)))
+
+/* virtual addresses for the above */
+static volatile u8 __iomem *hhead_base;
+static volatile u8 __iomem *quad_base;
+static volatile u32 __iomem *psurge_pri_intr;
+static volatile u8 __iomem *psurge_sec_intr;
+static volatile u32 __iomem *psurge_start;
+
+/* values for psurge_type */
+#define PSURGE_NONE		-1
+#define PSURGE_DUAL		0
+#define PSURGE_QUAD_OKEE	1
+#define PSURGE_QUAD_COTTON	2
+#define PSURGE_QUAD_ICEGRASS	3
+
+/* what sort of powersurge board we have */
+static int psurge_type = PSURGE_NONE;
+
+/* irq for secondary cpus to report */
+static struct irq_domain *psurge_host;
+int psurge_secondary_virq;
+
+/*
+ * Set and clear IPIs for powersurge.
+ */
+static inline void psurge_set_ipi(int cpu)
+{
+	if (psurge_type == PSURGE_NONE)
+		return;
+	if (cpu == 0)
+		in_be32(psurge_pri_intr);
+	else if (psurge_type == PSURGE_DUAL)
+		out_8(psurge_sec_intr, 0);
+	else
+		PSURGE_QUAD_OUT(PSURGE_QUAD_IRQ_SET, 1 << cpu);
+}
+
+static inline void psurge_clr_ipi(int cpu)
+{
+	if (cpu > 0) {
+		switch(psurge_type) {
+		case PSURGE_DUAL:
+			out_8(psurge_sec_intr, ~0);
+		case PSURGE_NONE:
+			break;
+		default:
+			PSURGE_QUAD_OUT(PSURGE_QUAD_IRQ_CLR, 1 << cpu);
+		}
+	}
+}
+
+/*
+ * On powersurge (old SMP powermac architecture) we don't have
+ * separate IPIs for separate messages like openpic does.  Instead
+ * use the generic demux helpers
+ *  -- paulus.
+ */
+static irqreturn_t psurge_ipi_intr(int irq, void *d)
+{
+	psurge_clr_ipi(smp_processor_id());
+	smp_ipi_demux();
+
+	return IRQ_HANDLED;
+}
+
+static void smp_psurge_cause_ipi(int cpu, unsigned long data)
+{
+	psurge_set_ipi(cpu);
+}
+
+static int psurge_host_map(struct irq_domain *h, unsigned int virq,
+			 irq_hw_number_t hw)
+{
+	irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_percpu_irq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops psurge_host_ops = {
+	.map	= psurge_host_map,
+};
+
+static int psurge_secondary_ipi_init(void)
+{
+	int rc = -ENOMEM;
+
+	psurge_host = irq_domain_add_nomap(NULL, ~0, &psurge_host_ops, NULL);
+
+	if (psurge_host)
+		psurge_secondary_virq = irq_create_direct_mapping(psurge_host);
+
+	if (psurge_secondary_virq)
+		rc = request_irq(psurge_secondary_virq, psurge_ipi_intr,
+			IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL);
+
+	if (rc)
+		pr_err("Failed to setup secondary cpu IPI\n");
+
+	return rc;
+}
+
+/*
+ * Determine a quad card presence. We read the board ID register, we
+ * force the data bus to change to something else, and we read it again.
+ * It it's stable, then the register probably exist (ugh !)
+ */
+static int __init psurge_quad_probe(void)
+{
+	int type;
+	unsigned int i;
+
+	type = PSURGE_QUAD_IN(PSURGE_QUAD_BOARD_ID);
+	if (type < PSURGE_QUAD_OKEE || type > PSURGE_QUAD_ICEGRASS
+	    || type != PSURGE_QUAD_IN(PSURGE_QUAD_BOARD_ID))
+		return PSURGE_DUAL;
+
+	/* looks OK, try a slightly more rigorous test */
+	/* bogus is not necessarily cacheline-aligned,
+	   though I don't suppose that really matters.  -- paulus */
+	for (i = 0; i < 100; i++) {
+		volatile u32 bogus[8];
+		bogus[(0+i)%8] = 0x00000000;
+		bogus[(1+i)%8] = 0x55555555;
+		bogus[(2+i)%8] = 0xFFFFFFFF;
+		bogus[(3+i)%8] = 0xAAAAAAAA;
+		bogus[(4+i)%8] = 0x33333333;
+		bogus[(5+i)%8] = 0xCCCCCCCC;
+		bogus[(6+i)%8] = 0xCCCCCCCC;
+		bogus[(7+i)%8] = 0x33333333;
+		wmb();
+		asm volatile("dcbf 0,%0" : : "r" (bogus) : "memory");
+		mb();
+		if (type != PSURGE_QUAD_IN(PSURGE_QUAD_BOARD_ID))
+			return PSURGE_DUAL;
+	}
+	return type;
+}
+
+static void __init psurge_quad_init(void)
+{
+	int procbits;
+
+	if (ppc_md.progress) ppc_md.progress("psurge_quad_init", 0x351);
+	procbits = ~PSURGE_QUAD_IN(PSURGE_QUAD_WHICH_CPU);
+	if (psurge_type == PSURGE_QUAD_ICEGRASS)
+		PSURGE_QUAD_BIS(PSURGE_QUAD_RESET_CTL, procbits);
+	else
+		PSURGE_QUAD_BIC(PSURGE_QUAD_CKSTOP_CTL, procbits);
+	mdelay(33);
+	out_8(psurge_sec_intr, ~0);
+	PSURGE_QUAD_OUT(PSURGE_QUAD_IRQ_CLR, procbits);
+	PSURGE_QUAD_BIS(PSURGE_QUAD_RESET_CTL, procbits);
+	if (psurge_type != PSURGE_QUAD_ICEGRASS)
+		PSURGE_QUAD_BIS(PSURGE_QUAD_CKSTOP_CTL, procbits);
+	PSURGE_QUAD_BIC(PSURGE_QUAD_PRIMARY_ARB, procbits);
+	mdelay(33);
+	PSURGE_QUAD_BIC(PSURGE_QUAD_RESET_CTL, procbits);
+	mdelay(33);
+	PSURGE_QUAD_BIS(PSURGE_QUAD_PRIMARY_ARB, procbits);
+	mdelay(33);
+}
+
+static void __init smp_psurge_probe(void)
+{
+	int i, ncpus;
+	struct device_node *dn;
+
+	/* We don't do SMP on the PPC601 -- paulus */
+	if (PVR_VER(mfspr(SPRN_PVR)) == 1)
+		return;
+
+	/*
+	 * The powersurge cpu board can be used in the generation
+	 * of powermacs that have a socket for an upgradeable cpu card,
+	 * including the 7500, 8500, 9500, 9600.
+	 * The device tree doesn't tell you if you have 2 cpus because
+	 * OF doesn't know anything about the 2nd processor.
+	 * Instead we look for magic bits in magic registers,
+	 * in the hammerhead memory controller in the case of the
+	 * dual-cpu powersurge board.  -- paulus.
+	 */
+	dn = of_find_node_by_name(NULL, "hammerhead");
+	if (dn == NULL)
+		return;
+	of_node_put(dn);
+
+	hhead_base = ioremap(HAMMERHEAD_BASE, 0x800);
+	quad_base = ioremap(PSURGE_QUAD_REG_ADDR, 1024);
+	psurge_sec_intr = hhead_base + HHEAD_SEC_INTR;
+
+	psurge_type = psurge_quad_probe();
+	if (psurge_type != PSURGE_DUAL) {
+		psurge_quad_init();
+		/* All released cards using this HW design have 4 CPUs */
+		ncpus = 4;
+		/* No sure how timebase sync works on those, let's use SW */
+		smp_ops->give_timebase = smp_generic_give_timebase;
+		smp_ops->take_timebase = smp_generic_take_timebase;
+	} else {
+		iounmap(quad_base);
+		if ((in_8(hhead_base + HHEAD_CONFIG) & 0x02) == 0) {
+			/* not a dual-cpu card */
+			iounmap(hhead_base);
+			psurge_type = PSURGE_NONE;
+			return;
+		}
+		ncpus = 2;
+	}
+
+	if (psurge_secondary_ipi_init())
+		return;
+
+	psurge_start = ioremap(PSURGE_START, 4);
+	psurge_pri_intr = ioremap(PSURGE_PRI_INTR, 4);
+
+	/* This is necessary because OF doesn't know about the
+	 * secondary cpu(s), and thus there aren't nodes in the
+	 * device tree for them, and smp_setup_cpu_maps hasn't
+	 * set their bits in cpu_present_mask.
+	 */
+	if (ncpus > NR_CPUS)
+		ncpus = NR_CPUS;
+	for (i = 1; i < ncpus ; ++i)
+		set_cpu_present(i, true);
+
+	if (ppc_md.progress) ppc_md.progress("smp_psurge_probe - done", 0x352);
+}
+
+static int __init smp_psurge_kick_cpu(int nr)
+{
+	unsigned long start = __pa(__secondary_start_pmac_0) + nr * 8;
+	unsigned long a, flags;
+	int i, j;
+
+	/* Defining this here is evil ... but I prefer hiding that
+	 * crap to avoid giving people ideas that they can do the
+	 * same.
+	 */
+	extern volatile unsigned int cpu_callin_map[NR_CPUS];
+
+	/* may need to flush here if secondary bats aren't setup */
+	for (a = KERNELBASE; a < KERNELBASE + 0x800000; a += 32)
+		asm volatile("dcbf 0,%0" : : "r" (a) : "memory");
+	asm volatile("sync");
+
+	if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu", 0x353);
+
+	/* This is going to freeze the timeebase, we disable interrupts */
+	local_irq_save(flags);
+
+	out_be32(psurge_start, start);
+	mb();
+
+	psurge_set_ipi(nr);
+
+	/*
+	 * We can't use udelay here because the timebase is now frozen.
+	 */
+	for (i = 0; i < 2000; ++i)
+		asm volatile("nop" : : : "memory");
+	psurge_clr_ipi(nr);
+
+	/*
+	 * Also, because the timebase is frozen, we must not return to the
+	 * caller which will try to do udelay's etc... Instead, we wait -here-
+	 * for the CPU to callin.
+	 */
+	for (i = 0; i < 100000 && !cpu_callin_map[nr]; ++i) {
+		for (j = 1; j < 10000; j++)
+			asm volatile("nop" : : : "memory");
+		asm volatile("sync" : : : "memory");
+	}
+	if (!cpu_callin_map[nr])
+		goto stuck;
+
+	/* And we do the TB sync here too for standard dual CPU cards */
+	if (psurge_type == PSURGE_DUAL) {
+		while(!tb_req)
+			barrier();
+		tb_req = 0;
+		mb();
+		timebase = get_tb();
+		mb();
+		while (timebase)
+			barrier();
+		mb();
+	}
+ stuck:
+	/* now interrupt the secondary, restarting both TBs */
+	if (psurge_type == PSURGE_DUAL)
+		psurge_set_ipi(1);
+
+	if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu - done", 0x354);
+
+	return 0;
+}
+
+static struct irqaction psurge_irqaction = {
+	.handler = psurge_ipi_intr,
+	.flags = IRQF_PERCPU | IRQF_NO_THREAD,
+	.name = "primary IPI",
+};
+
+static void __init smp_psurge_setup_cpu(int cpu_nr)
+{
+	if (cpu_nr != 0 || !psurge_start)
+		return;
+
+	/* reset the entry point so if we get another intr we won't
+	 * try to startup again */
+	out_be32(psurge_start, 0x100);
+	if (setup_irq(irq_create_mapping(NULL, 30), &psurge_irqaction))
+		printk(KERN_ERR "Couldn't get primary IPI interrupt");
+}
+
+void __init smp_psurge_take_timebase(void)
+{
+	if (psurge_type != PSURGE_DUAL)
+		return;
+
+	tb_req = 1;
+	mb();
+	while (!timebase)
+		barrier();
+	mb();
+	set_tb(timebase >> 32, timebase & 0xffffffff);
+	timebase = 0;
+	mb();
+	set_dec(tb_ticks_per_jiffy/2);
+}
+
+void __init smp_psurge_give_timebase(void)
+{
+	/* Nothing to do here */
+}
+
+/* PowerSurge-style Macs */
+struct smp_ops_t psurge_smp_ops = {
+	.message_pass	= NULL,	/* Use smp_muxed_ipi_message_pass */
+	.cause_ipi	= smp_psurge_cause_ipi,
+	.probe		= smp_psurge_probe,
+	.kick_cpu	= smp_psurge_kick_cpu,
+	.setup_cpu	= smp_psurge_setup_cpu,
+	.give_timebase	= smp_psurge_give_timebase,
+	.take_timebase	= smp_psurge_take_timebase,
+};
+#endif /* CONFIG_PPC_PMAC32_PSURGE */
+
+/*
+ * Core 99 and later support
+ */
+
+
+static void smp_core99_give_timebase(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	while(!tb_req)
+		barrier();
+	tb_req = 0;
+	(*pmac_tb_freeze)(1);
+	mb();
+	timebase = get_tb();
+	mb();
+	while (timebase)
+		barrier();
+	mb();
+	(*pmac_tb_freeze)(0);
+	mb();
+
+	local_irq_restore(flags);
+}
+
+
+static void smp_core99_take_timebase(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	tb_req = 1;
+	mb();
+	while (!timebase)
+		barrier();
+	mb();
+	set_tb(timebase >> 32, timebase & 0xffffffff);
+	timebase = 0;
+	mb();
+
+	local_irq_restore(flags);
+}
+
+#ifdef CONFIG_PPC64
+/*
+ * G5s enable/disable the timebase via an i2c-connected clock chip.
+ */
+static struct pmac_i2c_bus *pmac_tb_clock_chip_host;
+static u8 pmac_tb_pulsar_addr;
+
+static void smp_core99_cypress_tb_freeze(int freeze)
+{
+	u8 data;
+	int rc;
+
+	/* Strangely, the device-tree says address is 0xd2, but darwin
+	 * accesses 0xd0 ...
+	 */
+	pmac_i2c_setmode(pmac_tb_clock_chip_host,
+			 pmac_i2c_mode_combined);
+	rc = pmac_i2c_xfer(pmac_tb_clock_chip_host,
+			   0xd0 | pmac_i2c_read,
+			   1, 0x81, &data, 1);
+	if (rc != 0)
+		goto bail;
+
+	data = (data & 0xf3) | (freeze ? 0x00 : 0x0c);
+
+       	pmac_i2c_setmode(pmac_tb_clock_chip_host, pmac_i2c_mode_stdsub);
+	rc = pmac_i2c_xfer(pmac_tb_clock_chip_host,
+			   0xd0 | pmac_i2c_write,
+			   1, 0x81, &data, 1);
+
+ bail:
+	if (rc != 0) {
+		printk("Cypress Timebase %s rc: %d\n",
+		       freeze ? "freeze" : "unfreeze", rc);
+		panic("Timebase freeze failed !\n");
+	}
+}
+
+
+static void smp_core99_pulsar_tb_freeze(int freeze)
+{
+	u8 data;
+	int rc;
+
+	pmac_i2c_setmode(pmac_tb_clock_chip_host,
+			 pmac_i2c_mode_combined);
+	rc = pmac_i2c_xfer(pmac_tb_clock_chip_host,
+			   pmac_tb_pulsar_addr | pmac_i2c_read,
+			   1, 0x2e, &data, 1);
+	if (rc != 0)
+		goto bail;
+
+	data = (data & 0x88) | (freeze ? 0x11 : 0x22);
+
+	pmac_i2c_setmode(pmac_tb_clock_chip_host, pmac_i2c_mode_stdsub);
+	rc = pmac_i2c_xfer(pmac_tb_clock_chip_host,
+			   pmac_tb_pulsar_addr | pmac_i2c_write,
+			   1, 0x2e, &data, 1);
+ bail:
+	if (rc != 0) {
+		printk(KERN_ERR "Pulsar Timebase %s rc: %d\n",
+		       freeze ? "freeze" : "unfreeze", rc);
+		panic("Timebase freeze failed !\n");
+	}
+}
+
+static void __init smp_core99_setup_i2c_hwsync(int ncpus)
+{
+	struct device_node *cc = NULL;	
+	struct device_node *p;
+	const char *name = NULL;
+	const u32 *reg;
+	int ok;
+
+	/* Look for the clock chip */
+	for_each_node_by_name(cc, "i2c-hwclock") {
+		p = of_get_parent(cc);
+		ok = p && of_device_is_compatible(p, "uni-n-i2c");
+		of_node_put(p);
+		if (!ok)
+			continue;
+
+		pmac_tb_clock_chip_host = pmac_i2c_find_bus(cc);
+		if (pmac_tb_clock_chip_host == NULL)
+			continue;
+		reg = of_get_property(cc, "reg", NULL);
+		if (reg == NULL)
+			continue;
+		switch (*reg) {
+		case 0xd2:
+			if (of_device_is_compatible(cc,"pulsar-legacy-slewing")) {
+				pmac_tb_freeze = smp_core99_pulsar_tb_freeze;
+				pmac_tb_pulsar_addr = 0xd2;
+				name = "Pulsar";
+			} else if (of_device_is_compatible(cc, "cy28508")) {
+				pmac_tb_freeze = smp_core99_cypress_tb_freeze;
+				name = "Cypress";
+			}
+			break;
+		case 0xd4:
+			pmac_tb_freeze = smp_core99_pulsar_tb_freeze;
+			pmac_tb_pulsar_addr = 0xd4;
+			name = "Pulsar";
+			break;
+		}
+		if (pmac_tb_freeze != NULL)
+			break;
+	}
+	if (pmac_tb_freeze != NULL) {
+		/* Open i2c bus for synchronous access */
+		if (pmac_i2c_open(pmac_tb_clock_chip_host, 1)) {
+			printk(KERN_ERR "Failed top open i2c bus for clock"
+			       " sync, fallback to software sync !\n");
+			goto no_i2c_sync;
+		}
+		printk(KERN_INFO "Processor timebase sync using %s i2c clock\n",
+		       name);
+		return;
+	}
+ no_i2c_sync:
+	pmac_tb_freeze = NULL;
+	pmac_tb_clock_chip_host = NULL;
+}
+
+
+
+/*
+ * Newer G5s uses a platform function
+ */
+
+static void smp_core99_pfunc_tb_freeze(int freeze)
+{
+	struct device_node *cpus;
+	struct pmf_args args;
+
+	cpus = of_find_node_by_path("/cpus");
+	BUG_ON(cpus == NULL);
+	args.count = 1;
+	args.u[0].v = !freeze;
+	pmf_call_function(cpus, "cpu-timebase", &args);
+	of_node_put(cpus);
+}
+
+#else /* CONFIG_PPC64 */
+
+/*
+ * SMP G4 use a GPIO to enable/disable the timebase.
+ */
+
+static unsigned int core99_tb_gpio;	/* Timebase freeze GPIO */
+
+static void smp_core99_gpio_tb_freeze(int freeze)
+{
+	if (freeze)
+		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 4);
+	else
+		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 0);
+	pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, core99_tb_gpio, 0);
+}
+
+
+#endif /* !CONFIG_PPC64 */
+
+/* L2 and L3 cache settings to pass from CPU0 to CPU1 on G4 cpus */
+volatile static long int core99_l2_cache;
+volatile static long int core99_l3_cache;
+
+static void core99_init_caches(int cpu)
+{
+#ifndef CONFIG_PPC64
+	if (!cpu_has_feature(CPU_FTR_L2CR))
+		return;
+
+	if (cpu == 0) {
+		core99_l2_cache = _get_L2CR();
+		printk("CPU0: L2CR is %lx\n", core99_l2_cache);
+	} else {
+		printk("CPU%d: L2CR was %lx\n", cpu, _get_L2CR());
+		_set_L2CR(0);
+		_set_L2CR(core99_l2_cache);
+		printk("CPU%d: L2CR set to %lx\n", cpu, core99_l2_cache);
+	}
+
+	if (!cpu_has_feature(CPU_FTR_L3CR))
+		return;
+
+	if (cpu == 0){
+		core99_l3_cache = _get_L3CR();
+		printk("CPU0: L3CR is %lx\n", core99_l3_cache);
+	} else {
+		printk("CPU%d: L3CR was %lx\n", cpu, _get_L3CR());
+		_set_L3CR(0);
+		_set_L3CR(core99_l3_cache);
+		printk("CPU%d: L3CR set to %lx\n", cpu, core99_l3_cache);
+	}
+#endif /* !CONFIG_PPC64 */
+}
+
+static void __init smp_core99_setup(int ncpus)
+{
+#ifdef CONFIG_PPC64
+
+	/* i2c based HW sync on some G5s */
+	if (of_machine_is_compatible("PowerMac7,2") ||
+	    of_machine_is_compatible("PowerMac7,3") ||
+	    of_machine_is_compatible("RackMac3,1"))
+		smp_core99_setup_i2c_hwsync(ncpus);
+
+	/* pfunc based HW sync on recent G5s */
+	if (pmac_tb_freeze == NULL) {
+		struct device_node *cpus =
+			of_find_node_by_path("/cpus");
+		if (cpus &&
+		    of_get_property(cpus, "platform-cpu-timebase", NULL)) {
+			pmac_tb_freeze = smp_core99_pfunc_tb_freeze;
+			printk(KERN_INFO "Processor timebase sync using"
+			       " platform function\n");
+		}
+	}
+
+#else /* CONFIG_PPC64 */
+
+	/* GPIO based HW sync on ppc32 Core99 */
+	if (pmac_tb_freeze == NULL && !of_machine_is_compatible("MacRISC4")) {
+		struct device_node *cpu;
+		const u32 *tbprop = NULL;
+
+		core99_tb_gpio = KL_GPIO_TB_ENABLE;	/* default value */
+		cpu = of_find_node_by_type(NULL, "cpu");
+		if (cpu != NULL) {
+			tbprop = of_get_property(cpu, "timebase-enable", NULL);
+			if (tbprop)
+				core99_tb_gpio = *tbprop;
+			of_node_put(cpu);
+		}
+		pmac_tb_freeze = smp_core99_gpio_tb_freeze;
+		printk(KERN_INFO "Processor timebase sync using"
+		       " GPIO 0x%02x\n", core99_tb_gpio);
+	}
+
+#endif /* CONFIG_PPC64 */
+
+	/* No timebase sync, fallback to software */
+	if (pmac_tb_freeze == NULL) {
+		smp_ops->give_timebase = smp_generic_give_timebase;
+		smp_ops->take_timebase = smp_generic_take_timebase;
+		printk(KERN_INFO "Processor timebase sync using software\n");
+	}
+
+#ifndef CONFIG_PPC64
+	{
+		int i;
+
+		/* XXX should get this from reg properties */
+		for (i = 1; i < ncpus; ++i)
+			set_hard_smp_processor_id(i, i);
+	}
+#endif
+
+	/* 32 bits SMP can't NAP */
+	if (!of_machine_is_compatible("MacRISC4"))
+		powersave_nap = 0;
+}
+
+static void __init smp_core99_probe(void)
+{
+	struct device_node *cpus;
+	int ncpus = 0;
+
+	if (ppc_md.progress) ppc_md.progress("smp_core99_probe", 0x345);
+
+	/* Count CPUs in the device-tree */
+       	for (cpus = NULL; (cpus = of_find_node_by_type(cpus, "cpu")) != NULL;)
+	       	++ncpus;
+
+	printk(KERN_INFO "PowerMac SMP probe found %d cpus\n", ncpus);
+
+	/* Nothing more to do if less than 2 of them */
+	if (ncpus <= 1)
+		return;
+
+	/* We need to perform some early initialisations before we can start
+	 * setting up SMP as we are running before initcalls
+	 */
+	pmac_pfunc_base_install();
+	pmac_i2c_init();
+
+	/* Setup various bits like timebase sync method, ability to nap, ... */
+	smp_core99_setup(ncpus);
+
+	/* Install IPIs */
+	mpic_request_ipis();
+
+	/* Collect l2cr and l3cr values from CPU 0 */
+	core99_init_caches(0);
+}
+
+static int smp_core99_kick_cpu(int nr)
+{
+	unsigned int save_vector;
+	unsigned long target, flags;
+	unsigned int *vector = (unsigned int *)(PAGE_OFFSET+0x100);
+
+	if (nr < 0 || nr > 3)
+		return -ENOENT;
+
+	if (ppc_md.progress)
+		ppc_md.progress("smp_core99_kick_cpu", 0x346);
+
+	local_irq_save(flags);
+
+	/* Save reset vector */
+	save_vector = *vector;
+
+	/* Setup fake reset vector that does
+	 *   b __secondary_start_pmac_0 + nr*8
+	 */
+	target = (unsigned long) __secondary_start_pmac_0 + nr * 8;
+	patch_branch(vector, target, BRANCH_SET_LINK);
+
+	/* Put some life in our friend */
+	pmac_call_feature(PMAC_FTR_RESET_CPU, NULL, nr, 0);
+
+	/* FIXME: We wait a bit for the CPU to take the exception, I should
+	 * instead wait for the entry code to set something for me. Well,
+	 * ideally, all that crap will be done in prom.c and the CPU left
+	 * in a RAM-based wait loop like CHRP.
+	 */
+	mdelay(1);
+
+	/* Restore our exception vector */
+	*vector = save_vector;
+	flush_icache_range((unsigned long) vector, (unsigned long) vector + 4);
+
+	local_irq_restore(flags);
+	if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347);
+
+	return 0;
+}
+
+static void smp_core99_setup_cpu(int cpu_nr)
+{
+	/* Setup L2/L3 */
+	if (cpu_nr != 0)
+		core99_init_caches(cpu_nr);
+
+	/* Setup openpic */
+	mpic_setup_this_cpu();
+}
+
+#ifdef CONFIG_PPC64
+#ifdef CONFIG_HOTPLUG_CPU
+static int smp_core99_cpu_notify(struct notifier_block *self,
+				 unsigned long action, void *hcpu)
+{
+	int rc;
+
+	switch(action) {
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		/* Open i2c bus if it was used for tb sync */
+		if (pmac_tb_clock_chip_host) {
+			rc = pmac_i2c_open(pmac_tb_clock_chip_host, 1);
+			if (rc) {
+				pr_err("Failed to open i2c bus for time sync\n");
+				return notifier_from_errno(rc);
+			}
+		}
+		break;
+	case CPU_ONLINE:
+	case CPU_UP_CANCELED:
+		/* Close i2c bus if it was used for tb sync */
+		if (pmac_tb_clock_chip_host)
+			pmac_i2c_close(pmac_tb_clock_chip_host);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block smp_core99_cpu_nb = {
+	.notifier_call	= smp_core99_cpu_notify,
+};
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static void __init smp_core99_bringup_done(void)
+{
+	extern void g5_phy_disable_cpu1(void);
+
+	/* Close i2c bus if it was used for tb sync */
+	if (pmac_tb_clock_chip_host)
+		pmac_i2c_close(pmac_tb_clock_chip_host);
+
+	/* If we didn't start the second CPU, we must take
+	 * it off the bus.
+	 */
+	if (of_machine_is_compatible("MacRISC4") &&
+	    num_online_cpus() < 2) {
+		set_cpu_present(1, false);
+		g5_phy_disable_cpu1();
+	}
+#ifdef CONFIG_HOTPLUG_CPU
+	register_cpu_notifier(&smp_core99_cpu_nb);
+#endif
+
+	if (ppc_md.progress)
+		ppc_md.progress("smp_core99_bringup_done", 0x349);
+}
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static int smp_core99_cpu_disable(void)
+{
+	int rc = generic_cpu_disable();
+	if (rc)
+		return rc;
+
+	mpic_cpu_set_priority(0xf);
+
+	return 0;
+}
+
+#ifdef CONFIG_PPC32
+
+static void pmac_cpu_die(void)
+{
+	int cpu = smp_processor_id();
+
+	local_irq_disable();
+	idle_task_exit();
+	pr_debug("CPU%d offline\n", cpu);
+	generic_set_cpu_dead(cpu);
+	smp_wmb();
+	mb();
+	low_cpu_die();
+}
+
+#else /* CONFIG_PPC32 */
+
+static void pmac_cpu_die(void)
+{
+	int cpu = smp_processor_id();
+
+	local_irq_disable();
+	idle_task_exit();
+
+	/*
+	 * turn off as much as possible, we'll be
+	 * kicked out as this will only be invoked
+	 * on core99 platforms for now ...
+	 */
+
+	printk(KERN_INFO "CPU#%d offline\n", cpu);
+	generic_set_cpu_dead(cpu);
+	smp_wmb();
+
+	/*
+	 * Re-enable interrupts. The NAP code needs to enable them
+	 * anyways, do it now so we deal with the case where one already
+	 * happened while soft-disabled.
+	 * We shouldn't get any external interrupts, only decrementer, and the
+	 * decrementer handler is safe for use on offline CPUs
+	 */
+	local_irq_enable();
+
+	while (1) {
+		/* let's not take timer interrupts too often ... */
+		set_dec(0x7fffffff);
+
+		/* Enter NAP mode */
+		power4_idle();
+	}
+}
+
+#endif /* else CONFIG_PPC32 */
+#endif /* CONFIG_HOTPLUG_CPU */
+
+/* Core99 Macs (dual G4s and G5s) */
+struct smp_ops_t core99_smp_ops = {
+	.message_pass	= smp_mpic_message_pass,
+	.probe		= smp_core99_probe,
+#ifdef CONFIG_PPC64
+	.bringup_done	= smp_core99_bringup_done,
+#endif
+	.kick_cpu	= smp_core99_kick_cpu,
+	.setup_cpu	= smp_core99_setup_cpu,
+	.give_timebase	= smp_core99_give_timebase,
+	.take_timebase	= smp_core99_take_timebase,
+#if defined(CONFIG_HOTPLUG_CPU)
+	.cpu_disable	= smp_core99_cpu_disable,
+	.cpu_die	= generic_cpu_die,
+#endif
+};
+
+void __init pmac_setup_smp(void)
+{
+	struct device_node *np;
+
+	/* Check for Core99 */
+	np = of_find_node_by_name(NULL, "uni-n");
+	if (!np)
+		np = of_find_node_by_name(NULL, "u3");
+	if (!np)
+		np = of_find_node_by_name(NULL, "u4");
+	if (np) {
+		of_node_put(np);
+		smp_ops = &core99_smp_ops;
+	}
+#ifdef CONFIG_PPC_PMAC32_PSURGE
+	else {
+		/* We have to set bits in cpu_possible_mask here since the
+		 * secondary CPU(s) aren't in the device tree. Various
+		 * things won't be initialized for CPUs not in the possible
+		 * map, so we really need to fix it up here.
+		 */
+		int cpu;
+
+		for (cpu = 1; cpu < 4 && cpu < NR_CPUS; ++cpu)
+			set_cpu_possible(cpu, true);
+		smp_ops = &psurge_smp_ops;
+	}
+#endif /* CONFIG_PPC_PMAC32_PSURGE */
+
+#ifdef CONFIG_HOTPLUG_CPU
+	ppc_md.cpu_die = pmac_cpu_die;
+#endif
+}
+
+
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
new file mode 100644
index 000000000..8680bb697
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -0,0 +1,334 @@
+/*
+ * Support for periodic interrupts (100 per second) and for getting
+ * the current time from the RTC on Power Macintoshes.
+ *
+ * We use the decrementer register for our periodic interrupts.
+ *
+ * Paul Mackerras	August 1996.
+ * Copyright (C) 1996 Paul Mackerras.
+ * Copyright (C) 2003-2005 Benjamin Herrenschmidt.
+ *
+ */
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/time.h>
+#include <linux/adb.h>
+#include <linux/cuda.h>
+#include <linux/pmu.h>
+#include <linux/interrupt.h>
+#include <linux/hardirq.h>
+#include <linux/rtc.h>
+
+#include <asm/sections.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/nvram.h>
+#include <asm/smu.h>
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+/* Apparently the RTC stores seconds since 1 Jan 1904 */
+#define RTC_OFFSET	2082844800
+
+/*
+ * Calibrate the decrementer frequency with the VIA timer 1.
+ */
+#define VIA_TIMER_FREQ_6	4700000	/* time 1 frequency * 6 */
+
+/* VIA registers */
+#define RS		0x200		/* skip between registers */
+#define T1CL		(4*RS)		/* Timer 1 ctr/latch (low 8 bits) */
+#define T1CH		(5*RS)		/* Timer 1 counter (high 8 bits) */
+#define T1LL		(6*RS)		/* Timer 1 latch (low 8 bits) */
+#define T1LH		(7*RS)		/* Timer 1 latch (high 8 bits) */
+#define ACR		(11*RS)		/* Auxiliary control register */
+#define IFR		(13*RS)		/* Interrupt flag register */
+
+/* Bits in ACR */
+#define T1MODE		0xc0		/* Timer 1 mode */
+#define T1MODE_CONT	0x40		/*  continuous interrupts */
+
+/* Bits in IFR and IER */
+#define T1_INT		0x40		/* Timer 1 interrupt */
+
+long __init pmac_time_init(void)
+{
+	s32 delta = 0;
+#ifdef CONFIG_NVRAM
+	int dst;
+	
+	delta = ((s32)pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x9)) << 16;
+	delta |= ((s32)pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0xa)) << 8;
+	delta |= pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0xb);
+	if (delta & 0x00800000UL)
+		delta |= 0xFF000000UL;
+	dst = ((pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x8) & 0x80) != 0);
+	printk("GMT Delta read from XPRAM: %d minutes, DST: %s\n", delta/60,
+		dst ? "on" : "off");
+#endif
+	return delta;
+}
+
+#if defined(CONFIG_ADB_CUDA) || defined(CONFIG_ADB_PMU)
+static void to_rtc_time(unsigned long now, struct rtc_time *tm)
+{
+	to_tm(now, tm);
+	tm->tm_year -= 1900;
+	tm->tm_mon -= 1;
+}
+#endif
+
+#if defined(CONFIG_ADB_CUDA) || defined(CONFIG_ADB_PMU) || \
+    defined(CONFIG_PMAC_SMU)
+static unsigned long from_rtc_time(struct rtc_time *tm)
+{
+	return mktime(tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday,
+		      tm->tm_hour, tm->tm_min, tm->tm_sec);
+}
+#endif
+
+#ifdef CONFIG_ADB_CUDA
+static unsigned long cuda_get_time(void)
+{
+	struct adb_request req;
+	unsigned int now;
+
+	if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0)
+		return 0;
+	while (!req.complete)
+		cuda_poll();
+	if (req.reply_len != 7)
+		printk(KERN_ERR "cuda_get_time: got %d byte reply\n",
+		       req.reply_len);
+	now = (req.reply[3] << 24) + (req.reply[4] << 16)
+		+ (req.reply[5] << 8) + req.reply[6];
+	return ((unsigned long)now) - RTC_OFFSET;
+}
+
+#define cuda_get_rtc_time(tm)	to_rtc_time(cuda_get_time(), (tm))
+
+static int cuda_set_rtc_time(struct rtc_time *tm)
+{
+	unsigned int nowtime;
+	struct adb_request req;
+
+	nowtime = from_rtc_time(tm) + RTC_OFFSET;
+	if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME,
+			 nowtime >> 24, nowtime >> 16, nowtime >> 8,
+			 nowtime) < 0)
+		return -ENXIO;
+	while (!req.complete)
+		cuda_poll();
+	if ((req.reply_len != 3) && (req.reply_len != 7))
+		printk(KERN_ERR "cuda_set_rtc_time: got %d byte reply\n",
+		       req.reply_len);
+	return 0;
+}
+
+#else
+#define cuda_get_time()		0
+#define cuda_get_rtc_time(tm)
+#define cuda_set_rtc_time(tm)	0
+#endif
+
+#ifdef CONFIG_ADB_PMU
+static unsigned long pmu_get_time(void)
+{
+	struct adb_request req;
+	unsigned int now;
+
+	if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0)
+		return 0;
+	pmu_wait_complete(&req);
+	if (req.reply_len != 4)
+		printk(KERN_ERR "pmu_get_time: got %d byte reply from PMU\n",
+		       req.reply_len);
+	now = (req.reply[0] << 24) + (req.reply[1] << 16)
+		+ (req.reply[2] << 8) + req.reply[3];
+	return ((unsigned long)now) - RTC_OFFSET;
+}
+
+#define pmu_get_rtc_time(tm)	to_rtc_time(pmu_get_time(), (tm))
+
+static int pmu_set_rtc_time(struct rtc_time *tm)
+{
+	unsigned int nowtime;
+	struct adb_request req;
+
+	nowtime = from_rtc_time(tm) + RTC_OFFSET;
+	if (pmu_request(&req, NULL, 5, PMU_SET_RTC, nowtime >> 24,
+			nowtime >> 16, nowtime >> 8, nowtime) < 0)
+		return -ENXIO;
+	pmu_wait_complete(&req);
+	if (req.reply_len != 0)
+		printk(KERN_ERR "pmu_set_rtc_time: %d byte reply from PMU\n",
+		       req.reply_len);
+	return 0;
+}
+
+#else
+#define pmu_get_time()		0
+#define pmu_get_rtc_time(tm)
+#define pmu_set_rtc_time(tm)	0
+#endif
+
+#ifdef CONFIG_PMAC_SMU
+static unsigned long smu_get_time(void)
+{
+	struct rtc_time tm;
+
+	if (smu_get_rtc_time(&tm, 1))
+		return 0;
+	return from_rtc_time(&tm);
+}
+
+#else
+#define smu_get_time()			0
+#define smu_get_rtc_time(tm, spin)
+#define smu_set_rtc_time(tm, spin)	0
+#endif
+
+/* Can't be __init, it's called when suspending and resuming */
+unsigned long pmac_get_boot_time(void)
+{
+	/* Get the time from the RTC, used only at boot time */
+	switch (sys_ctrler) {
+	case SYS_CTRLER_CUDA:
+		return cuda_get_time();
+	case SYS_CTRLER_PMU:
+		return pmu_get_time();
+	case SYS_CTRLER_SMU:
+		return smu_get_time();
+	default:
+		return 0;
+	}
+}
+
+void pmac_get_rtc_time(struct rtc_time *tm)
+{
+	/* Get the time from the RTC, used only at boot time */
+	switch (sys_ctrler) {
+	case SYS_CTRLER_CUDA:
+		cuda_get_rtc_time(tm);
+		break;
+	case SYS_CTRLER_PMU:
+		pmu_get_rtc_time(tm);
+		break;
+	case SYS_CTRLER_SMU:
+		smu_get_rtc_time(tm, 1);
+		break;
+	default:
+		;
+	}
+}
+
+int pmac_set_rtc_time(struct rtc_time *tm)
+{
+	switch (sys_ctrler) {
+	case SYS_CTRLER_CUDA:
+		return cuda_set_rtc_time(tm);
+	case SYS_CTRLER_PMU:
+		return pmu_set_rtc_time(tm);
+	case SYS_CTRLER_SMU:
+		return smu_set_rtc_time(tm, 1);
+	default:
+		return -ENODEV;
+	}
+}
+
+#ifdef CONFIG_PPC32
+/*
+ * Calibrate the decrementer register using VIA timer 1.
+ * This is used both on powermacs and CHRP machines.
+ */
+int __init via_calibrate_decr(void)
+{
+	struct device_node *vias;
+	volatile unsigned char __iomem *via;
+	int count = VIA_TIMER_FREQ_6 / 100;
+	unsigned int dstart, dend;
+	struct resource rsrc;
+
+	vias = of_find_node_by_name(NULL, "via-cuda");
+	if (vias == NULL)
+		vias = of_find_node_by_name(NULL, "via-pmu");
+	if (vias == NULL)
+		vias = of_find_node_by_name(NULL, "via");
+	if (vias == NULL || of_address_to_resource(vias, 0, &rsrc)) {
+	        of_node_put(vias);
+		return 0;
+	}
+	of_node_put(vias);
+	via = ioremap(rsrc.start, resource_size(&rsrc));
+	if (via == NULL) {
+		printk(KERN_ERR "Failed to map VIA for timer calibration !\n");
+		return 0;
+	}
+
+	/* set timer 1 for continuous interrupts */
+	out_8(&via[ACR], (via[ACR] & ~T1MODE) | T1MODE_CONT);
+	/* set the counter to a small value */
+	out_8(&via[T1CH], 2);
+	/* set the latch to `count' */
+	out_8(&via[T1LL], count);
+	out_8(&via[T1LH], count >> 8);
+	/* wait until it hits 0 */
+	while ((in_8(&via[IFR]) & T1_INT) == 0)
+		;
+	dstart = get_dec();
+	/* clear the interrupt & wait until it hits 0 again */
+	in_8(&via[T1CL]);
+	while ((in_8(&via[IFR]) & T1_INT) == 0)
+		;
+	dend = get_dec();
+
+	ppc_tb_freq = (dstart - dend) * 100 / 6;
+
+	iounmap(via);
+
+	return 1;
+}
+#endif
+
+/*
+ * Query the OF and get the decr frequency.
+ */
+void __init pmac_calibrate_decr(void)
+{
+	generic_calibrate_decr();
+
+#ifdef CONFIG_PPC32
+	/* We assume MacRISC2 machines have correct device-tree
+	 * calibration. That's better since the VIA itself seems
+	 * to be slightly off. --BenH
+	 */
+	if (!of_machine_is_compatible("MacRISC2") &&
+	    !of_machine_is_compatible("MacRISC3") &&
+	    !of_machine_is_compatible("MacRISC4"))
+		if (via_calibrate_decr())
+			return;
+
+	/* Special case: QuickSilver G4s seem to have a badly calibrated
+	 * timebase-frequency in OF, VIA is much better on these. We should
+	 * probably implement calibration based on the KL timer on these
+	 * machines anyway... -BenH
+	 */
+	if (of_machine_is_compatible("PowerMac3,5"))
+		if (via_calibrate_decr())
+			return;
+#endif
+}
diff --git a/arch/powerpc/platforms/powermac/udbg_adb.c b/arch/powerpc/platforms/powermac/udbg_adb.c
new file mode 100644
index 000000000..366bd221e
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/udbg_adb.c
@@ -0,0 +1,219 @@
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/bitops.h>
+#include <linux/ptrace.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/cuda.h>
+#include <asm/machdep.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/xmon.h>
+#include <asm/prom.h>
+#include <asm/bootx.h>
+#include <asm/errno.h>
+#include <asm/pmac_feature.h>
+#include <asm/processor.h>
+#include <asm/delay.h>
+#include <asm/btext.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+
+/*
+ * This implementation is "special", it can "patch" the current
+ * udbg implementation and work on top of it. It must thus be
+ * initialized last
+ */
+
+static void (*udbg_adb_old_putc)(char c);
+static int (*udbg_adb_old_getc)(void);
+static int (*udbg_adb_old_getc_poll)(void);
+
+static enum {
+	input_adb_none,
+	input_adb_pmu,
+	input_adb_cuda,
+} input_type = input_adb_none;
+
+int xmon_wants_key, xmon_adb_keycode;
+
+static inline void udbg_adb_poll(void)
+{
+#ifdef CONFIG_ADB_PMU
+	if (input_type == input_adb_pmu)
+		pmu_poll_adb();
+#endif /* CONFIG_ADB_PMU */
+#ifdef CONFIG_ADB_CUDA
+	if (input_type == input_adb_cuda)
+		cuda_poll();
+#endif /* CONFIG_ADB_CUDA */
+}
+
+#ifdef CONFIG_BOOTX_TEXT
+
+static int udbg_adb_use_btext;
+static int xmon_adb_shiftstate;
+
+static unsigned char xmon_keytab[128] =
+	"asdfhgzxcv\000bqwer"				/* 0x00 - 0x0f */
+	"yt123465=97-80]o"				/* 0x10 - 0x1f */
+	"u[ip\rlj'k;\\,/nm."				/* 0x20 - 0x2f */
+	"\t `\177\0\033\0\0\0\0\0\0\0\0\0\0"		/* 0x30 - 0x3f */
+	"\0.\0*\0+\0\0\0\0\0/\r\0-\0"			/* 0x40 - 0x4f */
+	"\0\0000123456789\0\0\0";			/* 0x50 - 0x5f */
+
+static unsigned char xmon_shift_keytab[128] =
+	"ASDFHGZXCV\000BQWER"				/* 0x00 - 0x0f */
+	"YT!@#$^%+(&_*)}O"				/* 0x10 - 0x1f */
+	"U{IP\rLJ\"K:|<?NM>"				/* 0x20 - 0x2f */
+	"\t ~\177\0\033\0\0\0\0\0\0\0\0\0\0"		/* 0x30 - 0x3f */
+	"\0.\0*\0+\0\0\0\0\0/\r\0-\0"			/* 0x40 - 0x4f */
+	"\0\0000123456789\0\0\0";			/* 0x50 - 0x5f */
+
+static int udbg_adb_local_getc(void)
+{
+	int k, t, on;
+
+	xmon_wants_key = 1;
+	for (;;) {
+		xmon_adb_keycode = -1;
+		t = 0;
+		on = 0;
+		k = -1;
+		do {
+			if (--t < 0) {
+				on = 1 - on;
+				btext_drawchar(on? 0xdb: 0x20);
+				btext_drawchar('\b');
+				t = 200000;
+			}
+			udbg_adb_poll();
+			if (udbg_adb_old_getc_poll)
+				k = udbg_adb_old_getc_poll();
+		} while (k == -1 && xmon_adb_keycode == -1);
+		if (on)
+			btext_drawstring(" \b");
+		if (k != -1)
+			return k;
+		k = xmon_adb_keycode;
+
+		/* test for shift keys */
+		if ((k & 0x7f) == 0x38 || (k & 0x7f) == 0x7b) {
+			xmon_adb_shiftstate = (k & 0x80) == 0;
+			continue;
+		}
+		if (k >= 0x80)
+			continue;	/* ignore up transitions */
+		k = (xmon_adb_shiftstate? xmon_shift_keytab: xmon_keytab)[k];
+		if (k != 0)
+			break;
+	}
+	xmon_wants_key = 0;
+	return k;
+}
+#endif /* CONFIG_BOOTX_TEXT */
+
+static int udbg_adb_getc(void)
+{
+#ifdef CONFIG_BOOTX_TEXT
+	if (udbg_adb_use_btext && input_type != input_adb_none)
+		return udbg_adb_local_getc();
+#endif
+	if (udbg_adb_old_getc)
+		return udbg_adb_old_getc();
+	return -1;
+}
+
+/* getc_poll() is not really used, unless you have the xmon-over modem
+ * hack that doesn't quite concern us here, thus we just poll the low level
+ * ADB driver to prevent it from timing out and call back the original poll
+ * routine.
+ */
+static int udbg_adb_getc_poll(void)
+{
+	udbg_adb_poll();
+
+	if (udbg_adb_old_getc_poll)
+		return udbg_adb_old_getc_poll();
+	return -1;
+}
+
+static void udbg_adb_putc(char c)
+{
+#ifdef CONFIG_BOOTX_TEXT
+	if (udbg_adb_use_btext)
+		btext_drawchar(c);
+#endif
+	if (udbg_adb_old_putc)
+		return udbg_adb_old_putc(c);
+}
+
+void __init udbg_adb_init_early(void)
+{
+#ifdef CONFIG_BOOTX_TEXT
+	if (btext_find_display(1) == 0) {
+		udbg_adb_use_btext = 1;
+		udbg_putc = udbg_adb_putc;
+	}
+#endif
+}
+
+int __init udbg_adb_init(int force_btext)
+{
+	struct device_node *np;
+
+	/* Capture existing callbacks */
+	udbg_adb_old_putc = udbg_putc;
+	udbg_adb_old_getc = udbg_getc;
+	udbg_adb_old_getc_poll = udbg_getc_poll;
+
+	/* Check if our early init was already called */
+	if (udbg_adb_old_putc == udbg_adb_putc)
+		udbg_adb_old_putc = NULL;
+#ifdef CONFIG_BOOTX_TEXT
+	if (udbg_adb_old_putc == btext_drawchar)
+		udbg_adb_old_putc = NULL;
+#endif
+
+	/* Set ours as output */
+	udbg_putc = udbg_adb_putc;
+	udbg_getc = udbg_adb_getc;
+	udbg_getc_poll = udbg_adb_getc_poll;
+
+#ifdef CONFIG_BOOTX_TEXT
+	/* Check if we should use btext output */
+	if (btext_find_display(force_btext) == 0)
+		udbg_adb_use_btext = 1;
+#endif
+
+	/* See if there is a keyboard in the device tree with a parent
+	 * of type "adb". If not, we return a failure, but we keep the
+	 * bext output set for now
+	 */
+	for_each_node_by_name(np, "keyboard") {
+		struct device_node *parent = of_get_parent(np);
+		int found = (parent && strcmp(parent->type, "adb") == 0);
+		of_node_put(parent);
+		if (found)
+			break;
+	}
+	if (np == NULL)
+		return -ENODEV;
+	of_node_put(np);
+
+#ifdef CONFIG_ADB_PMU
+	if (find_via_pmu())
+		input_type = input_adb_pmu;
+#endif
+#ifdef CONFIG_ADB_CUDA
+	if (find_via_cuda())
+		input_type = input_adb_cuda;
+#endif
+
+	/* Same as above: nothing found, keep btext set for output */
+	if (input_type == input_adb_none)
+		return -ENODEV;
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c
new file mode 100644
index 000000000..d83135a98
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/udbg_scc.c
@@ -0,0 +1,184 @@
+/*
+ * udbg for zilog scc ports as found on Apple PowerMacs
+ *
+ * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <asm/udbg.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pmac_feature.h>
+
+extern u8 real_readb(volatile u8 __iomem  *addr);
+extern void real_writeb(u8 data, volatile u8 __iomem *addr);
+
+#define	SCC_TXRDY	4
+#define SCC_RXRDY	1
+
+static volatile u8 __iomem *sccc;
+static volatile u8 __iomem *sccd;
+
+static void udbg_scc_putc(char c)
+{
+	if (sccc) {
+		while ((in_8(sccc) & SCC_TXRDY) == 0)
+			;
+		out_8(sccd,  c);
+		if (c == '\n')
+			udbg_scc_putc('\r');
+	}
+}
+
+static int udbg_scc_getc_poll(void)
+{
+	if (sccc) {
+		if ((in_8(sccc) & SCC_RXRDY) != 0)
+			return in_8(sccd);
+		else
+			return -1;
+	}
+	return -1;
+}
+
+static int udbg_scc_getc(void)
+{
+	if (sccc) {
+		while ((in_8(sccc) & SCC_RXRDY) == 0)
+			;
+		return in_8(sccd);
+	}
+	return -1;
+}
+
+static unsigned char scc_inittab[] = {
+    13, 0,		/* set baud rate divisor */
+    12, 0,
+    14, 1,		/* baud rate gen enable, src=rtxc */
+    11, 0x50,		/* clocks = br gen */
+    5,  0xea,		/* tx 8 bits, assert DTR & RTS */
+    4,  0x46,		/* x16 clock, 1 stop */
+    3,  0xc1,		/* rx enable, 8 bits */
+};
+
+void udbg_scc_init(int force_scc)
+{
+	const u32 *reg;
+	unsigned long addr;
+	struct device_node *stdout = NULL, *escc = NULL, *macio = NULL;
+	struct device_node *ch, *ch_def = NULL, *ch_a = NULL;
+	const char *path;
+	int i, x;
+
+	escc = of_find_node_by_name(NULL, "escc");
+	if (escc == NULL)
+		goto bail;
+	macio = of_get_parent(escc);
+	if (macio == NULL)
+		goto bail;
+	path = of_get_property(of_chosen, "linux,stdout-path", NULL);
+	if (path != NULL)
+		stdout = of_find_node_by_path(path);
+	for (ch = NULL; (ch = of_get_next_child(escc, ch)) != NULL;) {
+		if (ch == stdout)
+			ch_def = of_node_get(ch);
+		if (strcmp(ch->name, "ch-a") == 0)
+			ch_a = of_node_get(ch);
+	}
+	if (ch_def == NULL && !force_scc)
+		goto bail;
+
+	ch = ch_def ? ch_def : ch_a;
+
+	/* Get address within mac-io ASIC */
+	reg = of_get_property(escc, "reg", NULL);
+	if (reg == NULL)
+		goto bail;
+	addr = reg[0];
+
+	/* Get address of mac-io PCI itself */
+	reg = of_get_property(macio, "assigned-addresses", NULL);
+	if (reg == NULL)
+		goto bail;
+	addr += reg[2];
+
+	/* Lock the serial port */
+	pmac_call_feature(PMAC_FTR_SCC_ENABLE, ch,
+			  PMAC_SCC_ASYNC | PMAC_SCC_FLAG_XMON, 1);
+
+	if (ch == ch_a)
+		addr += 0x20;
+	sccc = ioremap(addr & PAGE_MASK, PAGE_SIZE) ;
+	sccc += addr & ~PAGE_MASK;
+	sccd = sccc + 0x10;
+
+	mb();
+
+	for (i = 20000; i != 0; --i)
+		x = in_8(sccc);
+	out_8(sccc, 0x09);		/* reset A or B side */
+	out_8(sccc, 0xc0);
+
+	/* If SCC was the OF output port, read the BRG value, else
+	 * Setup for 38400 or 57600 8N1 depending on the machine
+	 */
+	if (ch_def != NULL) {
+		out_8(sccc, 13);
+		scc_inittab[1] = in_8(sccc);
+		out_8(sccc, 12);
+		scc_inittab[3] = in_8(sccc);
+	} else if (of_machine_is_compatible("RackMac1,1")
+		   || of_machine_is_compatible("RackMac1,2")
+		   || of_machine_is_compatible("MacRISC4")) {
+		/* Xserves and G5s default to 57600 */
+		scc_inittab[1] = 0;
+		scc_inittab[3] = 0;
+	} else {
+		/* Others default to 38400 */
+		scc_inittab[1] = 0;
+		scc_inittab[3] = 1;
+	}
+
+	for (i = 0; i < sizeof(scc_inittab); ++i)
+		out_8(sccc, scc_inittab[i]);
+
+
+	udbg_putc = udbg_scc_putc;
+	udbg_getc = udbg_scc_getc;
+	udbg_getc_poll = udbg_scc_getc_poll;
+
+	udbg_puts("Hello World !\n");
+
+ bail:
+	of_node_put(macio);
+	of_node_put(escc);
+	of_node_put(stdout);
+	of_node_put(ch_def);
+	of_node_put(ch_a);
+}
+
+#ifdef CONFIG_PPC64
+static void udbg_real_scc_putc(char c)
+{
+	while ((real_readb(sccc) & SCC_TXRDY) == 0)
+		;
+	real_writeb(c, sccd);
+	if (c == '\n')
+		udbg_real_scc_putc('\r');
+}
+
+void __init udbg_init_pmac_realmode(void)
+{
+	sccc = (volatile u8 __iomem *)0x80013020ul;
+	sccd = (volatile u8 __iomem *)0x80013030ul;
+
+	udbg_putc = udbg_real_scc_putc;
+	udbg_getc = NULL;
+	udbg_getc_poll = NULL;
+}
+#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
new file mode 100644
index 000000000..4b044d8cb
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -0,0 +1,21 @@
+config PPC_POWERNV
+	depends on PPC64 && PPC_BOOK3S
+	bool "IBM PowerNV (Non-Virtualized) platform support"
+	select PPC_NATIVE
+	select PPC_XICS
+	select PPC_ICP_NATIVE
+	select PPC_P7_NAP
+	select PPC_PCI_CHOICE if EMBEDDED
+	select EPAPR_BOOT
+	select PPC_INDIRECT_PIO
+	select PPC_UDBG_16550
+	select PPC_SCOM
+	select ARCH_RANDOM
+	select CPU_FREQ
+	select CPU_FREQ_GOV_PERFORMANCE
+	select CPU_FREQ_GOV_POWERSAVE
+	select CPU_FREQ_GOV_USERSPACE
+	select CPU_FREQ_GOV_ONDEMAND
+	select CPU_FREQ_GOV_CONSERVATIVE
+	select PPC_DOORBELL
+	default y
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
new file mode 100644
index 000000000..33e44f372
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -0,0 +1,11 @@
+obj-y			+= setup.o opal-wrappers.o opal.o opal-async.o
+obj-y			+= opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
+obj-y			+= rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
+obj-y			+= opal-msglog.o opal-hmi.o opal-power.o
+
+obj-$(CONFIG_SMP)	+= smp.o subcore.o subcore-asm.o
+obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
+obj-$(CONFIG_EEH)	+= eeh-powernv.o
+obj-$(CONFIG_PPC_SCOM)	+= opal-xscom.o
+obj-$(CONFIG_MEMORY_FAILURE)	+= opal-memory-errors.o
+obj-$(CONFIG_TRACEPOINTS)	+= opal-tracepoints.o
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
new file mode 100644
index 000000000..ce738ab3d
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -0,0 +1,1542 @@
+/*
+ * The file intends to implement the platform dependent EEH operations on
+ * powernv platform. Actually, the powernv was created in order to fully
+ * hypervisor support.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/atomic.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/rbtree.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/firmware.h>
+#include <asm/io.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/msi_bitmap.h>
+#include <asm/opal.h>
+#include <asm/ppc-pci.h>
+
+#include "powernv.h"
+#include "pci.h"
+
+static bool pnv_eeh_nb_init = false;
+
+/**
+ * pnv_eeh_init - EEH platform dependent initialization
+ *
+ * EEH platform dependent initialization on powernv
+ */
+static int pnv_eeh_init(void)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+
+	/* We require OPALv3 */
+	if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
+		pr_warn("%s: OPALv3 is required !\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	/* Set probe mode */
+	eeh_add_flag(EEH_PROBE_MODE_DEV);
+
+	/*
+	 * P7IOC blocks PCI config access to frozen PE, but PHB3
+	 * doesn't do that. So we have to selectively enable I/O
+	 * prior to collecting error log.
+	 */
+	list_for_each_entry(hose, &hose_list, list_node) {
+		phb = hose->private_data;
+
+		if (phb->model == PNV_PHB_MODEL_P7IOC)
+			eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
+
+		/*
+		 * PE#0 should be regarded as valid by EEH core
+		 * if it's not the reserved one. Currently, we
+		 * have the reserved PE#0 and PE#127 for PHB3
+		 * and P7IOC separately. So we should regard
+		 * PE#0 as valid for P7IOC.
+		 */
+		if (phb->ioda.reserved_pe != 0)
+			eeh_add_flag(EEH_VALID_PE_ZERO);
+
+		break;
+	}
+
+	return 0;
+}
+
+static int pnv_eeh_event(struct notifier_block *nb,
+			 unsigned long events, void *change)
+{
+	uint64_t changed_evts = (uint64_t)change;
+
+	/*
+	 * We simply send special EEH event if EEH has
+	 * been enabled, or clear pending events in
+	 * case that we enable EEH soon
+	 */
+	if (!(changed_evts & OPAL_EVENT_PCI_ERROR) ||
+	    !(events & OPAL_EVENT_PCI_ERROR))
+		return 0;
+
+	if (eeh_enabled())
+		eeh_send_failure_event(NULL);
+	else
+		opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
+
+	return 0;
+}
+
+static struct notifier_block pnv_eeh_nb = {
+	.notifier_call	= pnv_eeh_event,
+	.next		= NULL,
+	.priority	= 0
+};
+
+#ifdef CONFIG_DEBUG_FS
+static ssize_t pnv_eeh_ei_write(struct file *filp,
+				const char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	struct pci_controller *hose = filp->private_data;
+	struct eeh_dev *edev;
+	struct eeh_pe *pe;
+	int pe_no, type, func;
+	unsigned long addr, mask;
+	char buf[50];
+	int ret;
+
+	if (!eeh_ops || !eeh_ops->err_inject)
+		return -ENXIO;
+
+	/* Copy over argument buffer */
+	ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
+	if (!ret)
+		return -EFAULT;
+
+	/* Retrieve parameters */
+	ret = sscanf(buf, "%x:%x:%x:%lx:%lx",
+		     &pe_no, &type, &func, &addr, &mask);
+	if (ret != 5)
+		return -EINVAL;
+
+	/* Retrieve PE */
+	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
+	if (!edev)
+		return -ENOMEM;
+	edev->phb = hose;
+	edev->pe_config_addr = pe_no;
+	pe = eeh_pe_get(edev);
+	kfree(edev);
+	if (!pe)
+		return -ENODEV;
+
+	/* Do error injection */
+	ret = eeh_ops->err_inject(pe, type, func, addr, mask);
+	return ret < 0 ? ret : count;
+}
+
+static const struct file_operations pnv_eeh_ei_fops = {
+	.open	= simple_open,
+	.llseek	= no_llseek,
+	.write	= pnv_eeh_ei_write,
+};
+
+static int pnv_eeh_dbgfs_set(void *data, int offset, u64 val)
+{
+	struct pci_controller *hose = data;
+	struct pnv_phb *phb = hose->private_data;
+
+	out_be64(phb->regs + offset, val);
+	return 0;
+}
+
+static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val)
+{
+	struct pci_controller *hose = data;
+	struct pnv_phb *phb = hose->private_data;
+
+	*val = in_be64(phb->regs + offset);
+	return 0;
+}
+
+static int pnv_eeh_outb_dbgfs_set(void *data, u64 val)
+{
+	return pnv_eeh_dbgfs_set(data, 0xD10, val);
+}
+
+static int pnv_eeh_outb_dbgfs_get(void *data, u64 *val)
+{
+	return pnv_eeh_dbgfs_get(data, 0xD10, val);
+}
+
+static int pnv_eeh_inbA_dbgfs_set(void *data, u64 val)
+{
+	return pnv_eeh_dbgfs_set(data, 0xD90, val);
+}
+
+static int pnv_eeh_inbA_dbgfs_get(void *data, u64 *val)
+{
+	return pnv_eeh_dbgfs_get(data, 0xD90, val);
+}
+
+static int pnv_eeh_inbB_dbgfs_set(void *data, u64 val)
+{
+	return pnv_eeh_dbgfs_set(data, 0xE10, val);
+}
+
+static int pnv_eeh_inbB_dbgfs_get(void *data, u64 *val)
+{
+	return pnv_eeh_dbgfs_get(data, 0xE10, val);
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_outb_dbgfs_ops, pnv_eeh_outb_dbgfs_get,
+			pnv_eeh_outb_dbgfs_set, "0x%llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbA_dbgfs_ops, pnv_eeh_inbA_dbgfs_get,
+			pnv_eeh_inbA_dbgfs_set, "0x%llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbB_dbgfs_ops, pnv_eeh_inbB_dbgfs_get,
+			pnv_eeh_inbB_dbgfs_set, "0x%llx\n");
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * pnv_eeh_post_init - EEH platform dependent post initialization
+ *
+ * EEH platform dependent post initialization on powernv. When
+ * the function is called, the EEH PEs and devices should have
+ * been built. If the I/O cache staff has been built, EEH is
+ * ready to supply service.
+ */
+static int pnv_eeh_post_init(void)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	int ret = 0;
+
+	/* Register OPAL event notifier */
+	if (!pnv_eeh_nb_init) {
+		ret = opal_notifier_register(&pnv_eeh_nb);
+		if (ret) {
+			pr_warn("%s: Can't register OPAL event notifier (%d)\n",
+				__func__, ret);
+			return ret;
+		}
+
+		pnv_eeh_nb_init = true;
+	}
+
+	list_for_each_entry(hose, &hose_list, list_node) {
+		phb = hose->private_data;
+
+		/*
+		 * If EEH is enabled, we're going to rely on that.
+		 * Otherwise, we restore to conventional mechanism
+		 * to clear frozen PE during PCI config access.
+		 */
+		if (eeh_enabled())
+			phb->flags |= PNV_PHB_FLAG_EEH;
+		else
+			phb->flags &= ~PNV_PHB_FLAG_EEH;
+
+		/* Create debugfs entries */
+#ifdef CONFIG_DEBUG_FS
+		if (phb->has_dbgfs || !phb->dbgfs)
+			continue;
+
+		phb->has_dbgfs = 1;
+		debugfs_create_file("err_injct", 0200,
+				    phb->dbgfs, hose,
+				    &pnv_eeh_ei_fops);
+
+		debugfs_create_file("err_injct_outbound", 0600,
+				    phb->dbgfs, hose,
+				    &pnv_eeh_outb_dbgfs_ops);
+		debugfs_create_file("err_injct_inboundA", 0600,
+				    phb->dbgfs, hose,
+				    &pnv_eeh_inbA_dbgfs_ops);
+		debugfs_create_file("err_injct_inboundB", 0600,
+				    phb->dbgfs, hose,
+				    &pnv_eeh_inbB_dbgfs_ops);
+#endif /* CONFIG_DEBUG_FS */
+	}
+
+
+	return ret;
+}
+
+static int pnv_eeh_cap_start(struct pci_dn *pdn)
+{
+	u32 status;
+
+	if (!pdn)
+		return 0;
+
+	pnv_pci_cfg_read(pdn, PCI_STATUS, 2, &status);
+	if (!(status & PCI_STATUS_CAP_LIST))
+		return 0;
+
+	return PCI_CAPABILITY_LIST;
+}
+
+static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap)
+{
+	int pos = pnv_eeh_cap_start(pdn);
+	int cnt = 48;   /* Maximal number of capabilities */
+	u32 id;
+
+	if (!pos)
+		return 0;
+
+	while (cnt--) {
+		pnv_pci_cfg_read(pdn, pos, 1, &pos);
+		if (pos < 0x40)
+			break;
+
+		pos &= ~3;
+		pnv_pci_cfg_read(pdn, pos + PCI_CAP_LIST_ID, 1, &id);
+		if (id == 0xff)
+			break;
+
+		/* Found */
+		if (id == cap)
+			return pos;
+
+		/* Next one */
+		pos += PCI_CAP_LIST_NEXT;
+	}
+
+	return 0;
+}
+
+static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap)
+{
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+	u32 header;
+	int pos = 256, ttl = (4096 - 256) / 8;
+
+	if (!edev || !edev->pcie_cap)
+		return 0;
+	if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+		return 0;
+	else if (!header)
+		return 0;
+
+	while (ttl-- > 0) {
+		if (PCI_EXT_CAP_ID(header) == cap && pos)
+			return pos;
+
+		pos = PCI_EXT_CAP_NEXT(header);
+		if (pos < 256)
+			break;
+
+		if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+			break;
+	}
+
+	return 0;
+}
+
+/**
+ * pnv_eeh_probe - Do probe on PCI device
+ * @pdn: PCI device node
+ * @data: unused
+ *
+ * When EEH module is installed during system boot, all PCI devices
+ * are checked one by one to see if it supports EEH. The function
+ * is introduced for the purpose. By default, EEH has been enabled
+ * on all PCI devices. That's to say, we only need do necessary
+ * initialization on the corresponding eeh device and create PE
+ * accordingly.
+ *
+ * It's notable that's unsafe to retrieve the EEH device through
+ * the corresponding PCI device. During the PCI device hotplug, which
+ * was possiblly triggered by EEH core, the binding between EEH device
+ * and the PCI device isn't built yet.
+ */
+static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
+{
+	struct pci_controller *hose = pdn->phb;
+	struct pnv_phb *phb = hose->private_data;
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+	uint32_t pcie_flags;
+	int ret;
+
+	/*
+	 * When probing the root bridge, which doesn't have any
+	 * subordinate PCI devices. We don't have OF node for
+	 * the root bridge. So it's not reasonable to continue
+	 * the probing.
+	 */
+	if (!edev || edev->pe)
+		return NULL;
+
+	/* Skip for PCI-ISA bridge */
+	if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
+		return NULL;
+
+	/* Initialize eeh device */
+	edev->class_code = pdn->class_code;
+	edev->mode	&= 0xFFFFFF00;
+	edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
+	edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
+	edev->aer_cap  = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
+	if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
+		edev->mode |= EEH_DEV_BRIDGE;
+		if (edev->pcie_cap) {
+			pnv_pci_cfg_read(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
+					 2, &pcie_flags);
+			pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4;
+			if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT)
+				edev->mode |= EEH_DEV_ROOT_PORT;
+			else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM)
+				edev->mode |= EEH_DEV_DS_PORT;
+		}
+	}
+
+	edev->config_addr    = (pdn->busno << 8) | (pdn->devfn);
+	edev->pe_config_addr = phb->ioda.pe_rmap[edev->config_addr];
+
+	/* Create PE */
+	ret = eeh_add_to_parent_pe(edev);
+	if (ret) {
+		pr_warn("%s: Can't add PCI dev %04x:%02x:%02x.%01x to parent PE (%d)\n",
+			__func__, hose->global_number, pdn->busno,
+			PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn), ret);
+		return NULL;
+	}
+
+	/*
+	 * If the PE contains any one of following adapters, the
+	 * PCI config space can't be accessed when dumping EEH log.
+	 * Otherwise, we will run into fenced PHB caused by shortage
+	 * of outbound credits in the adapter. The PCI config access
+	 * should be blocked until PE reset. MMIO access is dropped
+	 * by hardware certainly. In order to drop PCI config requests,
+	 * one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which
+	 * will be checked in the backend for PE state retrival. If
+	 * the PE becomes frozen for the first time and the flag has
+	 * been set for the PE, we will set EEH_PE_CFG_BLOCKED for
+	 * that PE to block its config space.
+	 *
+	 * Broadcom Austin 4-ports NICs (14e4:1657)
+	 * Broadcom Shiner 2-ports 10G NICs (14e4:168e)
+	 */
+	if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
+	     pdn->device_id == 0x1657) ||
+	    (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
+	     pdn->device_id == 0x168e))
+		edev->pe->state |= EEH_PE_CFG_RESTRICTED;
+
+	/*
+	 * Cache the PE primary bus, which can't be fetched when
+	 * full hotplug is in progress. In that case, all child
+	 * PCI devices of the PE are expected to be removed prior
+	 * to PE reset.
+	 */
+	if (!edev->pe->bus)
+		edev->pe->bus = pci_find_bus(hose->global_number,
+					     pdn->busno);
+
+	/*
+	 * Enable EEH explicitly so that we will do EEH check
+	 * while accessing I/O stuff
+	 */
+	eeh_add_flag(EEH_ENABLED);
+
+	/* Save memory bars */
+	eeh_save_bars(edev);
+
+	return NULL;
+}
+
+/**
+ * pnv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
+ * @pe: EEH PE
+ * @option: operation to be issued
+ *
+ * The function is used to control the EEH functionality globally.
+ * Currently, following options are support according to PAPR:
+ * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
+ */
+static int pnv_eeh_set_option(struct eeh_pe *pe, int option)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+	bool freeze_pe = false;
+	int opt, ret = 0;
+	s64 rc;
+
+	/* Sanity check on option */
+	switch (option) {
+	case EEH_OPT_DISABLE:
+		return -EPERM;
+	case EEH_OPT_ENABLE:
+		return 0;
+	case EEH_OPT_THAW_MMIO:
+		opt = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO;
+		break;
+	case EEH_OPT_THAW_DMA:
+		opt = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA;
+		break;
+	case EEH_OPT_FREEZE_PE:
+		freeze_pe = true;
+		opt = OPAL_EEH_ACTION_SET_FREEZE_ALL;
+		break;
+	default:
+		pr_warn("%s: Invalid option %d\n", __func__, option);
+		return -EINVAL;
+	}
+
+	/* If PHB supports compound PE, to handle it */
+	if (freeze_pe) {
+		if (phb->freeze_pe) {
+			phb->freeze_pe(phb, pe->addr);
+		} else {
+			rc = opal_pci_eeh_freeze_set(phb->opal_id,
+						     pe->addr, opt);
+			if (rc != OPAL_SUCCESS) {
+				pr_warn("%s: Failure %lld freezing "
+					"PHB#%x-PE#%x\n",
+					__func__, rc,
+					phb->hose->global_number, pe->addr);
+				ret = -EIO;
+			}
+		}
+	} else {
+		if (phb->unfreeze_pe) {
+			ret = phb->unfreeze_pe(phb, pe->addr, opt);
+		} else {
+			rc = opal_pci_eeh_freeze_clear(phb->opal_id,
+						       pe->addr, opt);
+			if (rc != OPAL_SUCCESS) {
+				pr_warn("%s: Failure %lld enable %d "
+					"for PHB#%x-PE#%x\n",
+					__func__, rc, option,
+					phb->hose->global_number, pe->addr);
+				ret = -EIO;
+			}
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * pnv_eeh_get_pe_addr - Retrieve PE address
+ * @pe: EEH PE
+ *
+ * Retrieve the PE address according to the given tranditional
+ * PCI BDF (Bus/Device/Function) address.
+ */
+static int pnv_eeh_get_pe_addr(struct eeh_pe *pe)
+{
+	return pe->addr;
+}
+
+static void pnv_eeh_get_phb_diag(struct eeh_pe *pe)
+{
+	struct pnv_phb *phb = pe->phb->private_data;
+	s64 rc;
+
+	rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data,
+					 PNV_PCI_DIAG_BUF_SIZE);
+	if (rc != OPAL_SUCCESS)
+		pr_warn("%s: Failure %lld getting PHB#%x diag-data\n",
+			__func__, rc, pe->phb->global_number);
+}
+
+static int pnv_eeh_get_phb_state(struct eeh_pe *pe)
+{
+	struct pnv_phb *phb = pe->phb->private_data;
+	u8 fstate;
+	__be16 pcierr;
+	s64 rc;
+	int result = 0;
+
+	rc = opal_pci_eeh_freeze_status(phb->opal_id,
+					pe->addr,
+					&fstate,
+					&pcierr,
+					NULL);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld getting PHB#%x state\n",
+			__func__, rc, phb->hose->global_number);
+		return EEH_STATE_NOT_SUPPORT;
+	}
+
+	/*
+	 * Check PHB state. If the PHB is frozen for the
+	 * first time, to dump the PHB diag-data.
+	 */
+	if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
+		result = (EEH_STATE_MMIO_ACTIVE  |
+			  EEH_STATE_DMA_ACTIVE   |
+			  EEH_STATE_MMIO_ENABLED |
+			  EEH_STATE_DMA_ENABLED);
+	} else if (!(pe->state & EEH_PE_ISOLATED)) {
+		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+		pnv_eeh_get_phb_diag(pe);
+
+		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
+	}
+
+	return result;
+}
+
+static int pnv_eeh_get_pe_state(struct eeh_pe *pe)
+{
+	struct pnv_phb *phb = pe->phb->private_data;
+	u8 fstate;
+	__be16 pcierr;
+	s64 rc;
+	int result;
+
+	/*
+	 * We don't clobber hardware frozen state until PE
+	 * reset is completed. In order to keep EEH core
+	 * moving forward, we have to return operational
+	 * state during PE reset.
+	 */
+	if (pe->state & EEH_PE_RESET) {
+		result = (EEH_STATE_MMIO_ACTIVE  |
+			  EEH_STATE_DMA_ACTIVE   |
+			  EEH_STATE_MMIO_ENABLED |
+			  EEH_STATE_DMA_ENABLED);
+		return result;
+	}
+
+	/*
+	 * Fetch PE state from hardware. If the PHB
+	 * supports compound PE, let it handle that.
+	 */
+	if (phb->get_pe_state) {
+		fstate = phb->get_pe_state(phb, pe->addr);
+	} else {
+		rc = opal_pci_eeh_freeze_status(phb->opal_id,
+						pe->addr,
+						&fstate,
+						&pcierr,
+						NULL);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
+				__func__, rc, phb->hose->global_number,
+				pe->addr);
+			return EEH_STATE_NOT_SUPPORT;
+		}
+	}
+
+	/* Figure out state */
+	switch (fstate) {
+	case OPAL_EEH_STOPPED_NOT_FROZEN:
+		result = (EEH_STATE_MMIO_ACTIVE  |
+			  EEH_STATE_DMA_ACTIVE   |
+			  EEH_STATE_MMIO_ENABLED |
+			  EEH_STATE_DMA_ENABLED);
+		break;
+	case OPAL_EEH_STOPPED_MMIO_FREEZE:
+		result = (EEH_STATE_DMA_ACTIVE |
+			  EEH_STATE_DMA_ENABLED);
+		break;
+	case OPAL_EEH_STOPPED_DMA_FREEZE:
+		result = (EEH_STATE_MMIO_ACTIVE |
+			  EEH_STATE_MMIO_ENABLED);
+		break;
+	case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
+		result = 0;
+		break;
+	case OPAL_EEH_STOPPED_RESET:
+		result = EEH_STATE_RESET_ACTIVE;
+		break;
+	case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
+		result = EEH_STATE_UNAVAILABLE;
+		break;
+	case OPAL_EEH_STOPPED_PERM_UNAVAIL:
+		result = EEH_STATE_NOT_SUPPORT;
+		break;
+	default:
+		result = EEH_STATE_NOT_SUPPORT;
+		pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",
+			__func__, phb->hose->global_number,
+			pe->addr, fstate);
+	}
+
+	/*
+	 * If PHB supports compound PE, to freeze all
+	 * slave PEs for consistency.
+	 *
+	 * If the PE is switching to frozen state for the
+	 * first time, to dump the PHB diag-data.
+	 */
+	if (!(result & EEH_STATE_NOT_SUPPORT) &&
+	    !(result & EEH_STATE_UNAVAILABLE) &&
+	    !(result & EEH_STATE_MMIO_ACTIVE) &&
+	    !(result & EEH_STATE_DMA_ACTIVE)  &&
+	    !(pe->state & EEH_PE_ISOLATED)) {
+		if (phb->freeze_pe)
+			phb->freeze_pe(phb, pe->addr);
+
+		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+		pnv_eeh_get_phb_diag(pe);
+
+		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
+	}
+
+	return result;
+}
+
+/**
+ * pnv_eeh_get_state - Retrieve PE state
+ * @pe: EEH PE
+ * @delay: delay while PE state is temporarily unavailable
+ *
+ * Retrieve the state of the specified PE. For IODA-compitable
+ * platform, it should be retrieved from IODA table. Therefore,
+ * we prefer passing down to hardware implementation to handle
+ * it.
+ */
+static int pnv_eeh_get_state(struct eeh_pe *pe, int *delay)
+{
+	int ret;
+
+	if (pe->type & EEH_PE_PHB)
+		ret = pnv_eeh_get_phb_state(pe);
+	else
+		ret = pnv_eeh_get_pe_state(pe);
+
+	if (!delay)
+		return ret;
+
+	/*
+	 * If the PE state is temporarily unavailable,
+	 * to inform the EEH core delay for default
+	 * period (1 second)
+	 */
+	*delay = 0;
+	if (ret & EEH_STATE_UNAVAILABLE)
+		*delay = 1000;
+
+	return ret;
+}
+
+static s64 pnv_eeh_phb_poll(struct pnv_phb *phb)
+{
+	s64 rc = OPAL_HARDWARE;
+
+	while (1) {
+		rc = opal_pci_poll(phb->opal_id);
+		if (rc <= 0)
+			break;
+
+		if (system_state < SYSTEM_RUNNING)
+			udelay(1000 * rc);
+		else
+			msleep(rc);
+	}
+
+	return rc;
+}
+
+int pnv_eeh_phb_reset(struct pci_controller *hose, int option)
+{
+	struct pnv_phb *phb = hose->private_data;
+	s64 rc = OPAL_HARDWARE;
+
+	pr_debug("%s: Reset PHB#%x, option=%d\n",
+		 __func__, hose->global_number, option);
+
+	/* Issue PHB complete reset request */
+	if (option == EEH_RESET_FUNDAMENTAL ||
+	    option == EEH_RESET_HOT)
+		rc = opal_pci_reset(phb->opal_id,
+				    OPAL_RESET_PHB_COMPLETE,
+				    OPAL_ASSERT_RESET);
+	else if (option == EEH_RESET_DEACTIVATE)
+		rc = opal_pci_reset(phb->opal_id,
+				    OPAL_RESET_PHB_COMPLETE,
+				    OPAL_DEASSERT_RESET);
+	if (rc < 0)
+		goto out;
+
+	/*
+	 * Poll state of the PHB until the request is done
+	 * successfully. The PHB reset is usually PHB complete
+	 * reset followed by hot reset on root bus. So we also
+	 * need the PCI bus settlement delay.
+	 */
+	rc = pnv_eeh_phb_poll(phb);
+	if (option == EEH_RESET_DEACTIVATE) {
+		if (system_state < SYSTEM_RUNNING)
+			udelay(1000 * EEH_PE_RST_SETTLE_TIME);
+		else
+			msleep(EEH_PE_RST_SETTLE_TIME);
+	}
+out:
+	if (rc != OPAL_SUCCESS)
+		return -EIO;
+
+	return 0;
+}
+
+static int pnv_eeh_root_reset(struct pci_controller *hose, int option)
+{
+	struct pnv_phb *phb = hose->private_data;
+	s64 rc = OPAL_HARDWARE;
+
+	pr_debug("%s: Reset PHB#%x, option=%d\n",
+		 __func__, hose->global_number, option);
+
+	/*
+	 * During the reset deassert time, we needn't care
+	 * the reset scope because the firmware does nothing
+	 * for fundamental or hot reset during deassert phase.
+	 */
+	if (option == EEH_RESET_FUNDAMENTAL)
+		rc = opal_pci_reset(phb->opal_id,
+				    OPAL_RESET_PCI_FUNDAMENTAL,
+				    OPAL_ASSERT_RESET);
+	else if (option == EEH_RESET_HOT)
+		rc = opal_pci_reset(phb->opal_id,
+				    OPAL_RESET_PCI_HOT,
+				    OPAL_ASSERT_RESET);
+	else if (option == EEH_RESET_DEACTIVATE)
+		rc = opal_pci_reset(phb->opal_id,
+				    OPAL_RESET_PCI_HOT,
+				    OPAL_DEASSERT_RESET);
+	if (rc < 0)
+		goto out;
+
+	/* Poll state of the PHB until the request is done */
+	rc = pnv_eeh_phb_poll(phb);
+	if (option == EEH_RESET_DEACTIVATE)
+		msleep(EEH_PE_RST_SETTLE_TIME);
+out:
+	if (rc != OPAL_SUCCESS)
+		return -EIO;
+
+	return 0;
+}
+
+static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
+{
+	struct pci_dn *pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+	int aer = edev ? edev->aer_cap : 0;
+	u32 ctrl;
+
+	pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n",
+		 __func__, pci_domain_nr(dev->bus),
+		 dev->bus->number, option);
+
+	switch (option) {
+	case EEH_RESET_FUNDAMENTAL:
+	case EEH_RESET_HOT:
+		/* Don't report linkDown event */
+		if (aer) {
+			eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK,
+					     4, &ctrl);
+			ctrl |= PCI_ERR_UNC_SURPDN;
+			eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK,
+					      4, ctrl);
+		}
+
+		eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl);
+		ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
+		eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl);
+
+		msleep(EEH_PE_RST_HOLD_TIME);
+		break;
+	case EEH_RESET_DEACTIVATE:
+		eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl);
+		ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+		eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl);
+
+		msleep(EEH_PE_RST_SETTLE_TIME);
+
+		/* Continue reporting linkDown event */
+		if (aer) {
+			eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK,
+					     4, &ctrl);
+			ctrl &= ~PCI_ERR_UNC_SURPDN;
+			eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK,
+					      4, ctrl);
+		}
+
+		break;
+	}
+
+	return 0;
+}
+
+void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
+{
+	struct pci_controller *hose;
+
+	if (pci_is_root_bus(dev->bus)) {
+		hose = pci_bus_to_host(dev->bus);
+		pnv_eeh_root_reset(hose, EEH_RESET_HOT);
+		pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
+	} else {
+		pnv_eeh_bridge_reset(dev, EEH_RESET_HOT);
+		pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
+	}
+}
+
+/**
+ * pnv_eeh_reset - Reset the specified PE
+ * @pe: EEH PE
+ * @option: reset option
+ *
+ * Do reset on the indicated PE. For PCI bus sensitive PE,
+ * we need to reset the parent p2p bridge. The PHB has to
+ * be reinitialized if the p2p bridge is root bridge. For
+ * PCI device sensitive PE, we will try to reset the device
+ * through FLR. For now, we don't have OPAL APIs to do HARD
+ * reset yet, so all reset would be SOFT (HOT) reset.
+ */
+static int pnv_eeh_reset(struct eeh_pe *pe, int option)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pci_bus *bus;
+	int ret;
+
+	/*
+	 * For PHB reset, we always have complete reset. For those PEs whose
+	 * primary bus derived from root complex (root bus) or root port
+	 * (usually bus#1), we apply hot or fundamental reset on the root port.
+	 * For other PEs, we always have hot reset on the PE primary bus.
+	 *
+	 * Here, we have different design to pHyp, which always clear the
+	 * frozen state during PE reset. However, the good idea here from
+	 * benh is to keep frozen state before we get PE reset done completely
+	 * (until BAR restore). With the frozen state, HW drops illegal IO
+	 * or MMIO access, which can incur recrusive frozen PE during PE
+	 * reset. The side effect is that EEH core has to clear the frozen
+	 * state explicitly after BAR restore.
+	 */
+	if (pe->type & EEH_PE_PHB) {
+		ret = pnv_eeh_phb_reset(hose, option);
+	} else {
+		struct pnv_phb *phb;
+		s64 rc;
+
+		/*
+		 * The frozen PE might be caused by PAPR error injection
+		 * registers, which are expected to be cleared after hitting
+		 * frozen PE as stated in the hardware spec. Unfortunately,
+		 * that's not true on P7IOC. So we have to clear it manually
+		 * to avoid recursive EEH errors during recovery.
+		 */
+		phb = hose->private_data;
+		if (phb->model == PNV_PHB_MODEL_P7IOC &&
+		    (option == EEH_RESET_HOT ||
+		    option == EEH_RESET_FUNDAMENTAL)) {
+			rc = opal_pci_reset(phb->opal_id,
+					    OPAL_RESET_PHB_ERROR,
+					    OPAL_ASSERT_RESET);
+			if (rc != OPAL_SUCCESS) {
+				pr_warn("%s: Failure %lld clearing "
+					"error injection registers\n",
+					__func__, rc);
+				return -EIO;
+			}
+		}
+
+		bus = eeh_pe_bus_get(pe);
+		if (pci_is_root_bus(bus) ||
+			pci_is_root_bus(bus->parent))
+			ret = pnv_eeh_root_reset(hose, option);
+		else
+			ret = pnv_eeh_bridge_reset(bus->self, option);
+	}
+
+	return ret;
+}
+
+/**
+ * pnv_eeh_wait_state - Wait for PE state
+ * @pe: EEH PE
+ * @max_wait: maximal period in microsecond
+ *
+ * Wait for the state of associated PE. It might take some time
+ * to retrieve the PE's state.
+ */
+static int pnv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
+{
+	int ret;
+	int mwait;
+
+	while (1) {
+		ret = pnv_eeh_get_state(pe, &mwait);
+
+		/*
+		 * If the PE's state is temporarily unavailable,
+		 * we have to wait for the specified time. Otherwise,
+		 * the PE's state will be returned immediately.
+		 */
+		if (ret != EEH_STATE_UNAVAILABLE)
+			return ret;
+
+		max_wait -= mwait;
+		if (max_wait <= 0) {
+			pr_warn("%s: Timeout getting PE#%x's state (%d)\n",
+				__func__, pe->addr, max_wait);
+			return EEH_STATE_NOT_SUPPORT;
+		}
+
+		msleep(mwait);
+	}
+
+	return EEH_STATE_NOT_SUPPORT;
+}
+
+/**
+ * pnv_eeh_get_log - Retrieve error log
+ * @pe: EEH PE
+ * @severity: temporary or permanent error log
+ * @drv_log: driver log to be combined with retrieved error log
+ * @len: length of driver log
+ *
+ * Retrieve the temporary or permanent error from the PE.
+ */
+static int pnv_eeh_get_log(struct eeh_pe *pe, int severity,
+			   char *drv_log, unsigned long len)
+{
+	if (!eeh_has_flag(EEH_EARLY_DUMP_LOG))
+		pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
+
+	return 0;
+}
+
+/**
+ * pnv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
+ * @pe: EEH PE
+ *
+ * The function will be called to reconfigure the bridges included
+ * in the specified PE so that the mulfunctional PE would be recovered
+ * again.
+ */
+static int pnv_eeh_configure_bridge(struct eeh_pe *pe)
+{
+	return 0;
+}
+
+/**
+ * pnv_pe_err_inject - Inject specified error to the indicated PE
+ * @pe: the indicated PE
+ * @type: error type
+ * @func: specific error type
+ * @addr: address
+ * @mask: address mask
+ *
+ * The routine is called to inject specified error, which is
+ * determined by @type and @func, to the indicated PE for
+ * testing purpose.
+ */
+static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
+			      unsigned long addr, unsigned long mask)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+	s64 rc;
+
+	/* Sanity check on error type */
+	if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
+	    type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
+		pr_warn("%s: Invalid error type %d\n",
+			__func__, type);
+		return -ERANGE;
+	}
+
+	if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR ||
+	    func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) {
+		pr_warn("%s: Invalid error function %d\n",
+			__func__, func);
+		return -ERANGE;
+	}
+
+	/* Firmware supports error injection ? */
+	if (!opal_check_token(OPAL_PCI_ERR_INJECT)) {
+		pr_warn("%s: Firmware doesn't support error injection\n",
+			__func__);
+		return -ENXIO;
+	}
+
+	/* Do error injection */
+	rc = opal_pci_err_inject(phb->opal_id, pe->addr,
+				 type, func, addr, mask);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld injecting error "
+			"%d-%d to PHB#%x-PE#%x\n",
+			__func__, rc, type, func,
+			hose->global_number, pe->addr);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn)
+{
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+
+	if (!edev || !edev->pe)
+		return false;
+
+	if (edev->pe->state & EEH_PE_CFG_BLOCKED)
+		return true;
+
+	return false;
+}
+
+static int pnv_eeh_read_config(struct pci_dn *pdn,
+			       int where, int size, u32 *val)
+{
+	if (!pdn)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (pnv_eeh_cfg_blocked(pdn)) {
+		*val = 0xFFFFFFFF;
+		return PCIBIOS_SET_FAILED;
+	}
+
+	return pnv_pci_cfg_read(pdn, where, size, val);
+}
+
+static int pnv_eeh_write_config(struct pci_dn *pdn,
+				int where, int size, u32 val)
+{
+	if (!pdn)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (pnv_eeh_cfg_blocked(pdn))
+		return PCIBIOS_SET_FAILED;
+
+	return pnv_pci_cfg_write(pdn, where, size, val);
+}
+
+static void pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData *data)
+{
+	/* GEM */
+	if (data->gemXfir || data->gemRfir ||
+	    data->gemRirqfir || data->gemMask || data->gemRwof)
+		pr_info("  GEM: %016llx %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->gemXfir),
+			be64_to_cpu(data->gemRfir),
+			be64_to_cpu(data->gemRirqfir),
+			be64_to_cpu(data->gemMask),
+			be64_to_cpu(data->gemRwof));
+
+	/* LEM */
+	if (data->lemFir || data->lemErrMask ||
+	    data->lemAction0 || data->lemAction1 || data->lemWof)
+		pr_info("  LEM: %016llx %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->lemFir),
+			be64_to_cpu(data->lemErrMask),
+			be64_to_cpu(data->lemAction0),
+			be64_to_cpu(data->lemAction1),
+			be64_to_cpu(data->lemWof));
+}
+
+static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose)
+{
+	struct pnv_phb *phb = hose->private_data;
+	struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag;
+	long rc;
+
+	rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n",
+			__func__, phb->hub_id, rc);
+		return;
+	}
+
+	switch (data->type) {
+	case OPAL_P7IOC_DIAG_TYPE_RGC:
+		pr_info("P7IOC diag-data for RGC\n\n");
+		pnv_eeh_dump_hub_diag_common(data);
+		if (data->rgc.rgcStatus || data->rgc.rgcLdcp)
+			pr_info("  RGC: %016llx %016llx\n",
+				be64_to_cpu(data->rgc.rgcStatus),
+				be64_to_cpu(data->rgc.rgcLdcp));
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_BI:
+		pr_info("P7IOC diag-data for BI %s\n\n",
+			data->bi.biDownbound ? "Downbound" : "Upbound");
+		pnv_eeh_dump_hub_diag_common(data);
+		if (data->bi.biLdcp0 || data->bi.biLdcp1 ||
+		    data->bi.biLdcp2 || data->bi.biFenceStatus)
+			pr_info("  BI:  %016llx %016llx %016llx %016llx\n",
+				be64_to_cpu(data->bi.biLdcp0),
+				be64_to_cpu(data->bi.biLdcp1),
+				be64_to_cpu(data->bi.biLdcp2),
+				be64_to_cpu(data->bi.biFenceStatus));
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_CI:
+		pr_info("P7IOC diag-data for CI Port %d\n\n",
+			data->ci.ciPort);
+		pnv_eeh_dump_hub_diag_common(data);
+		if (data->ci.ciPortStatus || data->ci.ciPortLdcp)
+			pr_info("  CI:  %016llx %016llx\n",
+				be64_to_cpu(data->ci.ciPortStatus),
+				be64_to_cpu(data->ci.ciPortLdcp));
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_MISC:
+		pr_info("P7IOC diag-data for MISC\n\n");
+		pnv_eeh_dump_hub_diag_common(data);
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_I2C:
+		pr_info("P7IOC diag-data for I2C\n\n");
+		pnv_eeh_dump_hub_diag_common(data);
+		break;
+	default:
+		pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n",
+			__func__, phb->hub_id, data->type);
+	}
+}
+
+static int pnv_eeh_get_pe(struct pci_controller *hose,
+			  u16 pe_no, struct eeh_pe **pe)
+{
+	struct pnv_phb *phb = hose->private_data;
+	struct pnv_ioda_pe *pnv_pe;
+	struct eeh_pe *dev_pe;
+	struct eeh_dev edev;
+
+	/*
+	 * If PHB supports compound PE, to fetch
+	 * the master PE because slave PE is invisible
+	 * to EEH core.
+	 */
+	pnv_pe = &phb->ioda.pe_array[pe_no];
+	if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {
+		pnv_pe = pnv_pe->master;
+		WARN_ON(!pnv_pe ||
+			!(pnv_pe->flags & PNV_IODA_PE_MASTER));
+		pe_no = pnv_pe->pe_number;
+	}
+
+	/* Find the PE according to PE# */
+	memset(&edev, 0, sizeof(struct eeh_dev));
+	edev.phb = hose;
+	edev.pe_config_addr = pe_no;
+	dev_pe = eeh_pe_get(&edev);
+	if (!dev_pe)
+		return -EEXIST;
+
+	/* Freeze the (compound) PE */
+	*pe = dev_pe;
+	if (!(dev_pe->state & EEH_PE_ISOLATED))
+		phb->freeze_pe(phb, pe_no);
+
+	/*
+	 * At this point, we're sure the (compound) PE should
+	 * have been frozen. However, we still need poke until
+	 * hitting the frozen PE on top level.
+	 */
+	dev_pe = dev_pe->parent;
+	while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
+		int ret;
+		int active_flags = (EEH_STATE_MMIO_ACTIVE |
+				    EEH_STATE_DMA_ACTIVE);
+
+		ret = eeh_ops->get_state(dev_pe, NULL);
+		if (ret <= 0 || (ret & active_flags) == active_flags) {
+			dev_pe = dev_pe->parent;
+			continue;
+		}
+
+		/* Frozen parent PE */
+		*pe = dev_pe;
+		if (!(dev_pe->state & EEH_PE_ISOLATED))
+			phb->freeze_pe(phb, dev_pe->addr);
+
+		/* Next one */
+		dev_pe = dev_pe->parent;
+	}
+
+	return 0;
+}
+
+/**
+ * pnv_eeh_next_error - Retrieve next EEH error to handle
+ * @pe: Affected PE
+ *
+ * The function is expected to be called by EEH core while it gets
+ * special EEH event (without binding PE). The function calls to
+ * OPAL APIs for next error to handle. The informational error is
+ * handled internally by platform. However, the dead IOC, dead PHB,
+ * fenced PHB and frozen PE should be handled by EEH core eventually.
+ */
+static int pnv_eeh_next_error(struct eeh_pe **pe)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	struct eeh_pe *phb_pe, *parent_pe;
+	__be64 frozen_pe_no;
+	__be16 err_type, severity;
+	int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+	long rc;
+	int state, ret = EEH_NEXT_ERR_NONE;
+
+	/*
+	 * While running here, it's safe to purge the event queue.
+	 * And we should keep the cached OPAL notifier event sychronized
+	 * between the kernel and firmware.
+	 */
+	eeh_remove_event(NULL, false);
+	opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
+
+	list_for_each_entry(hose, &hose_list, list_node) {
+		/*
+		 * If the subordinate PCI buses of the PHB has been
+		 * removed or is exactly under error recovery, we
+		 * needn't take care of it any more.
+		 */
+		phb = hose->private_data;
+		phb_pe = eeh_phb_pe_get(hose);
+		if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED))
+			continue;
+
+		rc = opal_pci_next_error(phb->opal_id,
+					 &frozen_pe_no, &err_type, &severity);
+		if (rc != OPAL_SUCCESS) {
+			pr_devel("%s: Invalid return value on "
+				 "PHB#%x (0x%lx) from opal_pci_next_error",
+				 __func__, hose->global_number, rc);
+			continue;
+		}
+
+		/* If the PHB doesn't have error, stop processing */
+		if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR ||
+		    be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) {
+			pr_devel("%s: No error found on PHB#%x\n",
+				 __func__, hose->global_number);
+			continue;
+		}
+
+		/*
+		 * Processing the error. We're expecting the error with
+		 * highest priority reported upon multiple errors on the
+		 * specific PHB.
+		 */
+		pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
+			__func__, be16_to_cpu(err_type),
+			be16_to_cpu(severity), be64_to_cpu(frozen_pe_no),
+			hose->global_number);
+		switch (be16_to_cpu(err_type)) {
+		case OPAL_EEH_IOC_ERROR:
+			if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) {
+				pr_err("EEH: dead IOC detected\n");
+				ret = EEH_NEXT_ERR_DEAD_IOC;
+			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
+				pr_info("EEH: IOC informative error "
+					"detected\n");
+				pnv_eeh_get_and_dump_hub_diag(hose);
+				ret = EEH_NEXT_ERR_NONE;
+			}
+
+			break;
+		case OPAL_EEH_PHB_ERROR:
+			if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {
+				*pe = phb_pe;
+				pr_err("EEH: dead PHB#%x detected, "
+				       "location: %s\n",
+					hose->global_number,
+					eeh_pe_loc_get(phb_pe));
+				ret = EEH_NEXT_ERR_DEAD_PHB;
+			} else if (be16_to_cpu(severity) ==
+				   OPAL_EEH_SEV_PHB_FENCED) {
+				*pe = phb_pe;
+				pr_err("EEH: Fenced PHB#%x detected, "
+				       "location: %s\n",
+					hose->global_number,
+					eeh_pe_loc_get(phb_pe));
+				ret = EEH_NEXT_ERR_FENCED_PHB;
+			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
+				pr_info("EEH: PHB#%x informative error "
+					"detected, location: %s\n",
+					hose->global_number,
+					eeh_pe_loc_get(phb_pe));
+				pnv_eeh_get_phb_diag(phb_pe);
+				pnv_pci_dump_phb_diag_data(hose, phb_pe->data);
+				ret = EEH_NEXT_ERR_NONE;
+			}
+
+			break;
+		case OPAL_EEH_PE_ERROR:
+			/*
+			 * If we can't find the corresponding PE, we
+			 * just try to unfreeze.
+			 */
+			if (pnv_eeh_get_pe(hose,
+				be64_to_cpu(frozen_pe_no), pe)) {
+				/* Try best to clear it */
+				pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
+					hose->global_number, frozen_pe_no);
+				pr_info("EEH: PHB location: %s\n",
+					eeh_pe_loc_get(phb_pe));
+				opal_pci_eeh_freeze_clear(phb->opal_id,
+					frozen_pe_no,
+					OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+				ret = EEH_NEXT_ERR_NONE;
+			} else if ((*pe)->state & EEH_PE_ISOLATED ||
+				   eeh_pe_passed(*pe)) {
+				ret = EEH_NEXT_ERR_NONE;
+			} else {
+				pr_err("EEH: Frozen PE#%x "
+				       "on PHB#%x detected\n",
+				       (*pe)->addr,
+					(*pe)->phb->global_number);
+				pr_err("EEH: PE location: %s, "
+				       "PHB location: %s\n",
+				       eeh_pe_loc_get(*pe),
+				       eeh_pe_loc_get(phb_pe));
+				ret = EEH_NEXT_ERR_FROZEN_PE;
+			}
+
+			break;
+		default:
+			pr_warn("%s: Unexpected error type %d\n",
+				__func__, be16_to_cpu(err_type));
+		}
+
+		/*
+		 * EEH core will try recover from fenced PHB or
+		 * frozen PE. In the time for frozen PE, EEH core
+		 * enable IO path for that before collecting logs,
+		 * but it ruins the site. So we have to dump the
+		 * log in advance here.
+		 */
+		if ((ret == EEH_NEXT_ERR_FROZEN_PE  ||
+		    ret == EEH_NEXT_ERR_FENCED_PHB) &&
+		    !((*pe)->state & EEH_PE_ISOLATED)) {
+			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
+			pnv_eeh_get_phb_diag(*pe);
+
+			if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+				pnv_pci_dump_phb_diag_data((*pe)->phb,
+							   (*pe)->data);
+		}
+
+		/*
+		 * We probably have the frozen parent PE out there and
+		 * we need have to handle frozen parent PE firstly.
+		 */
+		if (ret == EEH_NEXT_ERR_FROZEN_PE) {
+			parent_pe = (*pe)->parent;
+			while (parent_pe) {
+				/* Hit the ceiling ? */
+				if (parent_pe->type & EEH_PE_PHB)
+					break;
+
+				/* Frozen parent PE ? */
+				state = eeh_ops->get_state(parent_pe, NULL);
+				if (state > 0 &&
+				    (state & active_flags) != active_flags)
+					*pe = parent_pe;
+
+				/* Next parent level */
+				parent_pe = parent_pe->parent;
+			}
+
+			/* We possibly migrate to another PE */
+			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
+		}
+
+		/*
+		 * If we have no errors on the specific PHB or only
+		 * informative error there, we continue poking it.
+		 * Otherwise, we need actions to be taken by upper
+		 * layer.
+		 */
+		if (ret > EEH_NEXT_ERR_INF)
+			break;
+	}
+
+	return ret;
+}
+
+static int pnv_eeh_restore_config(struct pci_dn *pdn)
+{
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+	struct pnv_phb *phb;
+	s64 ret;
+
+	if (!edev)
+		return -EEXIST;
+
+	phb = edev->phb->private_data;
+	ret = opal_pci_reinit(phb->opal_id,
+			      OPAL_REINIT_PCI_DEV, edev->config_addr);
+	if (ret) {
+		pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
+			__func__, edev->config_addr, ret);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static struct eeh_ops pnv_eeh_ops = {
+	.name                   = "powernv",
+	.init                   = pnv_eeh_init,
+	.post_init              = pnv_eeh_post_init,
+	.probe			= pnv_eeh_probe,
+	.set_option             = pnv_eeh_set_option,
+	.get_pe_addr            = pnv_eeh_get_pe_addr,
+	.get_state              = pnv_eeh_get_state,
+	.reset                  = pnv_eeh_reset,
+	.wait_state             = pnv_eeh_wait_state,
+	.get_log                = pnv_eeh_get_log,
+	.configure_bridge       = pnv_eeh_configure_bridge,
+	.err_inject		= pnv_eeh_err_inject,
+	.read_config            = pnv_eeh_read_config,
+	.write_config           = pnv_eeh_write_config,
+	.next_error		= pnv_eeh_next_error,
+	.restore_config		= pnv_eeh_restore_config
+};
+
+/**
+ * eeh_powernv_init - Register platform dependent EEH operations
+ *
+ * EEH initialization on powernv platform. This function should be
+ * called before any EEH related functions.
+ */
+static int __init eeh_powernv_init(void)
+{
+	int ret = -EINVAL;
+
+	eeh_set_pe_aux_size(PNV_PCI_DIAG_BUF_SIZE);
+	ret = eeh_ops_register(&pnv_eeh_ops);
+	if (!ret)
+		pr_info("EEH: PowerNV platform initialized\n");
+	else
+		pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret);
+
+	return ret;
+}
+machine_early_initcall(powernv, eeh_powernv_init);
diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c
new file mode 100644
index 000000000..693b6cdac
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-async.c
@@ -0,0 +1,208 @@
+/*
+ * PowerNV OPAL asynchronous completion interfaces
+ *
+ * Copyright 2013 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/gfp.h>
+#include <linux/of.h>
+#include <asm/machdep.h>
+#include <asm/opal.h>
+
+#define N_ASYNC_COMPLETIONS	64
+
+static DECLARE_BITMAP(opal_async_complete_map, N_ASYNC_COMPLETIONS) = {~0UL};
+static DECLARE_BITMAP(opal_async_token_map, N_ASYNC_COMPLETIONS);
+static DECLARE_WAIT_QUEUE_HEAD(opal_async_wait);
+static DEFINE_SPINLOCK(opal_async_comp_lock);
+static struct semaphore opal_async_sem;
+static struct opal_msg *opal_async_responses;
+static unsigned int opal_max_async_tokens;
+
+int __opal_async_get_token(void)
+{
+	unsigned long flags;
+	int token;
+
+	spin_lock_irqsave(&opal_async_comp_lock, flags);
+	token = find_first_bit(opal_async_complete_map, opal_max_async_tokens);
+	if (token >= opal_max_async_tokens) {
+		token = -EBUSY;
+		goto out;
+	}
+
+	if (__test_and_set_bit(token, opal_async_token_map)) {
+		token = -EBUSY;
+		goto out;
+	}
+
+	__clear_bit(token, opal_async_complete_map);
+
+out:
+	spin_unlock_irqrestore(&opal_async_comp_lock, flags);
+	return token;
+}
+
+int opal_async_get_token_interruptible(void)
+{
+	int token;
+
+	/* Wait until a token is available */
+	if (down_interruptible(&opal_async_sem))
+		return -ERESTARTSYS;
+
+	token = __opal_async_get_token();
+	if (token < 0)
+		up(&opal_async_sem);
+
+	return token;
+}
+EXPORT_SYMBOL_GPL(opal_async_get_token_interruptible);
+
+int __opal_async_release_token(int token)
+{
+	unsigned long flags;
+
+	if (token < 0 || token >= opal_max_async_tokens) {
+		pr_err("%s: Passed token is out of range, token %d\n",
+				__func__, token);
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&opal_async_comp_lock, flags);
+	__set_bit(token, opal_async_complete_map);
+	__clear_bit(token, opal_async_token_map);
+	spin_unlock_irqrestore(&opal_async_comp_lock, flags);
+
+	return 0;
+}
+
+int opal_async_release_token(int token)
+{
+	int ret;
+
+	ret = __opal_async_release_token(token);
+	if (ret)
+		return ret;
+
+	up(&opal_async_sem);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(opal_async_release_token);
+
+int opal_async_wait_response(uint64_t token, struct opal_msg *msg)
+{
+	if (token >= opal_max_async_tokens) {
+		pr_err("%s: Invalid token passed\n", __func__);
+		return -EINVAL;
+	}
+
+	if (!msg) {
+		pr_err("%s: Invalid message pointer passed\n", __func__);
+		return -EINVAL;
+	}
+
+	wait_event(opal_async_wait, test_bit(token, opal_async_complete_map));
+	memcpy(msg, &opal_async_responses[token], sizeof(*msg));
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(opal_async_wait_response);
+
+static int opal_async_comp_event(struct notifier_block *nb,
+		unsigned long msg_type, void *msg)
+{
+	struct opal_msg *comp_msg = msg;
+	unsigned long flags;
+	uint64_t token;
+
+	if (msg_type != OPAL_MSG_ASYNC_COMP)
+		return 0;
+
+	token = be64_to_cpu(comp_msg->params[0]);
+	memcpy(&opal_async_responses[token], comp_msg, sizeof(*comp_msg));
+	spin_lock_irqsave(&opal_async_comp_lock, flags);
+	__set_bit(token, opal_async_complete_map);
+	spin_unlock_irqrestore(&opal_async_comp_lock, flags);
+
+	wake_up(&opal_async_wait);
+
+	return 0;
+}
+
+static struct notifier_block opal_async_comp_nb = {
+		.notifier_call	= opal_async_comp_event,
+		.next		= NULL,
+		.priority	= 0,
+};
+
+static int __init opal_async_comp_init(void)
+{
+	struct device_node *opal_node;
+	const __be32 *async;
+	int err;
+
+	opal_node = of_find_node_by_path("/ibm,opal");
+	if (!opal_node) {
+		pr_err("%s: Opal node not found\n", __func__);
+		err = -ENOENT;
+		goto out;
+	}
+
+	async = of_get_property(opal_node, "opal-msg-async-num", NULL);
+	if (!async) {
+		pr_err("%s: %s has no opal-msg-async-num\n",
+				__func__, opal_node->full_name);
+		err = -ENOENT;
+		goto out_opal_node;
+	}
+
+	opal_max_async_tokens = be32_to_cpup(async);
+	if (opal_max_async_tokens > N_ASYNC_COMPLETIONS)
+		opal_max_async_tokens = N_ASYNC_COMPLETIONS;
+
+	err = opal_message_notifier_register(OPAL_MSG_ASYNC_COMP,
+			&opal_async_comp_nb);
+	if (err) {
+		pr_err("%s: Can't register OPAL event notifier (%d)\n",
+				__func__, err);
+		goto out_opal_node;
+	}
+
+	opal_async_responses = kzalloc(
+			sizeof(*opal_async_responses) * opal_max_async_tokens,
+			GFP_KERNEL);
+	if (!opal_async_responses) {
+		pr_err("%s: Out of memory, failed to do asynchronous "
+				"completion init\n", __func__);
+		err = -ENOMEM;
+		goto out_opal_node;
+	}
+
+	/* Initialize to 1 less than the maximum tokens available, as we may
+	 * require to pop one during emergency through synchronous call to
+	 * __opal_async_get_token()
+	 */
+	sema_init(&opal_async_sem, opal_max_async_tokens - 1);
+
+out_opal_node:
+	of_node_put(opal_node);
+out:
+	return err;
+}
+machine_subsys_initcall(powernv, opal_async_comp_init);
diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c
new file mode 100644
index 000000000..5aa9c1ce4
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-dump.c
@@ -0,0 +1,457 @@
+/*
+ * PowerNV OPAL Dump Interface
+ *
+ * Copyright 2013,2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kobject.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/delay.h>
+
+#include <asm/opal.h>
+
+#define DUMP_TYPE_FSP	0x01
+
+struct dump_obj {
+	struct kobject  kobj;
+	struct bin_attribute dump_attr;
+	uint32_t	id;  /* becomes object name */
+	uint32_t	type;
+	uint32_t	size;
+	char		*buffer;
+};
+#define to_dump_obj(x) container_of(x, struct dump_obj, kobj)
+
+struct dump_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct dump_obj *dump, struct dump_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct dump_obj *dump, struct dump_attribute *attr,
+			 const char *buf, size_t count);
+};
+#define to_dump_attr(x) container_of(x, struct dump_attribute, attr)
+
+static ssize_t dump_id_show(struct dump_obj *dump_obj,
+			    struct dump_attribute *attr,
+			    char *buf)
+{
+	return sprintf(buf, "0x%x\n", dump_obj->id);
+}
+
+static const char* dump_type_to_string(uint32_t type)
+{
+	switch (type) {
+	case 0x01: return "SP Dump";
+	case 0x02: return "System/Platform Dump";
+	case 0x03: return "SMA Dump";
+	default: return "unknown";
+	}
+}
+
+static ssize_t dump_type_show(struct dump_obj *dump_obj,
+			      struct dump_attribute *attr,
+			      char *buf)
+{
+	
+	return sprintf(buf, "0x%x %s\n", dump_obj->type,
+		       dump_type_to_string(dump_obj->type));
+}
+
+static ssize_t dump_ack_show(struct dump_obj *dump_obj,
+			     struct dump_attribute *attr,
+			     char *buf)
+{
+	return sprintf(buf, "ack - acknowledge dump\n");
+}
+
+/*
+ * Send acknowledgement to OPAL
+ */
+static int64_t dump_send_ack(uint32_t dump_id)
+{
+	int rc;
+
+	rc = opal_dump_ack(dump_id);
+	if (rc)
+		pr_warn("%s: Failed to send ack to Dump ID 0x%x (%d)\n",
+			__func__, dump_id, rc);
+	return rc;
+}
+
+static ssize_t dump_ack_store(struct dump_obj *dump_obj,
+			      struct dump_attribute *attr,
+			      const char *buf,
+			      size_t count)
+{
+	dump_send_ack(dump_obj->id);
+	sysfs_remove_file_self(&dump_obj->kobj, &attr->attr);
+	kobject_put(&dump_obj->kobj);
+	return count;
+}
+
+/* Attributes of a dump
+ * The binary attribute of the dump itself is dynamic
+ * due to the dynamic size of the dump
+ */
+static struct dump_attribute id_attribute =
+	__ATTR(id, S_IRUGO, dump_id_show, NULL);
+static struct dump_attribute type_attribute =
+	__ATTR(type, S_IRUGO, dump_type_show, NULL);
+static struct dump_attribute ack_attribute =
+	__ATTR(acknowledge, 0660, dump_ack_show, dump_ack_store);
+
+static ssize_t init_dump_show(struct dump_obj *dump_obj,
+			      struct dump_attribute *attr,
+			      char *buf)
+{
+	return sprintf(buf, "1 - initiate Service Processor(FSP) dump\n");
+}
+
+static int64_t dump_fips_init(uint8_t type)
+{
+	int rc;
+
+	rc = opal_dump_init(type);
+	if (rc)
+		pr_warn("%s: Failed to initiate FSP dump (%d)\n",
+			__func__, rc);
+	return rc;
+}
+
+static ssize_t init_dump_store(struct dump_obj *dump_obj,
+			       struct dump_attribute *attr,
+			       const char *buf,
+			       size_t count)
+{
+	int rc;
+
+	rc = dump_fips_init(DUMP_TYPE_FSP);
+	if (rc == OPAL_SUCCESS)
+		pr_info("%s: Initiated FSP dump\n", __func__);
+
+	return count;
+}
+
+static struct dump_attribute initiate_attribute =
+	__ATTR(initiate_dump, 0600, init_dump_show, init_dump_store);
+
+static struct attribute *initiate_attrs[] = {
+	&initiate_attribute.attr,
+	NULL,
+};
+
+static struct attribute_group initiate_attr_group = {
+	.attrs = initiate_attrs,
+};
+
+static struct kset *dump_kset;
+
+static ssize_t dump_attr_show(struct kobject *kobj,
+			      struct attribute *attr,
+			      char *buf)
+{
+	struct dump_attribute *attribute;
+	struct dump_obj *dump;
+
+	attribute = to_dump_attr(attr);
+	dump = to_dump_obj(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(dump, attribute, buf);
+}
+
+static ssize_t dump_attr_store(struct kobject *kobj,
+			       struct attribute *attr,
+			       const char *buf, size_t len)
+{
+	struct dump_attribute *attribute;
+	struct dump_obj *dump;
+
+	attribute = to_dump_attr(attr);
+	dump = to_dump_obj(kobj);
+
+	if (!attribute->store)
+		return -EIO;
+
+	return attribute->store(dump, attribute, buf, len);
+}
+
+static const struct sysfs_ops dump_sysfs_ops = {
+	.show = dump_attr_show,
+	.store = dump_attr_store,
+};
+
+static void dump_release(struct kobject *kobj)
+{
+	struct dump_obj *dump;
+
+	dump = to_dump_obj(kobj);
+	vfree(dump->buffer);
+	kfree(dump);
+}
+
+static struct attribute *dump_default_attrs[] = {
+	&id_attribute.attr,
+	&type_attribute.attr,
+	&ack_attribute.attr,
+	NULL,
+};
+
+static struct kobj_type dump_ktype = {
+	.sysfs_ops = &dump_sysfs_ops,
+	.release = &dump_release,
+	.default_attrs = dump_default_attrs,
+};
+
+static int64_t dump_read_info(uint32_t *dump_id, uint32_t *dump_size, uint32_t *dump_type)
+{
+	__be32 id, size, type;
+	int rc;
+
+	type = cpu_to_be32(0xffffffff);
+
+	rc = opal_dump_info2(&id, &size, &type);
+	if (rc == OPAL_PARAMETER)
+		rc = opal_dump_info(&id, &size);
+
+	*dump_id = be32_to_cpu(id);
+	*dump_size = be32_to_cpu(size);
+	*dump_type = be32_to_cpu(type);
+
+	if (rc)
+		pr_warn("%s: Failed to get dump info (%d)\n",
+			__func__, rc);
+	return rc;
+}
+
+static int64_t dump_read_data(struct dump_obj *dump)
+{
+	struct opal_sg_list *list;
+	uint64_t addr;
+	int64_t rc;
+
+	/* Allocate memory */
+	dump->buffer = vzalloc(PAGE_ALIGN(dump->size));
+	if (!dump->buffer) {
+		pr_err("%s : Failed to allocate memory\n", __func__);
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* Generate SG list */
+	list = opal_vmalloc_to_sg_list(dump->buffer, dump->size);
+	if (!list) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* First entry address */
+	addr = __pa(list);
+
+	/* Fetch data */
+	rc = OPAL_BUSY_EVENT;
+	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+		rc = opal_dump_read(dump->id, addr);
+		if (rc == OPAL_BUSY_EVENT) {
+			opal_poll_events(NULL);
+			msleep(20);
+		}
+	}
+
+	if (rc != OPAL_SUCCESS && rc != OPAL_PARTIAL)
+		pr_warn("%s: Extract dump failed for ID 0x%x\n",
+			__func__, dump->id);
+
+	/* Free SG list */
+	opal_free_sg_list(list);
+
+out:
+	return rc;
+}
+
+static ssize_t dump_attr_read(struct file *filep, struct kobject *kobj,
+			      struct bin_attribute *bin_attr,
+			      char *buffer, loff_t pos, size_t count)
+{
+	ssize_t rc;
+
+	struct dump_obj *dump = to_dump_obj(kobj);
+
+	if (!dump->buffer) {
+		rc = dump_read_data(dump);
+
+		if (rc != OPAL_SUCCESS && rc != OPAL_PARTIAL) {
+			vfree(dump->buffer);
+			dump->buffer = NULL;
+
+			return -EIO;
+		}
+		if (rc == OPAL_PARTIAL) {
+			/* On a partial read, we just return EIO
+			 * and rely on userspace to ask us to try
+			 * again.
+			 */
+			pr_info("%s: Platform dump partially read. ID = 0x%x\n",
+				__func__, dump->id);
+			return -EIO;
+		}
+	}
+
+	memcpy(buffer, dump->buffer + pos, count);
+
+	/* You may think we could free the dump buffer now and retrieve
+	 * it again later if needed, but due to current firmware limitation,
+	 * that's not the case. So, once read into userspace once,
+	 * we keep the dump around until it's acknowledged by userspace.
+	 */
+
+	return count;
+}
+
+static struct dump_obj *create_dump_obj(uint32_t id, size_t size,
+					uint32_t type)
+{
+	struct dump_obj *dump;
+	int rc;
+
+	dump = kzalloc(sizeof(*dump), GFP_KERNEL);
+	if (!dump)
+		return NULL;
+
+	dump->kobj.kset = dump_kset;
+
+	kobject_init(&dump->kobj, &dump_ktype);
+
+	sysfs_bin_attr_init(&dump->dump_attr);
+
+	dump->dump_attr.attr.name = "dump";
+	dump->dump_attr.attr.mode = 0400;
+	dump->dump_attr.size = size;
+	dump->dump_attr.read = dump_attr_read;
+
+	dump->id = id;
+	dump->size = size;
+	dump->type = type;
+
+	rc = kobject_add(&dump->kobj, NULL, "0x%x-0x%x", type, id);
+	if (rc) {
+		kobject_put(&dump->kobj);
+		return NULL;
+	}
+
+	rc = sysfs_create_bin_file(&dump->kobj, &dump->dump_attr);
+	if (rc) {
+		kobject_put(&dump->kobj);
+		return NULL;
+	}
+
+	pr_info("%s: New platform dump. ID = 0x%x Size %u\n",
+		__func__, dump->id, dump->size);
+
+	kobject_uevent(&dump->kobj, KOBJ_ADD);
+
+	return dump;
+}
+
+static int process_dump(void)
+{
+	int rc;
+	uint32_t dump_id, dump_size, dump_type;
+	struct dump_obj *dump;
+	char name[22];
+
+	rc = dump_read_info(&dump_id, &dump_size, &dump_type);
+	if (rc != OPAL_SUCCESS)
+		return rc;
+
+	sprintf(name, "0x%x-0x%x", dump_type, dump_id);
+
+	/* we may get notified twice, let's handle
+	 * that gracefully and not create two conflicting
+	 * entries.
+	 */
+	if (kset_find_obj(dump_kset, name))
+		return 0;
+
+	dump = create_dump_obj(dump_id, dump_size, dump_type);
+	if (!dump)
+		return -1;
+
+	return 0;
+}
+
+static void dump_work_fn(struct work_struct *work)
+{
+	process_dump();
+}
+
+static DECLARE_WORK(dump_work, dump_work_fn);
+
+static void schedule_process_dump(void)
+{
+	schedule_work(&dump_work);
+}
+
+/*
+ * New dump available notification
+ *
+ * Once we get notification, we add sysfs entries for it.
+ * We only fetch the dump on demand, and create sysfs asynchronously.
+ */
+static int dump_event(struct notifier_block *nb,
+		      unsigned long events, void *change)
+{
+	if (events & OPAL_EVENT_DUMP_AVAIL)
+		schedule_process_dump();
+
+	return 0;
+}
+
+static struct notifier_block dump_nb = {
+	.notifier_call  = dump_event,
+	.next           = NULL,
+	.priority       = 0
+};
+
+void __init opal_platform_dump_init(void)
+{
+	int rc;
+
+	/* ELOG not supported by firmware */
+	if (!opal_check_token(OPAL_DUMP_READ))
+		return;
+
+	dump_kset = kset_create_and_add("dump", NULL, opal_kobj);
+	if (!dump_kset) {
+		pr_warn("%s: Failed to create dump kset\n", __func__);
+		return;
+	}
+
+	rc = sysfs_create_group(&dump_kset->kobj, &initiate_attr_group);
+	if (rc) {
+		pr_warn("%s: Failed to create initiate dump attr group\n",
+			__func__);
+		kobject_put(&dump_kset->kobj);
+		return;
+	}
+
+	rc = opal_notifier_register(&dump_nb);
+	if (rc) {
+		pr_warn("%s: Can't register OPAL event notifier (%d)\n",
+			__func__, rc);
+		return;
+	}
+
+	if (opal_check_token(OPAL_DUMP_RESEND))
+		opal_dump_resend_notification();
+}
diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
new file mode 100644
index 000000000..38ce757e5
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -0,0 +1,320 @@
+/*
+ * Error log support on PowerNV.
+ *
+ * Copyright 2013,2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/fs.h>
+#include <linux/vmalloc.h>
+#include <linux/fcntl.h>
+#include <linux/kobject.h>
+#include <asm/uaccess.h>
+#include <asm/opal.h>
+
+struct elog_obj {
+	struct kobject kobj;
+	struct bin_attribute raw_attr;
+	uint64_t id;
+	uint64_t type;
+	size_t size;
+	char *buffer;
+};
+#define to_elog_obj(x) container_of(x, struct elog_obj, kobj)
+
+struct elog_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct elog_obj *elog, struct elog_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct elog_obj *elog, struct elog_attribute *attr,
+			 const char *buf, size_t count);
+};
+#define to_elog_attr(x) container_of(x, struct elog_attribute, attr)
+
+static ssize_t elog_id_show(struct elog_obj *elog_obj,
+			    struct elog_attribute *attr,
+			    char *buf)
+{
+	return sprintf(buf, "0x%llx\n", elog_obj->id);
+}
+
+static const char *elog_type_to_string(uint64_t type)
+{
+	switch (type) {
+	case 0: return "PEL";
+	default: return "unknown";
+	}
+}
+
+static ssize_t elog_type_show(struct elog_obj *elog_obj,
+			      struct elog_attribute *attr,
+			      char *buf)
+{
+	return sprintf(buf, "0x%llx %s\n",
+		       elog_obj->type,
+		       elog_type_to_string(elog_obj->type));
+}
+
+static ssize_t elog_ack_show(struct elog_obj *elog_obj,
+			     struct elog_attribute *attr,
+			     char *buf)
+{
+	return sprintf(buf, "ack - acknowledge log message\n");
+}
+
+static ssize_t elog_ack_store(struct elog_obj *elog_obj,
+			      struct elog_attribute *attr,
+			      const char *buf,
+			      size_t count)
+{
+	opal_send_ack_elog(elog_obj->id);
+	sysfs_remove_file_self(&elog_obj->kobj, &attr->attr);
+	kobject_put(&elog_obj->kobj);
+	return count;
+}
+
+static struct elog_attribute id_attribute =
+	__ATTR(id, S_IRUGO, elog_id_show, NULL);
+static struct elog_attribute type_attribute =
+	__ATTR(type, S_IRUGO, elog_type_show, NULL);
+static struct elog_attribute ack_attribute =
+	__ATTR(acknowledge, 0660, elog_ack_show, elog_ack_store);
+
+static struct kset *elog_kset;
+
+static ssize_t elog_attr_show(struct kobject *kobj,
+			      struct attribute *attr,
+			      char *buf)
+{
+	struct elog_attribute *attribute;
+	struct elog_obj *elog;
+
+	attribute = to_elog_attr(attr);
+	elog = to_elog_obj(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(elog, attribute, buf);
+}
+
+static ssize_t elog_attr_store(struct kobject *kobj,
+			       struct attribute *attr,
+			       const char *buf, size_t len)
+{
+	struct elog_attribute *attribute;
+	struct elog_obj *elog;
+
+	attribute = to_elog_attr(attr);
+	elog = to_elog_obj(kobj);
+
+	if (!attribute->store)
+		return -EIO;
+
+	return attribute->store(elog, attribute, buf, len);
+}
+
+static const struct sysfs_ops elog_sysfs_ops = {
+	.show = elog_attr_show,
+	.store = elog_attr_store,
+};
+
+static void elog_release(struct kobject *kobj)
+{
+	struct elog_obj *elog;
+
+	elog = to_elog_obj(kobj);
+	kfree(elog->buffer);
+	kfree(elog);
+}
+
+static struct attribute *elog_default_attrs[] = {
+	&id_attribute.attr,
+	&type_attribute.attr,
+	&ack_attribute.attr,
+	NULL,
+};
+
+static struct kobj_type elog_ktype = {
+	.sysfs_ops = &elog_sysfs_ops,
+	.release = &elog_release,
+	.default_attrs = elog_default_attrs,
+};
+
+/* Maximum size of a single log on FSP is 16KB */
+#define OPAL_MAX_ERRLOG_SIZE	16384
+
+static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj,
+			     struct bin_attribute *bin_attr,
+			     char *buffer, loff_t pos, size_t count)
+{
+	int opal_rc;
+
+	struct elog_obj *elog = to_elog_obj(kobj);
+
+	/* We may have had an error reading before, so let's retry */
+	if (!elog->buffer) {
+		elog->buffer = kzalloc(elog->size, GFP_KERNEL);
+		if (!elog->buffer)
+			return -EIO;
+
+		opal_rc = opal_read_elog(__pa(elog->buffer),
+					 elog->size, elog->id);
+		if (opal_rc != OPAL_SUCCESS) {
+			pr_err("ELOG: log read failed for log-id=%llx\n",
+			       elog->id);
+			kfree(elog->buffer);
+			elog->buffer = NULL;
+			return -EIO;
+		}
+	}
+
+	memcpy(buffer, elog->buffer + pos, count);
+
+	return count;
+}
+
+static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type)
+{
+	struct elog_obj *elog;
+	int rc;
+
+	elog = kzalloc(sizeof(*elog), GFP_KERNEL);
+	if (!elog)
+		return NULL;
+
+	elog->kobj.kset = elog_kset;
+
+	kobject_init(&elog->kobj, &elog_ktype);
+
+	sysfs_bin_attr_init(&elog->raw_attr);
+
+	elog->raw_attr.attr.name = "raw";
+	elog->raw_attr.attr.mode = 0400;
+	elog->raw_attr.size = size;
+	elog->raw_attr.read = raw_attr_read;
+
+	elog->id = id;
+	elog->size = size;
+	elog->type = type;
+
+	elog->buffer = kzalloc(elog->size, GFP_KERNEL);
+
+	if (elog->buffer) {
+		rc = opal_read_elog(__pa(elog->buffer),
+					 elog->size, elog->id);
+		if (rc != OPAL_SUCCESS) {
+			pr_err("ELOG: log read failed for log-id=%llx\n",
+			       elog->id);
+			kfree(elog->buffer);
+			elog->buffer = NULL;
+		}
+	}
+
+	rc = kobject_add(&elog->kobj, NULL, "0x%llx", id);
+	if (rc) {
+		kobject_put(&elog->kobj);
+		return NULL;
+	}
+
+	rc = sysfs_create_bin_file(&elog->kobj, &elog->raw_attr);
+	if (rc) {
+		kobject_put(&elog->kobj);
+		return NULL;
+	}
+
+	kobject_uevent(&elog->kobj, KOBJ_ADD);
+
+	return elog;
+}
+
+static void elog_work_fn(struct work_struct *work)
+{
+	__be64 size;
+	__be64 id;
+	__be64 type;
+	uint64_t elog_size;
+	uint64_t log_id;
+	uint64_t elog_type;
+	int rc;
+	char name[2+16+1];
+
+	rc = opal_get_elog_size(&id, &size, &type);
+	if (rc != OPAL_SUCCESS) {
+		pr_err("ELOG: OPAL log info read failed\n");
+		return;
+	}
+
+	elog_size = be64_to_cpu(size);
+	log_id = be64_to_cpu(id);
+	elog_type = be64_to_cpu(type);
+
+	WARN_ON(elog_size > OPAL_MAX_ERRLOG_SIZE);
+
+	if (elog_size >= OPAL_MAX_ERRLOG_SIZE)
+		elog_size  =  OPAL_MAX_ERRLOG_SIZE;
+
+	sprintf(name, "0x%llx", log_id);
+
+	/* we may get notified twice, let's handle
+	 * that gracefully and not create two conflicting
+	 * entries.
+	 */
+	if (kset_find_obj(elog_kset, name))
+		return;
+
+	create_elog_obj(log_id, elog_size, elog_type);
+}
+
+static DECLARE_WORK(elog_work, elog_work_fn);
+
+static int elog_event(struct notifier_block *nb,
+				unsigned long events, void *change)
+{
+	/* check for error log event */
+	if (events & OPAL_EVENT_ERROR_LOG_AVAIL)
+		schedule_work(&elog_work);
+	return 0;
+}
+
+static struct notifier_block elog_nb = {
+	.notifier_call  = elog_event,
+	.next           = NULL,
+	.priority       = 0
+};
+
+int __init opal_elog_init(void)
+{
+	int rc = 0;
+
+	/* ELOG not supported by firmware */
+	if (!opal_check_token(OPAL_ELOG_READ))
+		return -1;
+
+	elog_kset = kset_create_and_add("elog", NULL, opal_kobj);
+	if (!elog_kset) {
+		pr_warn("%s: failed to create elog kset\n", __func__);
+		return -1;
+	}
+
+	rc = opal_notifier_register(&elog_nb);
+	if (rc) {
+		pr_err("%s: Can't register OPAL event notifier (%d)\n",
+		__func__, rc);
+		return rc;
+	}
+
+	/* We are now ready to pull error logs from opal. */
+	if (opal_check_token(OPAL_ELOG_RESEND))
+		opal_resend_pending_logs();
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c
new file mode 100644
index 000000000..4ec621928
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-flash.c
@@ -0,0 +1,592 @@
+/*
+ * PowerNV OPAL Firmware Update Interface
+ *
+ * Copyright 2013 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define DEBUG
+
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/delay.h>
+
+#include <asm/opal.h>
+
+/* FLASH status codes */
+#define FLASH_NO_OP		-1099	/* No operation initiated by user */
+#define FLASH_NO_AUTH		-9002	/* Not a service authority partition */
+
+/* Validate image status values */
+#define VALIDATE_IMG_READY	-1001	/* Image ready for validation */
+#define VALIDATE_IMG_INCOMPLETE	-1002	/* User copied < VALIDATE_BUF_SIZE */
+
+/* Manage image status values */
+#define MANAGE_ACTIVE_ERR	-9001	/* Cannot overwrite active img */
+
+/* Flash image status values */
+#define FLASH_IMG_READY		0	/* Img ready for flash on reboot */
+#define FLASH_INVALID_IMG	-1003	/* Flash image shorter than expected */
+#define FLASH_IMG_NULL_DATA	-1004	/* Bad data in sg list entry */
+#define FLASH_IMG_BAD_LEN	-1005	/* Bad length in sg list entry */
+
+/* Manage operation tokens */
+#define FLASH_REJECT_TMP_SIDE	0	/* Reject temporary fw image */
+#define FLASH_COMMIT_TMP_SIDE	1	/* Commit temporary fw image */
+
+/* Update tokens */
+#define FLASH_UPDATE_CANCEL	0	/* Cancel update request */
+#define FLASH_UPDATE_INIT	1	/* Initiate update */
+
+/* Validate image update result tokens */
+#define VALIDATE_TMP_UPDATE	0     /* T side will be updated */
+#define VALIDATE_FLASH_AUTH	1     /* Partition does not have authority */
+#define VALIDATE_INVALID_IMG	2     /* Candidate image is not valid */
+#define VALIDATE_CUR_UNKNOWN	3     /* Current fixpack level is unknown */
+/*
+ * Current T side will be committed to P side before being replace with new
+ * image, and the new image is downlevel from current image
+ */
+#define VALIDATE_TMP_COMMIT_DL	4
+/*
+ * Current T side will be committed to P side before being replaced with new
+ * image
+ */
+#define VALIDATE_TMP_COMMIT	5
+/*
+ * T side will be updated with a downlevel image
+ */
+#define VALIDATE_TMP_UPDATE_DL	6
+/*
+ * The candidate image's release date is later than the system's firmware
+ * service entitlement date - service warranty period has expired
+ */
+#define VALIDATE_OUT_OF_WRNTY	7
+
+/* Validate buffer size */
+#define VALIDATE_BUF_SIZE	4096
+
+/* XXX: Assume candidate image size is <= 1GB */
+#define MAX_IMAGE_SIZE	0x40000000
+
+/* Image status */
+enum {
+	IMAGE_INVALID,
+	IMAGE_LOADING,
+	IMAGE_READY,
+};
+
+/* Candidate image data */
+struct image_data_t {
+	int		status;
+	void		*data;
+	uint32_t	size;
+};
+
+/* Candidate image header */
+struct image_header_t {
+	uint16_t	magic;
+	uint16_t	version;
+	uint32_t	size;
+};
+
+struct validate_flash_t {
+	int		status;		/* Return status */
+	void		*buf;		/* Candidate image buffer */
+	uint32_t	buf_size;	/* Image size */
+	uint32_t	result;		/* Update results token */
+};
+
+struct manage_flash_t {
+	int status;		/* Return status */
+};
+
+struct update_flash_t {
+	int status;		/* Return status */
+};
+
+static struct image_header_t	image_header;
+static struct image_data_t	image_data;
+static struct validate_flash_t	validate_flash_data;
+static struct manage_flash_t	manage_flash_data;
+
+/* Initialize update_flash_data status to No Operation */
+static struct update_flash_t	update_flash_data = {
+	.status = FLASH_NO_OP,
+};
+
+static DEFINE_MUTEX(image_data_mutex);
+
+/*
+ * Validate candidate image
+ */
+static inline void opal_flash_validate(void)
+{
+	long ret;
+	void *buf = validate_flash_data.buf;
+	__be32 size = cpu_to_be32(validate_flash_data.buf_size);
+	__be32 result;
+
+	ret = opal_validate_flash(__pa(buf), &size, &result);
+
+	validate_flash_data.status = ret;
+	validate_flash_data.buf_size = be32_to_cpu(size);
+	validate_flash_data.result = be32_to_cpu(result);
+}
+
+/*
+ * Validate output format:
+ *     validate result token
+ *     current image version details
+ *     new image version details
+ */
+static ssize_t validate_show(struct kobject *kobj,
+			     struct kobj_attribute *attr, char *buf)
+{
+	struct validate_flash_t *args_buf = &validate_flash_data;
+	int len;
+
+	/* Candidate image is not validated */
+	if (args_buf->status < VALIDATE_TMP_UPDATE) {
+		len = sprintf(buf, "%d\n", args_buf->status);
+		goto out;
+	}
+
+	/* Result token */
+	len = sprintf(buf, "%d\n", args_buf->result);
+
+	/* Current and candidate image version details */
+	if ((args_buf->result != VALIDATE_TMP_UPDATE) &&
+	    (args_buf->result < VALIDATE_CUR_UNKNOWN))
+		goto out;
+
+	if (args_buf->buf_size > (VALIDATE_BUF_SIZE - len)) {
+		memcpy(buf + len, args_buf->buf, VALIDATE_BUF_SIZE - len);
+		len = VALIDATE_BUF_SIZE;
+	} else {
+		memcpy(buf + len, args_buf->buf, args_buf->buf_size);
+		len += args_buf->buf_size;
+	}
+out:
+	/* Set status to default */
+	args_buf->status = FLASH_NO_OP;
+	return len;
+}
+
+/*
+ * Validate candidate firmware image
+ *
+ * Note:
+ *   We are only interested in first 4K bytes of the
+ *   candidate image.
+ */
+static ssize_t validate_store(struct kobject *kobj,
+			      struct kobj_attribute *attr,
+			      const char *buf, size_t count)
+{
+	struct validate_flash_t *args_buf = &validate_flash_data;
+
+	if (buf[0] != '1')
+		return -EINVAL;
+
+	mutex_lock(&image_data_mutex);
+
+	if (image_data.status != IMAGE_READY ||
+	    image_data.size < VALIDATE_BUF_SIZE) {
+		args_buf->result = VALIDATE_INVALID_IMG;
+		args_buf->status = VALIDATE_IMG_INCOMPLETE;
+		goto out;
+	}
+
+	/* Copy first 4k bytes of candidate image */
+	memcpy(args_buf->buf, image_data.data, VALIDATE_BUF_SIZE);
+
+	args_buf->status = VALIDATE_IMG_READY;
+	args_buf->buf_size = VALIDATE_BUF_SIZE;
+
+	/* Validate candidate image */
+	opal_flash_validate();
+
+out:
+	mutex_unlock(&image_data_mutex);
+	return count;
+}
+
+/*
+ * Manage flash routine
+ */
+static inline void opal_flash_manage(uint8_t op)
+{
+	struct manage_flash_t *const args_buf = &manage_flash_data;
+
+	args_buf->status = opal_manage_flash(op);
+}
+
+/*
+ * Show manage flash status
+ */
+static ssize_t manage_show(struct kobject *kobj,
+			   struct kobj_attribute *attr, char *buf)
+{
+	struct manage_flash_t *const args_buf = &manage_flash_data;
+	int rc;
+
+	rc = sprintf(buf, "%d\n", args_buf->status);
+	/* Set status to default*/
+	args_buf->status = FLASH_NO_OP;
+	return rc;
+}
+
+/*
+ * Manage operations:
+ *   0 - Reject
+ *   1 - Commit
+ */
+static ssize_t manage_store(struct kobject *kobj,
+			    struct kobj_attribute *attr,
+			    const char *buf, size_t count)
+{
+	uint8_t op;
+	switch (buf[0]) {
+	case '0':
+		op = FLASH_REJECT_TMP_SIDE;
+		break;
+	case '1':
+		op = FLASH_COMMIT_TMP_SIDE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* commit/reject temporary image */
+	opal_flash_manage(op);
+	return count;
+}
+
+/*
+ * OPAL update flash
+ */
+static int opal_flash_update(int op)
+{
+	struct opal_sg_list *list;
+	unsigned long addr;
+	int64_t rc = OPAL_PARAMETER;
+
+	if (op == FLASH_UPDATE_CANCEL) {
+		pr_alert("FLASH: Image update cancelled\n");
+		addr = '\0';
+		goto flash;
+	}
+
+	list = opal_vmalloc_to_sg_list(image_data.data, image_data.size);
+	if (!list)
+		goto invalid_img;
+
+	/* First entry address */
+	addr = __pa(list);
+
+flash:
+	rc = opal_update_flash(addr);
+
+invalid_img:
+	return rc;
+}
+
+/* Return CPUs to OPAL before starting FW update */
+static void flash_return_cpu(void *info)
+{
+	int cpu = smp_processor_id();
+
+	if (!cpu_online(cpu))
+		return;
+
+	/* Disable IRQ */
+	hard_irq_disable();
+
+	/* Return the CPU to OPAL */
+	opal_return_cpu();
+}
+
+/* This gets called just before system reboots */
+void opal_flash_term_callback(void)
+{
+	struct cpumask mask;
+
+	if (update_flash_data.status != FLASH_IMG_READY)
+		return;
+
+	pr_alert("FLASH: Flashing new firmware\n");
+	pr_alert("FLASH: Image is %u bytes\n", image_data.size);
+	pr_alert("FLASH: Performing flash and reboot/shutdown\n");
+	pr_alert("FLASH: This will take several minutes. Do not power off!\n");
+
+	/* Small delay to help getting the above message out */
+	msleep(500);
+
+	/* Return secondary CPUs to firmware */
+	cpumask_copy(&mask, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), &mask);
+	if (!cpumask_empty(&mask))
+		smp_call_function_many(&mask,
+				       flash_return_cpu, NULL, false);
+	/* Hard disable interrupts */
+	hard_irq_disable();
+}
+
+/*
+ * Show candidate image status
+ */
+static ssize_t update_show(struct kobject *kobj,
+			   struct kobj_attribute *attr, char *buf)
+{
+	struct update_flash_t *const args_buf = &update_flash_data;
+	return sprintf(buf, "%d\n", args_buf->status);
+}
+
+/*
+ * Set update image flag
+ *  1 - Flash new image
+ *  0 - Cancel flash request
+ */
+static ssize_t update_store(struct kobject *kobj,
+			    struct kobj_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct update_flash_t *const args_buf = &update_flash_data;
+	int rc = count;
+
+	mutex_lock(&image_data_mutex);
+
+	switch (buf[0]) {
+	case '0':
+		if (args_buf->status == FLASH_IMG_READY)
+			opal_flash_update(FLASH_UPDATE_CANCEL);
+		args_buf->status = FLASH_NO_OP;
+		break;
+	case '1':
+		/* Image is loaded? */
+		if (image_data.status == IMAGE_READY)
+			args_buf->status =
+				opal_flash_update(FLASH_UPDATE_INIT);
+		else
+			args_buf->status = FLASH_INVALID_IMG;
+		break;
+	default:
+		rc = -EINVAL;
+	}
+
+	mutex_unlock(&image_data_mutex);
+	return rc;
+}
+
+/*
+ * Free image buffer
+ */
+static void free_image_buf(void)
+{
+	void *addr;
+	int size;
+
+	addr = image_data.data;
+	size = PAGE_ALIGN(image_data.size);
+	while (size > 0) {
+		ClearPageReserved(vmalloc_to_page(addr));
+		addr += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+	vfree(image_data.data);
+	image_data.data = NULL;
+	image_data.status = IMAGE_INVALID;
+}
+
+/*
+ * Allocate image buffer.
+ */
+static int alloc_image_buf(char *buffer, size_t count)
+{
+	void *addr;
+	int size;
+
+	if (count < sizeof(struct image_header_t)) {
+		pr_warn("FLASH: Invalid candidate image\n");
+		return -EINVAL;
+	}
+
+	memcpy(&image_header, (void *)buffer, sizeof(struct image_header_t));
+	image_data.size = be32_to_cpu(image_header.size);
+	pr_debug("FLASH: Candidate image size = %u\n", image_data.size);
+
+	if (image_data.size > MAX_IMAGE_SIZE) {
+		pr_warn("FLASH: Too large image\n");
+		return -EINVAL;
+	}
+	if (image_data.size < VALIDATE_BUF_SIZE) {
+		pr_warn("FLASH: Image is shorter than expected\n");
+		return -EINVAL;
+	}
+
+	image_data.data = vzalloc(PAGE_ALIGN(image_data.size));
+	if (!image_data.data) {
+		pr_err("%s : Failed to allocate memory\n", __func__);
+		return -ENOMEM;
+	}
+
+	/* Pin memory */
+	addr = image_data.data;
+	size = PAGE_ALIGN(image_data.size);
+	while (size > 0) {
+		SetPageReserved(vmalloc_to_page(addr));
+		addr += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+
+	image_data.status = IMAGE_LOADING;
+	return 0;
+}
+
+/*
+ * Copy candidate image
+ *
+ * Parse candidate image header to get total image size
+ * and pre-allocate required memory.
+ */
+static ssize_t image_data_write(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *bin_attr,
+				char *buffer, loff_t pos, size_t count)
+{
+	int rc;
+
+	mutex_lock(&image_data_mutex);
+
+	/* New image ? */
+	if (pos == 0) {
+		/* Free memory, if already allocated */
+		if (image_data.data)
+			free_image_buf();
+
+		/* Cancel outstanding image update request */
+		if (update_flash_data.status == FLASH_IMG_READY)
+			opal_flash_update(FLASH_UPDATE_CANCEL);
+
+		/* Allocate memory */
+		rc = alloc_image_buf(buffer, count);
+		if (rc)
+			goto out;
+	}
+
+	if (image_data.status != IMAGE_LOADING) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	if ((pos + count) > image_data.size) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	memcpy(image_data.data + pos, (void *)buffer, count);
+	rc = count;
+
+	/* Set image status */
+	if ((pos + count) == image_data.size) {
+		pr_debug("FLASH: Candidate image loaded....\n");
+		image_data.status = IMAGE_READY;
+	}
+
+out:
+	mutex_unlock(&image_data_mutex);
+	return rc;
+}
+
+/*
+ * sysfs interface :
+ *  OPAL uses below sysfs files for code update.
+ *  We create these files under /sys/firmware/opal.
+ *
+ *   image		: Interface to load candidate firmware image
+ *   validate_flash	: Validate firmware image
+ *   manage_flash	: Commit/Reject firmware image
+ *   update_flash	: Flash new firmware image
+ *
+ */
+static struct bin_attribute image_data_attr = {
+	.attr = {.name = "image", .mode = 0200},
+	.size = MAX_IMAGE_SIZE,	/* Limit image size */
+	.write = image_data_write,
+};
+
+static struct kobj_attribute validate_attribute =
+	__ATTR(validate_flash, 0600, validate_show, validate_store);
+
+static struct kobj_attribute manage_attribute =
+	__ATTR(manage_flash, 0600, manage_show, manage_store);
+
+static struct kobj_attribute update_attribute =
+	__ATTR(update_flash, 0600, update_show, update_store);
+
+static struct attribute *image_op_attrs[] = {
+	&validate_attribute.attr,
+	&manage_attribute.attr,
+	&update_attribute.attr,
+	NULL	/* need to NULL terminate the list of attributes */
+};
+
+static struct attribute_group image_op_attr_group = {
+	.attrs = image_op_attrs,
+};
+
+void __init opal_flash_update_init(void)
+{
+	int ret;
+
+	/* Allocate validate image buffer */
+	validate_flash_data.buf = kzalloc(VALIDATE_BUF_SIZE, GFP_KERNEL);
+	if (!validate_flash_data.buf) {
+		pr_err("%s : Failed to allocate memory\n", __func__);
+		return;
+	}
+
+	/* Make sure /sys/firmware/opal directory is created */
+	if (!opal_kobj) {
+		pr_warn("FLASH: opal kobject is not available\n");
+		goto nokobj;
+	}
+
+	/* Create the sysfs files */
+	ret = sysfs_create_group(opal_kobj, &image_op_attr_group);
+	if (ret) {
+		pr_warn("FLASH: Failed to create sysfs files\n");
+		goto nokobj;
+	}
+
+	ret = sysfs_create_bin_file(opal_kobj, &image_data_attr);
+	if (ret) {
+		pr_warn("FLASH: Failed to create sysfs files\n");
+		goto nosysfs_file;
+	}
+
+	/* Set default status */
+	validate_flash_data.status = FLASH_NO_OP;
+	manage_flash_data.status = FLASH_NO_OP;
+	update_flash_data.status = FLASH_NO_OP;
+	image_data.status = IMAGE_INVALID;
+	return;
+
+nosysfs_file:
+	sysfs_remove_group(opal_kobj, &image_op_attr_group);
+
+nokobj:
+	kfree(validate_flash_data.buf);
+	return;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
new file mode 100644
index 000000000..b322bfb51
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -0,0 +1,189 @@
+/*
+ * OPAL hypervisor Maintenance interrupt handling support in PowreNV.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright 2014 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include <asm/opal.h>
+#include <asm/cputable.h>
+#include <asm/machdep.h>
+
+static int opal_hmi_handler_nb_init;
+struct OpalHmiEvtNode {
+	struct list_head list;
+	struct OpalHMIEvent hmi_evt;
+};
+static LIST_HEAD(opal_hmi_evt_list);
+static DEFINE_SPINLOCK(opal_hmi_evt_lock);
+
+static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
+{
+	const char *level, *sevstr, *error_info;
+	static const char *hmi_error_types[] = {
+		"Malfunction Alert",
+		"Processor Recovery done",
+		"Processor recovery occurred again",
+		"Processor recovery occurred for masked error",
+		"Timer facility experienced an error",
+		"TFMR SPR is corrupted",
+		"UPS (Uniterrupted Power System) Overflow indication",
+		"An XSCOM operation failure",
+		"An XSCOM operation completed",
+		"SCOM has set a reserved FIR bit to cause recovery",
+		"Debug trigger has set a reserved FIR bit to cause recovery",
+		"A hypervisor resource error occurred"
+	};
+
+	/* Print things out */
+	if (hmi_evt->version < OpalHMIEvt_V1) {
+		pr_err("HMI Interrupt, Unknown event version %d !\n",
+			hmi_evt->version);
+		return;
+	}
+	switch (hmi_evt->severity) {
+	case OpalHMI_SEV_NO_ERROR:
+		level = KERN_INFO;
+		sevstr = "Harmless";
+		break;
+	case OpalHMI_SEV_WARNING:
+		level = KERN_WARNING;
+		sevstr = "";
+		break;
+	case OpalHMI_SEV_ERROR_SYNC:
+		level = KERN_ERR;
+		sevstr = "Severe";
+		break;
+	case OpalHMI_SEV_FATAL:
+	default:
+		level = KERN_ERR;
+		sevstr = "Fatal";
+		break;
+	}
+
+	printk("%s%s Hypervisor Maintenance interrupt [%s]\n",
+		level, sevstr,
+		hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ?
+		"Recovered" : "Not recovered");
+	error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ?
+			hmi_error_types[hmi_evt->type]
+			: "Unknown";
+	printk("%s Error detail: %s\n", level, error_info);
+	printk("%s	HMER: %016llx\n", level, be64_to_cpu(hmi_evt->hmer));
+	if ((hmi_evt->type == OpalHMI_ERROR_TFAC) ||
+		(hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY))
+		printk("%s	TFMR: %016llx\n", level,
+						be64_to_cpu(hmi_evt->tfmr));
+}
+
+static void hmi_event_handler(struct work_struct *work)
+{
+	unsigned long flags;
+	struct OpalHMIEvent *hmi_evt;
+	struct OpalHmiEvtNode *msg_node;
+	uint8_t disposition;
+
+	spin_lock_irqsave(&opal_hmi_evt_lock, flags);
+	while (!list_empty(&opal_hmi_evt_list)) {
+		msg_node = list_entry(opal_hmi_evt_list.next,
+					   struct OpalHmiEvtNode, list);
+		list_del(&msg_node->list);
+		spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
+
+		hmi_evt = (struct OpalHMIEvent *) &msg_node->hmi_evt;
+		print_hmi_event_info(hmi_evt);
+		disposition = hmi_evt->disposition;
+		kfree(msg_node);
+
+		/*
+		 * Check if HMI event has been recovered or not. If not
+		 * then we can't continue, invoke panic.
+		 */
+		if (disposition != OpalHMI_DISPOSITION_RECOVERED)
+			panic("Unrecoverable HMI exception");
+
+		spin_lock_irqsave(&opal_hmi_evt_lock, flags);
+	}
+	spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
+}
+
+static DECLARE_WORK(hmi_event_work, hmi_event_handler);
+/*
+ * opal_handle_hmi_event - notifier handler that queues up HMI events
+ * to be preocessed later.
+ */
+static int opal_handle_hmi_event(struct notifier_block *nb,
+			  unsigned long msg_type, void *msg)
+{
+	unsigned long flags;
+	struct OpalHMIEvent *hmi_evt;
+	struct opal_msg *hmi_msg = msg;
+	struct OpalHmiEvtNode *msg_node;
+
+	/* Sanity Checks */
+	if (msg_type != OPAL_MSG_HMI_EVT)
+		return 0;
+
+	/* HMI event info starts from param[0] */
+	hmi_evt = (struct OpalHMIEvent *)&hmi_msg->params[0];
+
+	/* Delay the logging of HMI events to workqueue. */
+	msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
+	if (!msg_node) {
+		pr_err("HMI: out of memory, Opal message event not handled\n");
+		return -ENOMEM;
+	}
+	memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(struct OpalHMIEvent));
+
+	spin_lock_irqsave(&opal_hmi_evt_lock, flags);
+	list_add(&msg_node->list, &opal_hmi_evt_list);
+	spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
+
+	schedule_work(&hmi_event_work);
+	return 0;
+}
+
+static struct notifier_block opal_hmi_handler_nb = {
+	.notifier_call	= opal_handle_hmi_event,
+	.next		= NULL,
+	.priority	= 0,
+};
+
+static int __init opal_hmi_handler_init(void)
+{
+	int ret;
+
+	if (!opal_hmi_handler_nb_init) {
+		ret = opal_message_notifier_register(
+				OPAL_MSG_HMI_EVT, &opal_hmi_handler_nb);
+		if (ret) {
+			pr_err("%s: Can't register OPAL event notifier (%d)\n",
+			       __func__, ret);
+			return ret;
+		}
+		opal_hmi_handler_nb_init = 1;
+	}
+	return 0;
+}
+machine_subsys_initcall(powernv, opal_hmi_handler_init);
diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c
new file mode 100644
index 000000000..e4169d68c
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-lpc.c
@@ -0,0 +1,414 @@
+/*
+ * PowerNV LPC bus handling.
+ *
+ * Copyright 2013 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/bug.h>
+#include <linux/debugfs.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/xics.h>
+#include <asm/opal.h>
+#include <asm/prom.h>
+#include <asm/uaccess.h>
+#include <asm/debug.h>
+
+static int opal_lpc_chip_id = -1;
+
+static u8 opal_lpc_inb(unsigned long port)
+{
+	int64_t rc;
+	__be32 data;
+
+	if (opal_lpc_chip_id < 0 || port > 0xffff)
+		return 0xff;
+	rc = opal_lpc_read(opal_lpc_chip_id, OPAL_LPC_IO, port, &data, 1);
+	return rc ? 0xff : be32_to_cpu(data);
+}
+
+static __le16 __opal_lpc_inw(unsigned long port)
+{
+	int64_t rc;
+	__be32 data;
+
+	if (opal_lpc_chip_id < 0 || port > 0xfffe)
+		return 0xffff;
+	if (port & 1)
+		return (__le16)opal_lpc_inb(port) << 8 | opal_lpc_inb(port + 1);
+	rc = opal_lpc_read(opal_lpc_chip_id, OPAL_LPC_IO, port, &data, 2);
+	return rc ? 0xffff : be32_to_cpu(data);
+}
+static u16 opal_lpc_inw(unsigned long port)
+{
+	return le16_to_cpu(__opal_lpc_inw(port));
+}
+
+static __le32 __opal_lpc_inl(unsigned long port)
+{
+	int64_t rc;
+	__be32 data;
+
+	if (opal_lpc_chip_id < 0 || port > 0xfffc)
+		return 0xffffffff;
+	if (port & 3)
+		return (__le32)opal_lpc_inb(port    ) << 24 |
+		       (__le32)opal_lpc_inb(port + 1) << 16 |
+		       (__le32)opal_lpc_inb(port + 2) <<  8 |
+			       opal_lpc_inb(port + 3);
+	rc = opal_lpc_read(opal_lpc_chip_id, OPAL_LPC_IO, port, &data, 4);
+	return rc ? 0xffffffff : be32_to_cpu(data);
+}
+
+static u32 opal_lpc_inl(unsigned long port)
+{
+	return le32_to_cpu(__opal_lpc_inl(port));
+}
+
+static void opal_lpc_outb(u8 val, unsigned long port)
+{
+	if (opal_lpc_chip_id < 0 || port > 0xffff)
+		return;
+	opal_lpc_write(opal_lpc_chip_id, OPAL_LPC_IO, port, val, 1);
+}
+
+static void __opal_lpc_outw(__le16 val, unsigned long port)
+{
+	if (opal_lpc_chip_id < 0 || port > 0xfffe)
+		return;
+	if (port & 1) {
+		opal_lpc_outb(val >> 8, port);
+		opal_lpc_outb(val     , port + 1);
+		return;
+	}
+	opal_lpc_write(opal_lpc_chip_id, OPAL_LPC_IO, port, val, 2);
+}
+
+static void opal_lpc_outw(u16 val, unsigned long port)
+{
+	__opal_lpc_outw(cpu_to_le16(val), port);
+}
+
+static void __opal_lpc_outl(__le32 val, unsigned long port)
+{
+	if (opal_lpc_chip_id < 0 || port > 0xfffc)
+		return;
+	if (port & 3) {
+		opal_lpc_outb(val >> 24, port);
+		opal_lpc_outb(val >> 16, port + 1);
+		opal_lpc_outb(val >>  8, port + 2);
+		opal_lpc_outb(val      , port + 3);
+		return;
+	}
+	opal_lpc_write(opal_lpc_chip_id, OPAL_LPC_IO, port, val, 4);
+}
+
+static void opal_lpc_outl(u32 val, unsigned long port)
+{
+	__opal_lpc_outl(cpu_to_le32(val), port);
+}
+
+static void opal_lpc_insb(unsigned long p, void *b, unsigned long c)
+{
+	u8 *ptr = b;
+
+	while(c--)
+		*(ptr++) = opal_lpc_inb(p);
+}
+
+static void opal_lpc_insw(unsigned long p, void *b, unsigned long c)
+{
+	__le16 *ptr = b;
+
+	while(c--)
+		*(ptr++) = __opal_lpc_inw(p);
+}
+
+static void opal_lpc_insl(unsigned long p, void *b, unsigned long c)
+{
+	__le32 *ptr = b;
+
+	while(c--)
+		*(ptr++) = __opal_lpc_inl(p);
+}
+
+static void opal_lpc_outsb(unsigned long p, const void *b, unsigned long c)
+{
+	const u8 *ptr = b;
+
+	while(c--)
+		opal_lpc_outb(*(ptr++), p);
+}
+
+static void opal_lpc_outsw(unsigned long p, const void *b, unsigned long c)
+{
+	const __le16 *ptr = b;
+
+	while(c--)
+		__opal_lpc_outw(*(ptr++), p);
+}
+
+static void opal_lpc_outsl(unsigned long p, const void *b, unsigned long c)
+{
+	const __le32 *ptr = b;
+
+	while(c--)
+		__opal_lpc_outl(*(ptr++), p);
+}
+
+static const struct ppc_pci_io opal_lpc_io = {
+	.inb	= opal_lpc_inb,
+	.inw	= opal_lpc_inw,
+	.inl	= opal_lpc_inl,
+	.outb	= opal_lpc_outb,
+	.outw	= opal_lpc_outw,
+	.outl	= opal_lpc_outl,
+	.insb	= opal_lpc_insb,
+	.insw	= opal_lpc_insw,
+	.insl	= opal_lpc_insl,
+	.outsb	= opal_lpc_outsb,
+	.outsw	= opal_lpc_outsw,
+	.outsl	= opal_lpc_outsl,
+};
+
+#ifdef CONFIG_DEBUG_FS
+struct lpc_debugfs_entry {
+	enum OpalLPCAddressType lpc_type;
+};
+
+static ssize_t lpc_debug_read(struct file *filp, char __user *ubuf,
+			      size_t count, loff_t *ppos)
+{
+	struct lpc_debugfs_entry *lpc = filp->private_data;
+	u32 data, pos, len, todo;
+	int rc;
+
+	if (!access_ok(VERIFY_WRITE, ubuf, count))
+		return -EFAULT;
+
+	todo = count;
+	while (todo) {
+		pos = *ppos;
+
+		/*
+		 * Select access size based on count and alignment and
+		 * access type. IO and MEM only support byte acceses,
+		 * FW supports all 3.
+		 */
+		len = 1;
+		if (lpc->lpc_type == OPAL_LPC_FW) {
+			if (todo > 3 && (pos & 3) == 0)
+				len = 4;
+			else if (todo > 1 && (pos & 1) == 0)
+				len = 2;
+		}
+		rc = opal_lpc_read(opal_lpc_chip_id, lpc->lpc_type, pos,
+				   &data, len);
+		if (rc)
+			return -ENXIO;
+
+		/*
+		 * Now there is some trickery with the data returned by OPAL
+		 * as it's the desired data right justified in a 32-bit BE
+		 * word.
+		 *
+		 * This is a very bad interface and I'm to blame for it :-(
+		 *
+		 * So we can't just apply a 32-bit swap to what comes from OPAL,
+		 * because user space expects the *bytes* to be in their proper
+		 * respective positions (ie, LPC position).
+		 *
+		 * So what we really want to do here is to shift data right
+		 * appropriately on a LE kernel.
+		 *
+		 * IE. If the LPC transaction has bytes B0, B1, B2 and B3 in that
+		 * order, we have in memory written to by OPAL at the "data"
+		 * pointer:
+		 *
+		 *               Bytes:      OPAL "data"   LE "data"
+		 *   32-bit:   B0 B1 B2 B3   B0B1B2B3      B3B2B1B0
+		 *   16-bit:   B0 B1         0000B0B1      B1B00000
+		 *    8-bit:   B0            000000B0      B0000000
+		 *
+		 * So a BE kernel will have the leftmost of the above in the MSB
+		 * and rightmost in the LSB and can just then "cast" the u32 "data"
+		 * down to the appropriate quantity and write it.
+		 *
+		 * However, an LE kernel can't. It doesn't need to swap because a
+		 * load from data followed by a store to user are going to preserve
+		 * the byte ordering which is the wire byte order which is what the
+		 * user wants, but in order to "crop" to the right size, we need to
+		 * shift right first.
+		 */
+		switch(len) {
+		case 4:
+			rc = __put_user((u32)data, (u32 __user *)ubuf);
+			break;
+		case 2:
+#ifdef __LITTLE_ENDIAN__
+			data >>= 16;
+#endif
+			rc = __put_user((u16)data, (u16 __user *)ubuf);
+			break;
+		default:
+#ifdef __LITTLE_ENDIAN__
+			data >>= 24;
+#endif
+			rc = __put_user((u8)data, (u8 __user *)ubuf);
+			break;
+		}
+		if (rc)
+			return -EFAULT;
+		*ppos += len;
+		ubuf += len;
+		todo -= len;
+	}
+
+	return count;
+}
+
+static ssize_t lpc_debug_write(struct file *filp, const char __user *ubuf,
+			       size_t count, loff_t *ppos)
+{
+	struct lpc_debugfs_entry *lpc = filp->private_data;
+	u32 data, pos, len, todo;
+	int rc;
+
+	if (!access_ok(VERIFY_READ, ubuf, count))
+		return -EFAULT;
+
+	todo = count;
+	while (todo) {
+		pos = *ppos;
+
+		/*
+		 * Select access size based on count and alignment and
+		 * access type. IO and MEM only support byte acceses,
+		 * FW supports all 3.
+		 */
+		len = 1;
+		if (lpc->lpc_type == OPAL_LPC_FW) {
+			if (todo > 3 && (pos & 3) == 0)
+				len = 4;
+			else if (todo > 1 && (pos & 1) == 0)
+				len = 2;
+		}
+
+		/*
+		 * Similarly to the read case, we have some trickery here but
+		 * it's different to handle. We need to pass the value to OPAL in
+		 * a register whose layout depends on the access size. We want
+		 * to reproduce the memory layout of the user, however we aren't
+		 * doing a load from user and a store to another memory location
+		 * which would achieve that. Here we pass the value to OPAL via
+		 * a register which is expected to contain the "BE" interpretation
+		 * of the byte sequence. IE: for a 32-bit access, byte 0 should be
+		 * in the MSB. So here we *do* need to byteswap on LE.
+		 *
+		 *           User bytes:    LE "data"  OPAL "data"
+		 *  32-bit:  B0 B1 B2 B3    B3B2B1B0   B0B1B2B3
+		 *  16-bit:  B0 B1          0000B1B0   0000B0B1
+		 *   8-bit:  B0             000000B0   000000B0
+		 */
+		switch(len) {
+		case 4:
+			rc = __get_user(data, (u32 __user *)ubuf);
+			data = cpu_to_be32(data);
+			break;
+		case 2:
+			rc = __get_user(data, (u16 __user *)ubuf);
+			data = cpu_to_be16(data);
+			break;
+		default:
+			rc = __get_user(data, (u8 __user *)ubuf);
+			break;
+		}
+		if (rc)
+			return -EFAULT;
+
+		rc = opal_lpc_write(opal_lpc_chip_id, lpc->lpc_type, pos,
+				    data, len);
+		if (rc)
+			return -ENXIO;
+		*ppos += len;
+		ubuf += len;
+		todo -= len;
+	}
+
+	return count;
+}
+
+static const struct file_operations lpc_fops = {
+	.read =		lpc_debug_read,
+	.write =	lpc_debug_write,
+	.open =		simple_open,
+	.llseek =	default_llseek,
+};
+
+static int opal_lpc_debugfs_create_type(struct dentry *folder,
+					const char *fname,
+					enum OpalLPCAddressType type)
+{
+	struct lpc_debugfs_entry *entry;
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+	entry->lpc_type = type;
+	debugfs_create_file(fname, 0600, folder, entry, &lpc_fops);
+	return 0;
+}
+
+static int opal_lpc_init_debugfs(void)
+{
+	struct dentry *root;
+	int rc = 0;
+
+	if (opal_lpc_chip_id < 0)
+		return -ENODEV;
+
+	root = debugfs_create_dir("lpc", powerpc_debugfs_root);
+
+	rc |= opal_lpc_debugfs_create_type(root, "io", OPAL_LPC_IO);
+	rc |= opal_lpc_debugfs_create_type(root, "mem", OPAL_LPC_MEM);
+	rc |= opal_lpc_debugfs_create_type(root, "fw", OPAL_LPC_FW);
+	return rc;
+}
+machine_device_initcall(powernv, opal_lpc_init_debugfs);
+#endif  /* CONFIG_DEBUG_FS */
+
+void opal_lpc_init(void)
+{
+	struct device_node *np;
+
+	/*
+	 * Look for a Power8 LPC bus tagged as "primary",
+	 * we currently support only one though the OPAL APIs
+	 * support any number.
+	 */
+	for_each_compatible_node(np, NULL, "ibm,power8-lpc") {
+		if (!of_device_is_available(np))
+			continue;
+		if (!of_get_property(np, "primary", NULL))
+			continue;
+		opal_lpc_chip_id = of_get_ibm_chip_id(np);
+		break;
+	}
+	if (opal_lpc_chip_id < 0)
+		return;
+
+	/* Setup special IO ops */
+	ppc_pci_io = opal_lpc_io;
+	isa_io_special = true;
+
+	pr_info("OPAL: Power8 LPC bus found, chip ID %d\n", opal_lpc_chip_id);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c
new file mode 100644
index 000000000..43db2136d
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c
@@ -0,0 +1,147 @@
+/*
+ * OPAL asynchronus Memory error handling support in PowreNV.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include <asm/machdep.h>
+#include <asm/opal.h>
+#include <asm/cputable.h>
+
+static int opal_mem_err_nb_init;
+static LIST_HEAD(opal_memory_err_list);
+static DEFINE_SPINLOCK(opal_mem_err_lock);
+
+struct OpalMsgNode {
+	struct list_head list;
+	struct opal_msg msg;
+};
+
+static void handle_memory_error_event(struct OpalMemoryErrorData *merr_evt)
+{
+	uint64_t paddr_start, paddr_end;
+
+	pr_debug("%s: Retrived memory error event, type: 0x%x\n",
+		  __func__, merr_evt->type);
+	switch (merr_evt->type) {
+	case OPAL_MEM_ERR_TYPE_RESILIENCE:
+		paddr_start = be64_to_cpu(merr_evt->u.resilience.physical_address_start);
+		paddr_end = be64_to_cpu(merr_evt->u.resilience.physical_address_end);
+		break;
+	case OPAL_MEM_ERR_TYPE_DYN_DALLOC:
+		paddr_start = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_start);
+		paddr_end = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_end);
+		break;
+	default:
+		return;
+	}
+
+	for (; paddr_start < paddr_end; paddr_start += PAGE_SIZE) {
+		memory_failure(paddr_start >> PAGE_SHIFT, 0, 0);
+	}
+}
+
+static void handle_memory_error(void)
+{
+	unsigned long flags;
+	struct OpalMemoryErrorData *merr_evt;
+	struct OpalMsgNode *msg_node;
+
+	spin_lock_irqsave(&opal_mem_err_lock, flags);
+	while (!list_empty(&opal_memory_err_list)) {
+		 msg_node = list_entry(opal_memory_err_list.next,
+					   struct OpalMsgNode, list);
+		list_del(&msg_node->list);
+		spin_unlock_irqrestore(&opal_mem_err_lock, flags);
+
+		merr_evt = (struct OpalMemoryErrorData *)
+					&msg_node->msg.params[0];
+		handle_memory_error_event(merr_evt);
+		kfree(msg_node);
+		spin_lock_irqsave(&opal_mem_err_lock, flags);
+	}
+	spin_unlock_irqrestore(&opal_mem_err_lock, flags);
+}
+
+static void mem_error_handler(struct work_struct *work)
+{
+	handle_memory_error();
+}
+
+static DECLARE_WORK(mem_error_work, mem_error_handler);
+
+/*
+ * opal_memory_err_event - notifier handler that queues up the opal message
+ * to be preocessed later.
+ */
+static int opal_memory_err_event(struct notifier_block *nb,
+			  unsigned long msg_type, void *msg)
+{
+	unsigned long flags;
+	struct OpalMsgNode *msg_node;
+
+	if (msg_type != OPAL_MSG_MEM_ERR)
+		return 0;
+
+	msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
+	if (!msg_node) {
+		pr_err("MEMORY_ERROR: out of memory, Opal message event not"
+		       "handled\n");
+		return -ENOMEM;
+	}
+	memcpy(&msg_node->msg, msg, sizeof(struct opal_msg));
+
+	spin_lock_irqsave(&opal_mem_err_lock, flags);
+	list_add(&msg_node->list, &opal_memory_err_list);
+	spin_unlock_irqrestore(&opal_mem_err_lock, flags);
+
+	schedule_work(&mem_error_work);
+	return 0;
+}
+
+static struct notifier_block opal_mem_err_nb = {
+	.notifier_call	= opal_memory_err_event,
+	.next		= NULL,
+	.priority	= 0,
+};
+
+static int __init opal_mem_err_init(void)
+{
+	int ret;
+
+	if (!opal_mem_err_nb_init) {
+		ret = opal_message_notifier_register(
+					OPAL_MSG_MEM_ERR, &opal_mem_err_nb);
+		if (ret) {
+			pr_err("%s: Can't register OPAL event notifier (%d)\n",
+			       __func__, ret);
+			return ret;
+		}
+		opal_mem_err_nb_init = 1;
+	}
+	return 0;
+}
+machine_subsys_initcall(powernv, opal_mem_err_init);
diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c
new file mode 100644
index 000000000..44ed78af1
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-msglog.c
@@ -0,0 +1,124 @@
+/*
+ * PowerNV OPAL in-memory console interface
+ *
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/io.h>
+#include <asm/opal.h>
+#include <linux/debugfs.h>
+#include <linux/of.h>
+#include <linux/types.h>
+#include <asm/barrier.h>
+
+/* OPAL in-memory console. Defined in OPAL source at core/console.c */
+struct memcons {
+	__be64 magic;
+#define MEMCONS_MAGIC	0x6630696567726173L
+	__be64 obuf_phys;
+	__be64 ibuf_phys;
+	__be32 obuf_size;
+	__be32 ibuf_size;
+	__be32 out_pos;
+#define MEMCONS_OUT_POS_WRAP	0x80000000u
+#define MEMCONS_OUT_POS_MASK	0x00ffffffu
+	__be32 in_prod;
+	__be32 in_cons;
+};
+
+static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
+				struct bin_attribute *bin_attr, char *to,
+				loff_t pos, size_t count)
+{
+	struct memcons *mc = bin_attr->private;
+	const char *conbuf;
+	ssize_t ret;
+	size_t first_read = 0;
+	uint32_t out_pos, avail;
+
+	if (!mc)
+		return -ENODEV;
+
+	out_pos = be32_to_cpu(ACCESS_ONCE(mc->out_pos));
+
+	/* Now we've read out_pos, put a barrier in before reading the new
+	 * data it points to in conbuf. */
+	smp_rmb();
+
+	conbuf = phys_to_virt(be64_to_cpu(mc->obuf_phys));
+
+	/* When the buffer has wrapped, read from the out_pos marker to the end
+	 * of the buffer, and then read the remaining data as in the un-wrapped
+	 * case. */
+	if (out_pos & MEMCONS_OUT_POS_WRAP) {
+
+		out_pos &= MEMCONS_OUT_POS_MASK;
+		avail = be32_to_cpu(mc->obuf_size) - out_pos;
+
+		ret = memory_read_from_buffer(to, count, &pos,
+				conbuf + out_pos, avail);
+
+		if (ret < 0)
+			goto out;
+
+		first_read = ret;
+		to += first_read;
+		count -= first_read;
+		pos -= avail;
+
+		if (count <= 0)
+			goto out;
+	}
+
+	/* Sanity check. The firmware should not do this to us. */
+	if (out_pos > be32_to_cpu(mc->obuf_size)) {
+		pr_err("OPAL: memory console corruption. Aborting read.\n");
+		return -EINVAL;
+	}
+
+	ret = memory_read_from_buffer(to, count, &pos, conbuf, out_pos);
+
+	if (ret < 0)
+		goto out;
+
+	ret += first_read;
+out:
+	return ret;
+}
+
+static struct bin_attribute opal_msglog_attr = {
+	.attr = {.name = "msglog", .mode = 0444},
+	.read = opal_msglog_read
+};
+
+void __init opal_msglog_init(void)
+{
+	u64 mcaddr;
+	struct memcons *mc;
+
+	if (of_property_read_u64(opal_node, "ibm,opal-memcons", &mcaddr)) {
+		pr_warn("OPAL: Property ibm,opal-memcons not found, no message log\n");
+		return;
+	}
+
+	mc = phys_to_virt(mcaddr);
+	if (!mc) {
+		pr_warn("OPAL: memory console address is invalid\n");
+		return;
+	}
+
+	if (be64_to_cpu(mc->magic) != MEMCONS_MAGIC) {
+		pr_warn("OPAL: memory console version is invalid\n");
+		return;
+	}
+
+	opal_msglog_attr.private = mc;
+
+	if (sysfs_create_bin_file(opal_kobj, &opal_msglog_attr) != 0)
+		pr_warn("OPAL: sysfs file creation failed\n");
+}
diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c
new file mode 100644
index 000000000..9db4398de
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-nvram.c
@@ -0,0 +1,98 @@
+/*
+ * PowerNV nvram code.
+ *
+ * Copyright 2011 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define DEBUG
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of.h>
+
+#include <asm/opal.h>
+#include <asm/nvram.h>
+#include <asm/machdep.h>
+
+static unsigned int nvram_size;
+
+static ssize_t opal_nvram_size(void)
+{
+	return nvram_size;
+}
+
+static ssize_t opal_nvram_read(char *buf, size_t count, loff_t *index)
+{
+	s64 rc;
+	int off;
+
+	if (*index >= nvram_size)
+		return 0;
+	off = *index;
+	if ((off + count) > nvram_size)
+		count = nvram_size - off;
+	rc = opal_read_nvram(__pa(buf), count, off);
+	if (rc != OPAL_SUCCESS)
+		return -EIO;
+	*index += count;
+	return count;
+}
+
+static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
+{
+	s64 rc = OPAL_BUSY;
+	int off;
+
+	if (*index >= nvram_size)
+		return 0;
+	off = *index;
+	if ((off + count) > nvram_size)
+		count = nvram_size - off;
+
+	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+		rc = opal_write_nvram(__pa(buf), count, off);
+		if (rc == OPAL_BUSY_EVENT)
+			opal_poll_events(NULL);
+	}
+	*index += count;
+	return count;
+}
+
+static int __init opal_nvram_init_log_partitions(void)
+{
+	/* Scan nvram for partitions */
+	nvram_scan_partitions();
+	nvram_init_oops_partition(0);
+	return 0;
+}
+machine_arch_initcall(powernv, opal_nvram_init_log_partitions);
+
+void __init opal_nvram_init(void)
+{
+	struct device_node *np;
+	const __be32 *nbytes_p;
+
+	np = of_find_compatible_node(NULL, NULL, "ibm,opal-nvram");
+	if (np == NULL)
+		return;
+
+	nbytes_p = of_get_property(np, "#bytes", NULL);
+	if (!nbytes_p) {
+		of_node_put(np);
+		return;
+	}
+	nvram_size = be32_to_cpup(nbytes_p);
+
+	pr_info("OPAL nvram setup, %u bytes\n", nvram_size);
+	of_node_put(np);
+
+	ppc_md.nvram_read = opal_nvram_read;
+	ppc_md.nvram_write = opal_nvram_write;
+	ppc_md.nvram_size = opal_nvram_size;
+}
+
diff --git a/arch/powerpc/platforms/powernv/opal-power.c b/arch/powerpc/platforms/powernv/opal-power.c
new file mode 100644
index 000000000..ac46c2c24
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-power.c
@@ -0,0 +1,66 @@
+/*
+ * PowerNV OPAL power control for graceful shutdown handling
+ *
+ * Copyright 2015 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/notifier.h>
+
+#include <asm/opal.h>
+#include <asm/machdep.h>
+
+#define SOFT_OFF 0x00
+#define SOFT_REBOOT 0x01
+
+static int opal_power_control_event(struct notifier_block *nb,
+				    unsigned long msg_type, void *msg)
+{
+	struct opal_msg *power_msg = msg;
+	uint64_t type;
+
+	type = be64_to_cpu(power_msg->params[0]);
+
+	switch (type) {
+	case SOFT_REBOOT:
+		pr_info("OPAL: reboot requested\n");
+		orderly_reboot();
+		break;
+	case SOFT_OFF:
+		pr_info("OPAL: poweroff requested\n");
+		orderly_poweroff(true);
+		break;
+	default:
+		pr_err("OPAL: power control type unexpected %016llx\n", type);
+	}
+
+	return 0;
+}
+
+static struct notifier_block opal_power_control_nb = {
+	.notifier_call	= opal_power_control_event,
+	.next		= NULL,
+	.priority	= 0,
+};
+
+static int __init opal_power_control_init(void)
+{
+	int ret;
+
+	ret = opal_message_notifier_register(OPAL_MSG_SHUTDOWN,
+					     &opal_power_control_nb);
+	if (ret) {
+		pr_err("%s: Can't register OPAL event notifier (%d)\n",
+				__func__, ret);
+		return ret;
+	}
+
+	return 0;
+}
+machine_subsys_initcall(powernv, opal_power_control_init);
diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c
new file mode 100644
index 000000000..37dbee157
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-rtc.c
@@ -0,0 +1,87 @@
+/*
+ * PowerNV Real Time Clock.
+ *
+ * Copyright 2011 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/bcd.h>
+#include <linux/rtc.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+
+#include <asm/opal.h>
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+
+static void opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm)
+{
+	tm->tm_year	= ((bcd2bin(y_m_d >> 24) * 100) +
+			   bcd2bin((y_m_d >> 16) & 0xff)) - 1900;
+	tm->tm_mon	= bcd2bin((y_m_d >> 8) & 0xff) - 1;
+	tm->tm_mday	= bcd2bin(y_m_d & 0xff);
+	tm->tm_hour	= bcd2bin((h_m_s_ms >> 56) & 0xff);
+	tm->tm_min	= bcd2bin((h_m_s_ms >> 48) & 0xff);
+	tm->tm_sec	= bcd2bin((h_m_s_ms >> 40) & 0xff);
+
+        GregorianDay(tm);
+}
+
+unsigned long __init opal_get_boot_time(void)
+{
+	struct rtc_time tm;
+	u32 y_m_d;
+	u64 h_m_s_ms;
+	__be32 __y_m_d;
+	__be64 __h_m_s_ms;
+	long rc = OPAL_BUSY;
+
+	if (!opal_check_token(OPAL_RTC_READ))
+		return 0;
+
+	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+		rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
+		if (rc == OPAL_BUSY_EVENT)
+			opal_poll_events(NULL);
+		else
+			mdelay(10);
+	}
+	if (rc != OPAL_SUCCESS)
+		return 0;
+
+	y_m_d = be32_to_cpu(__y_m_d);
+	h_m_s_ms = be64_to_cpu(__h_m_s_ms);
+	opal_to_tm(y_m_d, h_m_s_ms, &tm);
+	return mktime(tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+		      tm.tm_hour, tm.tm_min, tm.tm_sec);
+}
+
+static __init int opal_time_init(void)
+{
+	struct platform_device *pdev;
+	struct device_node *rtc;
+
+	rtc = of_find_node_by_path("/ibm,opal/rtc");
+	if (rtc) {
+		pdev = of_platform_device_create(rtc, "opal-rtc", NULL);
+		of_node_put(rtc);
+	} else {
+		if (opal_check_token(OPAL_RTC_READ) ||
+		    opal_check_token(OPAL_READ_TPO))
+			pdev = platform_device_register_simple("opal-rtc", -1,
+							       NULL, 0);
+		else
+			return -ENODEV;
+	}
+
+	return PTR_ERR_OR_ZERO(pdev);
+}
+machine_subsys_initcall(powernv, opal_time_init);
diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c
new file mode 100644
index 000000000..655250499
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-sensor.c
@@ -0,0 +1,96 @@
+/*
+ * PowerNV sensor code
+ *
+ * Copyright (C) 2013 IBM
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/of_platform.h>
+#include <asm/opal.h>
+#include <asm/machdep.h>
+
+static DEFINE_MUTEX(opal_sensor_mutex);
+
+/*
+ * This will return sensor information to driver based on the requested sensor
+ * handle. A handle is an opaque id for the powernv, read by the driver from the
+ * device tree..
+ */
+int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
+{
+	int ret, token;
+	struct opal_msg msg;
+	__be32 data;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		pr_err("%s: Couldn't get the token, returning\n", __func__);
+		ret = token;
+		goto out;
+	}
+
+	mutex_lock(&opal_sensor_mutex);
+	ret = opal_sensor_read(sensor_hndl, token, &data);
+	switch (ret) {
+	case OPAL_ASYNC_COMPLETION:
+		ret = opal_async_wait_response(token, &msg);
+		if (ret) {
+			pr_err("%s: Failed to wait for the async response, %d\n",
+			       __func__, ret);
+			goto out_token;
+		}
+
+		ret = opal_error_code(be64_to_cpu(msg.params[1]));
+		*sensor_data = be32_to_cpu(data);
+		break;
+
+	case OPAL_SUCCESS:
+		ret = 0;
+		*sensor_data = be32_to_cpu(data);
+		break;
+
+	default:
+		ret = opal_error_code(ret);
+		break;
+	}
+
+out_token:
+	mutex_unlock(&opal_sensor_mutex);
+	opal_async_release_token(token);
+out:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(opal_get_sensor_data);
+
+static __init int opal_sensor_init(void)
+{
+	struct platform_device *pdev;
+	struct device_node *sensor;
+
+	sensor = of_find_node_by_path("/ibm,opal/sensors");
+	if (!sensor) {
+		pr_err("Opal node 'sensors' not found\n");
+		return -ENODEV;
+	}
+
+	pdev = of_platform_device_create(sensor, "opal-sensor", NULL);
+	of_node_put(sensor);
+
+	return PTR_ERR_OR_ZERO(pdev);
+}
+machine_subsys_initcall(powernv, opal_sensor_init);
diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c
new file mode 100644
index 000000000..9d1acf22a
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-sysparam.c
@@ -0,0 +1,304 @@
+/*
+ * PowerNV system parameter code
+ *
+ * Copyright (C) 2013 IBM
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kobject.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/gfp.h>
+#include <linux/stat.h>
+#include <asm/opal.h>
+
+#define MAX_PARAM_DATA_LEN	64
+
+static DEFINE_MUTEX(opal_sysparam_mutex);
+static struct kobject *sysparam_kobj;
+static void *param_data_buf;
+
+struct param_attr {
+	struct list_head list;
+	u32 param_id;
+	u32 param_size;
+	struct kobj_attribute kobj_attr;
+};
+
+static ssize_t opal_get_sys_param(u32 param_id, u32 length, void *buffer)
+{
+	struct opal_msg msg;
+	ssize_t ret;
+	int token;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		if (token != -ERESTARTSYS)
+			pr_err("%s: Couldn't get the token, returning\n",
+					__func__);
+		ret = token;
+		goto out;
+	}
+
+	ret = opal_get_param(token, param_id, (u64)buffer, length);
+	if (ret != OPAL_ASYNC_COMPLETION)
+		goto out_token;
+
+	ret = opal_async_wait_response(token, &msg);
+	if (ret) {
+		pr_err("%s: Failed to wait for the async response, %zd\n",
+				__func__, ret);
+		goto out_token;
+	}
+
+	ret = be64_to_cpu(msg.params[1]);
+
+out_token:
+	opal_async_release_token(token);
+out:
+	return ret;
+}
+
+static int opal_set_sys_param(u32 param_id, u32 length, void *buffer)
+{
+	struct opal_msg msg;
+	int ret, token;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		if (token != -ERESTARTSYS)
+			pr_err("%s: Couldn't get the token, returning\n",
+					__func__);
+		ret = token;
+		goto out;
+	}
+
+	ret = opal_set_param(token, param_id, (u64)buffer, length);
+
+	if (ret != OPAL_ASYNC_COMPLETION)
+		goto out_token;
+
+	ret = opal_async_wait_response(token, &msg);
+	if (ret) {
+		pr_err("%s: Failed to wait for the async response, %d\n",
+				__func__, ret);
+		goto out_token;
+	}
+
+	ret = be64_to_cpu(msg.params[1]);
+
+out_token:
+	opal_async_release_token(token);
+out:
+	return ret;
+}
+
+static ssize_t sys_param_show(struct kobject *kobj,
+		struct kobj_attribute *kobj_attr, char *buf)
+{
+	struct param_attr *attr = container_of(kobj_attr, struct param_attr,
+			kobj_attr);
+	ssize_t ret;
+
+	mutex_lock(&opal_sysparam_mutex);
+	ret = opal_get_sys_param(attr->param_id, attr->param_size,
+			param_data_buf);
+	if (ret)
+		goto out;
+
+	memcpy(buf, param_data_buf, attr->param_size);
+
+	ret = attr->param_size;
+out:
+	mutex_unlock(&opal_sysparam_mutex);
+	return ret;
+}
+
+static ssize_t sys_param_store(struct kobject *kobj,
+		struct kobj_attribute *kobj_attr, const char *buf, size_t count)
+{
+	struct param_attr *attr = container_of(kobj_attr, struct param_attr,
+			kobj_attr);
+	ssize_t ret;
+
+        /* MAX_PARAM_DATA_LEN is sizeof(param_data_buf) */
+        if (count > MAX_PARAM_DATA_LEN)
+                count = MAX_PARAM_DATA_LEN;
+
+	mutex_lock(&opal_sysparam_mutex);
+	memcpy(param_data_buf, buf, count);
+	ret = opal_set_sys_param(attr->param_id, attr->param_size,
+			param_data_buf);
+	mutex_unlock(&opal_sysparam_mutex);
+	if (!ret)
+		ret = count;
+	return ret;
+}
+
+void __init opal_sys_param_init(void)
+{
+	struct device_node *sysparam;
+	struct param_attr *attr;
+	u32 *id, *size;
+	int count, i;
+	u8 *perm;
+
+	if (!opal_kobj) {
+		pr_warn("SYSPARAM: opal kobject is not available\n");
+		goto out;
+	}
+
+	sysparam_kobj = kobject_create_and_add("sysparams", opal_kobj);
+	if (!sysparam_kobj) {
+		pr_err("SYSPARAM: Failed to create sysparam kobject\n");
+		goto out;
+	}
+
+	/* Allocate big enough buffer for any get/set transactions */
+	param_data_buf = kzalloc(MAX_PARAM_DATA_LEN, GFP_KERNEL);
+	if (!param_data_buf) {
+		pr_err("SYSPARAM: Failed to allocate memory for param data "
+				"buf\n");
+		goto out_kobj_put;
+	}
+
+	sysparam = of_find_node_by_path("/ibm,opal/sysparams");
+	if (!sysparam) {
+		pr_err("SYSPARAM: Opal sysparam node not found\n");
+		goto out_param_buf;
+	}
+
+	if (!of_device_is_compatible(sysparam, "ibm,opal-sysparams")) {
+		pr_err("SYSPARAM: Opal sysparam node not compatible\n");
+		goto out_node_put;
+	}
+
+	/* Number of parameters exposed through DT */
+	count = of_property_count_strings(sysparam, "param-name");
+	if (count < 0) {
+		pr_err("SYSPARAM: No string found of property param-name in "
+				"the node %s\n", sysparam->name);
+		goto out_node_put;
+	}
+
+	id = kzalloc(sizeof(*id) * count, GFP_KERNEL);
+	if (!id) {
+		pr_err("SYSPARAM: Failed to allocate memory to read parameter "
+				"id\n");
+		goto out_node_put;
+	}
+
+	size = kzalloc(sizeof(*size) * count, GFP_KERNEL);
+	if (!size) {
+		pr_err("SYSPARAM: Failed to allocate memory to read parameter "
+				"size\n");
+		goto out_free_id;
+	}
+
+	perm = kzalloc(sizeof(*perm) * count, GFP_KERNEL);
+	if (!perm) {
+		pr_err("SYSPARAM: Failed to allocate memory to read supported "
+				"action on the parameter");
+		goto out_free_size;
+	}
+
+	if (of_property_read_u32_array(sysparam, "param-id", id, count)) {
+		pr_err("SYSPARAM: Missing property param-id in the DT\n");
+		goto out_free_perm;
+	}
+
+	if (of_property_read_u32_array(sysparam, "param-len", size, count)) {
+		pr_err("SYSPARAM: Missing property param-len in the DT\n");
+		goto out_free_perm;
+	}
+
+
+	if (of_property_read_u8_array(sysparam, "param-perm", perm, count)) {
+		pr_err("SYSPARAM: Missing property param-perm in the DT\n");
+		goto out_free_perm;
+	}
+
+	attr = kzalloc(sizeof(*attr) * count, GFP_KERNEL);
+	if (!attr) {
+		pr_err("SYSPARAM: Failed to allocate memory for parameter "
+				"attributes\n");
+		goto out_free_perm;
+	}
+
+	/* For each of the parameters, populate the parameter attributes */
+	for (i = 0; i < count; i++) {
+		if (size[i] > MAX_PARAM_DATA_LEN) {
+			pr_warn("SYSPARAM: Not creating parameter %d as size "
+				"exceeds buffer length\n", i);
+			continue;
+		}
+
+		sysfs_attr_init(&attr[i].kobj_attr.attr);
+		attr[i].param_id = id[i];
+		attr[i].param_size = size[i];
+		if (of_property_read_string_index(sysparam, "param-name", i,
+				&attr[i].kobj_attr.attr.name))
+			continue;
+
+		/* If the parameter is read-only or read-write */
+		switch (perm[i] & 3) {
+		case OPAL_SYSPARAM_READ:
+			attr[i].kobj_attr.attr.mode = S_IRUGO;
+			break;
+		case OPAL_SYSPARAM_WRITE:
+			attr[i].kobj_attr.attr.mode = S_IWUSR;
+			break;
+		case OPAL_SYSPARAM_RW:
+			attr[i].kobj_attr.attr.mode = S_IRUGO | S_IWUSR;
+			break;
+		default:
+			break;
+		}
+
+		attr[i].kobj_attr.show = sys_param_show;
+		attr[i].kobj_attr.store = sys_param_store;
+
+		if (sysfs_create_file(sysparam_kobj, &attr[i].kobj_attr.attr)) {
+			pr_err("SYSPARAM: Failed to create sysfs file %s\n",
+					attr[i].kobj_attr.attr.name);
+			goto out_free_attr;
+		}
+	}
+
+	kfree(perm);
+	kfree(size);
+	kfree(id);
+	of_node_put(sysparam);
+	return;
+
+out_free_attr:
+	kfree(attr);
+out_free_perm:
+	kfree(perm);
+out_free_size:
+	kfree(size);
+out_free_id:
+	kfree(id);
+out_node_put:
+	of_node_put(sysparam);
+out_param_buf:
+	kfree(param_data_buf);
+out_kobj_put:
+	kobject_put(sysparam_kobj);
+out:
+	return;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c
new file mode 100644
index 000000000..e11273b23
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c
@@ -0,0 +1,84 @@
+#include <linux/percpu.h>
+#include <linux/jump_label.h>
+#include <asm/trace.h>
+
+#ifdef HAVE_JUMP_LABEL
+struct static_key opal_tracepoint_key = STATIC_KEY_INIT;
+
+void opal_tracepoint_regfunc(void)
+{
+	static_key_slow_inc(&opal_tracepoint_key);
+}
+
+void opal_tracepoint_unregfunc(void)
+{
+	static_key_slow_dec(&opal_tracepoint_key);
+}
+#else
+/*
+ * We optimise OPAL calls by placing opal_tracepoint_refcount
+ * directly in the TOC so we can check if the opal tracepoints are
+ * enabled via a single load.
+ */
+
+/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
+extern long opal_tracepoint_refcount;
+
+void opal_tracepoint_regfunc(void)
+{
+	opal_tracepoint_refcount++;
+}
+
+void opal_tracepoint_unregfunc(void)
+{
+	opal_tracepoint_refcount--;
+}
+#endif
+
+/*
+ * Since the tracing code might execute OPAL calls we need to guard against
+ * recursion.
+ */
+static DEFINE_PER_CPU(unsigned int, opal_trace_depth);
+
+void __trace_opal_entry(unsigned long opcode, unsigned long *args)
+{
+	unsigned long flags;
+	unsigned int *depth;
+
+	local_irq_save(flags);
+
+	depth = this_cpu_ptr(&opal_trace_depth);
+
+	if (*depth)
+		goto out;
+
+	(*depth)++;
+	preempt_disable();
+	trace_opal_entry(opcode, args);
+	(*depth)--;
+
+out:
+	local_irq_restore(flags);
+}
+
+void __trace_opal_exit(long opcode, unsigned long retval)
+{
+	unsigned long flags;
+	unsigned int *depth;
+
+	local_irq_save(flags);
+
+	depth = this_cpu_ptr(&opal_trace_depth);
+
+	if (*depth)
+		goto out;
+
+	(*depth)++;
+	trace_opal_exit(opcode, retval);
+	preempt_enable();
+	(*depth)--;
+
+out:
+	local_irq_restore(flags);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
new file mode 100644
index 000000000..a7ade94cd
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -0,0 +1,297 @@
+/*
+ * PowerNV OPAL API wrappers
+ *
+ * Copyright 2011 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/jump_label.h>
+#include <asm/ppc_asm.h>
+#include <asm/hvcall.h>
+#include <asm/asm-offsets.h>
+#include <asm/opal.h>
+
+	.section	".text"
+
+#ifdef CONFIG_TRACEPOINTS
+#ifdef HAVE_JUMP_LABEL
+#define OPAL_BRANCH(LABEL)					\
+	ARCH_STATIC_BRANCH(LABEL, opal_tracepoint_key)
+#else
+
+	.section	".toc","aw"
+
+	.globl opal_tracepoint_refcount
+opal_tracepoint_refcount:
+	.llong	0
+
+	.section	".text"
+
+/*
+ * We branch around this in early init by using an unconditional cpu
+ * feature.
+ */
+#define OPAL_BRANCH(LABEL)					\
+BEGIN_FTR_SECTION;						\
+	b	1f;						\
+END_FTR_SECTION(0, 1);						\
+	ld	r12,opal_tracepoint_refcount@toc(r2);		\
+	cmpdi	r12,0;						\
+	bne-	LABEL;						\
+1:
+
+#endif
+
+#else
+#define OPAL_BRANCH(LABEL)
+#endif
+
+/* TODO:
+ *
+ * - Trace irqs in/off (needs saving/restoring all args, argh...)
+ * - Get r11 feed up by Dave so I can have better register usage
+ */
+
+#define OPAL_CALL(name, token)		\
+ _GLOBAL_TOC(name);			\
+	mflr	r0;			\
+	std	r0,16(r1);		\
+	li	r0,token;		\
+	OPAL_BRANCH(opal_tracepoint_entry) \
+	mfcr	r12;			\
+	stw	r12,8(r1);		\
+	std	r1,PACAR1(r13);		\
+	li	r11,0;			\
+	mfmsr	r12;			\
+	ori	r11,r11,MSR_EE;		\
+	std	r12,PACASAVEDMSR(r13);	\
+	andc	r12,r12,r11;		\
+	mtmsrd	r12,1;			\
+	LOAD_REG_ADDR(r11,opal_return);	\
+	mtlr	r11;			\
+	li	r11,MSR_DR|MSR_IR|MSR_LE;\
+	andc	r12,r12,r11;		\
+	mtspr	SPRN_HSRR1,r12;		\
+	LOAD_REG_ADDR(r11,opal);	\
+	ld	r12,8(r11);		\
+	ld	r2,0(r11);		\
+	mtspr	SPRN_HSRR0,r12;		\
+	hrfid
+
+opal_return:
+	/*
+	 * Fixup endian on OPAL return... we should be able to simplify
+	 * this by instead converting the below trampoline to a set of
+	 * bytes (always BE) since MSR:LE will end up fixed up as a side
+	 * effect of the rfid.
+	 */
+	FIXUP_ENDIAN
+	ld	r2,PACATOC(r13);
+	lwz	r4,8(r1);
+	ld	r5,16(r1);
+	ld	r6,PACASAVEDMSR(r13);
+	mtspr	SPRN_SRR0,r5;
+	mtspr	SPRN_SRR1,r6;
+	mtcr	r4;
+	rfid
+
+#ifdef CONFIG_TRACEPOINTS
+opal_tracepoint_entry:
+	stdu	r1,-STACKFRAMESIZE(r1)
+	std	r0,STK_REG(R23)(r1)
+	std	r3,STK_REG(R24)(r1)
+	std	r4,STK_REG(R25)(r1)
+	std	r5,STK_REG(R26)(r1)
+	std	r6,STK_REG(R27)(r1)
+	std	r7,STK_REG(R28)(r1)
+	std	r8,STK_REG(R29)(r1)
+	std	r9,STK_REG(R30)(r1)
+	std	r10,STK_REG(R31)(r1)
+	mr	r3,r0
+	addi	r4,r1,STK_REG(R24)
+	bl	__trace_opal_entry
+	ld	r0,STK_REG(R23)(r1)
+	ld	r3,STK_REG(R24)(r1)
+	ld	r4,STK_REG(R25)(r1)
+	ld	r5,STK_REG(R26)(r1)
+	ld	r6,STK_REG(R27)(r1)
+	ld	r7,STK_REG(R28)(r1)
+	ld	r8,STK_REG(R29)(r1)
+	ld	r9,STK_REG(R30)(r1)
+	ld	r10,STK_REG(R31)(r1)
+	LOAD_REG_ADDR(r11,opal_tracepoint_return)
+	mfcr	r12
+	std	r11,16(r1)
+	stw	r12,8(r1)
+	std	r1,PACAR1(r13)
+	li	r11,0
+	mfmsr	r12
+	ori	r11,r11,MSR_EE
+	std	r12,PACASAVEDMSR(r13)
+	andc	r12,r12,r11
+	mtmsrd	r12,1
+	LOAD_REG_ADDR(r11,opal_return)
+	mtlr	r11
+	li	r11,MSR_DR|MSR_IR|MSR_LE
+	andc	r12,r12,r11
+	mtspr	SPRN_HSRR1,r12
+	LOAD_REG_ADDR(r11,opal)
+	ld	r12,8(r11)
+	ld	r2,0(r11)
+	mtspr	SPRN_HSRR0,r12
+	hrfid
+
+opal_tracepoint_return:
+	std	r3,STK_REG(R31)(r1)
+	mr	r4,r3
+	ld	r0,STK_REG(R23)(r1)
+	bl	__trace_opal_exit
+	ld	r3,STK_REG(R31)(r1)
+	addi	r1,r1,STACKFRAMESIZE
+	ld	r0,16(r1)
+	mtlr	r0
+	blr
+#endif
+
+/*
+ * Make opal call in realmode. This is a generic function to be called
+ * from realmode. It handles endianness.
+ *
+ * r13 - paca pointer
+ * r1  - stack pointer
+ * r0  - opal token
+ */
+_GLOBAL(opal_call_realmode)
+	mflr	r12
+	std	r12,PPC_LR_STKOFF(r1)
+	ld	r2,PACATOC(r13)
+	/* Set opal return address */
+	LOAD_REG_ADDR(r12,return_from_opal_call)
+	mtlr	r12
+
+	mfmsr	r12
+#ifdef __LITTLE_ENDIAN__
+	/* Handle endian-ness */
+	li	r11,MSR_LE
+	andc	r12,r12,r11
+#endif
+	mtspr	SPRN_HSRR1,r12
+	LOAD_REG_ADDR(r11,opal)
+	ld	r12,8(r11)
+	ld	r2,0(r11)
+	mtspr	SPRN_HSRR0,r12
+	hrfid
+
+return_from_opal_call:
+#ifdef __LITTLE_ENDIAN__
+	FIXUP_ENDIAN
+#endif
+	ld	r12,PPC_LR_STKOFF(r1)
+	mtlr	r12
+	blr
+
+OPAL_CALL(opal_invalid_call,			OPAL_INVALID_CALL);
+OPAL_CALL(opal_console_write,			OPAL_CONSOLE_WRITE);
+OPAL_CALL(opal_console_read,			OPAL_CONSOLE_READ);
+OPAL_CALL(opal_console_write_buffer_space,	OPAL_CONSOLE_WRITE_BUFFER_SPACE);
+OPAL_CALL(opal_rtc_read,			OPAL_RTC_READ);
+OPAL_CALL(opal_rtc_write,			OPAL_RTC_WRITE);
+OPAL_CALL(opal_cec_power_down,			OPAL_CEC_POWER_DOWN);
+OPAL_CALL(opal_cec_reboot,			OPAL_CEC_REBOOT);
+OPAL_CALL(opal_read_nvram,			OPAL_READ_NVRAM);
+OPAL_CALL(opal_write_nvram,			OPAL_WRITE_NVRAM);
+OPAL_CALL(opal_handle_interrupt,		OPAL_HANDLE_INTERRUPT);
+OPAL_CALL(opal_poll_events,			OPAL_POLL_EVENTS);
+OPAL_CALL(opal_pci_set_hub_tce_memory,		OPAL_PCI_SET_HUB_TCE_MEMORY);
+OPAL_CALL(opal_pci_set_phb_tce_memory,		OPAL_PCI_SET_PHB_TCE_MEMORY);
+OPAL_CALL(opal_pci_config_read_byte,		OPAL_PCI_CONFIG_READ_BYTE);
+OPAL_CALL(opal_pci_config_read_half_word,	OPAL_PCI_CONFIG_READ_HALF_WORD);
+OPAL_CALL(opal_pci_config_read_word,		OPAL_PCI_CONFIG_READ_WORD);
+OPAL_CALL(opal_pci_config_write_byte,		OPAL_PCI_CONFIG_WRITE_BYTE);
+OPAL_CALL(opal_pci_config_write_half_word,	OPAL_PCI_CONFIG_WRITE_HALF_WORD);
+OPAL_CALL(opal_pci_config_write_word,		OPAL_PCI_CONFIG_WRITE_WORD);
+OPAL_CALL(opal_set_xive,			OPAL_SET_XIVE);
+OPAL_CALL(opal_get_xive,			OPAL_GET_XIVE);
+OPAL_CALL(opal_register_exception_handler,	OPAL_REGISTER_OPAL_EXCEPTION_HANDLER);
+OPAL_CALL(opal_pci_eeh_freeze_status,		OPAL_PCI_EEH_FREEZE_STATUS);
+OPAL_CALL(opal_pci_eeh_freeze_clear,		OPAL_PCI_EEH_FREEZE_CLEAR);
+OPAL_CALL(opal_pci_eeh_freeze_set,		OPAL_PCI_EEH_FREEZE_SET);
+OPAL_CALL(opal_pci_err_inject,			OPAL_PCI_ERR_INJECT);
+OPAL_CALL(opal_pci_shpc,			OPAL_PCI_SHPC);
+OPAL_CALL(opal_pci_phb_mmio_enable,		OPAL_PCI_PHB_MMIO_ENABLE);
+OPAL_CALL(opal_pci_set_phb_mem_window,		OPAL_PCI_SET_PHB_MEM_WINDOW);
+OPAL_CALL(opal_pci_map_pe_mmio_window,		OPAL_PCI_MAP_PE_MMIO_WINDOW);
+OPAL_CALL(opal_pci_set_phb_table_memory,	OPAL_PCI_SET_PHB_TABLE_MEMORY);
+OPAL_CALL(opal_pci_set_pe,			OPAL_PCI_SET_PE);
+OPAL_CALL(opal_pci_set_peltv,			OPAL_PCI_SET_PELTV);
+OPAL_CALL(opal_pci_set_mve,			OPAL_PCI_SET_MVE);
+OPAL_CALL(opal_pci_set_mve_enable,		OPAL_PCI_SET_MVE_ENABLE);
+OPAL_CALL(opal_pci_get_xive_reissue,		OPAL_PCI_GET_XIVE_REISSUE);
+OPAL_CALL(opal_pci_set_xive_reissue,		OPAL_PCI_SET_XIVE_REISSUE);
+OPAL_CALL(opal_pci_set_xive_pe,			OPAL_PCI_SET_XIVE_PE);
+OPAL_CALL(opal_get_xive_source,			OPAL_GET_XIVE_SOURCE);
+OPAL_CALL(opal_get_msi_32,			OPAL_GET_MSI_32);
+OPAL_CALL(opal_get_msi_64,			OPAL_GET_MSI_64);
+OPAL_CALL(opal_start_cpu,			OPAL_START_CPU);
+OPAL_CALL(opal_query_cpu_status,		OPAL_QUERY_CPU_STATUS);
+OPAL_CALL(opal_write_oppanel,			OPAL_WRITE_OPPANEL);
+OPAL_CALL(opal_pci_map_pe_dma_window,		OPAL_PCI_MAP_PE_DMA_WINDOW);
+OPAL_CALL(opal_pci_map_pe_dma_window_real,	OPAL_PCI_MAP_PE_DMA_WINDOW_REAL);
+OPAL_CALL(opal_pci_reset,			OPAL_PCI_RESET);
+OPAL_CALL(opal_pci_get_hub_diag_data,		OPAL_PCI_GET_HUB_DIAG_DATA);
+OPAL_CALL(opal_pci_get_phb_diag_data,		OPAL_PCI_GET_PHB_DIAG_DATA);
+OPAL_CALL(opal_pci_fence_phb,			OPAL_PCI_FENCE_PHB);
+OPAL_CALL(opal_pci_reinit,			OPAL_PCI_REINIT);
+OPAL_CALL(opal_pci_mask_pe_error,		OPAL_PCI_MASK_PE_ERROR);
+OPAL_CALL(opal_set_slot_led_status,		OPAL_SET_SLOT_LED_STATUS);
+OPAL_CALL(opal_get_epow_status,			OPAL_GET_EPOW_STATUS);
+OPAL_CALL(opal_set_system_attention_led,	OPAL_SET_SYSTEM_ATTENTION_LED);
+OPAL_CALL(opal_pci_next_error,			OPAL_PCI_NEXT_ERROR);
+OPAL_CALL(opal_pci_poll,			OPAL_PCI_POLL);
+OPAL_CALL(opal_pci_msi_eoi,			OPAL_PCI_MSI_EOI);
+OPAL_CALL(opal_pci_get_phb_diag_data2,		OPAL_PCI_GET_PHB_DIAG_DATA2);
+OPAL_CALL(opal_xscom_read,			OPAL_XSCOM_READ);
+OPAL_CALL(opal_xscom_write,			OPAL_XSCOM_WRITE);
+OPAL_CALL(opal_lpc_read,			OPAL_LPC_READ);
+OPAL_CALL(opal_lpc_write,			OPAL_LPC_WRITE);
+OPAL_CALL(opal_return_cpu,			OPAL_RETURN_CPU);
+OPAL_CALL(opal_reinit_cpus,			OPAL_REINIT_CPUS);
+OPAL_CALL(opal_read_elog,			OPAL_ELOG_READ);
+OPAL_CALL(opal_send_ack_elog,			OPAL_ELOG_ACK);
+OPAL_CALL(opal_get_elog_size,			OPAL_ELOG_SIZE);
+OPAL_CALL(opal_resend_pending_logs,		OPAL_ELOG_RESEND);
+OPAL_CALL(opal_write_elog,			OPAL_ELOG_WRITE);
+OPAL_CALL(opal_validate_flash,			OPAL_FLASH_VALIDATE);
+OPAL_CALL(opal_manage_flash,			OPAL_FLASH_MANAGE);
+OPAL_CALL(opal_update_flash,			OPAL_FLASH_UPDATE);
+OPAL_CALL(opal_resync_timebase,			OPAL_RESYNC_TIMEBASE);
+OPAL_CALL(opal_check_token,			OPAL_CHECK_TOKEN);
+OPAL_CALL(opal_dump_init,			OPAL_DUMP_INIT);
+OPAL_CALL(opal_dump_info,			OPAL_DUMP_INFO);
+OPAL_CALL(opal_dump_info2,			OPAL_DUMP_INFO2);
+OPAL_CALL(opal_dump_read,			OPAL_DUMP_READ);
+OPAL_CALL(opal_dump_ack,			OPAL_DUMP_ACK);
+OPAL_CALL(opal_get_msg,				OPAL_GET_MSG);
+OPAL_CALL(opal_check_completion,		OPAL_CHECK_ASYNC_COMPLETION);
+OPAL_CALL(opal_dump_resend_notification,	OPAL_DUMP_RESEND);
+OPAL_CALL(opal_sync_host_reboot,		OPAL_SYNC_HOST_REBOOT);
+OPAL_CALL(opal_sensor_read,			OPAL_SENSOR_READ);
+OPAL_CALL(opal_get_param,			OPAL_GET_PARAM);
+OPAL_CALL(opal_set_param,			OPAL_SET_PARAM);
+OPAL_CALL(opal_handle_hmi,			OPAL_HANDLE_HMI);
+OPAL_CALL(opal_slw_set_reg,			OPAL_SLW_SET_REG);
+OPAL_CALL(opal_register_dump_region,		OPAL_REGISTER_DUMP_REGION);
+OPAL_CALL(opal_unregister_dump_region,		OPAL_UNREGISTER_DUMP_REGION);
+OPAL_CALL(opal_pci_set_phb_cxl_mode,		OPAL_PCI_SET_PHB_CAPI_MODE);
+OPAL_CALL(opal_tpo_write,			OPAL_WRITE_TPO);
+OPAL_CALL(opal_tpo_read,			OPAL_READ_TPO);
+OPAL_CALL(opal_ipmi_send,			OPAL_IPMI_SEND);
+OPAL_CALL(opal_ipmi_recv,			OPAL_IPMI_RECV);
+OPAL_CALL(opal_i2c_request,			OPAL_I2C_REQUEST);
+OPAL_CALL(opal_flash_read,			OPAL_FLASH_READ);
+OPAL_CALL(opal_flash_write,			OPAL_FLASH_WRITE);
+OPAL_CALL(opal_flash_erase,			OPAL_FLASH_ERASE);
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
new file mode 100644
index 000000000..7634d1c62
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -0,0 +1,133 @@
+/*
+ * PowerNV LPC bus handling.
+ *
+ * Copyright 2013 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/bug.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/opal.h>
+#include <asm/scom.h>
+
+/*
+ * We could probably fit that inside the scom_map_t
+ * which is a void* after all but it's really too ugly
+ * so let's kmalloc it for now
+ */
+struct opal_scom_map {
+	uint32_t chip;
+	uint64_t addr;
+};
+
+static scom_map_t opal_scom_map(struct device_node *dev, u64 reg, u64 count)
+{
+	struct opal_scom_map *m;
+	const __be32 *gcid;
+
+	if (!of_get_property(dev, "scom-controller", NULL)) {
+		pr_err("%s: device %s is not a SCOM controller\n",
+			__func__, dev->full_name);
+		return SCOM_MAP_INVALID;
+	}
+	gcid = of_get_property(dev, "ibm,chip-id", NULL);
+	if (!gcid) {
+		pr_err("%s: device %s has no ibm,chip-id\n",
+			__func__, dev->full_name);
+		return SCOM_MAP_INVALID;
+	}
+	m = kmalloc(sizeof(struct opal_scom_map), GFP_KERNEL);
+	if (!m)
+		return NULL;
+	m->chip = be32_to_cpup(gcid);
+	m->addr = reg;
+
+	return (scom_map_t)m;
+}
+
+static void opal_scom_unmap(scom_map_t map)
+{
+	kfree(map);
+}
+
+static int opal_xscom_err_xlate(int64_t rc)
+{
+	switch(rc) {
+	case 0:
+		return 0;
+	/* Add more translations if necessary */
+	default:
+		return -EIO;
+	}
+}
+
+static u64 opal_scom_unmangle(u64 addr)
+{
+	/*
+	 * XSCOM indirect addresses have the top bit set. Additionally
+	 * the rest of the top 3 nibbles is always 0.
+	 *
+	 * Because the debugfs interface uses signed offsets and shifts
+	 * the address left by 3, we basically cannot use the top 4 bits
+	 * of the 64-bit address, and thus cannot use the indirect bit.
+	 *
+	 * To deal with that, we support the indirect bit being in bit
+	 * 4 (IBM notation) instead of bit 0 in this API, we do the
+	 * conversion here. To leave room for further xscom address
+	 * expansion, we only clear out the top byte
+	 *
+	 * For in-kernel use, we also support the real indirect bit, so
+	 * we test for any of the top 5 bits
+	 *
+	 */
+	if (addr & (0x1full << 59))
+		addr = (addr & ~(0xffull << 56)) | (1ull << 63);
+	return addr;
+}
+
+static int opal_scom_read(scom_map_t map, u64 reg, u64 *value)
+{
+	struct opal_scom_map *m = map;
+	int64_t rc;
+	__be64 v;
+
+	reg = opal_scom_unmangle(m->addr + reg);
+	rc = opal_xscom_read(m->chip, reg, (__be64 *)__pa(&v));
+	*value = be64_to_cpu(v);
+	return opal_xscom_err_xlate(rc);
+}
+
+static int opal_scom_write(scom_map_t map, u64 reg, u64 value)
+{
+	struct opal_scom_map *m = map;
+	int64_t rc;
+
+	reg = opal_scom_unmangle(m->addr + reg);
+	rc = opal_xscom_write(m->chip, reg, value);
+	return opal_xscom_err_xlate(rc);
+}
+
+static const struct scom_controller opal_scom_controller = {
+	.map	= opal_scom_map,
+	.unmap	= opal_scom_unmap,
+	.read	= opal_scom_read,
+	.write	= opal_scom_write
+};
+
+static int opal_xscom_init(void)
+{
+	if (firmware_has_feature(FW_FEATURE_OPALv3))
+		scom_init(&opal_scom_controller);
+	return 0;
+}
+machine_arch_initcall(powernv, opal_xscom_init);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
new file mode 100644
index 000000000..2241565b0
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -0,0 +1,972 @@
+/*
+ * PowerNV OPAL high level interfaces
+ *
+ * Copyright 2011 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt)	"opal: " fmt
+
+#include <linux/printk.h>
+#include <linux/types.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/kobject.h>
+#include <linux/delay.h>
+#include <linux/memblock.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+
+#include <asm/machdep.h>
+#include <asm/opal.h>
+#include <asm/firmware.h>
+#include <asm/mce.h>
+
+#include "powernv.h"
+
+/* /sys/firmware/opal */
+struct kobject *opal_kobj;
+
+struct opal {
+	u64 base;
+	u64 entry;
+	u64 size;
+} opal;
+
+struct mcheck_recoverable_range {
+	u64 start_addr;
+	u64 end_addr;
+	u64 recover_addr;
+};
+
+static struct mcheck_recoverable_range *mc_recoverable_range;
+static int mc_recoverable_range_len;
+
+struct device_node *opal_node;
+static DEFINE_SPINLOCK(opal_write_lock);
+static unsigned int *opal_irqs;
+static unsigned int opal_irq_count;
+static ATOMIC_NOTIFIER_HEAD(opal_notifier_head);
+static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
+static DEFINE_SPINLOCK(opal_notifier_lock);
+static uint64_t last_notified_mask = 0x0ul;
+static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
+static uint32_t opal_heartbeat;
+
+static void opal_reinit_cores(void)
+{
+	/* Do the actual re-init, This will clobber all FPRs, VRs, etc...
+	 *
+	 * It will preserve non volatile GPRs and HSPRG0/1. It will
+	 * also restore HIDs and other SPRs to their original value
+	 * but it might clobber a bunch.
+	 */
+#ifdef __BIG_ENDIAN__
+	opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE);
+#else
+	opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_LE);
+#endif
+}
+
+int __init early_init_dt_scan_opal(unsigned long node,
+				   const char *uname, int depth, void *data)
+{
+	const void *basep, *entryp, *sizep;
+	int basesz, entrysz, runtimesz;
+
+	if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
+		return 0;
+
+	basep  = of_get_flat_dt_prop(node, "opal-base-address", &basesz);
+	entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz);
+	sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz);
+
+	if (!basep || !entryp || !sizep)
+		return 1;
+
+	opal.base = of_read_number(basep, basesz/4);
+	opal.entry = of_read_number(entryp, entrysz/4);
+	opal.size = of_read_number(sizep, runtimesz/4);
+
+	pr_debug("OPAL Base  = 0x%llx (basep=%p basesz=%d)\n",
+		 opal.base, basep, basesz);
+	pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n",
+		 opal.entry, entryp, entrysz);
+	pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
+		 opal.size, sizep, runtimesz);
+
+	powerpc_firmware_features |= FW_FEATURE_OPAL;
+	if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
+		powerpc_firmware_features |= FW_FEATURE_OPALv2;
+		powerpc_firmware_features |= FW_FEATURE_OPALv3;
+		pr_info("OPAL V3 detected !\n");
+	} else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) {
+		powerpc_firmware_features |= FW_FEATURE_OPALv2;
+		pr_info("OPAL V2 detected !\n");
+	} else {
+		pr_info("OPAL V1 detected !\n");
+	}
+
+	/* Reinit all cores with the right endian */
+	opal_reinit_cores();
+
+	/* Restore some bits */
+	if (cur_cpu_spec->cpu_restore)
+		cur_cpu_spec->cpu_restore();
+
+	return 1;
+}
+
+int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
+				   const char *uname, int depth, void *data)
+{
+	int i, psize, size;
+	const __be32 *prop;
+
+	if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
+		return 0;
+
+	prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize);
+
+	if (!prop)
+		return 1;
+
+	pr_debug("Found machine check recoverable ranges.\n");
+
+	/*
+	 * Calculate number of available entries.
+	 *
+	 * Each recoverable address range entry is (start address, len,
+	 * recovery address), 2 cells each for start and recovery address,
+	 * 1 cell for len, totalling 5 cells per entry.
+	 */
+	mc_recoverable_range_len = psize / (sizeof(*prop) * 5);
+
+	/* Sanity check */
+	if (!mc_recoverable_range_len)
+		return 1;
+
+	/* Size required to hold all the entries. */
+	size = mc_recoverable_range_len *
+			sizeof(struct mcheck_recoverable_range);
+
+	/*
+	 * Allocate a buffer to hold the MC recoverable ranges. We would be
+	 * accessing them in real mode, hence it needs to be within
+	 * RMO region.
+	 */
+	mc_recoverable_range =__va(memblock_alloc_base(size, __alignof__(u64),
+							ppc64_rma_size));
+	memset(mc_recoverable_range, 0, size);
+
+	for (i = 0; i < mc_recoverable_range_len; i++) {
+		mc_recoverable_range[i].start_addr =
+					of_read_number(prop + (i * 5) + 0, 2);
+		mc_recoverable_range[i].end_addr =
+					mc_recoverable_range[i].start_addr +
+					of_read_number(prop + (i * 5) + 2, 1);
+		mc_recoverable_range[i].recover_addr =
+					of_read_number(prop + (i * 5) + 3, 2);
+
+		pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
+				mc_recoverable_range[i].start_addr,
+				mc_recoverable_range[i].end_addr,
+				mc_recoverable_range[i].recover_addr);
+	}
+	return 1;
+}
+
+static int __init opal_register_exception_handlers(void)
+{
+#ifdef __BIG_ENDIAN__
+	u64 glue;
+
+	if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
+		return -ENODEV;
+
+	/* Hookup some exception handlers except machine check. We use the
+	 * fwnmi area at 0x7000 to provide the glue space to OPAL
+	 */
+	glue = 0x7000;
+
+	/*
+	 * Check if we are running on newer firmware that exports
+	 * OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to patch
+	 * the HMI interrupt and we catch it directly in Linux.
+	 *
+	 * For older firmware (i.e currently released POWER8 System Firmware
+	 * as of today <= SV810_087), we fallback to old behavior and let OPAL
+	 * patch the HMI vector and handle it inside OPAL firmware.
+	 *
+	 * For newer firmware (in development/yet to be released) we will
+	 * start catching/handling HMI directly in Linux.
+	 */
+	if (!opal_check_token(OPAL_HANDLE_HMI)) {
+		pr_info("Old firmware detected, OPAL handles HMIs.\n");
+		opal_register_exception_handler(
+				OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
+				0, glue);
+		glue += 128;
+	}
+
+	opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
+#endif
+
+	return 0;
+}
+machine_early_initcall(powernv, opal_register_exception_handlers);
+
+int opal_notifier_register(struct notifier_block *nb)
+{
+	if (!nb) {
+		pr_warning("%s: Invalid argument (%p)\n",
+			   __func__, nb);
+		return -EINVAL;
+	}
+
+	atomic_notifier_chain_register(&opal_notifier_head, nb);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(opal_notifier_register);
+
+int opal_notifier_unregister(struct notifier_block *nb)
+{
+	if (!nb) {
+		pr_warning("%s: Invalid argument (%p)\n",
+			   __func__, nb);
+		return -EINVAL;
+	}
+
+	atomic_notifier_chain_unregister(&opal_notifier_head, nb);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(opal_notifier_unregister);
+
+static void opal_do_notifier(uint64_t events)
+{
+	unsigned long flags;
+	uint64_t changed_mask;
+
+	if (atomic_read(&opal_notifier_hold))
+		return;
+
+	spin_lock_irqsave(&opal_notifier_lock, flags);
+	changed_mask = last_notified_mask ^ events;
+	last_notified_mask = events;
+	spin_unlock_irqrestore(&opal_notifier_lock, flags);
+
+	/*
+	 * We feed with the event bits and changed bits for
+	 * enough information to the callback.
+	 */
+	atomic_notifier_call_chain(&opal_notifier_head,
+				   events, (void *)changed_mask);
+}
+
+void opal_notifier_update_evt(uint64_t evt_mask,
+			      uint64_t evt_val)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&opal_notifier_lock, flags);
+	last_notified_mask &= ~evt_mask;
+	last_notified_mask |= evt_val;
+	spin_unlock_irqrestore(&opal_notifier_lock, flags);
+}
+
+void opal_notifier_enable(void)
+{
+	int64_t rc;
+	__be64 evt = 0;
+
+	atomic_set(&opal_notifier_hold, 0);
+
+	/* Process pending events */
+	rc = opal_poll_events(&evt);
+	if (rc == OPAL_SUCCESS && evt)
+		opal_do_notifier(be64_to_cpu(evt));
+}
+
+void opal_notifier_disable(void)
+{
+	atomic_set(&opal_notifier_hold, 1);
+}
+
+/*
+ * Opal message notifier based on message type. Allow subscribers to get
+ * notified for specific messgae type.
+ */
+int opal_message_notifier_register(enum opal_msg_type msg_type,
+					struct notifier_block *nb)
+{
+	if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) {
+		pr_warning("%s: Invalid arguments, msg_type:%d\n",
+			   __func__, msg_type);
+		return -EINVAL;
+	}
+
+	return atomic_notifier_chain_register(
+				&opal_msg_notifier_head[msg_type], nb);
+}
+
+int opal_message_notifier_unregister(enum opal_msg_type msg_type,
+				     struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(
+			&opal_msg_notifier_head[msg_type], nb);
+}
+
+static void opal_message_do_notify(uint32_t msg_type, void *msg)
+{
+	/* notify subscribers */
+	atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
+					msg_type, msg);
+}
+
+static void opal_handle_message(void)
+{
+	s64 ret;
+	/*
+	 * TODO: pre-allocate a message buffer depending on opal-msg-size
+	 * value in /proc/device-tree.
+	 */
+	static struct opal_msg msg;
+	u32 type;
+
+	ret = opal_get_msg(__pa(&msg), sizeof(msg));
+	/* No opal message pending. */
+	if (ret == OPAL_RESOURCE)
+		return;
+
+	/* check for errors. */
+	if (ret) {
+		pr_warning("%s: Failed to retrieve opal message, err=%lld\n",
+				__func__, ret);
+		return;
+	}
+
+	type = be32_to_cpu(msg.msg_type);
+
+	/* Sanity check */
+	if (type >= OPAL_MSG_TYPE_MAX) {
+		pr_warning("%s: Unknown message type: %u\n", __func__, type);
+		return;
+	}
+	opal_message_do_notify(type, (void *)&msg);
+}
+
+static int opal_message_notify(struct notifier_block *nb,
+			  unsigned long events, void *change)
+{
+	if (events & OPAL_EVENT_MSG_PENDING)
+		opal_handle_message();
+	return 0;
+}
+
+static struct notifier_block opal_message_nb = {
+	.notifier_call	= opal_message_notify,
+	.next		= NULL,
+	.priority	= 0,
+};
+
+static int __init opal_message_init(void)
+{
+	int ret, i;
+
+	for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
+		ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
+
+	ret = opal_notifier_register(&opal_message_nb);
+	if (ret) {
+		pr_err("%s: Can't register OPAL event notifier (%d)\n",
+		       __func__, ret);
+		return ret;
+	}
+	return 0;
+}
+machine_early_initcall(powernv, opal_message_init);
+
+int opal_get_chars(uint32_t vtermno, char *buf, int count)
+{
+	s64 rc;
+	__be64 evt, len;
+
+	if (!opal.entry)
+		return -ENODEV;
+	opal_poll_events(&evt);
+	if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
+		return 0;
+	len = cpu_to_be64(count);
+	rc = opal_console_read(vtermno, &len, buf);
+	if (rc == OPAL_SUCCESS)
+		return be64_to_cpu(len);
+	return 0;
+}
+
+int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
+{
+	int written = 0;
+	__be64 olen;
+	s64 len, rc;
+	unsigned long flags;
+	__be64 evt;
+
+	if (!opal.entry)
+		return -ENODEV;
+
+	/* We want put_chars to be atomic to avoid mangling of hvsi
+	 * packets. To do that, we first test for room and return
+	 * -EAGAIN if there isn't enough.
+	 *
+	 * Unfortunately, opal_console_write_buffer_space() doesn't
+	 * appear to work on opal v1, so we just assume there is
+	 * enough room and be done with it
+	 */
+	spin_lock_irqsave(&opal_write_lock, flags);
+	if (firmware_has_feature(FW_FEATURE_OPALv2)) {
+		rc = opal_console_write_buffer_space(vtermno, &olen);
+		len = be64_to_cpu(olen);
+		if (rc || len < total_len) {
+			spin_unlock_irqrestore(&opal_write_lock, flags);
+			/* Closed -> drop characters */
+			if (rc)
+				return total_len;
+			opal_poll_events(NULL);
+			return -EAGAIN;
+		}
+	}
+
+	/* We still try to handle partial completions, though they
+	 * should no longer happen.
+	 */
+	rc = OPAL_BUSY;
+	while(total_len > 0 && (rc == OPAL_BUSY ||
+				rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) {
+		olen = cpu_to_be64(total_len);
+		rc = opal_console_write(vtermno, &olen, data);
+		len = be64_to_cpu(olen);
+
+		/* Closed or other error drop */
+		if (rc != OPAL_SUCCESS && rc != OPAL_BUSY &&
+		    rc != OPAL_BUSY_EVENT) {
+			written = total_len;
+			break;
+		}
+		if (rc == OPAL_SUCCESS) {
+			total_len -= len;
+			data += len;
+			written += len;
+		}
+		/* This is a bit nasty but we need that for the console to
+		 * flush when there aren't any interrupts. We will clean
+		 * things a bit later to limit that to synchronous path
+		 * such as the kernel console and xmon/udbg
+		 */
+		do
+			opal_poll_events(&evt);
+		while(rc == OPAL_SUCCESS &&
+			(be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT));
+	}
+	spin_unlock_irqrestore(&opal_write_lock, flags);
+	return written;
+}
+
+static int opal_recover_mce(struct pt_regs *regs,
+					struct machine_check_event *evt)
+{
+	int recovered = 0;
+	uint64_t ea = get_mce_fault_addr(evt);
+
+	if (!(regs->msr & MSR_RI)) {
+		/* If MSR_RI isn't set, we cannot recover */
+		recovered = 0;
+	} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
+		/* Platform corrected itself */
+		recovered = 1;
+	} else if (ea && !is_kernel_addr(ea)) {
+		/*
+		 * Faulting address is not in kernel text. We should be fine.
+		 * We need to find which process uses this address.
+		 * For now, kill the task if we have received exception when
+		 * in userspace.
+		 *
+		 * TODO: Queue up this address for hwpoisioning later.
+		 */
+		if (user_mode(regs) && !is_global_init(current)) {
+			_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+			recovered = 1;
+		} else
+			recovered = 0;
+	} else if (user_mode(regs) && !is_global_init(current) &&
+		evt->severity == MCE_SEV_ERROR_SYNC) {
+		/*
+		 * If we have received a synchronous error when in userspace
+		 * kill the task.
+		 */
+		_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+		recovered = 1;
+	}
+	return recovered;
+}
+
+int opal_machine_check(struct pt_regs *regs)
+{
+	struct machine_check_event evt;
+
+	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
+		return 0;
+
+	/* Print things out */
+	if (evt.version != MCE_V1) {
+		pr_err("Machine Check Exception, Unknown event version %d !\n",
+		       evt.version);
+		return 0;
+	}
+	machine_check_print_event_info(&evt);
+
+	if (opal_recover_mce(regs, &evt))
+		return 1;
+	return 0;
+}
+
+/* Early hmi handler called in real mode. */
+int opal_hmi_exception_early(struct pt_regs *regs)
+{
+	s64 rc;
+
+	/*
+	 * call opal hmi handler. Pass paca address as token.
+	 * The return value OPAL_SUCCESS is an indication that there is
+	 * an HMI event generated waiting to pull by Linux.
+	 */
+	rc = opal_handle_hmi();
+	if (rc == OPAL_SUCCESS) {
+		local_paca->hmi_event_available = 1;
+		return 1;
+	}
+	return 0;
+}
+
+/* HMI exception handler called in virtual mode during check_irq_replay. */
+int opal_handle_hmi_exception(struct pt_regs *regs)
+{
+	s64 rc;
+	__be64 evt = 0;
+
+	/*
+	 * Check if HMI event is available.
+	 * if Yes, then call opal_poll_events to pull opal messages and
+	 * process them.
+	 */
+	if (!local_paca->hmi_event_available)
+		return 0;
+
+	local_paca->hmi_event_available = 0;
+	rc = opal_poll_events(&evt);
+	if (rc == OPAL_SUCCESS && evt)
+		opal_do_notifier(be64_to_cpu(evt));
+
+	return 1;
+}
+
+static uint64_t find_recovery_address(uint64_t nip)
+{
+	int i;
+
+	for (i = 0; i < mc_recoverable_range_len; i++)
+		if ((nip >= mc_recoverable_range[i].start_addr) &&
+		    (nip < mc_recoverable_range[i].end_addr))
+		    return mc_recoverable_range[i].recover_addr;
+	return 0;
+}
+
+bool opal_mce_check_early_recovery(struct pt_regs *regs)
+{
+	uint64_t recover_addr = 0;
+
+	if (!opal.base || !opal.size)
+		goto out;
+
+	if ((regs->nip >= opal.base) &&
+			(regs->nip <= (opal.base + opal.size)))
+		recover_addr = find_recovery_address(regs->nip);
+
+	/*
+	 * Setup regs->nip to rfi into fixup address.
+	 */
+	if (recover_addr)
+		regs->nip = recover_addr;
+
+out:
+	return !!recover_addr;
+}
+
+static irqreturn_t opal_interrupt(int irq, void *data)
+{
+	__be64 events;
+
+	opal_handle_interrupt(virq_to_hw(irq), &events);
+
+	opal_do_notifier(be64_to_cpu(events));
+
+	return IRQ_HANDLED;
+}
+
+static int opal_sysfs_init(void)
+{
+	opal_kobj = kobject_create_and_add("opal", firmware_kobj);
+	if (!opal_kobj) {
+		pr_warn("kobject_create_and_add opal failed\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj,
+			       struct bin_attribute *bin_attr,
+			       char *buf, loff_t off, size_t count)
+{
+	return memory_read_from_buffer(buf, count, &off, bin_attr->private,
+				       bin_attr->size);
+}
+
+static BIN_ATTR_RO(symbol_map, 0);
+
+static void opal_export_symmap(void)
+{
+	const __be64 *syms;
+	unsigned int size;
+	struct device_node *fw;
+	int rc;
+
+	fw = of_find_node_by_path("/ibm,opal/firmware");
+	if (!fw)
+		return;
+	syms = of_get_property(fw, "symbol-map", &size);
+	if (!syms || size != 2 * sizeof(__be64))
+		return;
+
+	/* Setup attributes */
+	bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0]));
+	bin_attr_symbol_map.size = be64_to_cpu(syms[1]);
+
+	rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map);
+	if (rc)
+		pr_warn("Error %d creating OPAL symbols file\n", rc);
+}
+
+static void __init opal_dump_region_init(void)
+{
+	void *addr;
+	uint64_t size;
+	int rc;
+
+	if (!opal_check_token(OPAL_REGISTER_DUMP_REGION))
+		return;
+
+	/* Register kernel log buffer */
+	addr = log_buf_addr_get();
+	if (addr == NULL)
+		return;
+
+	size = log_buf_len_get();
+	if (size == 0)
+		return;
+
+	rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
+				       __pa(addr), size);
+	/* Don't warn if this is just an older OPAL that doesn't
+	 * know about that call
+	 */
+	if (rc && rc != OPAL_UNSUPPORTED)
+		pr_warn("DUMP: Failed to register kernel log buffer. "
+			"rc = %d\n", rc);
+}
+
+static void opal_flash_init(struct device_node *opal_node)
+{
+	struct device_node *np;
+
+	for_each_child_of_node(opal_node, np)
+		if (of_device_is_compatible(np, "ibm,opal-flash"))
+			of_platform_device_create(np, NULL, NULL);
+}
+
+static void opal_ipmi_init(struct device_node *opal_node)
+{
+	struct device_node *np;
+
+	for_each_child_of_node(opal_node, np)
+		if (of_device_is_compatible(np, "ibm,opal-ipmi"))
+			of_platform_device_create(np, NULL, NULL);
+}
+
+static void opal_i2c_create_devs(void)
+{
+	struct device_node *np;
+
+	for_each_compatible_node(np, NULL, "ibm,opal-i2c")
+		of_platform_device_create(np, NULL, NULL);
+}
+
+static void __init opal_irq_init(struct device_node *dn)
+{
+	const __be32 *irqs;
+	int i, irqlen;
+
+	/* Get interrupt property */
+	irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
+	opal_irq_count = irqs ? (irqlen / 4) : 0;
+	pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count);
+	if (!opal_irq_count)
+		return;
+
+	/* Install interrupt handlers */
+	opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL);
+	for (i = 0; irqs && i < opal_irq_count; i++, irqs++) {
+		unsigned int irq, virq;
+		int rc;
+
+		/* Get hardware and virtual IRQ */
+		irq = be32_to_cpup(irqs);
+		virq = irq_create_mapping(NULL, irq);
+		if (virq == NO_IRQ) {
+			pr_warn("Failed to map irq 0x%x\n", irq);
+			continue;
+		}
+
+		/* Install interrupt handler */
+		rc = request_irq(virq, opal_interrupt, 0, "opal", NULL);
+		if (rc) {
+			irq_dispose_mapping(virq);
+			pr_warn("Error %d requesting irq %d (0x%x)\n",
+				 rc, virq, irq);
+			continue;
+		}
+
+		/* Cache IRQ */
+		opal_irqs[i] = virq;
+	}
+}
+
+static int kopald(void *unused)
+{
+	set_freezable();
+	do {
+		try_to_freeze();
+		opal_poll_events(NULL);
+		msleep_interruptible(opal_heartbeat);
+	} while (!kthread_should_stop());
+
+	return 0;
+}
+
+static void opal_init_heartbeat(void)
+{
+	/* Old firwmware, we assume the HVC heartbeat is sufficient */
+	if (of_property_read_u32(opal_node, "ibm,heartbeat-ms",
+				 &opal_heartbeat) != 0)
+		opal_heartbeat = 0;
+
+	if (opal_heartbeat)
+		kthread_run(kopald, NULL, "kopald");
+}
+
+static int __init opal_init(void)
+{
+	struct device_node *np, *consoles;
+	int rc;
+
+	opal_node = of_find_node_by_path("/ibm,opal");
+	if (!opal_node) {
+		pr_warn("Device node not found\n");
+		return -ENODEV;
+	}
+
+	/* Register OPAL consoles if any ports */
+	if (firmware_has_feature(FW_FEATURE_OPALv2))
+		consoles = of_find_node_by_path("/ibm,opal/consoles");
+	else
+		consoles = of_node_get(opal_node);
+	if (consoles) {
+		for_each_child_of_node(consoles, np) {
+			if (strcmp(np->name, "serial"))
+				continue;
+			of_platform_device_create(np, NULL, NULL);
+		}
+		of_node_put(consoles);
+	}
+
+	/* Create i2c platform devices */
+	opal_i2c_create_devs();
+
+	/* Setup a heatbeat thread if requested by OPAL */
+	opal_init_heartbeat();
+
+	/* Find all OPAL interrupts and request them */
+	opal_irq_init(opal_node);
+
+	/* Create "opal" kobject under /sys/firmware */
+	rc = opal_sysfs_init();
+	if (rc == 0) {
+		/* Export symbol map to userspace */
+		opal_export_symmap();
+		/* Setup dump region interface */
+		opal_dump_region_init();
+		/* Setup error log interface */
+		rc = opal_elog_init();
+		/* Setup code update interface */
+		opal_flash_update_init();
+		/* Setup platform dump extract interface */
+		opal_platform_dump_init();
+		/* Setup system parameters interface */
+		opal_sys_param_init();
+		/* Setup message log interface. */
+		opal_msglog_init();
+	}
+
+	/* Initialize OPAL IPMI backend */
+	opal_ipmi_init(opal_node);
+
+	opal_flash_init(opal_node);
+
+	return 0;
+}
+machine_subsys_initcall(powernv, opal_init);
+
+void opal_shutdown(void)
+{
+	unsigned int i;
+	long rc = OPAL_BUSY;
+
+	/* First free interrupts, which will also mask them */
+	for (i = 0; i < opal_irq_count; i++) {
+		if (opal_irqs[i])
+			free_irq(opal_irqs[i], NULL);
+		opal_irqs[i] = 0;
+	}
+
+	/*
+	 * Then sync with OPAL which ensure anything that can
+	 * potentially write to our memory has completed such
+	 * as an ongoing dump retrieval
+	 */
+	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+		rc = opal_sync_host_reboot();
+		if (rc == OPAL_BUSY)
+			opal_poll_events(NULL);
+		else
+			mdelay(10);
+	}
+
+	/* Unregister memory dump region */
+	if (opal_check_token(OPAL_UNREGISTER_DUMP_REGION))
+		opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
+}
+
+/* Export this so that test modules can use it */
+EXPORT_SYMBOL_GPL(opal_invalid_call);
+EXPORT_SYMBOL_GPL(opal_ipmi_send);
+EXPORT_SYMBOL_GPL(opal_ipmi_recv);
+EXPORT_SYMBOL_GPL(opal_flash_read);
+EXPORT_SYMBOL_GPL(opal_flash_write);
+EXPORT_SYMBOL_GPL(opal_flash_erase);
+
+/* Convert a region of vmalloc memory to an opal sg list */
+struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
+					     unsigned long vmalloc_size)
+{
+	struct opal_sg_list *sg, *first = NULL;
+	unsigned long i = 0;
+
+	sg = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!sg)
+		goto nomem;
+
+	first = sg;
+
+	while (vmalloc_size > 0) {
+		uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT;
+		uint64_t length = min(vmalloc_size, PAGE_SIZE);
+
+		sg->entry[i].data = cpu_to_be64(data);
+		sg->entry[i].length = cpu_to_be64(length);
+		i++;
+
+		if (i >= SG_ENTRIES_PER_NODE) {
+			struct opal_sg_list *next;
+
+			next = kzalloc(PAGE_SIZE, GFP_KERNEL);
+			if (!next)
+				goto nomem;
+
+			sg->length = cpu_to_be64(
+					i * sizeof(struct opal_sg_entry) + 16);
+			i = 0;
+			sg->next = cpu_to_be64(__pa(next));
+			sg = next;
+		}
+
+		vmalloc_addr += length;
+		vmalloc_size -= length;
+	}
+
+	sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16);
+
+	return first;
+
+nomem:
+	pr_err("%s : Failed to allocate memory\n", __func__);
+	opal_free_sg_list(first);
+	return NULL;
+}
+
+void opal_free_sg_list(struct opal_sg_list *sg)
+{
+	while (sg) {
+		uint64_t next = be64_to_cpu(sg->next);
+
+		kfree(sg);
+
+		if (next)
+			sg = __va(next);
+		else
+			sg = NULL;
+	}
+}
+
+int opal_error_code(int rc)
+{
+	switch (rc) {
+	case OPAL_SUCCESS:		return 0;
+
+	case OPAL_PARAMETER:		return -EINVAL;
+	case OPAL_ASYNC_COMPLETION:	return -EINPROGRESS;
+	case OPAL_BUSY_EVENT:		return -EBUSY;
+	case OPAL_NO_MEM:		return -ENOMEM;
+
+	case OPAL_UNSUPPORTED:		return -EIO;
+	case OPAL_HARDWARE:		return -EIO;
+	case OPAL_INTERNAL_ERROR:	return -EIO;
+	default:
+		pr_err("%s: unexpected OPAL error %d\n", __func__, rc);
+		return -EIO;
+	}
+}
+
+EXPORT_SYMBOL_GPL(opal_poll_events);
+EXPORT_SYMBOL_GPL(opal_rtc_read);
+EXPORT_SYMBOL_GPL(opal_rtc_write);
+EXPORT_SYMBOL_GPL(opal_tpo_read);
+EXPORT_SYMBOL_GPL(opal_tpo_write);
+EXPORT_SYMBOL_GPL(opal_i2c_request);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
new file mode 100644
index 000000000..f8bc950ef
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -0,0 +1,2871 @@
+/*
+ * Support PCI/PCIe on PowerNV platforms
+ *
+ * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/crash_dump.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/msi.h>
+#include <linux/memblock.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/msi_bitmap.h>
+#include <asm/ppc-pci.h>
+#include <asm/opal.h>
+#include <asm/iommu.h>
+#include <asm/tce.h>
+#include <asm/xics.h>
+#include <asm/debug.h>
+#include <asm/firmware.h>
+#include <asm/pnv-pci.h>
+
+#include <misc/cxl.h>
+
+#include "powernv.h"
+#include "pci.h"
+
+/* 256M DMA window, 4K TCE pages, 8 bytes TCE */
+#define TCE32_TABLE_SIZE	((0x10000000 / 0x1000) * 8)
+
+static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
+			    const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+	char pfix[32];
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	if (pe->flags & PNV_IODA_PE_DEV)
+		strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix));
+	else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
+		sprintf(pfix, "%04x:%02x     ",
+			pci_domain_nr(pe->pbus), pe->pbus->number);
+#ifdef CONFIG_PCI_IOV
+	else if (pe->flags & PNV_IODA_PE_VF)
+		sprintf(pfix, "%04x:%02x:%2x.%d",
+			pci_domain_nr(pe->parent_dev->bus),
+			(pe->rid & 0xff00) >> 8,
+			PCI_SLOT(pe->rid), PCI_FUNC(pe->rid));
+#endif /* CONFIG_PCI_IOV*/
+
+	printk("%spci %s: [PE# %.3d] %pV",
+	       level, pfix, pe->pe_number, &vaf);
+
+	va_end(args);
+}
+
+#define pe_err(pe, fmt, ...)					\
+	pe_level_printk(pe, KERN_ERR, fmt, ##__VA_ARGS__)
+#define pe_warn(pe, fmt, ...)					\
+	pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__)
+#define pe_info(pe, fmt, ...)					\
+	pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__)
+
+static bool pnv_iommu_bypass_disabled __read_mostly;
+
+static int __init iommu_setup(char *str)
+{
+	if (!str)
+		return -EINVAL;
+
+	while (*str) {
+		if (!strncmp(str, "nobypass", 8)) {
+			pnv_iommu_bypass_disabled = true;
+			pr_info("PowerNV: IOMMU bypass window disabled.\n");
+			break;
+		}
+		str += strcspn(str, ",");
+		if (*str == ',')
+			str++;
+	}
+
+	return 0;
+}
+early_param("iommu", iommu_setup);
+
+/*
+ * stdcix is only supposed to be used in hypervisor real mode as per
+ * the architecture spec
+ */
+static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr)
+{
+	__asm__ __volatile__("stdcix %0,0,%1"
+		: : "r" (val), "r" (paddr) : "memory");
+}
+
+static inline bool pnv_pci_is_mem_pref_64(unsigned long flags)
+{
+	return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) ==
+		(IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
+}
+
+static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
+{
+	if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe)) {
+		pr_warn("%s: Invalid PE %d on PHB#%x\n",
+			__func__, pe_no, phb->hose->global_number);
+		return;
+	}
+
+	if (test_and_set_bit(pe_no, phb->ioda.pe_alloc)) {
+		pr_warn("%s: PE %d was assigned on PHB#%x\n",
+			__func__, pe_no, phb->hose->global_number);
+		return;
+	}
+
+	phb->ioda.pe_array[pe_no].phb = phb;
+	phb->ioda.pe_array[pe_no].pe_number = pe_no;
+}
+
+static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
+{
+	unsigned long pe;
+
+	do {
+		pe = find_next_zero_bit(phb->ioda.pe_alloc,
+					phb->ioda.total_pe, 0);
+		if (pe >= phb->ioda.total_pe)
+			return IODA_INVALID_PE;
+	} while(test_and_set_bit(pe, phb->ioda.pe_alloc));
+
+	phb->ioda.pe_array[pe].phb = phb;
+	phb->ioda.pe_array[pe].pe_number = pe;
+	return pe;
+}
+
+static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
+{
+	WARN_ON(phb->ioda.pe_array[pe].pdev);
+
+	memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe));
+	clear_bit(pe, phb->ioda.pe_alloc);
+}
+
+/* The default M64 BAR is shared by all PEs */
+static int pnv_ioda2_init_m64(struct pnv_phb *phb)
+{
+	const char *desc;
+	struct resource *r;
+	s64 rc;
+
+	/* Configure the default M64 BAR */
+	rc = opal_pci_set_phb_mem_window(phb->opal_id,
+					 OPAL_M64_WINDOW_TYPE,
+					 phb->ioda.m64_bar_idx,
+					 phb->ioda.m64_base,
+					 0, /* unused */
+					 phb->ioda.m64_size);
+	if (rc != OPAL_SUCCESS) {
+		desc = "configuring";
+		goto fail;
+	}
+
+	/* Enable the default M64 BAR */
+	rc = opal_pci_phb_mmio_enable(phb->opal_id,
+				      OPAL_M64_WINDOW_TYPE,
+				      phb->ioda.m64_bar_idx,
+				      OPAL_ENABLE_M64_SPLIT);
+	if (rc != OPAL_SUCCESS) {
+		desc = "enabling";
+		goto fail;
+	}
+
+	/* Mark the M64 BAR assigned */
+	set_bit(phb->ioda.m64_bar_idx, &phb->ioda.m64_bar_alloc);
+
+	/*
+	 * Strip off the segment used by the reserved PE, which is
+	 * expected to be 0 or last one of PE capabicity.
+	 */
+	r = &phb->hose->mem_resources[1];
+	if (phb->ioda.reserved_pe == 0)
+		r->start += phb->ioda.m64_segsize;
+	else if (phb->ioda.reserved_pe == (phb->ioda.total_pe - 1))
+		r->end -= phb->ioda.m64_segsize;
+	else
+		pr_warn("  Cannot strip M64 segment for reserved PE#%d\n",
+			phb->ioda.reserved_pe);
+
+	return 0;
+
+fail:
+	pr_warn("  Failure %lld %s M64 BAR#%d\n",
+		rc, desc, phb->ioda.m64_bar_idx);
+	opal_pci_phb_mmio_enable(phb->opal_id,
+				 OPAL_M64_WINDOW_TYPE,
+				 phb->ioda.m64_bar_idx,
+				 OPAL_DISABLE_M64);
+	return -EIO;
+}
+
+static void pnv_ioda2_reserve_m64_pe(struct pnv_phb *phb)
+{
+	resource_size_t sgsz = phb->ioda.m64_segsize;
+	struct pci_dev *pdev;
+	struct resource *r;
+	int base, step, i;
+
+	/*
+	 * Root bus always has full M64 range and root port has
+	 * M64 range used in reality. So we're checking root port
+	 * instead of root bus.
+	 */
+	list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) {
+		for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
+			r = &pdev->resource[PCI_BRIDGE_RESOURCES + i];
+			if (!r->parent ||
+			    !pnv_pci_is_mem_pref_64(r->flags))
+				continue;
+
+			base = (r->start - phb->ioda.m64_base) / sgsz;
+			for (step = 0; step < resource_size(r) / sgsz; step++)
+				pnv_ioda_reserve_pe(phb, base + step);
+		}
+	}
+}
+
+static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb,
+				 struct pci_bus *bus, int all)
+{
+	resource_size_t segsz = phb->ioda.m64_segsize;
+	struct pci_dev *pdev;
+	struct resource *r;
+	struct pnv_ioda_pe *master_pe, *pe;
+	unsigned long size, *pe_alloc;
+	bool found;
+	int start, i, j;
+
+	/* Root bus shouldn't use M64 */
+	if (pci_is_root_bus(bus))
+		return IODA_INVALID_PE;
+
+	/* We support only one M64 window on each bus */
+	found = false;
+	pci_bus_for_each_resource(bus, r, i) {
+		if (r && r->parent &&
+		    pnv_pci_is_mem_pref_64(r->flags)) {
+			found = true;
+			break;
+		}
+	}
+
+	/* No M64 window found ? */
+	if (!found)
+		return IODA_INVALID_PE;
+
+	/* Allocate bitmap */
+	size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
+	pe_alloc = kzalloc(size, GFP_KERNEL);
+	if (!pe_alloc) {
+		pr_warn("%s: Out of memory !\n",
+			__func__);
+		return IODA_INVALID_PE;
+	}
+
+	/*
+	 * Figure out reserved PE numbers by the PE
+	 * the its child PEs.
+	 */
+	start = (r->start - phb->ioda.m64_base) / segsz;
+	for (i = 0; i < resource_size(r) / segsz; i++)
+		set_bit(start + i, pe_alloc);
+
+	if (all)
+		goto done;
+
+	/*
+	 * If the PE doesn't cover all subordinate buses,
+	 * we need subtract from reserved PEs for children.
+	 */
+	list_for_each_entry(pdev, &bus->devices, bus_list) {
+		if (!pdev->subordinate)
+			continue;
+
+		pci_bus_for_each_resource(pdev->subordinate, r, i) {
+			if (!r || !r->parent ||
+			    !pnv_pci_is_mem_pref_64(r->flags))
+				continue;
+
+			start = (r->start - phb->ioda.m64_base) / segsz;
+			for (j = 0; j < resource_size(r) / segsz ; j++)
+				clear_bit(start + j, pe_alloc);
+                }
+        }
+
+	/*
+	 * the current bus might not own M64 window and that's all
+	 * contributed by its child buses. For the case, we needn't
+	 * pick M64 dependent PE#.
+	 */
+	if (bitmap_empty(pe_alloc, phb->ioda.total_pe)) {
+		kfree(pe_alloc);
+		return IODA_INVALID_PE;
+	}
+
+	/*
+	 * Figure out the master PE and put all slave PEs to master
+	 * PE's list to form compound PE.
+	 */
+done:
+	master_pe = NULL;
+	i = -1;
+	while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) <
+		phb->ioda.total_pe) {
+		pe = &phb->ioda.pe_array[i];
+
+		if (!master_pe) {
+			pe->flags |= PNV_IODA_PE_MASTER;
+			INIT_LIST_HEAD(&pe->slaves);
+			master_pe = pe;
+		} else {
+			pe->flags |= PNV_IODA_PE_SLAVE;
+			pe->master = master_pe;
+			list_add_tail(&pe->list, &master_pe->slaves);
+		}
+	}
+
+	kfree(pe_alloc);
+	return master_pe->pe_number;
+}
+
+static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
+{
+	struct pci_controller *hose = phb->hose;
+	struct device_node *dn = hose->dn;
+	struct resource *res;
+	const u32 *r;
+	u64 pci_addr;
+
+	/* FIXME: Support M64 for P7IOC */
+	if (phb->type != PNV_PHB_IODA2) {
+		pr_info("  Not support M64 window\n");
+		return;
+	}
+
+	if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
+		pr_info("  Firmware too old to support M64 window\n");
+		return;
+	}
+
+	r = of_get_property(dn, "ibm,opal-m64-window", NULL);
+	if (!r) {
+		pr_info("  No <ibm,opal-m64-window> on %s\n",
+			dn->full_name);
+		return;
+	}
+
+	res = &hose->mem_resources[1];
+	res->start = of_translate_address(dn, r + 2);
+	res->end = res->start + of_read_number(r + 4, 2) - 1;
+	res->flags = (IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
+	pci_addr = of_read_number(r, 2);
+	hose->mem_offset[1] = res->start - pci_addr;
+
+	phb->ioda.m64_size = resource_size(res);
+	phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe;
+	phb->ioda.m64_base = pci_addr;
+
+	pr_info(" MEM64 0x%016llx..0x%016llx -> 0x%016llx\n",
+			res->start, res->end, pci_addr);
+
+	/* Use last M64 BAR to cover M64 window */
+	phb->ioda.m64_bar_idx = 15;
+	phb->init_m64 = pnv_ioda2_init_m64;
+	phb->reserve_m64_pe = pnv_ioda2_reserve_m64_pe;
+	phb->pick_m64_pe = pnv_ioda2_pick_m64_pe;
+}
+
+static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no)
+{
+	struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_no];
+	struct pnv_ioda_pe *slave;
+	s64 rc;
+
+	/* Fetch master PE */
+	if (pe->flags & PNV_IODA_PE_SLAVE) {
+		pe = pe->master;
+		if (WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)))
+			return;
+
+		pe_no = pe->pe_number;
+	}
+
+	/* Freeze master PE */
+	rc = opal_pci_eeh_freeze_set(phb->opal_id,
+				     pe_no,
+				     OPAL_EEH_ACTION_SET_FREEZE_ALL);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
+			__func__, rc, phb->hose->global_number, pe_no);
+		return;
+	}
+
+	/* Freeze slave PEs */
+	if (!(pe->flags & PNV_IODA_PE_MASTER))
+		return;
+
+	list_for_each_entry(slave, &pe->slaves, list) {
+		rc = opal_pci_eeh_freeze_set(phb->opal_id,
+					     slave->pe_number,
+					     OPAL_EEH_ACTION_SET_FREEZE_ALL);
+		if (rc != OPAL_SUCCESS)
+			pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
+				__func__, rc, phb->hose->global_number,
+				slave->pe_number);
+	}
+}
+
+static int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt)
+{
+	struct pnv_ioda_pe *pe, *slave;
+	s64 rc;
+
+	/* Find master PE */
+	pe = &phb->ioda.pe_array[pe_no];
+	if (pe->flags & PNV_IODA_PE_SLAVE) {
+		pe = pe->master;
+		WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER));
+		pe_no = pe->pe_number;
+	}
+
+	/* Clear frozen state for master PE */
+	rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, opt);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n",
+			__func__, rc, opt, phb->hose->global_number, pe_no);
+		return -EIO;
+	}
+
+	if (!(pe->flags & PNV_IODA_PE_MASTER))
+		return 0;
+
+	/* Clear frozen state for slave PEs */
+	list_for_each_entry(slave, &pe->slaves, list) {
+		rc = opal_pci_eeh_freeze_clear(phb->opal_id,
+					     slave->pe_number,
+					     opt);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n",
+				__func__, rc, opt, phb->hose->global_number,
+				slave->pe_number);
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no)
+{
+	struct pnv_ioda_pe *slave, *pe;
+	u8 fstate, state;
+	__be16 pcierr;
+	s64 rc;
+
+	/* Sanity check on PE number */
+	if (pe_no < 0 || pe_no >= phb->ioda.total_pe)
+		return OPAL_EEH_STOPPED_PERM_UNAVAIL;
+
+	/*
+	 * Fetch the master PE and the PE instance might be
+	 * not initialized yet.
+	 */
+	pe = &phb->ioda.pe_array[pe_no];
+	if (pe->flags & PNV_IODA_PE_SLAVE) {
+		pe = pe->master;
+		WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER));
+		pe_no = pe->pe_number;
+	}
+
+	/* Check the master PE */
+	rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
+					&state, &pcierr, NULL);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld getting "
+			"PHB#%x-PE#%x state\n",
+			__func__, rc,
+			phb->hose->global_number, pe_no);
+		return OPAL_EEH_STOPPED_TEMP_UNAVAIL;
+	}
+
+	/* Check the slave PE */
+	if (!(pe->flags & PNV_IODA_PE_MASTER))
+		return state;
+
+	list_for_each_entry(slave, &pe->slaves, list) {
+		rc = opal_pci_eeh_freeze_status(phb->opal_id,
+						slave->pe_number,
+						&fstate,
+						&pcierr,
+						NULL);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Failure %lld getting "
+				"PHB#%x-PE#%x state\n",
+				__func__, rc,
+				phb->hose->global_number, slave->pe_number);
+			return OPAL_EEH_STOPPED_TEMP_UNAVAIL;
+		}
+
+		/*
+		 * Override the result based on the ascending
+		 * priority.
+		 */
+		if (fstate > state)
+			state = fstate;
+	}
+
+	return state;
+}
+
+/* Currently those 2 are only used when MSIs are enabled, this will change
+ * but in the meantime, we need to protect them to avoid warnings
+ */
+#ifdef CONFIG_PCI_MSI
+static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	struct pci_dn *pdn = pci_get_pdn(dev);
+
+	if (!pdn)
+		return NULL;
+	if (pdn->pe_number == IODA_INVALID_PE)
+		return NULL;
+	return &phb->ioda.pe_array[pdn->pe_number];
+}
+#endif /* CONFIG_PCI_MSI */
+
+static int pnv_ioda_set_one_peltv(struct pnv_phb *phb,
+				  struct pnv_ioda_pe *parent,
+				  struct pnv_ioda_pe *child,
+				  bool is_add)
+{
+	const char *desc = is_add ? "adding" : "removing";
+	uint8_t op = is_add ? OPAL_ADD_PE_TO_DOMAIN :
+			      OPAL_REMOVE_PE_FROM_DOMAIN;
+	struct pnv_ioda_pe *slave;
+	long rc;
+
+	/* Parent PE affects child PE */
+	rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number,
+				child->pe_number, op);
+	if (rc != OPAL_SUCCESS) {
+		pe_warn(child, "OPAL error %ld %s to parent PELTV\n",
+			rc, desc);
+		return -ENXIO;
+	}
+
+	if (!(child->flags & PNV_IODA_PE_MASTER))
+		return 0;
+
+	/* Compound case: parent PE affects slave PEs */
+	list_for_each_entry(slave, &child->slaves, list) {
+		rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number,
+					slave->pe_number, op);
+		if (rc != OPAL_SUCCESS) {
+			pe_warn(slave, "OPAL error %ld %s to parent PELTV\n",
+				rc, desc);
+			return -ENXIO;
+		}
+	}
+
+	return 0;
+}
+
+static int pnv_ioda_set_peltv(struct pnv_phb *phb,
+			      struct pnv_ioda_pe *pe,
+			      bool is_add)
+{
+	struct pnv_ioda_pe *slave;
+	struct pci_dev *pdev = NULL;
+	int ret;
+
+	/*
+	 * Clear PE frozen state. If it's master PE, we need
+	 * clear slave PE frozen state as well.
+	 */
+	if (is_add) {
+		opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
+					  OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+		if (pe->flags & PNV_IODA_PE_MASTER) {
+			list_for_each_entry(slave, &pe->slaves, list)
+				opal_pci_eeh_freeze_clear(phb->opal_id,
+							  slave->pe_number,
+							  OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+		}
+	}
+
+	/*
+	 * Associate PE in PELT. We need add the PE into the
+	 * corresponding PELT-V as well. Otherwise, the error
+	 * originated from the PE might contribute to other
+	 * PEs.
+	 */
+	ret = pnv_ioda_set_one_peltv(phb, pe, pe, is_add);
+	if (ret)
+		return ret;
+
+	/* For compound PEs, any one affects all of them */
+	if (pe->flags & PNV_IODA_PE_MASTER) {
+		list_for_each_entry(slave, &pe->slaves, list) {
+			ret = pnv_ioda_set_one_peltv(phb, slave, pe, is_add);
+			if (ret)
+				return ret;
+		}
+	}
+
+	if (pe->flags & (PNV_IODA_PE_BUS_ALL | PNV_IODA_PE_BUS))
+		pdev = pe->pbus->self;
+	else if (pe->flags & PNV_IODA_PE_DEV)
+		pdev = pe->pdev->bus->self;
+#ifdef CONFIG_PCI_IOV
+	else if (pe->flags & PNV_IODA_PE_VF)
+		pdev = pe->parent_dev->bus->self;
+#endif /* CONFIG_PCI_IOV */
+	while (pdev) {
+		struct pci_dn *pdn = pci_get_pdn(pdev);
+		struct pnv_ioda_pe *parent;
+
+		if (pdn && pdn->pe_number != IODA_INVALID_PE) {
+			parent = &phb->ioda.pe_array[pdn->pe_number];
+			ret = pnv_ioda_set_one_peltv(phb, parent, pe, is_add);
+			if (ret)
+				return ret;
+		}
+
+		pdev = pdev->bus->self;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_PCI_IOV
+static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
+{
+	struct pci_dev *parent;
+	uint8_t bcomp, dcomp, fcomp;
+	int64_t rc;
+	long rid_end, rid;
+
+	/* Currently, we just deconfigure VF PE. Bus PE will always there.*/
+	if (pe->pbus) {
+		int count;
+
+		dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
+		fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
+		parent = pe->pbus->self;
+		if (pe->flags & PNV_IODA_PE_BUS_ALL)
+			count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
+		else
+			count = 1;
+
+		switch(count) {
+		case  1: bcomp = OpalPciBusAll;         break;
+		case  2: bcomp = OpalPciBus7Bits;       break;
+		case  4: bcomp = OpalPciBus6Bits;       break;
+		case  8: bcomp = OpalPciBus5Bits;       break;
+		case 16: bcomp = OpalPciBus4Bits;       break;
+		case 32: bcomp = OpalPciBus3Bits;       break;
+		default:
+			dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n",
+			        count);
+			/* Do an exact match only */
+			bcomp = OpalPciBusAll;
+		}
+		rid_end = pe->rid + (count << 8);
+	} else {
+		if (pe->flags & PNV_IODA_PE_VF)
+			parent = pe->parent_dev;
+		else
+			parent = pe->pdev->bus->self;
+		bcomp = OpalPciBusAll;
+		dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
+		fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
+		rid_end = pe->rid + 1;
+	}
+
+	/* Clear the reverse map */
+	for (rid = pe->rid; rid < rid_end; rid++)
+		phb->ioda.pe_rmap[rid] = 0;
+
+	/* Release from all parents PELT-V */
+	while (parent) {
+		struct pci_dn *pdn = pci_get_pdn(parent);
+		if (pdn && pdn->pe_number != IODA_INVALID_PE) {
+			rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
+						pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
+			/* XXX What to do in case of error ? */
+		}
+		parent = parent->bus->self;
+	}
+
+	opal_pci_eeh_freeze_set(phb->opal_id, pe->pe_number,
+				  OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+
+	/* Disassociate PE in PELT */
+	rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
+				pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
+	if (rc)
+		pe_warn(pe, "OPAL error %ld remove self from PELTV\n", rc);
+	rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
+			     bcomp, dcomp, fcomp, OPAL_UNMAP_PE);
+	if (rc)
+		pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
+
+	pe->pbus = NULL;
+	pe->pdev = NULL;
+	pe->parent_dev = NULL;
+
+	return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
+static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
+{
+	struct pci_dev *parent;
+	uint8_t bcomp, dcomp, fcomp;
+	long rc, rid_end, rid;
+
+	/* Bus validation ? */
+	if (pe->pbus) {
+		int count;
+
+		dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
+		fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
+		parent = pe->pbus->self;
+		if (pe->flags & PNV_IODA_PE_BUS_ALL)
+			count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
+		else
+			count = 1;
+
+		switch(count) {
+		case  1: bcomp = OpalPciBusAll;		break;
+		case  2: bcomp = OpalPciBus7Bits;	break;
+		case  4: bcomp = OpalPciBus6Bits;	break;
+		case  8: bcomp = OpalPciBus5Bits;	break;
+		case 16: bcomp = OpalPciBus4Bits;	break;
+		case 32: bcomp = OpalPciBus3Bits;	break;
+		default:
+			dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n",
+			        count);
+			/* Do an exact match only */
+			bcomp = OpalPciBusAll;
+		}
+		rid_end = pe->rid + (count << 8);
+	} else {
+#ifdef CONFIG_PCI_IOV
+		if (pe->flags & PNV_IODA_PE_VF)
+			parent = pe->parent_dev;
+		else
+#endif /* CONFIG_PCI_IOV */
+			parent = pe->pdev->bus->self;
+		bcomp = OpalPciBusAll;
+		dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
+		fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
+		rid_end = pe->rid + 1;
+	}
+
+	/*
+	 * Associate PE in PELT. We need add the PE into the
+	 * corresponding PELT-V as well. Otherwise, the error
+	 * originated from the PE might contribute to other
+	 * PEs.
+	 */
+	rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
+			     bcomp, dcomp, fcomp, OPAL_MAP_PE);
+	if (rc) {
+		pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
+		return -ENXIO;
+	}
+
+	/* Configure PELTV */
+	pnv_ioda_set_peltv(phb, pe, true);
+
+	/* Setup reverse map */
+	for (rid = pe->rid; rid < rid_end; rid++)
+		phb->ioda.pe_rmap[rid] = pe->pe_number;
+
+	/* Setup one MVTs on IODA1 */
+	if (phb->type != PNV_PHB_IODA1) {
+		pe->mve_number = 0;
+		goto out;
+	}
+
+	pe->mve_number = pe->pe_number;
+	rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, pe->pe_number);
+	if (rc != OPAL_SUCCESS) {
+		pe_err(pe, "OPAL error %ld setting up MVE %d\n",
+		       rc, pe->mve_number);
+		pe->mve_number = -1;
+	} else {
+		rc = opal_pci_set_mve_enable(phb->opal_id,
+					     pe->mve_number, OPAL_ENABLE_MVE);
+		if (rc) {
+			pe_err(pe, "OPAL error %ld enabling MVE %d\n",
+			       rc, pe->mve_number);
+			pe->mve_number = -1;
+		}
+	}
+
+out:
+	return 0;
+}
+
+static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
+				       struct pnv_ioda_pe *pe)
+{
+	struct pnv_ioda_pe *lpe;
+
+	list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) {
+		if (lpe->dma_weight < pe->dma_weight) {
+			list_add_tail(&pe->dma_link, &lpe->dma_link);
+			return;
+		}
+	}
+	list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list);
+}
+
+static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
+{
+	/* This is quite simplistic. The "base" weight of a device
+	 * is 10. 0 means no DMA is to be accounted for it.
+	 */
+
+	/* If it's a bridge, no DMA */
+	if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
+		return 0;
+
+	/* Reduce the weight of slow USB controllers */
+	if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
+	    dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
+	    dev->class == PCI_CLASS_SERIAL_USB_EHCI)
+		return 3;
+
+	/* Increase the weight of RAID (includes Obsidian) */
+	if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
+		return 15;
+
+	/* Default */
+	return 10;
+}
+
+#ifdef CONFIG_PCI_IOV
+static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
+{
+	struct pci_dn *pdn = pci_get_pdn(dev);
+	int i;
+	struct resource *res, res2;
+	resource_size_t size;
+	u16 num_vfs;
+
+	if (!dev->is_physfn)
+		return -EINVAL;
+
+	/*
+	 * "offset" is in VFs.  The M64 windows are sized so that when they
+	 * are segmented, each segment is the same size as the IOV BAR.
+	 * Each segment is in a separate PE, and the high order bits of the
+	 * address are the PE number.  Therefore, each VF's BAR is in a
+	 * separate PE, and changing the IOV BAR start address changes the
+	 * range of PEs the VFs are in.
+	 */
+	num_vfs = pdn->num_vfs;
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &dev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || !res->parent)
+			continue;
+
+		if (!pnv_pci_is_mem_pref_64(res->flags))
+			continue;
+
+		/*
+		 * The actual IOV BAR range is determined by the start address
+		 * and the actual size for num_vfs VFs BAR.  This check is to
+		 * make sure that after shifting, the range will not overlap
+		 * with another device.
+		 */
+		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
+		res2.flags = res->flags;
+		res2.start = res->start + (size * offset);
+		res2.end = res2.start + (size * num_vfs) - 1;
+
+		if (res2.end > res->end) {
+			dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n",
+				i, &res2, res, num_vfs, offset);
+			return -EBUSY;
+		}
+	}
+
+	/*
+	 * After doing so, there would be a "hole" in the /proc/iomem when
+	 * offset is a positive value. It looks like the device return some
+	 * mmio back to the system, which actually no one could use it.
+	 */
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &dev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || !res->parent)
+			continue;
+
+		if (!pnv_pci_is_mem_pref_64(res->flags))
+			continue;
+
+		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
+		res2 = *res;
+		res->start += size * offset;
+
+		dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (enabling %d VFs shifted by %d)\n",
+			 i, &res2, res, num_vfs, offset);
+		pci_update_resource(dev, i + PCI_IOV_RESOURCES);
+	}
+	return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
+#if 0
+static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	struct pci_dn *pdn = pci_get_pdn(dev);
+	struct pnv_ioda_pe *pe;
+	int pe_num;
+
+	if (!pdn) {
+		pr_err("%s: Device tree node not associated properly\n",
+			   pci_name(dev));
+		return NULL;
+	}
+	if (pdn->pe_number != IODA_INVALID_PE)
+		return NULL;
+
+	/* PE#0 has been pre-set */
+	if (dev->bus->number == 0)
+		pe_num = 0;
+	else
+		pe_num = pnv_ioda_alloc_pe(phb);
+	if (pe_num == IODA_INVALID_PE) {
+		pr_warning("%s: Not enough PE# available, disabling device\n",
+			   pci_name(dev));
+		return NULL;
+	}
+
+	/* NOTE: We get only one ref to the pci_dev for the pdn, not for the
+	 * pointer in the PE data structure, both should be destroyed at the
+	 * same time. However, this needs to be looked at more closely again
+	 * once we actually start removing things (Hotplug, SR-IOV, ...)
+	 *
+	 * At some point we want to remove the PDN completely anyways
+	 */
+	pe = &phb->ioda.pe_array[pe_num];
+	pci_dev_get(dev);
+	pdn->pcidev = dev;
+	pdn->pe_number = pe_num;
+	pe->pdev = dev;
+	pe->pbus = NULL;
+	pe->tce32_seg = -1;
+	pe->mve_number = -1;
+	pe->rid = dev->bus->number << 8 | pdn->devfn;
+
+	pe_info(pe, "Associated device to PE\n");
+
+	if (pnv_ioda_configure_pe(phb, pe)) {
+		/* XXX What do we do here ? */
+		if (pe_num)
+			pnv_ioda_free_pe(phb, pe_num);
+		pdn->pe_number = IODA_INVALID_PE;
+		pe->pdev = NULL;
+		pci_dev_put(dev);
+		return NULL;
+	}
+
+	/* Assign a DMA weight to the device */
+	pe->dma_weight = pnv_ioda_dma_weight(dev);
+	if (pe->dma_weight != 0) {
+		phb->ioda.dma_weight += pe->dma_weight;
+		phb->ioda.dma_pe_count++;
+	}
+
+	/* Link the PE */
+	pnv_ioda_link_pe_by_weight(phb, pe);
+
+	return pe;
+}
+#endif /* Useful for SRIOV case */
+
+static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
+{
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		struct pci_dn *pdn = pci_get_pdn(dev);
+
+		if (pdn == NULL) {
+			pr_warn("%s: No device node associated with device !\n",
+				pci_name(dev));
+			continue;
+		}
+		pdn->pe_number = pe->pe_number;
+		pe->dma_weight += pnv_ioda_dma_weight(dev);
+		if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
+			pnv_ioda_setup_same_PE(dev->subordinate, pe);
+	}
+}
+
+/*
+ * There're 2 types of PCI bus sensitive PEs: One that is compromised of
+ * single PCI bus. Another one that contains the primary PCI bus and its
+ * subordinate PCI devices and buses. The second type of PE is normally
+ * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
+ */
+static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	struct pnv_phb *phb = hose->private_data;
+	struct pnv_ioda_pe *pe;
+	int pe_num = IODA_INVALID_PE;
+
+	/* Check if PE is determined by M64 */
+	if (phb->pick_m64_pe)
+		pe_num = phb->pick_m64_pe(phb, bus, all);
+
+	/* The PE number isn't pinned by M64 */
+	if (pe_num == IODA_INVALID_PE)
+		pe_num = pnv_ioda_alloc_pe(phb);
+
+	if (pe_num == IODA_INVALID_PE) {
+		pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
+			__func__, pci_domain_nr(bus), bus->number);
+		return;
+	}
+
+	pe = &phb->ioda.pe_array[pe_num];
+	pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
+	pe->pbus = bus;
+	pe->pdev = NULL;
+	pe->tce32_seg = -1;
+	pe->mve_number = -1;
+	pe->rid = bus->busn_res.start << 8;
+	pe->dma_weight = 0;
+
+	if (all)
+		pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n",
+			bus->busn_res.start, bus->busn_res.end, pe_num);
+	else
+		pe_info(pe, "Secondary bus %d associated with PE#%d\n",
+			bus->busn_res.start, pe_num);
+
+	if (pnv_ioda_configure_pe(phb, pe)) {
+		/* XXX What do we do here ? */
+		if (pe_num)
+			pnv_ioda_free_pe(phb, pe_num);
+		pe->pbus = NULL;
+		return;
+	}
+
+	pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
+			GFP_KERNEL, hose->node);
+	pe->tce32_table->data = pe;
+
+	/* Associate it with all child devices */
+	pnv_ioda_setup_same_PE(bus, pe);
+
+	/* Put PE to the list */
+	list_add_tail(&pe->list, &phb->ioda.pe_list);
+
+	/* Account for one DMA PE if at least one DMA capable device exist
+	 * below the bridge
+	 */
+	if (pe->dma_weight != 0) {
+		phb->ioda.dma_weight += pe->dma_weight;
+		phb->ioda.dma_pe_count++;
+	}
+
+	/* Link the PE */
+	pnv_ioda_link_pe_by_weight(phb, pe);
+}
+
+static void pnv_ioda_setup_PEs(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+
+	pnv_ioda_setup_bus_PE(bus, 0);
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		if (dev->subordinate) {
+			if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE)
+				pnv_ioda_setup_bus_PE(dev->subordinate, 1);
+			else
+				pnv_ioda_setup_PEs(dev->subordinate);
+		}
+	}
+}
+
+/*
+ * Configure PEs so that the downstream PCI buses and devices
+ * could have their associated PE#. Unfortunately, we didn't
+ * figure out the way to identify the PLX bridge yet. So we
+ * simply put the PCI bus and the subordinate behind the root
+ * port to PE# here. The game rule here is expected to be changed
+ * as soon as we can detected PLX bridge correctly.
+ */
+static void pnv_pci_ioda_setup_PEs(void)
+{
+	struct pci_controller *hose, *tmp;
+	struct pnv_phb *phb;
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		phb = hose->private_data;
+
+		/* M64 layout might affect PE allocation */
+		if (phb->reserve_m64_pe)
+			phb->reserve_m64_pe(phb);
+
+		pnv_ioda_setup_PEs(hose->bus);
+	}
+}
+
+#ifdef CONFIG_PCI_IOV
+static int pnv_pci_vf_release_m64(struct pci_dev *pdev)
+{
+	struct pci_bus        *bus;
+	struct pci_controller *hose;
+	struct pnv_phb        *phb;
+	struct pci_dn         *pdn;
+	int                    i, j;
+
+	bus = pdev->bus;
+	hose = pci_bus_to_host(bus);
+	phb = hose->private_data;
+	pdn = pci_get_pdn(pdev);
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
+		for (j = 0; j < M64_PER_IOV; j++) {
+			if (pdn->m64_wins[i][j] == IODA_INVALID_M64)
+				continue;
+			opal_pci_phb_mmio_enable(phb->opal_id,
+				OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 0);
+			clear_bit(pdn->m64_wins[i][j], &phb->ioda.m64_bar_alloc);
+			pdn->m64_wins[i][j] = IODA_INVALID_M64;
+		}
+
+	return 0;
+}
+
+static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pci_bus        *bus;
+	struct pci_controller *hose;
+	struct pnv_phb        *phb;
+	struct pci_dn         *pdn;
+	unsigned int           win;
+	struct resource       *res;
+	int                    i, j;
+	int64_t                rc;
+	int                    total_vfs;
+	resource_size_t        size, start;
+	int                    pe_num;
+	int                    vf_groups;
+	int                    vf_per_group;
+
+	bus = pdev->bus;
+	hose = pci_bus_to_host(bus);
+	phb = hose->private_data;
+	pdn = pci_get_pdn(pdev);
+	total_vfs = pci_sriov_get_totalvfs(pdev);
+
+	/* Initialize the m64_wins to IODA_INVALID_M64 */
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
+		for (j = 0; j < M64_PER_IOV; j++)
+			pdn->m64_wins[i][j] = IODA_INVALID_M64;
+
+	if (pdn->m64_per_iov == M64_PER_IOV) {
+		vf_groups = (num_vfs <= M64_PER_IOV) ? num_vfs: M64_PER_IOV;
+		vf_per_group = (num_vfs <= M64_PER_IOV)? 1:
+			roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
+	} else {
+		vf_groups = 1;
+		vf_per_group = 1;
+	}
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &pdev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || !res->parent)
+			continue;
+
+		if (!pnv_pci_is_mem_pref_64(res->flags))
+			continue;
+
+		for (j = 0; j < vf_groups; j++) {
+			do {
+				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
+						phb->ioda.m64_bar_idx + 1, 0);
+
+				if (win >= phb->ioda.m64_bar_idx + 1)
+					goto m64_failed;
+			} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
+
+			pdn->m64_wins[i][j] = win;
+
+			if (pdn->m64_per_iov == M64_PER_IOV) {
+				size = pci_iov_resource_size(pdev,
+							PCI_IOV_RESOURCES + i);
+				size = size * vf_per_group;
+				start = res->start + size * j;
+			} else {
+				size = resource_size(res);
+				start = res->start;
+			}
+
+			/* Map the M64 here */
+			if (pdn->m64_per_iov == M64_PER_IOV) {
+				pe_num = pdn->offset + j;
+				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+						pe_num, OPAL_M64_WINDOW_TYPE,
+						pdn->m64_wins[i][j], 0);
+			}
+
+			rc = opal_pci_set_phb_mem_window(phb->opal_id,
+						 OPAL_M64_WINDOW_TYPE,
+						 pdn->m64_wins[i][j],
+						 start,
+						 0, /* unused */
+						 size);
+
+
+			if (rc != OPAL_SUCCESS) {
+				dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n",
+					win, rc);
+				goto m64_failed;
+			}
+
+			if (pdn->m64_per_iov == M64_PER_IOV)
+				rc = opal_pci_phb_mmio_enable(phb->opal_id,
+				     OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 2);
+			else
+				rc = opal_pci_phb_mmio_enable(phb->opal_id,
+				     OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 1);
+
+			if (rc != OPAL_SUCCESS) {
+				dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
+					win, rc);
+				goto m64_failed;
+			}
+		}
+	}
+	return 0;
+
+m64_failed:
+	pnv_pci_vf_release_m64(pdev);
+	return -EBUSY;
+}
+
+static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe)
+{
+	struct pci_bus        *bus;
+	struct pci_controller *hose;
+	struct pnv_phb        *phb;
+	struct iommu_table    *tbl;
+	unsigned long         addr;
+	int64_t               rc;
+
+	bus = dev->bus;
+	hose = pci_bus_to_host(bus);
+	phb = hose->private_data;
+	tbl = pe->tce32_table;
+	addr = tbl->it_base;
+
+	opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
+				   pe->pe_number << 1, 1, __pa(addr),
+				   0, 0x1000);
+
+	rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
+				        pe->pe_number,
+				        (pe->pe_number << 1) + 1,
+				        pe->tce_bypass_base,
+				        0);
+	if (rc)
+		pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
+
+	iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
+	free_pages(addr, get_order(TCE32_TABLE_SIZE));
+	pe->tce32_table = NULL;
+}
+
+static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pci_bus        *bus;
+	struct pci_controller *hose;
+	struct pnv_phb        *phb;
+	struct pnv_ioda_pe    *pe, *pe_n;
+	struct pci_dn         *pdn;
+	u16                    vf_index;
+	int64_t                rc;
+
+	bus = pdev->bus;
+	hose = pci_bus_to_host(bus);
+	phb = hose->private_data;
+	pdn = pci_get_pdn(pdev);
+
+	if (!pdev->is_physfn)
+		return;
+
+	if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) {
+		int   vf_group;
+		int   vf_per_group;
+		int   vf_index1;
+
+		vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
+
+		for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++)
+			for (vf_index = vf_group * vf_per_group;
+				vf_index < (vf_group + 1) * vf_per_group &&
+				vf_index < num_vfs;
+				vf_index++)
+				for (vf_index1 = vf_group * vf_per_group;
+					vf_index1 < (vf_group + 1) * vf_per_group &&
+					vf_index1 < num_vfs;
+					vf_index1++){
+
+					rc = opal_pci_set_peltv(phb->opal_id,
+						pdn->offset + vf_index,
+						pdn->offset + vf_index1,
+						OPAL_REMOVE_PE_FROM_DOMAIN);
+
+					if (rc)
+					    dev_warn(&pdev->dev, "%s: Failed to unlink same group PE#%d(%lld)\n",
+						__func__,
+						pdn->offset + vf_index1, rc);
+				}
+	}
+
+	list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
+		if (pe->parent_dev != pdev)
+			continue;
+
+		pnv_pci_ioda2_release_dma_pe(pdev, pe);
+
+		/* Remove from list */
+		mutex_lock(&phb->ioda.pe_list_mutex);
+		list_del(&pe->list);
+		mutex_unlock(&phb->ioda.pe_list_mutex);
+
+		pnv_ioda_deconfigure_pe(phb, pe);
+
+		pnv_ioda_free_pe(phb, pe->pe_number);
+	}
+}
+
+void pnv_pci_sriov_disable(struct pci_dev *pdev)
+{
+	struct pci_bus        *bus;
+	struct pci_controller *hose;
+	struct pnv_phb        *phb;
+	struct pci_dn         *pdn;
+	struct pci_sriov      *iov;
+	u16 num_vfs;
+
+	bus = pdev->bus;
+	hose = pci_bus_to_host(bus);
+	phb = hose->private_data;
+	pdn = pci_get_pdn(pdev);
+	iov = pdev->sriov;
+	num_vfs = pdn->num_vfs;
+
+	/* Release VF PEs */
+	pnv_ioda_release_vf_PE(pdev, num_vfs);
+
+	if (phb->type == PNV_PHB_IODA2) {
+		if (pdn->m64_per_iov == 1)
+			pnv_pci_vf_resource_shift(pdev, -pdn->offset);
+
+		/* Release M64 windows */
+		pnv_pci_vf_release_m64(pdev);
+
+		/* Release PE numbers */
+		bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
+		pdn->offset = 0;
+	}
+}
+
+static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+				       struct pnv_ioda_pe *pe);
+static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pci_bus        *bus;
+	struct pci_controller *hose;
+	struct pnv_phb        *phb;
+	struct pnv_ioda_pe    *pe;
+	int                    pe_num;
+	u16                    vf_index;
+	struct pci_dn         *pdn;
+	int64_t                rc;
+
+	bus = pdev->bus;
+	hose = pci_bus_to_host(bus);
+	phb = hose->private_data;
+	pdn = pci_get_pdn(pdev);
+
+	if (!pdev->is_physfn)
+		return;
+
+	/* Reserve PE for each VF */
+	for (vf_index = 0; vf_index < num_vfs; vf_index++) {
+		pe_num = pdn->offset + vf_index;
+
+		pe = &phb->ioda.pe_array[pe_num];
+		pe->pe_number = pe_num;
+		pe->phb = phb;
+		pe->flags = PNV_IODA_PE_VF;
+		pe->pbus = NULL;
+		pe->parent_dev = pdev;
+		pe->tce32_seg = -1;
+		pe->mve_number = -1;
+		pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) |
+			   pci_iov_virtfn_devfn(pdev, vf_index);
+
+		pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%d\n",
+			hose->global_number, pdev->bus->number,
+			PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)),
+			PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num);
+
+		if (pnv_ioda_configure_pe(phb, pe)) {
+			/* XXX What do we do here ? */
+			if (pe_num)
+				pnv_ioda_free_pe(phb, pe_num);
+			pe->pdev = NULL;
+			continue;
+		}
+
+		pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
+				GFP_KERNEL, hose->node);
+		pe->tce32_table->data = pe;
+
+		/* Put PE to the list */
+		mutex_lock(&phb->ioda.pe_list_mutex);
+		list_add_tail(&pe->list, &phb->ioda.pe_list);
+		mutex_unlock(&phb->ioda.pe_list_mutex);
+
+		pnv_pci_ioda2_setup_dma_pe(phb, pe);
+	}
+
+	if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) {
+		int   vf_group;
+		int   vf_per_group;
+		int   vf_index1;
+
+		vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
+
+		for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++) {
+			for (vf_index = vf_group * vf_per_group;
+			     vf_index < (vf_group + 1) * vf_per_group &&
+			     vf_index < num_vfs;
+			     vf_index++) {
+				for (vf_index1 = vf_group * vf_per_group;
+				     vf_index1 < (vf_group + 1) * vf_per_group &&
+				     vf_index1 < num_vfs;
+				     vf_index1++) {
+
+					rc = opal_pci_set_peltv(phb->opal_id,
+						pdn->offset + vf_index,
+						pdn->offset + vf_index1,
+						OPAL_ADD_PE_TO_DOMAIN);
+
+					if (rc)
+					    dev_warn(&pdev->dev, "%s: Failed to link same group PE#%d(%lld)\n",
+						__func__,
+						pdn->offset + vf_index1, rc);
+				}
+			}
+		}
+	}
+}
+
+int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pci_bus        *bus;
+	struct pci_controller *hose;
+	struct pnv_phb        *phb;
+	struct pci_dn         *pdn;
+	int                    ret;
+
+	bus = pdev->bus;
+	hose = pci_bus_to_host(bus);
+	phb = hose->private_data;
+	pdn = pci_get_pdn(pdev);
+
+	if (phb->type == PNV_PHB_IODA2) {
+		/* Calculate available PE for required VFs */
+		mutex_lock(&phb->ioda.pe_alloc_mutex);
+		pdn->offset = bitmap_find_next_zero_area(
+			phb->ioda.pe_alloc, phb->ioda.total_pe,
+			0, num_vfs, 0);
+		if (pdn->offset >= phb->ioda.total_pe) {
+			mutex_unlock(&phb->ioda.pe_alloc_mutex);
+			dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
+			pdn->offset = 0;
+			return -EBUSY;
+		}
+		bitmap_set(phb->ioda.pe_alloc, pdn->offset, num_vfs);
+		pdn->num_vfs = num_vfs;
+		mutex_unlock(&phb->ioda.pe_alloc_mutex);
+
+		/* Assign M64 window accordingly */
+		ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
+		if (ret) {
+			dev_info(&pdev->dev, "Not enough M64 window resources\n");
+			goto m64_failed;
+		}
+
+		/*
+		 * When using one M64 BAR to map one IOV BAR, we need to shift
+		 * the IOV BAR according to the PE# allocated to the VFs.
+		 * Otherwise, the PE# for the VF will conflict with others.
+		 */
+		if (pdn->m64_per_iov == 1) {
+			ret = pnv_pci_vf_resource_shift(pdev, pdn->offset);
+			if (ret)
+				goto m64_failed;
+		}
+	}
+
+	/* Setup VF PEs */
+	pnv_ioda_setup_vf_PE(pdev, num_vfs);
+
+	return 0;
+
+m64_failed:
+	bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
+	pdn->offset = 0;
+
+	return ret;
+}
+
+int pcibios_sriov_disable(struct pci_dev *pdev)
+{
+	pnv_pci_sriov_disable(pdev);
+
+	/* Release PCI data */
+	remove_dev_pci_data(pdev);
+	return 0;
+}
+
+int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+	/* Allocate PCI data */
+	add_dev_pci_data(pdev);
+
+	pnv_pci_sriov_enable(pdev, num_vfs);
+	return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
+static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev)
+{
+	struct pci_dn *pdn = pci_get_pdn(pdev);
+	struct pnv_ioda_pe *pe;
+
+	/*
+	 * The function can be called while the PE#
+	 * hasn't been assigned. Do nothing for the
+	 * case.
+	 */
+	if (!pdn || pdn->pe_number == IODA_INVALID_PE)
+		return;
+
+	pe = &phb->ioda.pe_array[pdn->pe_number];
+	WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
+	set_iommu_table_base_and_group(&pdev->dev, pe->tce32_table);
+}
+
+static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
+				     struct pci_dev *pdev, u64 dma_mask)
+{
+	struct pci_dn *pdn = pci_get_pdn(pdev);
+	struct pnv_ioda_pe *pe;
+	uint64_t top;
+	bool bypass = false;
+
+	if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+		return -ENODEV;;
+
+	pe = &phb->ioda.pe_array[pdn->pe_number];
+	if (pe->tce_bypass_enabled) {
+		top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
+		bypass = (dma_mask >= top);
+	}
+
+	if (bypass) {
+		dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n");
+		set_dma_ops(&pdev->dev, &dma_direct_ops);
+		set_dma_offset(&pdev->dev, pe->tce_bypass_base);
+	} else {
+		dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
+		set_dma_ops(&pdev->dev, &dma_iommu_ops);
+		set_iommu_table_base(&pdev->dev, pe->tce32_table);
+	}
+	*pdev->dev.dma_mask = dma_mask;
+	return 0;
+}
+
+static u64 pnv_pci_ioda_dma_get_required_mask(struct pnv_phb *phb,
+					      struct pci_dev *pdev)
+{
+	struct pci_dn *pdn = pci_get_pdn(pdev);
+	struct pnv_ioda_pe *pe;
+	u64 end, mask;
+
+	if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+		return 0;
+
+	pe = &phb->ioda.pe_array[pdn->pe_number];
+	if (!pe->tce_bypass_enabled)
+		return __dma_get_required_mask(&pdev->dev);
+
+
+	end = pe->tce_bypass_base + memblock_end_of_DRAM();
+	mask = 1ULL << (fls64(end) - 1);
+	mask += mask - 1;
+
+	return mask;
+}
+
+static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
+				   struct pci_bus *bus,
+				   bool add_to_iommu_group)
+{
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		if (add_to_iommu_group)
+			set_iommu_table_base_and_group(&dev->dev,
+						       pe->tce32_table);
+		else
+			set_iommu_table_base(&dev->dev, pe->tce32_table);
+
+		if (dev->subordinate)
+			pnv_ioda_setup_bus_dma(pe, dev->subordinate,
+					       add_to_iommu_group);
+	}
+}
+
+static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
+					 struct iommu_table *tbl,
+					 __be64 *startp, __be64 *endp, bool rm)
+{
+	__be64 __iomem *invalidate = rm ?
+		(__be64 __iomem *)pe->tce_inval_reg_phys :
+		(__be64 __iomem *)tbl->it_index;
+	unsigned long start, end, inc;
+	const unsigned shift = tbl->it_page_shift;
+
+	start = __pa(startp);
+	end = __pa(endp);
+
+	/* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
+	if (tbl->it_busno) {
+		start <<= shift;
+		end <<= shift;
+		inc = 128ull << shift;
+		start |= tbl->it_busno;
+		end |= tbl->it_busno;
+	} else if (tbl->it_type & TCE_PCI_SWINV_PAIR) {
+		/* p7ioc-style invalidation, 2 TCEs per write */
+		start |= (1ull << 63);
+		end |= (1ull << 63);
+		inc = 16;
+        } else {
+		/* Default (older HW) */
+                inc = 128;
+	}
+
+        end |= inc - 1;	/* round up end to be different than start */
+
+        mb(); /* Ensure above stores are visible */
+        while (start <= end) {
+		if (rm)
+			__raw_rm_writeq(cpu_to_be64(start), invalidate);
+		else
+			__raw_writeq(cpu_to_be64(start), invalidate);
+                start += inc;
+        }
+
+	/*
+	 * The iommu layer will do another mb() for us on build()
+	 * and we don't care on free()
+	 */
+}
+
+static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
+					 struct iommu_table *tbl,
+					 __be64 *startp, __be64 *endp, bool rm)
+{
+	unsigned long start, end, inc;
+	__be64 __iomem *invalidate = rm ?
+		(__be64 __iomem *)pe->tce_inval_reg_phys :
+		(__be64 __iomem *)tbl->it_index;
+	const unsigned shift = tbl->it_page_shift;
+
+	/* We'll invalidate DMA address in PE scope */
+	start = 0x2ull << 60;
+	start |= (pe->pe_number & 0xFF);
+	end = start;
+
+	/* Figure out the start, end and step */
+	inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64));
+	start |= (inc << shift);
+	inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64));
+	end |= (inc << shift);
+	inc = (0x1ull << shift);
+	mb();
+
+	while (start <= end) {
+		if (rm)
+			__raw_rm_writeq(cpu_to_be64(start), invalidate);
+		else
+			__raw_writeq(cpu_to_be64(start), invalidate);
+		start += inc;
+	}
+}
+
+void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
+				 __be64 *startp, __be64 *endp, bool rm)
+{
+	struct pnv_ioda_pe *pe = tbl->data;
+	struct pnv_phb *phb = pe->phb;
+
+	if (phb->type == PNV_PHB_IODA1)
+		pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm);
+	else
+		pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm);
+}
+
+static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
+				      struct pnv_ioda_pe *pe, unsigned int base,
+				      unsigned int segs)
+{
+
+	struct page *tce_mem = NULL;
+	const __be64 *swinvp;
+	struct iommu_table *tbl;
+	unsigned int i;
+	int64_t rc;
+	void *addr;
+
+	/* XXX FIXME: Handle 64-bit only DMA devices */
+	/* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
+	/* XXX FIXME: Allocate multi-level tables on PHB3 */
+
+	/* We shouldn't already have a 32-bit DMA associated */
+	if (WARN_ON(pe->tce32_seg >= 0))
+		return;
+
+	/* Grab a 32-bit TCE table */
+	pe->tce32_seg = base;
+	pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
+		(base << 28), ((base + segs) << 28) - 1);
+
+	/* XXX Currently, we allocate one big contiguous table for the
+	 * TCEs. We only really need one chunk per 256M of TCE space
+	 * (ie per segment) but that's an optimization for later, it
+	 * requires some added smarts with our get/put_tce implementation
+	 */
+	tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
+				   get_order(TCE32_TABLE_SIZE * segs));
+	if (!tce_mem) {
+		pe_err(pe, " Failed to allocate a 32-bit TCE memory\n");
+		goto fail;
+	}
+	addr = page_address(tce_mem);
+	memset(addr, 0, TCE32_TABLE_SIZE * segs);
+
+	/* Configure HW */
+	for (i = 0; i < segs; i++) {
+		rc = opal_pci_map_pe_dma_window(phb->opal_id,
+					      pe->pe_number,
+					      base + i, 1,
+					      __pa(addr) + TCE32_TABLE_SIZE * i,
+					      TCE32_TABLE_SIZE, 0x1000);
+		if (rc) {
+			pe_err(pe, " Failed to configure 32-bit TCE table,"
+			       " err %ld\n", rc);
+			goto fail;
+		}
+	}
+
+	/* Setup linux iommu table */
+	tbl = pe->tce32_table;
+	pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
+				  base << 28, IOMMU_PAGE_SHIFT_4K);
+
+	/* OPAL variant of P7IOC SW invalidated TCEs */
+	swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
+	if (swinvp) {
+		/* We need a couple more fields -- an address and a data
+		 * to or.  Since the bus is only printed out on table free
+		 * errors, and on the first pass the data will be a relative
+		 * bus number, print that out instead.
+		 */
+		pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
+		tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
+				8);
+		tbl->it_type |= (TCE_PCI_SWINV_CREATE |
+				 TCE_PCI_SWINV_FREE   |
+				 TCE_PCI_SWINV_PAIR);
+	}
+	iommu_init_table(tbl, phb->hose->node);
+
+	if (pe->flags & PNV_IODA_PE_DEV) {
+		iommu_register_group(tbl, phb->hose->global_number,
+				     pe->pe_number);
+		set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
+	} else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
+		iommu_register_group(tbl, phb->hose->global_number,
+				     pe->pe_number);
+		pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
+	} else if (pe->flags & PNV_IODA_PE_VF) {
+		iommu_register_group(tbl, phb->hose->global_number,
+				     pe->pe_number);
+	}
+
+	return;
+ fail:
+	/* XXX Failure: Try to fallback to 64-bit only ? */
+	if (pe->tce32_seg >= 0)
+		pe->tce32_seg = -1;
+	if (tce_mem)
+		__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
+}
+
+static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
+{
+	struct pnv_ioda_pe *pe = tbl->data;
+	uint16_t window_id = (pe->pe_number << 1 ) + 1;
+	int64_t rc;
+
+	pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis");
+	if (enable) {
+		phys_addr_t top = memblock_end_of_DRAM();
+
+		top = roundup_pow_of_two(top);
+		rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
+						     pe->pe_number,
+						     window_id,
+						     pe->tce_bypass_base,
+						     top);
+	} else {
+		rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
+						     pe->pe_number,
+						     window_id,
+						     pe->tce_bypass_base,
+						     0);
+
+		/*
+		 * EEH needs the mapping between IOMMU table and group
+		 * of those VFIO/KVM pass-through devices. We can postpone
+		 * resetting DMA ops until the DMA mask is configured in
+		 * host side.
+		 */
+		if (pe->pdev)
+			set_iommu_table_base(&pe->pdev->dev, tbl);
+		else
+			pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
+	}
+	if (rc)
+		pe_err(pe, "OPAL error %lld configuring bypass window\n", rc);
+	else
+		pe->tce_bypass_enabled = enable;
+}
+
+static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
+					  struct pnv_ioda_pe *pe)
+{
+	/* TVE #1 is selected by PCI address bit 59 */
+	pe->tce_bypass_base = 1ull << 59;
+
+	/* Install set_bypass callback for VFIO */
+	pe->tce32_table->set_bypass = pnv_pci_ioda2_set_bypass;
+
+	/* Enable bypass by default */
+	pnv_pci_ioda2_set_bypass(pe->tce32_table, true);
+}
+
+static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+				       struct pnv_ioda_pe *pe)
+{
+	struct page *tce_mem = NULL;
+	void *addr;
+	const __be64 *swinvp;
+	struct iommu_table *tbl;
+	unsigned int tce_table_size, end;
+	int64_t rc;
+
+	/* We shouldn't already have a 32-bit DMA associated */
+	if (WARN_ON(pe->tce32_seg >= 0))
+		return;
+
+	/* The PE will reserve all possible 32-bits space */
+	pe->tce32_seg = 0;
+	end = (1 << ilog2(phb->ioda.m32_pci_base));
+	tce_table_size = (end / 0x1000) * 8;
+	pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
+		end);
+
+	/* Allocate TCE table */
+	tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
+				   get_order(tce_table_size));
+	if (!tce_mem) {
+		pe_err(pe, "Failed to allocate a 32-bit TCE memory\n");
+		goto fail;
+	}
+	addr = page_address(tce_mem);
+	memset(addr, 0, tce_table_size);
+
+	/*
+	 * Map TCE table through TVT. The TVE index is the PE number
+	 * shifted by 1 bit for 32-bits DMA space.
+	 */
+	rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
+					pe->pe_number << 1, 1, __pa(addr),
+					tce_table_size, 0x1000);
+	if (rc) {
+		pe_err(pe, "Failed to configure 32-bit TCE table,"
+		       " err %ld\n", rc);
+		goto fail;
+	}
+
+	/* Setup linux iommu table */
+	tbl = pe->tce32_table;
+	pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0,
+			IOMMU_PAGE_SHIFT_4K);
+
+	/* OPAL variant of PHB3 invalidated TCEs */
+	swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
+	if (swinvp) {
+		/* We need a couple more fields -- an address and a data
+		 * to or.  Since the bus is only printed out on table free
+		 * errors, and on the first pass the data will be a relative
+		 * bus number, print that out instead.
+		 */
+		pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
+		tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
+				8);
+		tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
+	}
+	iommu_init_table(tbl, phb->hose->node);
+
+	if (pe->flags & PNV_IODA_PE_DEV) {
+		iommu_register_group(tbl, phb->hose->global_number,
+				     pe->pe_number);
+		set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
+	} else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
+		iommu_register_group(tbl, phb->hose->global_number,
+				     pe->pe_number);
+		pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
+	} else if (pe->flags & PNV_IODA_PE_VF) {
+		iommu_register_group(tbl, phb->hose->global_number,
+				     pe->pe_number);
+	}
+
+	/* Also create a bypass window */
+	if (!pnv_iommu_bypass_disabled)
+		pnv_pci_ioda2_setup_bypass_pe(phb, pe);
+
+	return;
+fail:
+	if (pe->tce32_seg >= 0)
+		pe->tce32_seg = -1;
+	if (tce_mem)
+		__free_pages(tce_mem, get_order(tce_table_size));
+}
+
+static void pnv_ioda_setup_dma(struct pnv_phb *phb)
+{
+	struct pci_controller *hose = phb->hose;
+	unsigned int residual, remaining, segs, tw, base;
+	struct pnv_ioda_pe *pe;
+
+	/* If we have more PE# than segments available, hand out one
+	 * per PE until we run out and let the rest fail. If not,
+	 * then we assign at least one segment per PE, plus more based
+	 * on the amount of devices under that PE
+	 */
+	if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
+		residual = 0;
+	else
+		residual = phb->ioda.tce32_count -
+			phb->ioda.dma_pe_count;
+
+	pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
+		hose->global_number, phb->ioda.tce32_count);
+	pr_info("PCI: %d PE# for a total weight of %d\n",
+		phb->ioda.dma_pe_count, phb->ioda.dma_weight);
+
+	/* Walk our PE list and configure their DMA segments, hand them
+	 * out one base segment plus any residual segments based on
+	 * weight
+	 */
+	remaining = phb->ioda.tce32_count;
+	tw = phb->ioda.dma_weight;
+	base = 0;
+	list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
+		if (!pe->dma_weight)
+			continue;
+		if (!remaining) {
+			pe_warn(pe, "No DMA32 resources available\n");
+			continue;
+		}
+		segs = 1;
+		if (residual) {
+			segs += ((pe->dma_weight * residual)  + (tw / 2)) / tw;
+			if (segs > remaining)
+				segs = remaining;
+		}
+
+		/*
+		 * For IODA2 compliant PHB3, we needn't care about the weight.
+		 * The all available 32-bits DMA space will be assigned to
+		 * the specific PE.
+		 */
+		if (phb->type == PNV_PHB_IODA1) {
+			pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
+				pe->dma_weight, segs);
+			pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
+		} else {
+			pe_info(pe, "Assign DMA32 space\n");
+			segs = 0;
+			pnv_pci_ioda2_setup_dma_pe(phb, pe);
+		}
+
+		remaining -= segs;
+		base += segs;
+	}
+}
+
+#ifdef CONFIG_PCI_MSI
+static void pnv_ioda2_msi_eoi(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	struct irq_chip *chip = irq_data_get_irq_chip(d);
+	struct pnv_phb *phb = container_of(chip, struct pnv_phb,
+					   ioda.irq_chip);
+	int64_t rc;
+
+	rc = opal_pci_msi_eoi(phb->opal_id, hw_irq);
+	WARN_ON_ONCE(rc);
+
+	icp_native_eoi(d);
+}
+
+
+static void set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq)
+{
+	struct irq_data *idata;
+	struct irq_chip *ichip;
+
+	if (phb->type != PNV_PHB_IODA2)
+		return;
+
+	if (!phb->ioda.irq_chip_init) {
+		/*
+		 * First time we setup an MSI IRQ, we need to setup the
+		 * corresponding IRQ chip to route correctly.
+		 */
+		idata = irq_get_irq_data(virq);
+		ichip = irq_data_get_irq_chip(idata);
+		phb->ioda.irq_chip_init = 1;
+		phb->ioda.irq_chip = *ichip;
+		phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi;
+	}
+	irq_set_chip(virq, &phb->ioda.irq_chip);
+}
+
+#ifdef CONFIG_CXL_BASE
+
+struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+
+	return of_node_get(hose->dn);
+}
+EXPORT_SYMBOL(pnv_pci_get_phb_node);
+
+int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	struct pnv_ioda_pe *pe;
+	int rc;
+
+	pe = pnv_ioda_get_pe(dev);
+	if (!pe)
+		return -ENODEV;
+
+	pe_info(pe, "Switching PHB to CXL\n");
+
+	rc = opal_pci_set_phb_cxl_mode(phb->opal_id, mode, pe->pe_number);
+	if (rc)
+		dev_err(&dev->dev, "opal_pci_set_phb_cxl_mode failed: %i\n", rc);
+
+	return rc;
+}
+EXPORT_SYMBOL(pnv_phb_to_cxl_mode);
+
+/* Find PHB for cxl dev and allocate MSI hwirqs?
+ * Returns the absolute hardware IRQ number
+ */
+int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	int hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, num);
+
+	if (hwirq < 0) {
+		dev_warn(&dev->dev, "Failed to find a free MSI\n");
+		return -ENOSPC;
+	}
+
+	return phb->msi_base + hwirq;
+}
+EXPORT_SYMBOL(pnv_cxl_alloc_hwirqs);
+
+void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+
+	msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, num);
+}
+EXPORT_SYMBOL(pnv_cxl_release_hwirqs);
+
+void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs,
+				  struct pci_dev *dev)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	int i, hwirq;
+
+	for (i = 1; i < CXL_IRQ_RANGES; i++) {
+		if (!irqs->range[i])
+			continue;
+		pr_devel("cxl release irq range 0x%x: offset: 0x%lx  limit: %ld\n",
+			 i, irqs->offset[i],
+			 irqs->range[i]);
+		hwirq = irqs->offset[i] - phb->msi_base;
+		msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq,
+				       irqs->range[i]);
+	}
+}
+EXPORT_SYMBOL(pnv_cxl_release_hwirq_ranges);
+
+int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs,
+			       struct pci_dev *dev, int num)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	int i, hwirq, try;
+
+	memset(irqs, 0, sizeof(struct cxl_irq_ranges));
+
+	/* 0 is reserved for the multiplexed PSL DSI interrupt */
+	for (i = 1; i < CXL_IRQ_RANGES && num; i++) {
+		try = num;
+		while (try) {
+			hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, try);
+			if (hwirq >= 0)
+				break;
+			try /= 2;
+		}
+		if (!try)
+			goto fail;
+
+		irqs->offset[i] = phb->msi_base + hwirq;
+		irqs->range[i] = try;
+		pr_devel("cxl alloc irq range 0x%x: offset: 0x%lx  limit: %li\n",
+			 i, irqs->offset[i], irqs->range[i]);
+		num -= try;
+	}
+	if (num)
+		goto fail;
+
+	return 0;
+fail:
+	pnv_cxl_release_hwirq_ranges(irqs, dev);
+	return -ENOSPC;
+}
+EXPORT_SYMBOL(pnv_cxl_alloc_hwirq_ranges);
+
+int pnv_cxl_get_irq_count(struct pci_dev *dev)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+
+	return phb->msi_bmp.irq_count;
+}
+EXPORT_SYMBOL(pnv_cxl_get_irq_count);
+
+int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
+			   unsigned int virq)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	unsigned int xive_num = hwirq - phb->msi_base;
+	struct pnv_ioda_pe *pe;
+	int rc;
+
+	if (!(pe = pnv_ioda_get_pe(dev)))
+		return -ENODEV;
+
+	/* Assign XIVE to PE */
+	rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
+	if (rc) {
+		pe_warn(pe, "%s: OPAL error %d setting msi_base 0x%x "
+			"hwirq 0x%x XIVE 0x%x PE\n",
+			pci_name(dev), rc, phb->msi_base, hwirq, xive_num);
+		return -EIO;
+	}
+	set_msi_irq_chip(phb, virq);
+
+	return 0;
+}
+EXPORT_SYMBOL(pnv_cxl_ioda_msi_setup);
+#endif
+
+static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
+				  unsigned int hwirq, unsigned int virq,
+				  unsigned int is_64, struct msi_msg *msg)
+{
+	struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
+	unsigned int xive_num = hwirq - phb->msi_base;
+	__be32 data;
+	int rc;
+
+	/* No PE assigned ? bail out ... no MSI for you ! */
+	if (pe == NULL)
+		return -ENXIO;
+
+	/* Check if we have an MVE */
+	if (pe->mve_number < 0)
+		return -ENXIO;
+
+	/* Force 32-bit MSI on some broken devices */
+	if (dev->no_64bit_msi)
+		is_64 = 0;
+
+	/* Assign XIVE to PE */
+	rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
+	if (rc) {
+		pr_warn("%s: OPAL error %d setting XIVE %d PE\n",
+			pci_name(dev), rc, xive_num);
+		return -EIO;
+	}
+
+	if (is_64) {
+		__be64 addr64;
+
+		rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1,
+				     &addr64, &data);
+		if (rc) {
+			pr_warn("%s: OPAL error %d getting 64-bit MSI data\n",
+				pci_name(dev), rc);
+			return -EIO;
+		}
+		msg->address_hi = be64_to_cpu(addr64) >> 32;
+		msg->address_lo = be64_to_cpu(addr64) & 0xfffffffful;
+	} else {
+		__be32 addr32;
+
+		rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1,
+				     &addr32, &data);
+		if (rc) {
+			pr_warn("%s: OPAL error %d getting 32-bit MSI data\n",
+				pci_name(dev), rc);
+			return -EIO;
+		}
+		msg->address_hi = 0;
+		msg->address_lo = be32_to_cpu(addr32);
+	}
+	msg->data = be32_to_cpu(data);
+
+	set_msi_irq_chip(phb, virq);
+
+	pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
+		 " address=%x_%08x data=%x PE# %d\n",
+		 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
+		 msg->address_hi, msg->address_lo, data, pe->pe_number);
+
+	return 0;
+}
+
+static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
+{
+	unsigned int count;
+	const __be32 *prop = of_get_property(phb->hose->dn,
+					     "ibm,opal-msi-ranges", NULL);
+	if (!prop) {
+		/* BML Fallback */
+		prop = of_get_property(phb->hose->dn, "msi-ranges", NULL);
+	}
+	if (!prop)
+		return;
+
+	phb->msi_base = be32_to_cpup(prop);
+	count = be32_to_cpup(prop + 1);
+	if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) {
+		pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
+		       phb->hose->global_number);
+		return;
+	}
+
+	phb->msi_setup = pnv_pci_ioda_msi_setup;
+	phb->msi32_support = 1;
+	pr_info("  Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
+		count, phb->msi_base);
+}
+#else
+static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
+#endif /* CONFIG_PCI_MSI */
+
+#ifdef CONFIG_PCI_IOV
+static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	struct resource *res;
+	int i;
+	resource_size_t size;
+	struct pci_dn *pdn;
+	int mul, total_vfs;
+
+	if (!pdev->is_physfn || pdev->is_added)
+		return;
+
+	hose = pci_bus_to_host(pdev->bus);
+	phb = hose->private_data;
+
+	pdn = pci_get_pdn(pdev);
+	pdn->vfs_expanded = 0;
+
+	total_vfs = pci_sriov_get_totalvfs(pdev);
+	pdn->m64_per_iov = 1;
+	mul = phb->ioda.total_pe;
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &pdev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || res->parent)
+			continue;
+		if (!pnv_pci_is_mem_pref_64(res->flags)) {
+			dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n",
+				 i, res);
+			continue;
+		}
+
+		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
+
+		/* bigger than 64M */
+		if (size > (1 << 26)) {
+			dev_info(&pdev->dev, "PowerNV: VF BAR%d: %pR IOV size is bigger than 64M, roundup power2\n",
+				 i, res);
+			pdn->m64_per_iov = M64_PER_IOV;
+			mul = roundup_pow_of_two(total_vfs);
+			break;
+		}
+	}
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &pdev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || res->parent)
+			continue;
+		if (!pnv_pci_is_mem_pref_64(res->flags)) {
+			dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
+				 i, res);
+			continue;
+		}
+
+		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
+		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
+		res->end = res->start + size * mul - 1;
+		dev_dbg(&pdev->dev, "                       %pR\n", res);
+		dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
+			 i, res, mul);
+	}
+	pdn->vfs_expanded = mul;
+}
+#endif /* CONFIG_PCI_IOV */
+
+/*
+ * This function is supposed to be called on basis of PE from top
+ * to bottom style. So the the I/O or MMIO segment assigned to
+ * parent PE could be overrided by its child PEs if necessary.
+ */
+static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
+				  struct pnv_ioda_pe *pe)
+{
+	struct pnv_phb *phb = hose->private_data;
+	struct pci_bus_region region;
+	struct resource *res;
+	int i, index;
+	int rc;
+
+	/*
+	 * NOTE: We only care PCI bus based PE for now. For PCI
+	 * device based PE, for example SRIOV sensitive VF should
+	 * be figured out later.
+	 */
+	BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
+
+	pci_bus_for_each_resource(pe->pbus, res, i) {
+		if (!res || !res->flags ||
+		    res->start > res->end)
+			continue;
+
+		if (res->flags & IORESOURCE_IO) {
+			region.start = res->start - phb->ioda.io_pci_base;
+			region.end   = res->end - phb->ioda.io_pci_base;
+			index = region.start / phb->ioda.io_segsize;
+
+			while (index < phb->ioda.total_pe &&
+			       region.start <= region.end) {
+				phb->ioda.io_segmap[index] = pe->pe_number;
+				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+					pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
+				if (rc != OPAL_SUCCESS) {
+					pr_err("%s: OPAL error %d when mapping IO "
+					       "segment #%d to PE#%d\n",
+					       __func__, rc, index, pe->pe_number);
+					break;
+				}
+
+				region.start += phb->ioda.io_segsize;
+				index++;
+			}
+		} else if ((res->flags & IORESOURCE_MEM) &&
+			   !pnv_pci_is_mem_pref_64(res->flags)) {
+			region.start = res->start -
+				       hose->mem_offset[0] -
+				       phb->ioda.m32_pci_base;
+			region.end   = res->end -
+				       hose->mem_offset[0] -
+				       phb->ioda.m32_pci_base;
+			index = region.start / phb->ioda.m32_segsize;
+
+			while (index < phb->ioda.total_pe &&
+			       region.start <= region.end) {
+				phb->ioda.m32_segmap[index] = pe->pe_number;
+				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+					pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
+				if (rc != OPAL_SUCCESS) {
+					pr_err("%s: OPAL error %d when mapping M32 "
+					       "segment#%d to PE#%d",
+					       __func__, rc, index, pe->pe_number);
+					break;
+				}
+
+				region.start += phb->ioda.m32_segsize;
+				index++;
+			}
+		}
+	}
+}
+
+static void pnv_pci_ioda_setup_seg(void)
+{
+	struct pci_controller *tmp, *hose;
+	struct pnv_phb *phb;
+	struct pnv_ioda_pe *pe;
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		phb = hose->private_data;
+		list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+			pnv_ioda_setup_pe_seg(hose, pe);
+		}
+	}
+}
+
+static void pnv_pci_ioda_setup_DMA(void)
+{
+	struct pci_controller *hose, *tmp;
+	struct pnv_phb *phb;
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		pnv_ioda_setup_dma(hose->private_data);
+
+		/* Mark the PHB initialization done */
+		phb = hose->private_data;
+		phb->initialized = 1;
+	}
+}
+
+static void pnv_pci_ioda_create_dbgfs(void)
+{
+#ifdef CONFIG_DEBUG_FS
+	struct pci_controller *hose, *tmp;
+	struct pnv_phb *phb;
+	char name[16];
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		phb = hose->private_data;
+
+		sprintf(name, "PCI%04x", hose->global_number);
+		phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
+		if (!phb->dbgfs)
+			pr_warning("%s: Error on creating debugfs on PHB#%x\n",
+				__func__, hose->global_number);
+	}
+#endif /* CONFIG_DEBUG_FS */
+}
+
+static void pnv_pci_ioda_fixup(void)
+{
+	pnv_pci_ioda_setup_PEs();
+	pnv_pci_ioda_setup_seg();
+	pnv_pci_ioda_setup_DMA();
+
+	pnv_pci_ioda_create_dbgfs();
+
+#ifdef CONFIG_EEH
+	eeh_init();
+	eeh_addr_cache_build();
+#endif
+}
+
+/*
+ * Returns the alignment for I/O or memory windows for P2P
+ * bridges. That actually depends on how PEs are segmented.
+ * For now, we return I/O or M32 segment size for PE sensitive
+ * P2P bridges. Otherwise, the default values (4KiB for I/O,
+ * 1MiB for memory) will be returned.
+ *
+ * The current PCI bus might be put into one PE, which was
+ * create against the parent PCI bridge. For that case, we
+ * needn't enlarge the alignment so that we can save some
+ * resources.
+ */
+static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
+						unsigned long type)
+{
+	struct pci_dev *bridge;
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	struct pnv_phb *phb = hose->private_data;
+	int num_pci_bridges = 0;
+
+	bridge = bus->self;
+	while (bridge) {
+		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) {
+			num_pci_bridges++;
+			if (num_pci_bridges >= 2)
+				return 1;
+		}
+
+		bridge = bridge->bus->self;
+	}
+
+	/* We fail back to M32 if M64 isn't supported */
+	if (phb->ioda.m64_segsize &&
+	    pnv_pci_is_mem_pref_64(type))
+		return phb->ioda.m64_segsize;
+	if (type & IORESOURCE_MEM)
+		return phb->ioda.m32_segsize;
+
+	return phb->ioda.io_segsize;
+}
+
+#ifdef CONFIG_PCI_IOV
+static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
+						      int resno)
+{
+	struct pci_dn *pdn = pci_get_pdn(pdev);
+	resource_size_t align, iov_align;
+
+	iov_align = resource_size(&pdev->resource[resno]);
+	if (iov_align)
+		return iov_align;
+
+	align = pci_iov_resource_size(pdev, resno);
+	if (pdn->vfs_expanded)
+		return pdn->vfs_expanded * align;
+
+	return align;
+}
+#endif /* CONFIG_PCI_IOV */
+
+/* Prevent enabling devices for which we couldn't properly
+ * assign a PE
+ */
+static bool pnv_pci_enable_device_hook(struct pci_dev *dev)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	struct pci_dn *pdn;
+
+	/* The function is probably called while the PEs have
+	 * not be created yet. For example, resource reassignment
+	 * during PCI probe period. We just skip the check if
+	 * PEs isn't ready.
+	 */
+	if (!phb->initialized)
+		return true;
+
+	pdn = pci_get_pdn(dev);
+	if (!pdn || pdn->pe_number == IODA_INVALID_PE)
+		return false;
+
+	return true;
+}
+
+static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
+			       u32 devfn)
+{
+	return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
+}
+
+static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
+{
+	opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE,
+		       OPAL_ASSERT_RESET);
+}
+
+static void __init pnv_pci_init_ioda_phb(struct device_node *np,
+					 u64 hub_id, int ioda_type)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	unsigned long size, m32map_off, pemap_off, iomap_off = 0;
+	const __be64 *prop64;
+	const __be32 *prop32;
+	int len;
+	u64 phb_id;
+	void *aux;
+	long rc;
+
+	pr_info("Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name);
+
+	prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
+	if (!prop64) {
+		pr_err("  Missing \"ibm,opal-phbid\" property !\n");
+		return;
+	}
+	phb_id = be64_to_cpup(prop64);
+	pr_debug("  PHB-ID  : 0x%016llx\n", phb_id);
+
+	phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0);
+
+	/* Allocate PCI controller */
+	phb->hose = hose = pcibios_alloc_controller(np);
+	if (!phb->hose) {
+		pr_err("  Can't allocate PCI controller for %s\n",
+		       np->full_name);
+		memblock_free(__pa(phb), sizeof(struct pnv_phb));
+		return;
+	}
+
+	spin_lock_init(&phb->lock);
+	prop32 = of_get_property(np, "bus-range", &len);
+	if (prop32 && len == 8) {
+		hose->first_busno = be32_to_cpu(prop32[0]);
+		hose->last_busno = be32_to_cpu(prop32[1]);
+	} else {
+		pr_warn("  Broken <bus-range> on %s\n", np->full_name);
+		hose->first_busno = 0;
+		hose->last_busno = 0xff;
+	}
+	hose->private_data = phb;
+	phb->hub_id = hub_id;
+	phb->opal_id = phb_id;
+	phb->type = ioda_type;
+	mutex_init(&phb->ioda.pe_alloc_mutex);
+
+	/* Detect specific models for error handling */
+	if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
+		phb->model = PNV_PHB_MODEL_P7IOC;
+	else if (of_device_is_compatible(np, "ibm,power8-pciex"))
+		phb->model = PNV_PHB_MODEL_PHB3;
+	else
+		phb->model = PNV_PHB_MODEL_UNKNOWN;
+
+	/* Parse 32-bit and IO ranges (if any) */
+	pci_process_bridge_OF_ranges(hose, np, !hose->global_number);
+
+	/* Get registers */
+	phb->regs = of_iomap(np, 0);
+	if (phb->regs == NULL)
+		pr_err("  Failed to map registers !\n");
+
+	/* Initialize more IODA stuff */
+	phb->ioda.total_pe = 1;
+	prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
+	if (prop32)
+		phb->ioda.total_pe = be32_to_cpup(prop32);
+	prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL);
+	if (prop32)
+		phb->ioda.reserved_pe = be32_to_cpup(prop32);
+
+	/* Parse 64-bit MMIO range */
+	pnv_ioda_parse_m64_window(phb);
+
+	phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
+	/* FW Has already off top 64k of M32 space (MSI space) */
+	phb->ioda.m32_size += 0x10000;
+
+	phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
+	phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0];
+	phb->ioda.io_size = hose->pci_io_size;
+	phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
+	phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
+
+	/* Allocate aux data & arrays. We don't have IO ports on PHB3 */
+	size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
+	m32map_off = size;
+	size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
+	if (phb->type == PNV_PHB_IODA1) {
+		iomap_off = size;
+		size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]);
+	}
+	pemap_off = size;
+	size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
+	aux = memblock_virt_alloc(size, 0);
+	phb->ioda.pe_alloc = aux;
+	phb->ioda.m32_segmap = aux + m32map_off;
+	if (phb->type == PNV_PHB_IODA1)
+		phb->ioda.io_segmap = aux + iomap_off;
+	phb->ioda.pe_array = aux + pemap_off;
+	set_bit(phb->ioda.reserved_pe, phb->ioda.pe_alloc);
+
+	INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
+	INIT_LIST_HEAD(&phb->ioda.pe_list);
+	mutex_init(&phb->ioda.pe_list_mutex);
+
+	/* Calculate how many 32-bit TCE segments we have */
+	phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
+
+#if 0 /* We should really do that ... */
+	rc = opal_pci_set_phb_mem_window(opal->phb_id,
+					 window_type,
+					 window_num,
+					 starting_real_address,
+					 starting_pci_address,
+					 segment_size);
+#endif
+
+	pr_info("  %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n",
+		phb->ioda.total_pe, phb->ioda.reserved_pe,
+		phb->ioda.m32_size, phb->ioda.m32_segsize);
+	if (phb->ioda.m64_size)
+		pr_info("                 M64: 0x%lx [segment=0x%lx]\n",
+			phb->ioda.m64_size, phb->ioda.m64_segsize);
+	if (phb->ioda.io_size)
+		pr_info("                  IO: 0x%x [segment=0x%x]\n",
+			phb->ioda.io_size, phb->ioda.io_segsize);
+
+
+	phb->hose->ops = &pnv_pci_ops;
+	phb->get_pe_state = pnv_ioda_get_pe_state;
+	phb->freeze_pe = pnv_ioda_freeze_pe;
+	phb->unfreeze_pe = pnv_ioda_unfreeze_pe;
+
+	/* Setup RID -> PE mapping function */
+	phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
+
+	/* Setup TCEs */
+	phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
+	phb->dma_set_mask = pnv_pci_ioda_dma_set_mask;
+	phb->dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask;
+
+	/* Setup shutdown function for kexec */
+	phb->shutdown = pnv_pci_ioda_shutdown;
+
+	/* Setup MSI support */
+	pnv_pci_init_ioda_msis(phb);
+
+	/*
+	 * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here
+	 * to let the PCI core do resource assignment. It's supposed
+	 * that the PCI core will do correct I/O and MMIO alignment
+	 * for the P2P bridge bars so that each PCI bus (excluding
+	 * the child P2P bridges) can form individual PE.
+	 */
+	ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
+	pnv_pci_controller_ops.enable_device_hook = pnv_pci_enable_device_hook;
+	pnv_pci_controller_ops.window_alignment = pnv_pci_window_alignment;
+	pnv_pci_controller_ops.reset_secondary_bus = pnv_pci_reset_secondary_bus;
+	hose->controller_ops = pnv_pci_controller_ops;
+
+#ifdef CONFIG_PCI_IOV
+	ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources;
+	ppc_md.pcibios_iov_resource_alignment = pnv_pci_iov_resource_alignment;
+#endif
+
+	pci_add_flags(PCI_REASSIGN_ALL_RSRC);
+
+	/* Reset IODA tables to a clean state */
+	rc = opal_pci_reset(phb_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET);
+	if (rc)
+		pr_warning("  OPAL Error %ld performing IODA table reset !\n", rc);
+
+	/* If we're running in kdump kerenl, the previous kerenl never
+	 * shutdown PCI devices correctly. We already got IODA table
+	 * cleaned out. So we have to issue PHB reset to stop all PCI
+	 * transactions from previous kerenl.
+	 */
+	if (is_kdump_kernel()) {
+		pr_info("  Issue PHB reset ...\n");
+		pnv_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
+		pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE);
+	}
+
+	/* Remove M64 resource if we can't configure it successfully */
+	if (!phb->init_m64 || phb->init_m64(phb))
+		hose->mem_resources[1].flags = 0;
+}
+
+void __init pnv_pci_init_ioda2_phb(struct device_node *np)
+{
+	pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
+}
+
+void __init pnv_pci_init_ioda_hub(struct device_node *np)
+{
+	struct device_node *phbn;
+	const __be64 *prop64;
+	u64 hub_id;
+
+	pr_info("Probing IODA IO-Hub %s\n", np->full_name);
+
+	prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
+	if (!prop64) {
+		pr_err(" Missing \"ibm,opal-hubid\" property !\n");
+		return;
+	}
+	hub_id = be64_to_cpup(prop64);
+	pr_devel(" HUB-ID : 0x%016llx\n", hub_id);
+
+	/* Count child PHBs */
+	for_each_child_of_node(np, phbn) {
+		/* Look for IODA1 PHBs */
+		if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
+			pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1);
+	}
+}
diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
new file mode 100644
index 000000000..4729ca793
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
@@ -0,0 +1,236 @@
+/*
+ * Support PCI/PCIe on PowerNV platforms
+ *
+ * Currently supports only P5IOC2
+ *
+ * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/msi.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/msi_bitmap.h>
+#include <asm/ppc-pci.h>
+#include <asm/opal.h>
+#include <asm/iommu.h>
+#include <asm/tce.h>
+
+#include "powernv.h"
+#include "pci.h"
+
+/* For now, use a fixed amount of TCE memory for each p5ioc2
+ * hub, 16M will do
+ */
+#define P5IOC2_TCE_MEMORY	0x01000000
+
+#ifdef CONFIG_PCI_MSI
+static int pnv_pci_p5ioc2_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
+				    unsigned int hwirq, unsigned int virq,
+				    unsigned int is_64, struct msi_msg *msg)
+{
+	if (WARN_ON(!is_64))
+		return -ENXIO;
+	msg->data = hwirq - phb->msi_base;
+	msg->address_hi = 0x10000000;
+	msg->address_lo = 0;
+
+	return 0;
+}
+
+static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb)
+{
+	unsigned int count;
+	const __be32 *prop = of_get_property(phb->hose->dn,
+					     "ibm,opal-msi-ranges", NULL);
+	if (!prop)
+		return;
+
+	/* Don't do MSI's on p5ioc2 PCI-X are they are not properly
+	 * verified in HW
+	 */
+	if (of_device_is_compatible(phb->hose->dn, "ibm,p5ioc2-pcix"))
+		return;
+	phb->msi_base = be32_to_cpup(prop);
+	count = be32_to_cpup(prop + 1);
+	if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) {
+		pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
+		       phb->hose->global_number);
+		return;
+	}
+	phb->msi_setup = pnv_pci_p5ioc2_msi_setup;
+	phb->msi32_support = 0;
+	pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
+		count, phb->msi_base);
+}
+#else
+static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { }
+#endif /* CONFIG_PCI_MSI */
+
+static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb,
+					 struct pci_dev *pdev)
+{
+	if (phb->p5ioc2.iommu_table.it_map == NULL) {
+		iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node);
+		iommu_register_group(&phb->p5ioc2.iommu_table,
+				pci_domain_nr(phb->hose->bus), phb->opal_id);
+	}
+
+	set_iommu_table_base_and_group(&pdev->dev, &phb->p5ioc2.iommu_table);
+}
+
+static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
+					   void *tce_mem, u64 tce_size)
+{
+	struct pnv_phb *phb;
+	const __be64 *prop64;
+	u64 phb_id;
+	int64_t rc;
+	static int primary = 1;
+
+	pr_info(" Initializing p5ioc2 PHB %s\n", np->full_name);
+
+	prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
+	if (!prop64) {
+		pr_err("  Missing \"ibm,opal-phbid\" property !\n");
+		return;
+	}
+	phb_id = be64_to_cpup(prop64);
+	pr_devel("  PHB-ID  : 0x%016llx\n", phb_id);
+	pr_devel("  TCE AT  : 0x%016lx\n", __pa(tce_mem));
+	pr_devel("  TCE SZ  : 0x%016llx\n", tce_size);
+
+	rc = opal_pci_set_phb_tce_memory(phb_id, __pa(tce_mem), tce_size);
+	if (rc != OPAL_SUCCESS) {
+		pr_err("  Failed to set TCE memory, OPAL error %lld\n", rc);
+		return;
+	}
+
+	phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0);
+	phb->hose = pcibios_alloc_controller(np);
+	if (!phb->hose) {
+		pr_err("  Failed to allocate PCI controller\n");
+		return;
+	}
+
+	spin_lock_init(&phb->lock);
+	phb->hose->first_busno = 0;
+	phb->hose->last_busno = 0xff;
+	phb->hose->private_data = phb;
+	phb->hose->controller_ops = pnv_pci_controller_ops;
+	phb->hub_id = hub_id;
+	phb->opal_id = phb_id;
+	phb->type = PNV_PHB_P5IOC2;
+	phb->model = PNV_PHB_MODEL_P5IOC2;
+
+	phb->regs = of_iomap(np, 0);
+
+	if (phb->regs == NULL)
+		pr_err("  Failed to map registers !\n");
+	else {
+		pr_devel("  P_BUID     = 0x%08x\n", in_be32(phb->regs + 0x100));
+		pr_devel("  P_IOSZ     = 0x%08x\n", in_be32(phb->regs + 0x1b0));
+		pr_devel("  P_IO_ST    = 0x%08x\n", in_be32(phb->regs + 0x1e0));
+		pr_devel("  P_MEM1_H   = 0x%08x\n", in_be32(phb->regs + 0x1a0));
+		pr_devel("  P_MEM1_L   = 0x%08x\n", in_be32(phb->regs + 0x190));
+		pr_devel("  P_MSZ1_L   = 0x%08x\n", in_be32(phb->regs + 0x1c0));
+		pr_devel("  P_MEM_ST   = 0x%08x\n", in_be32(phb->regs + 0x1d0));
+		pr_devel("  P_MEM2_H   = 0x%08x\n", in_be32(phb->regs + 0x2c0));
+		pr_devel("  P_MEM2_L   = 0x%08x\n", in_be32(phb->regs + 0x2b0));
+		pr_devel("  P_MSZ2_H   = 0x%08x\n", in_be32(phb->regs + 0x2d0));
+		pr_devel("  P_MSZ2_L   = 0x%08x\n", in_be32(phb->regs + 0x2e0));
+	}
+
+	/* Interpret the "ranges" property */
+	/* This also maps the I/O region and sets isa_io/mem_base */
+	pci_process_bridge_OF_ranges(phb->hose, np, primary);
+	primary = 0;
+
+	phb->hose->ops = &pnv_pci_ops;
+
+	/* Setup MSI support */
+	pnv_pci_init_p5ioc2_msis(phb);
+
+	/* Setup TCEs */
+	phb->dma_dev_setup = pnv_pci_p5ioc2_dma_dev_setup;
+	pnv_pci_setup_iommu_table(&phb->p5ioc2.iommu_table,
+				  tce_mem, tce_size, 0,
+				  IOMMU_PAGE_SHIFT_4K);
+}
+
+void __init pnv_pci_init_p5ioc2_hub(struct device_node *np)
+{
+	struct device_node *phbn;
+	const __be64 *prop64;
+	u64 hub_id;
+	void *tce_mem;
+	uint64_t tce_per_phb;
+	int64_t rc;
+	int phb_count = 0;
+
+	pr_info("Probing p5ioc2 IO-Hub %s\n", np->full_name);
+
+	prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
+	if (!prop64) {
+		pr_err(" Missing \"ibm,opal-hubid\" property !\n");
+		return;
+	}
+	hub_id = be64_to_cpup(prop64);
+	pr_info(" HUB-ID : 0x%016llx\n", hub_id);
+
+	/* Count child PHBs and calculate TCE space per PHB */
+	for_each_child_of_node(np, phbn) {
+		if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") ||
+		    of_device_is_compatible(phbn, "ibm,p5ioc2-pciex"))
+			phb_count++;
+	}
+
+	if (phb_count <= 0) {
+		pr_info(" No PHBs for Hub %s\n", np->full_name);
+		return;
+	}
+
+	tce_per_phb = __rounddown_pow_of_two(P5IOC2_TCE_MEMORY / phb_count);
+	pr_info(" Allocating %lld MB of TCE memory per PHB\n",
+		tce_per_phb >> 20);
+
+	/* Currently allocate 16M of TCE memory for every Hub
+	 *
+	 * XXX TODO: Make it chip local if possible
+	 */
+	tce_mem = memblock_virt_alloc(P5IOC2_TCE_MEMORY, P5IOC2_TCE_MEMORY);
+	pr_debug(" TCE    : 0x%016lx..0x%016lx\n",
+		__pa(tce_mem), __pa(tce_mem) + P5IOC2_TCE_MEMORY - 1);
+	rc = opal_pci_set_hub_tce_memory(hub_id, __pa(tce_mem),
+					P5IOC2_TCE_MEMORY);
+	if (rc != OPAL_SUCCESS) {
+		pr_err(" Failed to allocate TCE memory, OPAL error %lld\n", rc);
+		return;
+	}
+
+	/* Initialize PHBs */
+	for_each_child_of_node(np, phbn) {
+		if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") ||
+		    of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) {
+			pnv_pci_init_p5ioc2_phb(phbn, hub_id,
+					tce_mem, tce_per_phb);
+			tce_mem += tce_per_phb;
+		}
+	}
+}
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
new file mode 100644
index 000000000..bca2aeb6e
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -0,0 +1,783 @@
+/*
+ * Support PCI/PCIe on PowerNV platforms
+ *
+ * Currently supports only P5IOC2
+ *
+ * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/msi.h>
+#include <linux/iommu.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/msi_bitmap.h>
+#include <asm/ppc-pci.h>
+#include <asm/opal.h>
+#include <asm/iommu.h>
+#include <asm/tce.h>
+#include <asm/firmware.h>
+#include <asm/eeh_event.h>
+#include <asm/eeh.h>
+
+#include "powernv.h"
+#include "pci.h"
+
+/* Delay in usec */
+#define PCI_RESET_DELAY_US	3000000
+
+#define cfg_dbg(fmt...)	do { } while(0)
+//#define cfg_dbg(fmt...)	printk(fmt)
+
+#ifdef CONFIG_PCI_MSI
+static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	struct msi_desc *entry;
+	struct msi_msg msg;
+	int hwirq;
+	unsigned int virq;
+	int rc;
+
+	if (WARN_ON(!phb) || !phb->msi_bmp.bitmap)
+		return -ENODEV;
+
+	if (pdev->no_64bit_msi && !phb->msi32_support)
+		return -ENODEV;
+
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		if (!entry->msi_attrib.is_64 && !phb->msi32_support) {
+			pr_warn("%s: Supports only 64-bit MSIs\n",
+				pci_name(pdev));
+			return -ENXIO;
+		}
+		hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, 1);
+		if (hwirq < 0) {
+			pr_warn("%s: Failed to find a free MSI\n",
+				pci_name(pdev));
+			return -ENOSPC;
+		}
+		virq = irq_create_mapping(NULL, phb->msi_base + hwirq);
+		if (virq == NO_IRQ) {
+			pr_warn("%s: Failed to map MSI to linux irq\n",
+				pci_name(pdev));
+			msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1);
+			return -ENOMEM;
+		}
+		rc = phb->msi_setup(phb, pdev, phb->msi_base + hwirq,
+				    virq, entry->msi_attrib.is_64, &msg);
+		if (rc) {
+			pr_warn("%s: Failed to setup MSI\n", pci_name(pdev));
+			irq_dispose_mapping(virq);
+			msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1);
+			return rc;
+		}
+		irq_set_msi_desc(virq, entry);
+		pci_write_msi_msg(virq, &msg);
+	}
+	return 0;
+}
+
+static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
+{
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	struct msi_desc *entry;
+
+	if (WARN_ON(!phb))
+		return;
+
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		if (entry->irq == NO_IRQ)
+			continue;
+		irq_set_msi_desc(entry->irq, NULL);
+		msi_bitmap_free_hwirqs(&phb->msi_bmp,
+			virq_to_hw(entry->irq) - phb->msi_base, 1);
+		irq_dispose_mapping(entry->irq);
+	}
+}
+#endif /* CONFIG_PCI_MSI */
+
+static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
+					 struct OpalIoPhbErrorCommon *common)
+{
+	struct OpalIoP7IOCPhbErrorData *data;
+	int i;
+
+	data = (struct OpalIoP7IOCPhbErrorData *)common;
+	pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n",
+		hose->global_number, be32_to_cpu(common->version));
+
+	if (data->brdgCtl)
+		pr_info("brdgCtl:     %08x\n",
+			be32_to_cpu(data->brdgCtl));
+	if (data->portStatusReg || data->rootCmplxStatus ||
+	    data->busAgentStatus)
+		pr_info("UtlSts:      %08x %08x %08x\n",
+			be32_to_cpu(data->portStatusReg),
+			be32_to_cpu(data->rootCmplxStatus),
+			be32_to_cpu(data->busAgentStatus));
+	if (data->deviceStatus || data->slotStatus   ||
+	    data->linkStatus   || data->devCmdStatus ||
+	    data->devSecStatus)
+		pr_info("RootSts:     %08x %08x %08x %08x %08x\n",
+			be32_to_cpu(data->deviceStatus),
+			be32_to_cpu(data->slotStatus),
+			be32_to_cpu(data->linkStatus),
+			be32_to_cpu(data->devCmdStatus),
+			be32_to_cpu(data->devSecStatus));
+	if (data->rootErrorStatus   || data->uncorrErrorStatus ||
+	    data->corrErrorStatus)
+		pr_info("RootErrSts:  %08x %08x %08x\n",
+			be32_to_cpu(data->rootErrorStatus),
+			be32_to_cpu(data->uncorrErrorStatus),
+			be32_to_cpu(data->corrErrorStatus));
+	if (data->tlpHdr1 || data->tlpHdr2 ||
+	    data->tlpHdr3 || data->tlpHdr4)
+		pr_info("RootErrLog:  %08x %08x %08x %08x\n",
+			be32_to_cpu(data->tlpHdr1),
+			be32_to_cpu(data->tlpHdr2),
+			be32_to_cpu(data->tlpHdr3),
+			be32_to_cpu(data->tlpHdr4));
+	if (data->sourceId || data->errorClass ||
+	    data->correlator)
+		pr_info("RootErrLog1: %08x %016llx %016llx\n",
+			be32_to_cpu(data->sourceId),
+			be64_to_cpu(data->errorClass),
+			be64_to_cpu(data->correlator));
+	if (data->p7iocPlssr || data->p7iocCsr)
+		pr_info("PhbSts:      %016llx %016llx\n",
+			be64_to_cpu(data->p7iocPlssr),
+			be64_to_cpu(data->p7iocCsr));
+	if (data->lemFir)
+		pr_info("Lem:         %016llx %016llx %016llx\n",
+			be64_to_cpu(data->lemFir),
+			be64_to_cpu(data->lemErrorMask),
+			be64_to_cpu(data->lemWOF));
+	if (data->phbErrorStatus)
+		pr_info("PhbErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->phbErrorStatus),
+			be64_to_cpu(data->phbFirstErrorStatus),
+			be64_to_cpu(data->phbErrorLog0),
+			be64_to_cpu(data->phbErrorLog1));
+	if (data->mmioErrorStatus)
+		pr_info("OutErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->mmioErrorStatus),
+			be64_to_cpu(data->mmioFirstErrorStatus),
+			be64_to_cpu(data->mmioErrorLog0),
+			be64_to_cpu(data->mmioErrorLog1));
+	if (data->dma0ErrorStatus)
+		pr_info("InAErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->dma0ErrorStatus),
+			be64_to_cpu(data->dma0FirstErrorStatus),
+			be64_to_cpu(data->dma0ErrorLog0),
+			be64_to_cpu(data->dma0ErrorLog1));
+	if (data->dma1ErrorStatus)
+		pr_info("InBErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->dma1ErrorStatus),
+			be64_to_cpu(data->dma1FirstErrorStatus),
+			be64_to_cpu(data->dma1ErrorLog0),
+			be64_to_cpu(data->dma1ErrorLog1));
+
+	for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
+		if ((data->pestA[i] >> 63) == 0 &&
+		    (data->pestB[i] >> 63) == 0)
+			continue;
+
+		pr_info("PE[%3d] A/B: %016llx %016llx\n",
+			i, be64_to_cpu(data->pestA[i]),
+			be64_to_cpu(data->pestB[i]));
+	}
+}
+
+static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
+					struct OpalIoPhbErrorCommon *common)
+{
+	struct OpalIoPhb3ErrorData *data;
+	int i;
+
+	data = (struct OpalIoPhb3ErrorData*)common;
+	pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n",
+		hose->global_number, be32_to_cpu(common->version));
+	if (data->brdgCtl)
+		pr_info("brdgCtl:     %08x\n",
+			be32_to_cpu(data->brdgCtl));
+	if (data->portStatusReg || data->rootCmplxStatus ||
+	    data->busAgentStatus)
+		pr_info("UtlSts:      %08x %08x %08x\n",
+			be32_to_cpu(data->portStatusReg),
+			be32_to_cpu(data->rootCmplxStatus),
+			be32_to_cpu(data->busAgentStatus));
+	if (data->deviceStatus || data->slotStatus   ||
+	    data->linkStatus   || data->devCmdStatus ||
+	    data->devSecStatus)
+		pr_info("RootSts:     %08x %08x %08x %08x %08x\n",
+			be32_to_cpu(data->deviceStatus),
+			be32_to_cpu(data->slotStatus),
+			be32_to_cpu(data->linkStatus),
+			be32_to_cpu(data->devCmdStatus),
+			be32_to_cpu(data->devSecStatus));
+	if (data->rootErrorStatus || data->uncorrErrorStatus ||
+	    data->corrErrorStatus)
+		pr_info("RootErrSts:  %08x %08x %08x\n",
+			be32_to_cpu(data->rootErrorStatus),
+			be32_to_cpu(data->uncorrErrorStatus),
+			be32_to_cpu(data->corrErrorStatus));
+	if (data->tlpHdr1 || data->tlpHdr2 ||
+	    data->tlpHdr3 || data->tlpHdr4)
+		pr_info("RootErrLog:  %08x %08x %08x %08x\n",
+			be32_to_cpu(data->tlpHdr1),
+			be32_to_cpu(data->tlpHdr2),
+			be32_to_cpu(data->tlpHdr3),
+			be32_to_cpu(data->tlpHdr4));
+	if (data->sourceId || data->errorClass ||
+	    data->correlator)
+		pr_info("RootErrLog1: %08x %016llx %016llx\n",
+			be32_to_cpu(data->sourceId),
+			be64_to_cpu(data->errorClass),
+			be64_to_cpu(data->correlator));
+	if (data->nFir)
+		pr_info("nFir:        %016llx %016llx %016llx\n",
+			be64_to_cpu(data->nFir),
+			be64_to_cpu(data->nFirMask),
+			be64_to_cpu(data->nFirWOF));
+	if (data->phbPlssr || data->phbCsr)
+		pr_info("PhbSts:      %016llx %016llx\n",
+			be64_to_cpu(data->phbPlssr),
+			be64_to_cpu(data->phbCsr));
+	if (data->lemFir)
+		pr_info("Lem:         %016llx %016llx %016llx\n",
+			be64_to_cpu(data->lemFir),
+			be64_to_cpu(data->lemErrorMask),
+			be64_to_cpu(data->lemWOF));
+	if (data->phbErrorStatus)
+		pr_info("PhbErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->phbErrorStatus),
+			be64_to_cpu(data->phbFirstErrorStatus),
+			be64_to_cpu(data->phbErrorLog0),
+			be64_to_cpu(data->phbErrorLog1));
+	if (data->mmioErrorStatus)
+		pr_info("OutErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->mmioErrorStatus),
+			be64_to_cpu(data->mmioFirstErrorStatus),
+			be64_to_cpu(data->mmioErrorLog0),
+			be64_to_cpu(data->mmioErrorLog1));
+	if (data->dma0ErrorStatus)
+		pr_info("InAErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->dma0ErrorStatus),
+			be64_to_cpu(data->dma0FirstErrorStatus),
+			be64_to_cpu(data->dma0ErrorLog0),
+			be64_to_cpu(data->dma0ErrorLog1));
+	if (data->dma1ErrorStatus)
+		pr_info("InBErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->dma1ErrorStatus),
+			be64_to_cpu(data->dma1FirstErrorStatus),
+			be64_to_cpu(data->dma1ErrorLog0),
+			be64_to_cpu(data->dma1ErrorLog1));
+
+	for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) {
+		if ((be64_to_cpu(data->pestA[i]) >> 63) == 0 &&
+		    (be64_to_cpu(data->pestB[i]) >> 63) == 0)
+			continue;
+
+		pr_info("PE[%3d] A/B: %016llx %016llx\n",
+				i, be64_to_cpu(data->pestA[i]),
+				be64_to_cpu(data->pestB[i]));
+	}
+}
+
+void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
+				unsigned char *log_buff)
+{
+	struct OpalIoPhbErrorCommon *common;
+
+	if (!hose || !log_buff)
+		return;
+
+	common = (struct OpalIoPhbErrorCommon *)log_buff;
+	switch (be32_to_cpu(common->ioType)) {
+	case OPAL_PHB_ERROR_DATA_TYPE_P7IOC:
+		pnv_pci_dump_p7ioc_diag_data(hose, common);
+		break;
+	case OPAL_PHB_ERROR_DATA_TYPE_PHB3:
+		pnv_pci_dump_phb3_diag_data(hose, common);
+		break;
+	default:
+		pr_warn("%s: Unrecognized ioType %d\n",
+			__func__, be32_to_cpu(common->ioType));
+	}
+}
+
+static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
+{
+	unsigned long flags, rc;
+	int has_diag, ret = 0;
+
+	spin_lock_irqsave(&phb->lock, flags);
+
+	/* Fetch PHB diag-data */
+	rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
+					 PNV_PCI_DIAG_BUF_SIZE);
+	has_diag = (rc == OPAL_SUCCESS);
+
+	/* If PHB supports compound PE, to handle it */
+	if (phb->unfreeze_pe) {
+		ret = phb->unfreeze_pe(phb,
+				       pe_no,
+				       OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+	} else {
+		rc = opal_pci_eeh_freeze_clear(phb->opal_id,
+					     pe_no,
+					     OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+		if (rc) {
+			pr_warn("%s: Failure %ld clearing frozen "
+				"PHB#%x-PE#%x\n",
+				__func__, rc, phb->hose->global_number,
+				pe_no);
+			ret = -EIO;
+		}
+	}
+
+	/*
+	 * For now, let's only display the diag buffer when we fail to clear
+	 * the EEH status. We'll do more sensible things later when we have
+	 * proper EEH support. We need to make sure we don't pollute ourselves
+	 * with the normal errors generated when probing empty slots
+	 */
+	if (has_diag && ret)
+		pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob);
+
+	spin_unlock_irqrestore(&phb->lock, flags);
+}
+
+static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
+{
+	struct pnv_phb *phb = pdn->phb->private_data;
+	u8	fstate;
+	__be16	pcierr;
+	int	pe_no;
+	s64	rc;
+
+	/*
+	 * Get the PE#. During the PCI probe stage, we might not
+	 * setup that yet. So all ER errors should be mapped to
+	 * reserved PE.
+	 */
+	pe_no = pdn->pe_number;
+	if (pe_no == IODA_INVALID_PE) {
+		if (phb->type == PNV_PHB_P5IOC2)
+			pe_no = 0;
+		else
+			pe_no = phb->ioda.reserved_pe;
+	}
+
+	/*
+	 * Fetch frozen state. If the PHB support compound PE,
+	 * we need handle that case.
+	 */
+	if (phb->get_pe_state) {
+		fstate = phb->get_pe_state(phb, pe_no);
+	} else {
+		rc = opal_pci_eeh_freeze_status(phb->opal_id,
+						pe_no,
+						&fstate,
+						&pcierr,
+						NULL);
+		if (rc) {
+			pr_warn("%s: Failure %lld getting PHB#%x-PE#%x state\n",
+				__func__, rc, phb->hose->global_number, pe_no);
+			return;
+		}
+	}
+
+	cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
+		(pdn->busno << 8) | (pdn->devfn), pe_no, fstate);
+
+	/* Clear the frozen state if applicable */
+	if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE ||
+	    fstate == OPAL_EEH_STOPPED_DMA_FREEZE  ||
+	    fstate == OPAL_EEH_STOPPED_MMIO_DMA_FREEZE) {
+		/*
+		 * If PHB supports compound PE, freeze it for
+		 * consistency.
+		 */
+		if (phb->freeze_pe)
+			phb->freeze_pe(phb, pe_no);
+
+		pnv_pci_handle_eeh_config(phb, pe_no);
+	}
+}
+
+int pnv_pci_cfg_read(struct pci_dn *pdn,
+		     int where, int size, u32 *val)
+{
+	struct pnv_phb *phb = pdn->phb->private_data;
+	u32 bdfn = (pdn->busno << 8) | pdn->devfn;
+	s64 rc;
+
+	switch (size) {
+	case 1: {
+		u8 v8;
+		rc = opal_pci_config_read_byte(phb->opal_id, bdfn, where, &v8);
+		*val = (rc == OPAL_SUCCESS) ? v8 : 0xff;
+		break;
+	}
+	case 2: {
+		__be16 v16;
+		rc = opal_pci_config_read_half_word(phb->opal_id, bdfn, where,
+						   &v16);
+		*val = (rc == OPAL_SUCCESS) ? be16_to_cpu(v16) : 0xffff;
+		break;
+	}
+	case 4: {
+		__be32 v32;
+		rc = opal_pci_config_read_word(phb->opal_id, bdfn, where, &v32);
+		*val = (rc == OPAL_SUCCESS) ? be32_to_cpu(v32) : 0xffffffff;
+		break;
+	}
+	default:
+		return PCIBIOS_FUNC_NOT_SUPPORTED;
+	}
+
+	cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
+		__func__, pdn->busno, pdn->devfn, where, size, *val);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+int pnv_pci_cfg_write(struct pci_dn *pdn,
+		      int where, int size, u32 val)
+{
+	struct pnv_phb *phb = pdn->phb->private_data;
+	u32 bdfn = (pdn->busno << 8) | pdn->devfn;
+
+	cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
+		pdn->busno, pdn->devfn, where, size, val);
+	switch (size) {
+	case 1:
+		opal_pci_config_write_byte(phb->opal_id, bdfn, where, val);
+		break;
+	case 2:
+		opal_pci_config_write_half_word(phb->opal_id, bdfn, where, val);
+		break;
+	case 4:
+		opal_pci_config_write_word(phb->opal_id, bdfn, where, val);
+		break;
+	default:
+		return PCIBIOS_FUNC_NOT_SUPPORTED;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+#if CONFIG_EEH
+static bool pnv_pci_cfg_check(struct pci_dn *pdn)
+{
+	struct eeh_dev *edev = NULL;
+	struct pnv_phb *phb = pdn->phb->private_data;
+
+	/* EEH not enabled ? */
+	if (!(phb->flags & PNV_PHB_FLAG_EEH))
+		return true;
+
+	/* PE reset or device removed ? */
+	edev = pdn->edev;
+	if (edev) {
+		if (edev->pe &&
+		    (edev->pe->state & EEH_PE_CFG_BLOCKED))
+			return false;
+
+		if (edev->mode & EEH_DEV_REMOVED)
+			return false;
+	}
+
+	return true;
+}
+#else
+static inline pnv_pci_cfg_check(struct pci_dn *pdn)
+{
+	return true;
+}
+#endif /* CONFIG_EEH */
+
+static int pnv_pci_read_config(struct pci_bus *bus,
+			       unsigned int devfn,
+			       int where, int size, u32 *val)
+{
+	struct pci_dn *pdn;
+	struct pnv_phb *phb;
+	int ret;
+
+	*val = 0xFFFFFFFF;
+	pdn = pci_get_pdn_by_devfn(bus, devfn);
+	if (!pdn)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (!pnv_pci_cfg_check(pdn))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	ret = pnv_pci_cfg_read(pdn, where, size, val);
+	phb = pdn->phb->private_data;
+	if (phb->flags & PNV_PHB_FLAG_EEH && pdn->edev) {
+		if (*val == EEH_IO_ERROR_VALUE(size) &&
+		    eeh_dev_check_failure(pdn->edev))
+                        return PCIBIOS_DEVICE_NOT_FOUND;
+	} else {
+		pnv_pci_config_check_eeh(pdn);
+	}
+
+	return ret;
+}
+
+static int pnv_pci_write_config(struct pci_bus *bus,
+				unsigned int devfn,
+				int where, int size, u32 val)
+{
+	struct pci_dn *pdn;
+	struct pnv_phb *phb;
+	int ret;
+
+	pdn = pci_get_pdn_by_devfn(bus, devfn);
+	if (!pdn)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (!pnv_pci_cfg_check(pdn))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	ret = pnv_pci_cfg_write(pdn, where, size, val);
+	phb = pdn->phb->private_data;
+	if (!(phb->flags & PNV_PHB_FLAG_EEH))
+		pnv_pci_config_check_eeh(pdn);
+
+	return ret;
+}
+
+struct pci_ops pnv_pci_ops = {
+	.read  = pnv_pci_read_config,
+	.write = pnv_pci_write_config,
+};
+
+static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+			 unsigned long uaddr, enum dma_data_direction direction,
+			 struct dma_attrs *attrs, bool rm)
+{
+	u64 proto_tce;
+	__be64 *tcep, *tces;
+	u64 rpn;
+
+	proto_tce = TCE_PCI_READ; // Read allowed
+
+	if (direction != DMA_TO_DEVICE)
+		proto_tce |= TCE_PCI_WRITE;
+
+	tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
+	rpn = __pa(uaddr) >> tbl->it_page_shift;
+
+	while (npages--)
+		*(tcep++) = cpu_to_be64(proto_tce |
+				(rpn++ << tbl->it_page_shift));
+
+	/* Some implementations won't cache invalid TCEs and thus may not
+	 * need that flush. We'll probably turn it_type into a bit mask
+	 * of flags if that becomes the case
+	 */
+	if (tbl->it_type & TCE_PCI_SWINV_CREATE)
+		pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
+
+	return 0;
+}
+
+static int pnv_tce_build_vm(struct iommu_table *tbl, long index, long npages,
+			    unsigned long uaddr,
+			    enum dma_data_direction direction,
+			    struct dma_attrs *attrs)
+{
+	return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs,
+			false);
+}
+
+static void pnv_tce_free(struct iommu_table *tbl, long index, long npages,
+		bool rm)
+{
+	__be64 *tcep, *tces;
+
+	tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
+
+	while (npages--)
+		*(tcep++) = cpu_to_be64(0);
+
+	if (tbl->it_type & TCE_PCI_SWINV_FREE)
+		pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
+}
+
+static void pnv_tce_free_vm(struct iommu_table *tbl, long index, long npages)
+{
+	pnv_tce_free(tbl, index, npages, false);
+}
+
+static unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
+{
+	return ((u64 *)tbl->it_base)[index - tbl->it_offset];
+}
+
+static int pnv_tce_build_rm(struct iommu_table *tbl, long index, long npages,
+			    unsigned long uaddr,
+			    enum dma_data_direction direction,
+			    struct dma_attrs *attrs)
+{
+	return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs, true);
+}
+
+static void pnv_tce_free_rm(struct iommu_table *tbl, long index, long npages)
+{
+	pnv_tce_free(tbl, index, npages, true);
+}
+
+void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
+			       void *tce_mem, u64 tce_size,
+			       u64 dma_offset, unsigned page_shift)
+{
+	tbl->it_blocksize = 16;
+	tbl->it_base = (unsigned long)tce_mem;
+	tbl->it_page_shift = page_shift;
+	tbl->it_offset = dma_offset >> tbl->it_page_shift;
+	tbl->it_index = 0;
+	tbl->it_size = tce_size >> 3;
+	tbl->it_busno = 0;
+	tbl->it_type = TCE_PCI;
+}
+
+static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
+{
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+	struct pnv_phb *phb = hose->private_data;
+#ifdef CONFIG_PCI_IOV
+	struct pnv_ioda_pe *pe;
+	struct pci_dn *pdn;
+
+	/* Fix the VF pdn PE number */
+	if (pdev->is_virtfn) {
+		pdn = pci_get_pdn(pdev);
+		WARN_ON(pdn->pe_number != IODA_INVALID_PE);
+		list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+			if (pe->rid == ((pdev->bus->number << 8) |
+			    (pdev->devfn & 0xff))) {
+				pdn->pe_number = pe->pe_number;
+				pe->pdev = pdev;
+				break;
+			}
+		}
+	}
+#endif /* CONFIG_PCI_IOV */
+
+	if (phb && phb->dma_dev_setup)
+		phb->dma_dev_setup(phb, pdev);
+}
+
+int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
+{
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+	struct pnv_phb *phb = hose->private_data;
+
+	if (phb && phb->dma_set_mask)
+		return phb->dma_set_mask(phb, pdev, dma_mask);
+	return __dma_set_mask(&pdev->dev, dma_mask);
+}
+
+u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev)
+{
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+	struct pnv_phb *phb = hose->private_data;
+
+	if (phb && phb->dma_get_required_mask)
+		return phb->dma_get_required_mask(phb, pdev);
+
+	return __dma_get_required_mask(&pdev->dev);
+}
+
+void pnv_pci_shutdown(void)
+{
+	struct pci_controller *hose;
+
+	list_for_each_entry(hose, &hose_list, list_node) {
+		struct pnv_phb *phb = hose->private_data;
+
+		if (phb && phb->shutdown)
+			phb->shutdown(phb);
+	}
+}
+
+/* Fixup wrong class code in p7ioc and p8 root complex */
+static void pnv_p7ioc_rc_quirk(struct pci_dev *dev)
+{
+	dev->class = PCI_CLASS_BRIDGE_PCI << 8;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk);
+
+void __init pnv_pci_init(void)
+{
+	struct device_node *np;
+	bool found_ioda = false;
+
+	pci_add_flags(PCI_CAN_SKIP_ISA_ALIGN);
+
+	/* If we don't have OPAL, eg. in sim, just skip PCI probe */
+	if (!firmware_has_feature(FW_FEATURE_OPAL))
+		return;
+
+	/* Look for IODA IO-Hubs. We don't support mixing IODA
+	 * and p5ioc2 due to the need to change some global
+	 * probing flags
+	 */
+	for_each_compatible_node(np, NULL, "ibm,ioda-hub") {
+		pnv_pci_init_ioda_hub(np);
+		found_ioda = true;
+	}
+
+	/* Look for p5ioc2 IO-Hubs */
+	if (!found_ioda)
+		for_each_compatible_node(np, NULL, "ibm,p5ioc2")
+			pnv_pci_init_p5ioc2_hub(np);
+
+	/* Look for ioda2 built-in PHB3's */
+	for_each_compatible_node(np, NULL, "ibm,ioda2-phb")
+		pnv_pci_init_ioda2_phb(np);
+
+	/* Setup the linkage between OF nodes and PHBs */
+	pci_devs_phb_init();
+
+	/* Configure IOMMU DMA hooks */
+	ppc_md.tce_build = pnv_tce_build_vm;
+	ppc_md.tce_free = pnv_tce_free_vm;
+	ppc_md.tce_build_rm = pnv_tce_build_rm;
+	ppc_md.tce_free_rm = pnv_tce_free_rm;
+	ppc_md.tce_get = pnv_tce_get;
+	set_pci_dma_ops(&dma_iommu_ops);
+
+	/* Configure MSIs */
+#ifdef CONFIG_PCI_MSI
+	ppc_md.setup_msi_irqs = pnv_setup_msi_irqs;
+	ppc_md.teardown_msi_irqs = pnv_teardown_msi_irqs;
+#endif
+}
+
+machine_subsys_initcall_sync(powernv, tce_iommu_bus_notifier_init);
+
+struct pci_controller_ops pnv_pci_controller_ops = {
+	.dma_dev_setup = pnv_pci_dma_dev_setup,
+};
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
new file mode 100644
index 000000000..070ee888f
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -0,0 +1,221 @@
+#ifndef __POWERNV_PCI_H
+#define __POWERNV_PCI_H
+
+struct pci_dn;
+
+enum pnv_phb_type {
+	PNV_PHB_P5IOC2	= 0,
+	PNV_PHB_IODA1	= 1,
+	PNV_PHB_IODA2	= 2,
+};
+
+/* Precise PHB model for error management */
+enum pnv_phb_model {
+	PNV_PHB_MODEL_UNKNOWN,
+	PNV_PHB_MODEL_P5IOC2,
+	PNV_PHB_MODEL_P7IOC,
+	PNV_PHB_MODEL_PHB3,
+};
+
+#define PNV_PCI_DIAG_BUF_SIZE	8192
+#define PNV_IODA_PE_DEV		(1 << 0)	/* PE has single PCI device	*/
+#define PNV_IODA_PE_BUS		(1 << 1)	/* PE has primary PCI bus	*/
+#define PNV_IODA_PE_BUS_ALL	(1 << 2)	/* PE has subordinate buses	*/
+#define PNV_IODA_PE_MASTER	(1 << 3)	/* Master PE in compound case	*/
+#define PNV_IODA_PE_SLAVE	(1 << 4)	/* Slave PE in compound case	*/
+#define PNV_IODA_PE_VF		(1 << 5)	/* PE for one VF 		*/
+
+/* Data associated with a PE, including IOMMU tracking etc.. */
+struct pnv_phb;
+struct pnv_ioda_pe {
+	unsigned long		flags;
+	struct pnv_phb		*phb;
+
+	/* A PE can be associated with a single device or an
+	 * entire bus (& children). In the former case, pdev
+	 * is populated, in the later case, pbus is.
+	 */
+#ifdef CONFIG_PCI_IOV
+	struct pci_dev          *parent_dev;
+#endif
+	struct pci_dev		*pdev;
+	struct pci_bus		*pbus;
+
+	/* Effective RID (device RID for a device PE and base bus
+	 * RID with devfn 0 for a bus PE)
+	 */
+	unsigned int		rid;
+
+	/* PE number */
+	unsigned int		pe_number;
+
+	/* "Weight" assigned to the PE for the sake of DMA resource
+	 * allocations
+	 */
+	unsigned int		dma_weight;
+
+	/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
+	int			tce32_seg;
+	int			tce32_segcount;
+	struct iommu_table	*tce32_table;
+	phys_addr_t		tce_inval_reg_phys;
+
+	/* 64-bit TCE bypass region */
+	bool			tce_bypass_enabled;
+	uint64_t		tce_bypass_base;
+
+	/* MSIs. MVE index is identical for for 32 and 64 bit MSI
+	 * and -1 if not supported. (It's actually identical to the
+	 * PE number)
+	 */
+	int			mve_number;
+
+	/* PEs in compound case */
+	struct pnv_ioda_pe	*master;
+	struct list_head	slaves;
+
+	/* Link in list of PE#s */
+	struct list_head	dma_link;
+	struct list_head	list;
+};
+
+#define PNV_PHB_FLAG_EEH	(1 << 0)
+
+struct pnv_phb {
+	struct pci_controller	*hose;
+	enum pnv_phb_type	type;
+	enum pnv_phb_model	model;
+	u64			hub_id;
+	u64			opal_id;
+	int			flags;
+	void __iomem		*regs;
+	int			initialized;
+	spinlock_t		lock;
+
+#ifdef CONFIG_DEBUG_FS
+	int			has_dbgfs;
+	struct dentry		*dbgfs;
+#endif
+
+#ifdef CONFIG_PCI_MSI
+	unsigned int		msi_base;
+	unsigned int		msi32_support;
+	struct msi_bitmap	msi_bmp;
+#endif
+	int (*msi_setup)(struct pnv_phb *phb, struct pci_dev *dev,
+			 unsigned int hwirq, unsigned int virq,
+			 unsigned int is_64, struct msi_msg *msg);
+	void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
+	int (*dma_set_mask)(struct pnv_phb *phb, struct pci_dev *pdev,
+			    u64 dma_mask);
+	u64 (*dma_get_required_mask)(struct pnv_phb *phb,
+				     struct pci_dev *pdev);
+	void (*fixup_phb)(struct pci_controller *hose);
+	u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
+	void (*shutdown)(struct pnv_phb *phb);
+	int (*init_m64)(struct pnv_phb *phb);
+	void (*reserve_m64_pe)(struct pnv_phb *phb);
+	int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all);
+	int (*get_pe_state)(struct pnv_phb *phb, int pe_no);
+	void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
+	int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt);
+
+	union {
+		struct {
+			struct iommu_table iommu_table;
+		} p5ioc2;
+
+		struct {
+			/* Global bridge info */
+			unsigned int		total_pe;
+			unsigned int		reserved_pe;
+
+			/* 32-bit MMIO window */
+			unsigned int		m32_size;
+			unsigned int		m32_segsize;
+			unsigned int		m32_pci_base;
+
+			/* 64-bit MMIO window */
+			unsigned int		m64_bar_idx;
+			unsigned long		m64_size;
+			unsigned long		m64_segsize;
+			unsigned long		m64_base;
+			unsigned long		m64_bar_alloc;
+
+			/* IO ports */
+			unsigned int		io_size;
+			unsigned int		io_segsize;
+			unsigned int		io_pci_base;
+
+			/* PE allocation bitmap */
+			unsigned long		*pe_alloc;
+			/* PE allocation mutex */
+			struct mutex		pe_alloc_mutex;
+
+			/* M32 & IO segment maps */
+			unsigned int		*m32_segmap;
+			unsigned int		*io_segmap;
+			struct pnv_ioda_pe	*pe_array;
+
+			/* IRQ chip */
+			int			irq_chip_init;
+			struct irq_chip		irq_chip;
+
+			/* Sorted list of used PE's based
+			 * on the sequence of creation
+			 */
+			struct list_head	pe_list;
+			struct mutex            pe_list_mutex;
+
+			/* Reverse map of PEs, will have to extend if
+			 * we are to support more than 256 PEs, indexed
+			 * bus { bus, devfn }
+			 */
+			unsigned char		pe_rmap[0x10000];
+
+			/* 32-bit TCE tables allocation */
+			unsigned long		tce32_count;
+
+			/* Total "weight" for the sake of DMA resources
+			 * allocation
+			 */
+			unsigned int		dma_weight;
+			unsigned int		dma_pe_count;
+
+			/* Sorted list of used PE's, sorted at
+			 * boot for resource allocation purposes
+			 */
+			struct list_head	pe_dma_list;
+		} ioda;
+	};
+
+	/* PHB and hub status structure */
+	union {
+		unsigned char			blob[PNV_PCI_DIAG_BUF_SIZE];
+		struct OpalIoP7IOCPhbErrorData	p7ioc;
+		struct OpalIoPhb3ErrorData	phb3;
+		struct OpalIoP7IOCErrorData 	hub_diag;
+	} diag;
+
+};
+
+extern struct pci_ops pnv_pci_ops;
+
+void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
+				unsigned char *log_buff);
+int pnv_pci_cfg_read(struct pci_dn *pdn,
+		     int where, int size, u32 *val);
+int pnv_pci_cfg_write(struct pci_dn *pdn,
+		      int where, int size, u32 val);
+extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
+				      void *tce_mem, u64 tce_size,
+				      u64 dma_offset, unsigned page_shift);
+extern void pnv_pci_init_p5ioc2_hub(struct device_node *np);
+extern void pnv_pci_init_ioda_hub(struct device_node *np);
+extern void pnv_pci_init_ioda2_phb(struct device_node *np);
+extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
+					__be64 *startp, __be64 *endp, bool rm);
+extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
+extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
+
+#endif /* __POWERNV_PCI_H */
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
new file mode 100644
index 000000000..826d2c9be
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -0,0 +1,40 @@
+#ifndef _POWERNV_H
+#define _POWERNV_H
+
+#ifdef CONFIG_SMP
+extern void pnv_smp_init(void);
+#else
+static inline void pnv_smp_init(void) { }
+#endif
+
+struct pci_dev;
+
+#ifdef CONFIG_PCI
+extern void pnv_pci_init(void);
+extern void pnv_pci_shutdown(void);
+extern int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask);
+extern u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev);
+#else
+static inline void pnv_pci_init(void) { }
+static inline void pnv_pci_shutdown(void) { }
+
+static inline int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
+{
+	return -ENODEV;
+}
+
+static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev)
+{
+	return 0;
+}
+#endif
+
+extern struct pci_controller_ops pnv_pci_controller_ops;
+
+extern u32 pnv_get_supported_cpuidle_states(void);
+
+extern void pnv_lpc_init(void);
+
+bool cpu_core_split_required(void);
+
+#endif /* _POWERNV_H */
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
new file mode 100644
index 000000000..6eb808ff6
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt)	"powernv-rng: " fmt
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <asm/archrandom.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/smp.h>
+
+
+struct powernv_rng {
+	void __iomem *regs;
+	void __iomem *regs_real;
+	unsigned long mask;
+};
+
+static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
+
+
+int powernv_hwrng_present(void)
+{
+	struct powernv_rng *rng;
+
+	rng = get_cpu_var(powernv_rng);
+	put_cpu_var(rng);
+	return rng != NULL;
+}
+
+static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
+{
+	unsigned long parity;
+
+	/* Calculate the parity of the value */
+	asm ("popcntd %0,%1" : "=r" (parity) : "r" (val));
+
+	/* xor our value with the previous mask */
+	val ^= rng->mask;
+
+	/* update the mask based on the parity of this value */
+	rng->mask = (rng->mask << 1) | (parity & 1);
+
+	return val;
+}
+
+int powernv_get_random_real_mode(unsigned long *v)
+{
+	struct powernv_rng *rng;
+
+	rng = raw_cpu_read(powernv_rng);
+
+	*v = rng_whiten(rng, in_rm64(rng->regs_real));
+
+	return 1;
+}
+
+int powernv_get_random_long(unsigned long *v)
+{
+	struct powernv_rng *rng;
+
+	rng = get_cpu_var(powernv_rng);
+
+	*v = rng_whiten(rng, in_be64(rng->regs));
+
+	put_cpu_var(rng);
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(powernv_get_random_long);
+
+static __init void rng_init_per_cpu(struct powernv_rng *rng,
+				    struct device_node *dn)
+{
+	int chip_id, cpu;
+
+	chip_id = of_get_ibm_chip_id(dn);
+	if (chip_id == -1)
+		pr_warn("No ibm,chip-id found for %s.\n", dn->full_name);
+
+	for_each_possible_cpu(cpu) {
+		if (per_cpu(powernv_rng, cpu) == NULL ||
+		    cpu_to_chip_id(cpu) == chip_id) {
+			per_cpu(powernv_rng, cpu) = rng;
+		}
+	}
+}
+
+static __init int rng_create(struct device_node *dn)
+{
+	struct powernv_rng *rng;
+	struct resource res;
+	unsigned long val;
+
+	rng = kzalloc(sizeof(*rng), GFP_KERNEL);
+	if (!rng)
+		return -ENOMEM;
+
+	if (of_address_to_resource(dn, 0, &res)) {
+		kfree(rng);
+		return -ENXIO;
+	}
+
+	rng->regs_real = (void __iomem *)res.start;
+
+	rng->regs = of_iomap(dn, 0);
+	if (!rng->regs) {
+		kfree(rng);
+		return -ENXIO;
+	}
+
+	val = in_be64(rng->regs);
+	rng->mask = val;
+
+	rng_init_per_cpu(rng, dn);
+
+	pr_info_once("Registering arch random hook.\n");
+
+	ppc_md.get_random_long = powernv_get_random_long;
+
+	return 0;
+}
+
+static __init int rng_init(void)
+{
+	struct device_node *dn;
+	int rc;
+
+	for_each_compatible_node(dn, NULL, "ibm,power-rng") {
+		rc = rng_create(dn);
+		if (rc) {
+			pr_err("Failed creating rng for %s (%d).\n",
+				dn->full_name, rc);
+			continue;
+		}
+
+		/* Create devices for hwrng driver */
+		of_platform_device_create(dn, NULL, NULL);
+	}
+
+	return 0;
+}
+machine_subsys_initcall(powernv, rng_init);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
new file mode 100644
index 000000000..16fdcb23f
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -0,0 +1,503 @@
+/*
+ * PowerNV setup code.
+ *
+ * Copyright 2011 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/cpu.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/tty.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/interrupt.h>
+#include <linux/bug.h>
+#include <linux/pci.h>
+#include <linux/cpufreq.h>
+
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/xics.h>
+#include <asm/opal.h>
+#include <asm/kexec.h>
+#include <asm/smp.h>
+#include <asm/cputhreads.h>
+#include <asm/cpuidle.h>
+#include <asm/code-patching.h>
+
+#include "powernv.h"
+#include "subcore.h"
+
+static void __init pnv_setup_arch(void)
+{
+	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
+
+	/* Initialize SMP */
+	pnv_smp_init();
+
+	/* Setup PCI */
+	pnv_pci_init();
+
+	/* Setup RTC and NVRAM callbacks */
+	if (firmware_has_feature(FW_FEATURE_OPAL))
+		opal_nvram_init();
+
+	/* Enable NAP mode */
+	powersave_nap = 1;
+
+	/* XXX PMCS */
+}
+
+static void __init pnv_init_early(void)
+{
+	/*
+	 * Initialize the LPC bus now so that legacy serial
+	 * ports can be found on it
+	 */
+	opal_lpc_init();
+
+#ifdef CONFIG_HVC_OPAL
+	if (firmware_has_feature(FW_FEATURE_OPAL))
+		hvc_opal_init_early();
+	else
+#endif
+		add_preferred_console("hvc", 0, NULL);
+}
+
+static void __init pnv_init_IRQ(void)
+{
+	xics_init();
+
+	WARN_ON(!ppc_md.get_irq);
+}
+
+static void pnv_show_cpuinfo(struct seq_file *m)
+{
+	struct device_node *root;
+	const char *model = "";
+
+	root = of_find_node_by_path("/");
+	if (root)
+		model = of_get_property(root, "model", NULL);
+	seq_printf(m, "machine\t\t: PowerNV %s\n", model);
+	if (firmware_has_feature(FW_FEATURE_OPALv3))
+		seq_printf(m, "firmware\t: OPAL v3\n");
+	else if (firmware_has_feature(FW_FEATURE_OPALv2))
+		seq_printf(m, "firmware\t: OPAL v2\n");
+	else if (firmware_has_feature(FW_FEATURE_OPAL))
+		seq_printf(m, "firmware\t: OPAL v1\n");
+	else
+		seq_printf(m, "firmware\t: BML\n");
+	of_node_put(root);
+}
+
+static void pnv_prepare_going_down(void)
+{
+	/*
+	 * Disable all notifiers from OPAL, we can't
+	 * service interrupts anymore anyway
+	 */
+	opal_notifier_disable();
+
+	/* Soft disable interrupts */
+	local_irq_disable();
+
+	/*
+	 * Return secondary CPUs to firwmare if a flash update
+	 * is pending otherwise we will get all sort of error
+	 * messages about CPU being stuck etc.. This will also
+	 * have the side effect of hard disabling interrupts so
+	 * past this point, the kernel is effectively dead.
+	 */
+	opal_flash_term_callback();
+}
+
+static void  __noreturn pnv_restart(char *cmd)
+{
+	long rc = OPAL_BUSY;
+
+	pnv_prepare_going_down();
+
+	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+		rc = opal_cec_reboot();
+		if (rc == OPAL_BUSY_EVENT)
+			opal_poll_events(NULL);
+		else
+			mdelay(10);
+	}
+	for (;;)
+		opal_poll_events(NULL);
+}
+
+static void __noreturn pnv_power_off(void)
+{
+	long rc = OPAL_BUSY;
+
+	pnv_prepare_going_down();
+
+	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+		rc = opal_cec_power_down(0);
+		if (rc == OPAL_BUSY_EVENT)
+			opal_poll_events(NULL);
+		else
+			mdelay(10);
+	}
+	for (;;)
+		opal_poll_events(NULL);
+}
+
+static void __noreturn pnv_halt(void)
+{
+	pnv_power_off();
+}
+
+static void pnv_progress(char *s, unsigned short hex)
+{
+}
+
+static int pnv_dma_set_mask(struct device *dev, u64 dma_mask)
+{
+	if (dev_is_pci(dev))
+		return pnv_pci_dma_set_mask(to_pci_dev(dev), dma_mask);
+	return __dma_set_mask(dev, dma_mask);
+}
+
+static u64 pnv_dma_get_required_mask(struct device *dev)
+{
+	if (dev_is_pci(dev))
+		return pnv_pci_dma_get_required_mask(to_pci_dev(dev));
+
+	return __dma_get_required_mask(dev);
+}
+
+static void pnv_shutdown(void)
+{
+	/* Let the PCI code clear up IODA tables */
+	pnv_pci_shutdown();
+
+	/*
+	 * Stop OPAL activity: Unregister all OPAL interrupts so they
+	 * don't fire up while we kexec and make sure all potentially
+	 * DMA'ing ops are complete (such as dump retrieval).
+	 */
+	opal_shutdown();
+}
+
+#ifdef CONFIG_KEXEC
+static void pnv_kexec_wait_secondaries_down(void)
+{
+	int my_cpu, i, notified = -1;
+
+	my_cpu = get_cpu();
+
+	for_each_online_cpu(i) {
+		uint8_t status;
+		int64_t rc;
+
+		if (i == my_cpu)
+			continue;
+
+		for (;;) {
+			rc = opal_query_cpu_status(get_hard_smp_processor_id(i),
+						   &status);
+			if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED)
+				break;
+			barrier();
+			if (i != notified) {
+				printk(KERN_INFO "kexec: waiting for cpu %d "
+				       "(physical %d) to enter OPAL\n",
+				       i, paca[i].hw_cpu_id);
+				notified = i;
+			}
+		}
+	}
+}
+
+static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
+{
+	xics_kexec_teardown_cpu(secondary);
+
+	/* On OPAL v3, we return all CPUs to firmware */
+
+	if (!firmware_has_feature(FW_FEATURE_OPALv3))
+		return;
+
+	if (secondary) {
+		/* Return secondary CPUs to firmware on OPAL v3 */
+		mb();
+		get_paca()->kexec_state = KEXEC_STATE_REAL_MODE;
+		mb();
+
+		/* Return the CPU to OPAL */
+		opal_return_cpu();
+	} else if (crash_shutdown) {
+		/*
+		 * On crash, we don't wait for secondaries to go
+		 * down as they might be unreachable or hung, so
+		 * instead we just wait a bit and move on.
+		 */
+		mdelay(1);
+	} else {
+		/* Primary waits for the secondaries to have reached OPAL */
+		pnv_kexec_wait_secondaries_down();
+	}
+}
+#endif /* CONFIG_KEXEC */
+
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+static unsigned long pnv_memory_block_size(void)
+{
+	return 256UL * 1024 * 1024;
+}
+#endif
+
+static void __init pnv_setup_machdep_opal(void)
+{
+	ppc_md.get_boot_time = opal_get_boot_time;
+	ppc_md.restart = pnv_restart;
+	pm_power_off = pnv_power_off;
+	ppc_md.halt = pnv_halt;
+	ppc_md.machine_check_exception = opal_machine_check;
+	ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
+	ppc_md.hmi_exception_early = opal_hmi_exception_early;
+	ppc_md.handle_hmi_exception = opal_handle_hmi_exception;
+}
+
+static u32 supported_cpuidle_states;
+
+int pnv_save_sprs_for_winkle(void)
+{
+	int cpu;
+	int rc;
+
+	/*
+	 * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross
+	 * all cpus at boot. Get these reg values of current cpu and use the
+	 * same accross all cpus.
+	 */
+	uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
+	uint64_t hid0_val = mfspr(SPRN_HID0);
+	uint64_t hid1_val = mfspr(SPRN_HID1);
+	uint64_t hid4_val = mfspr(SPRN_HID4);
+	uint64_t hid5_val = mfspr(SPRN_HID5);
+	uint64_t hmeer_val = mfspr(SPRN_HMEER);
+
+	for_each_possible_cpu(cpu) {
+		uint64_t pir = get_hard_smp_processor_id(cpu);
+		uint64_t hsprg0_val = (uint64_t)&paca[cpu];
+
+		/*
+		 * HSPRG0 is used to store the cpu's pointer to paca. Hence last
+		 * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0
+		 * with 63rd bit set, so that when a thread wakes up at 0x100 we
+		 * can use this bit to distinguish between fastsleep and
+		 * deep winkle.
+		 */
+		hsprg0_val |= 1;
+
+		rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
+		if (rc != 0)
+			return rc;
+
+		rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
+		if (rc != 0)
+			return rc;
+
+		/* HIDs are per core registers */
+		if (cpu_thread_in_core(cpu) == 0) {
+
+			rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
+			if (rc != 0)
+				return rc;
+
+			rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
+			if (rc != 0)
+				return rc;
+
+			rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
+			if (rc != 0)
+				return rc;
+
+			rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
+			if (rc != 0)
+				return rc;
+
+			rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
+			if (rc != 0)
+				return rc;
+		}
+	}
+
+	return 0;
+}
+
+static void pnv_alloc_idle_core_states(void)
+{
+	int i, j;
+	int nr_cores = cpu_nr_cores();
+	u32 *core_idle_state;
+
+	/*
+	 * core_idle_state - First 8 bits track the idle state of each thread
+	 * of the core. The 8th bit is the lock bit. Initially all thread bits
+	 * are set. They are cleared when the thread enters deep idle state
+	 * like sleep and winkle. Initially the lock bit is cleared.
+	 * The lock bit has 2 purposes
+	 * a. While the first thread is restoring core state, it prevents
+	 * other threads in the core from switching to process context.
+	 * b. While the last thread in the core is saving the core state, it
+	 * prevents a different thread from waking up.
+	 */
+	for (i = 0; i < nr_cores; i++) {
+		int first_cpu = i * threads_per_core;
+		int node = cpu_to_node(first_cpu);
+
+		core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
+		*core_idle_state = PNV_CORE_IDLE_THREAD_BITS;
+
+		for (j = 0; j < threads_per_core; j++) {
+			int cpu = first_cpu + j;
+
+			paca[cpu].core_idle_state_ptr = core_idle_state;
+			paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
+			paca[cpu].thread_mask = 1 << j;
+		}
+	}
+
+	update_subcore_sibling_mask();
+
+	if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED)
+		pnv_save_sprs_for_winkle();
+}
+
+u32 pnv_get_supported_cpuidle_states(void)
+{
+	return supported_cpuidle_states;
+}
+EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
+
+static int __init pnv_init_idle_states(void)
+{
+	struct device_node *power_mgt;
+	int dt_idle_states;
+	u32 *flags;
+	int i;
+
+	supported_cpuidle_states = 0;
+
+	if (cpuidle_disable != IDLE_NO_OVERRIDE)
+		goto out;
+
+	if (!firmware_has_feature(FW_FEATURE_OPALv3))
+		goto out;
+
+	power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
+	if (!power_mgt) {
+		pr_warn("opal: PowerMgmt Node not found\n");
+		goto out;
+	}
+	dt_idle_states = of_property_count_u32_elems(power_mgt,
+			"ibm,cpu-idle-state-flags");
+	if (dt_idle_states < 0) {
+		pr_warn("cpuidle-powernv: no idle states found in the DT\n");
+		goto out;
+	}
+
+	flags = kzalloc(sizeof(*flags) * dt_idle_states, GFP_KERNEL);
+	if (of_property_read_u32_array(power_mgt,
+			"ibm,cpu-idle-state-flags", flags, dt_idle_states)) {
+		pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
+		goto out_free;
+	}
+
+	for (i = 0; i < dt_idle_states; i++)
+		supported_cpuidle_states |= flags[i];
+
+	if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
+		patch_instruction(
+			(unsigned int *)pnv_fastsleep_workaround_at_entry,
+			PPC_INST_NOP);
+		patch_instruction(
+			(unsigned int *)pnv_fastsleep_workaround_at_exit,
+			PPC_INST_NOP);
+	}
+	pnv_alloc_idle_core_states();
+out_free:
+	kfree(flags);
+out:
+	return 0;
+}
+
+subsys_initcall(pnv_init_idle_states);
+
+static int __init pnv_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "ibm,powernv"))
+		return 0;
+
+	hpte_init_native();
+
+	if (firmware_has_feature(FW_FEATURE_OPAL))
+		pnv_setup_machdep_opal();
+
+	pr_debug("PowerNV detected !\n");
+
+	return 1;
+}
+
+/*
+ * Returns the cpu frequency for 'cpu' in Hz. This is used by
+ * /proc/cpuinfo
+ */
+static unsigned long pnv_get_proc_freq(unsigned int cpu)
+{
+	unsigned long ret_freq;
+
+	ret_freq = cpufreq_quick_get(cpu) * 1000ul;
+
+	/*
+	 * If the backend cpufreq driver does not exist,
+         * then fallback to old way of reporting the clockrate.
+	 */
+	if (!ret_freq)
+		ret_freq = ppc_proc_freq;
+	return ret_freq;
+}
+
+define_machine(powernv) {
+	.name			= "PowerNV",
+	.probe			= pnv_probe,
+	.init_early		= pnv_init_early,
+	.setup_arch		= pnv_setup_arch,
+	.init_IRQ		= pnv_init_IRQ,
+	.show_cpuinfo		= pnv_show_cpuinfo,
+	.get_proc_freq          = pnv_get_proc_freq,
+	.progress		= pnv_progress,
+	.machine_shutdown	= pnv_shutdown,
+	.power_save             = power7_idle,
+	.calibrate_decr		= generic_calibrate_decr,
+	.dma_set_mask		= pnv_dma_set_mask,
+	.dma_get_required_mask	= pnv_dma_get_required_mask,
+#ifdef CONFIG_KEXEC
+	.kexec_cpu_down		= pnv_kexec_cpu_down,
+#endif
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+	.memory_block_size	= pnv_memory_block_size,
+#endif
+};
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
new file mode 100644
index 000000000..8f70ba681
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -0,0 +1,256 @@
+/*
+ * SMP support for PowerNV machines.
+ *
+ * Copyright 2011 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/cpu.h>
+
+#include <asm/irq.h>
+#include <asm/smp.h>
+#include <asm/paca.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/vdso_datapage.h>
+#include <asm/cputhreads.h>
+#include <asm/xics.h>
+#include <asm/opal.h>
+#include <asm/runlatch.h>
+#include <asm/code-patching.h>
+#include <asm/dbell.h>
+#include <asm/kvm_ppc.h>
+#include <asm/ppc-opcode.h>
+
+#include "powernv.h"
+
+#ifdef DEBUG
+#include <asm/udbg.h>
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+static void pnv_smp_setup_cpu(int cpu)
+{
+	if (cpu != boot_cpuid)
+		xics_setup_cpu();
+
+#ifdef CONFIG_PPC_DOORBELL
+	if (cpu_has_feature(CPU_FTR_DBELL))
+		doorbell_setup_this_cpu();
+#endif
+}
+
+static int pnv_smp_kick_cpu(int nr)
+{
+	unsigned int pcpu = get_hard_smp_processor_id(nr);
+	unsigned long start_here =
+			__pa(ppc_function_entry(generic_secondary_smp_init));
+	long rc;
+
+	BUG_ON(nr < 0 || nr >= NR_CPUS);
+
+	/*
+	 * If we already started or OPALv2 is not supported, we just
+	 * kick the CPU via the PACA
+	 */
+	if (paca[nr].cpu_start || !firmware_has_feature(FW_FEATURE_OPALv2))
+		goto kick;
+
+	/*
+	 * At this point, the CPU can either be spinning on the way in
+	 * from kexec or be inside OPAL waiting to be started for the
+	 * first time. OPAL v3 allows us to query OPAL to know if it
+	 * has the CPUs, so we do that
+	 */
+	if (firmware_has_feature(FW_FEATURE_OPALv3)) {
+		uint8_t status;
+
+		rc = opal_query_cpu_status(pcpu, &status);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("OPAL Error %ld querying CPU %d state\n",
+				rc, nr);
+			return -ENODEV;
+		}
+
+		/*
+		 * Already started, just kick it, probably coming from
+		 * kexec and spinning
+		 */
+		if (status == OPAL_THREAD_STARTED)
+			goto kick;
+
+		/*
+		 * Available/inactive, let's kick it
+		 */
+		if (status == OPAL_THREAD_INACTIVE) {
+			pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n",
+				 nr, pcpu);
+			rc = opal_start_cpu(pcpu, start_here);
+			if (rc != OPAL_SUCCESS) {
+				pr_warn("OPAL Error %ld starting CPU %d\n",
+					rc, nr);
+				return -ENODEV;
+			}
+		} else {
+			/*
+			 * An unavailable CPU (or any other unknown status)
+			 * shouldn't be started. It should also
+			 * not be in the possible map but currently it can
+			 * happen
+			 */
+			pr_devel("OPAL: CPU %d (HW 0x%x) is unavailable"
+				 " (status %d)...\n", nr, pcpu, status);
+			return -ENODEV;
+		}
+	} else {
+		/*
+		 * On OPAL v2, we just kick it and hope for the best,
+		 * we must not test the error from opal_start_cpu() or
+		 * we would fail to get CPUs from kexec.
+		 */
+		opal_start_cpu(pcpu, start_here);
+	}
+ kick:
+	return smp_generic_kick_cpu(nr);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static int pnv_smp_cpu_disable(void)
+{
+	int cpu = smp_processor_id();
+
+	/* This is identical to pSeries... might consolidate by
+	 * moving migrate_irqs_away to a ppc_md with default to
+	 * the generic fixup_irqs. --BenH.
+	 */
+	set_cpu_online(cpu, false);
+	vdso_data->processorCount--;
+	if (cpu == boot_cpuid)
+		boot_cpuid = cpumask_any(cpu_online_mask);
+	xics_migrate_irqs_away();
+	return 0;
+}
+
+static void pnv_smp_cpu_kill_self(void)
+{
+	unsigned int cpu;
+	unsigned long srr1, wmask;
+	u32 idle_states;
+
+	/* Standard hot unplug procedure */
+	local_irq_disable();
+	idle_task_exit();
+	current->active_mm = NULL; /* for sanity */
+	cpu = smp_processor_id();
+	DBG("CPU%d offline\n", cpu);
+	generic_set_cpu_dead(cpu);
+	smp_wmb();
+
+	wmask = SRR1_WAKEMASK;
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		wmask = SRR1_WAKEMASK_P8;
+
+	idle_states = pnv_get_supported_cpuidle_states();
+	/* We don't want to take decrementer interrupts while we are offline,
+	 * so clear LPCR:PECE1. We keep PECE2 enabled.
+	 */
+	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
+	while (!generic_check_cpu_restart(cpu)) {
+
+		ppc64_runlatch_off();
+
+		if (idle_states & OPAL_PM_WINKLE_ENABLED)
+			srr1 = power7_winkle();
+		else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
+				(idle_states & OPAL_PM_SLEEP_ENABLED_ER1))
+			srr1 = power7_sleep();
+		else
+			srr1 = power7_nap(1);
+
+		ppc64_runlatch_on();
+
+		/*
+		 * If the SRR1 value indicates that we woke up due to
+		 * an external interrupt, then clear the interrupt.
+		 * We clear the interrupt before checking for the
+		 * reason, so as to avoid a race where we wake up for
+		 * some other reason, find nothing and clear the interrupt
+		 * just as some other cpu is sending us an interrupt.
+		 * If we returned from power7_nap as a result of
+		 * having finished executing in a KVM guest, then srr1
+		 * contains 0.
+		 */
+		if ((srr1 & wmask) == SRR1_WAKEEE) {
+			icp_native_flush_interrupt();
+			local_paca->irq_happened &= PACA_IRQ_HARD_DIS;
+			smp_mb();
+		} else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
+			unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
+			asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
+			kvmppc_set_host_ipi(cpu, 0);
+		}
+
+		if (cpu_core_split_required())
+			continue;
+
+		if (!generic_check_cpu_restart(cpu))
+			DBG("CPU%d Unexpected exit while offline !\n", cpu);
+	}
+	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_PECE1);
+	DBG("CPU%d coming online...\n", cpu);
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static int pnv_cpu_bootable(unsigned int nr)
+{
+	/*
+	 * Starting with POWER8, the subcore logic relies on all threads of a
+	 * core being booted so that they can participate in split mode
+	 * switches. So on those machines we ignore the smt_enabled_at_boot
+	 * setting (smt-enabled on the kernel command line).
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		return 1;
+
+	return smp_generic_cpu_bootable(nr);
+}
+
+static struct smp_ops_t pnv_smp_ops = {
+	.message_pass	= smp_muxed_ipi_message_pass,
+	.cause_ipi	= NULL,	/* Filled at runtime by xics_smp_probe() */
+	.probe		= xics_smp_probe,
+	.kick_cpu	= pnv_smp_kick_cpu,
+	.setup_cpu	= pnv_smp_setup_cpu,
+	.cpu_bootable	= pnv_cpu_bootable,
+#ifdef CONFIG_HOTPLUG_CPU
+	.cpu_disable	= pnv_smp_cpu_disable,
+	.cpu_die	= generic_cpu_die,
+#endif /* CONFIG_HOTPLUG_CPU */
+};
+
+/* This is called very early during platform setup_arch */
+void __init pnv_smp_init(void)
+{
+	smp_ops = &pnv_smp_ops;
+
+#ifdef CONFIG_HOTPLUG_CPU
+	ppc_md.cpu_die	= pnv_smp_cpu_kill_self;
+#endif
+}
diff --git a/arch/powerpc/platforms/powernv/subcore-asm.S b/arch/powerpc/platforms/powernv/subcore-asm.S
new file mode 100644
index 000000000..39bb24aa8
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/subcore-asm.S
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/asm-offsets.h>
+#include <asm/ppc_asm.h>
+#include <asm/reg.h>
+
+#include "subcore.h"
+
+
+_GLOBAL(split_core_secondary_loop)
+	/*
+	 * r3 = u8 *state, used throughout the routine
+	 * r4 = temp
+	 * r5 = temp
+	 * ..
+	 * r12 = MSR
+	 */
+	mfmsr	r12
+
+	/* Disable interrupts so SRR0/1 don't get trashed */
+	li	r4,0
+	ori	r4,r4,MSR_EE|MSR_SE|MSR_BE|MSR_RI
+	andc	r4,r12,r4
+	sync
+	mtmsrd	r4
+
+	/* Switch to real mode and leave interrupts off */
+	li	r5, MSR_IR|MSR_DR
+	andc	r5, r4, r5
+
+	LOAD_REG_ADDR(r4, real_mode)
+
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r5
+	rfid
+	b	.	/* prevent speculative execution */
+
+real_mode:
+	/* Grab values from unsplit SPRs */
+	mfspr	r6,  SPRN_LDBAR
+	mfspr	r7,  SPRN_PMMAR
+	mfspr	r8,  SPRN_PMCR
+	mfspr	r9,  SPRN_RPR
+	mfspr	r10, SPRN_SDR1
+
+	/* Order reading the SPRs vs telling the primary we are ready to split */
+	sync
+
+	/* Tell thread 0 we are in real mode */
+	li	r4, SYNC_STEP_REAL_MODE
+	stb	r4, 0(r3)
+
+	li	r5, (HID0_POWER8_4LPARMODE | HID0_POWER8_2LPARMODE)@highest
+	sldi	r5, r5, 48
+
+	/* Loop until we see the split happen in HID0 */
+1:	mfspr	r4, SPRN_HID0
+	and.	r4, r4, r5
+	beq	1b
+
+	/*
+	 * We only need to initialise the below regs once for each subcore,
+	 * but it's simpler and harmless to do it on each thread.
+	 */
+
+	/* Make sure various SPRS have sane values */
+	li	r4, 0
+	mtspr	SPRN_LPID, r4
+	mtspr	SPRN_PCR, r4
+	mtspr	SPRN_HDEC, r4
+
+	/* Restore SPR values now we are split */
+	mtspr	SPRN_LDBAR, r6
+	mtspr	SPRN_PMMAR, r7
+	mtspr	SPRN_PMCR, r8
+	mtspr	SPRN_RPR, r9
+	mtspr	SPRN_SDR1, r10
+
+	LOAD_REG_ADDR(r5, virtual_mode)
+
+	/* Get out of real mode */
+	mtspr	SPRN_SRR0,r5
+	mtspr	SPRN_SRR1,r12
+	rfid
+	b	.	/* prevent speculative execution */
+
+virtual_mode:
+	blr
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
new file mode 100644
index 000000000..f60f80ada
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt)	"powernv: " fmt
+
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/gfp.h>
+#include <linux/smp.h>
+#include <linux/stop_machine.h>
+
+#include <asm/cputhreads.h>
+#include <asm/kvm_ppc.h>
+#include <asm/machdep.h>
+#include <asm/opal.h>
+#include <asm/smp.h>
+
+#include "subcore.h"
+#include "powernv.h"
+
+
+/*
+ * Split/unsplit procedure:
+ *
+ * A core can be in one of three states, unsplit, 2-way split, and 4-way split.
+ *
+ * The mapping to subcores_per_core is simple:
+ *
+ *  State       | subcores_per_core
+ *  ------------|------------------
+ *  Unsplit     |        1
+ *  2-way split |        2
+ *  4-way split |        4
+ *
+ * The core is split along thread boundaries, the mapping between subcores and
+ * threads is as follows:
+ *
+ *  Unsplit:
+ *          ----------------------------
+ *  Subcore |            0             |
+ *          ----------------------------
+ *  Thread  |  0  1  2  3  4  5  6  7  |
+ *          ----------------------------
+ *
+ *  2-way split:
+ *          -------------------------------------
+ *  Subcore |        0        |        1        |
+ *          -------------------------------------
+ *  Thread  |  0   1   2   3  |  4   5   6   7  |
+ *          -------------------------------------
+ *
+ *  4-way split:
+ *          -----------------------------------------
+ *  Subcore |    0    |    1    |    2    |    3    |
+ *          -----------------------------------------
+ *  Thread  |  0   1  |  2   3  |  4   5  |  6   7  |
+ *          -----------------------------------------
+ *
+ *
+ * Transitions
+ * -----------
+ *
+ * It is not possible to transition between either of the split states, the
+ * core must first be unsplit. The legal transitions are:
+ *
+ *  -----------          ---------------
+ *  |         |  <---->  | 2-way split |
+ *  |         |          ---------------
+ *  | Unsplit |
+ *  |         |          ---------------
+ *  |         |  <---->  | 4-way split |
+ *  -----------          ---------------
+ *
+ * Unsplitting
+ * -----------
+ *
+ * Unsplitting is the simpler procedure. It requires thread 0 to request the
+ * unsplit while all other threads NAP.
+ *
+ * Thread 0 clears HID0_POWER8_DYNLPARDIS (Dynamic LPAR Disable). This tells
+ * the hardware that if all threads except 0 are napping, the hardware should
+ * unsplit the core.
+ *
+ * Non-zero threads are sent to a NAP loop, they don't exit the loop until they
+ * see the core unsplit.
+ *
+ * Core 0 spins waiting for the hardware to see all the other threads napping
+ * and perform the unsplit.
+ *
+ * Once thread 0 sees the unsplit, it IPIs the secondary threads to wake them
+ * out of NAP. They will then see the core unsplit and exit the NAP loop.
+ *
+ * Splitting
+ * ---------
+ *
+ * The basic splitting procedure is fairly straight forward. However it is
+ * complicated by the fact that after the split occurs, the newly created
+ * subcores are not in a fully initialised state.
+ *
+ * Most notably the subcores do not have the correct value for SDR1, which
+ * means they must not be running in virtual mode when the split occurs. The
+ * subcores have separate timebases SPRs but these are pre-synchronised by
+ * opal.
+ *
+ * To begin with secondary threads are sent to an assembly routine. There they
+ * switch to real mode, so they are immune to the uninitialised SDR1 value.
+ * Once in real mode they indicate that they are in real mode, and spin waiting
+ * to see the core split.
+ *
+ * Thread 0 waits to see that all secondaries are in real mode, and then begins
+ * the splitting procedure. It firstly sets HID0_POWER8_DYNLPARDIS, which
+ * prevents the hardware from unsplitting. Then it sets the appropriate HID bit
+ * to request the split, and spins waiting to see that the split has happened.
+ *
+ * Concurrently the secondaries will notice the split. When they do they set up
+ * their SPRs, notably SDR1, and then they can return to virtual mode and exit
+ * the procedure.
+ */
+
+/* Initialised at boot by subcore_init() */
+static int subcores_per_core;
+
+/*
+ * Used to communicate to offline cpus that we want them to pop out of the
+ * offline loop and do a split or unsplit.
+ *
+ * 0 - no split happening
+ * 1 - unsplit in progress
+ * 2 - split to 2 in progress
+ * 4 - split to 4 in progress
+ */
+static int new_split_mode;
+
+static cpumask_var_t cpu_offline_mask;
+
+struct split_state {
+	u8 step;
+	u8 master;
+};
+
+static DEFINE_PER_CPU(struct split_state, split_state);
+
+static void wait_for_sync_step(int step)
+{
+	int i, cpu = smp_processor_id();
+
+	for (i = cpu + 1; i < cpu + threads_per_core; i++)
+		while(per_cpu(split_state, i).step < step)
+			barrier();
+
+	/* Order the wait loop vs any subsequent loads/stores. */
+	mb();
+}
+
+static void update_hid_in_slw(u64 hid0)
+{
+	u64 idle_states = pnv_get_supported_cpuidle_states();
+
+	if (idle_states & OPAL_PM_WINKLE_ENABLED) {
+		/* OPAL call to patch slw with the new HID0 value */
+		u64 cpu_pir = hard_smp_processor_id();
+
+		opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0);
+	}
+}
+
+static void unsplit_core(void)
+{
+	u64 hid0, mask;
+	int i, cpu;
+
+	mask = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
+
+	cpu = smp_processor_id();
+	if (cpu_thread_in_core(cpu) != 0) {
+		while (mfspr(SPRN_HID0) & mask)
+			power7_nap(0);
+
+		per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT;
+		return;
+	}
+
+	hid0 = mfspr(SPRN_HID0);
+	hid0 &= ~HID0_POWER8_DYNLPARDIS;
+	mtspr(SPRN_HID0, hid0);
+	update_hid_in_slw(hid0);
+
+	while (mfspr(SPRN_HID0) & mask)
+		cpu_relax();
+
+	/* Wake secondaries out of NAP */
+	for (i = cpu + 1; i < cpu + threads_per_core; i++)
+		smp_send_reschedule(i);
+
+	wait_for_sync_step(SYNC_STEP_UNSPLIT);
+}
+
+static void split_core(int new_mode)
+{
+	struct {  u64 value; u64 mask; } split_parms[2] = {
+		{ HID0_POWER8_1TO2LPAR, HID0_POWER8_2LPARMODE },
+		{ HID0_POWER8_1TO4LPAR, HID0_POWER8_4LPARMODE }
+	};
+	int i, cpu;
+	u64 hid0;
+
+	/* Convert new_mode (2 or 4) into an index into our parms array */
+	i = (new_mode >> 1) - 1;
+	BUG_ON(i < 0 || i > 1);
+
+	cpu = smp_processor_id();
+	if (cpu_thread_in_core(cpu) != 0) {
+		split_core_secondary_loop(&per_cpu(split_state, cpu).step);
+		return;
+	}
+
+	wait_for_sync_step(SYNC_STEP_REAL_MODE);
+
+	/* Write new mode */
+	hid0  = mfspr(SPRN_HID0);
+	hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value;
+	mtspr(SPRN_HID0, hid0);
+	update_hid_in_slw(hid0);
+
+	/* Wait for it to happen */
+	while (!(mfspr(SPRN_HID0) & split_parms[i].mask))
+		cpu_relax();
+}
+
+static void cpu_do_split(int new_mode)
+{
+	/*
+	 * At boot subcores_per_core will be 0, so we will always unsplit at
+	 * boot. In the usual case where the core is already unsplit it's a
+	 * nop, and this just ensures the kernel's notion of the mode is
+	 * consistent with the hardware.
+	 */
+	if (subcores_per_core != 1)
+		unsplit_core();
+
+	if (new_mode != 1)
+		split_core(new_mode);
+
+	mb();
+	per_cpu(split_state, smp_processor_id()).step = SYNC_STEP_FINISHED;
+}
+
+bool cpu_core_split_required(void)
+{
+	smp_rmb();
+
+	if (!new_split_mode)
+		return false;
+
+	cpu_do_split(new_split_mode);
+
+	return true;
+}
+
+void update_subcore_sibling_mask(void)
+{
+	int cpu;
+	/*
+	 * sibling mask for the first cpu. Left shift this by required bits
+	 * to get sibling mask for the rest of the cpus.
+	 */
+	int sibling_mask_first_cpu =  (1 << threads_per_subcore) - 1;
+
+	for_each_possible_cpu(cpu) {
+		int tid = cpu_thread_in_core(cpu);
+		int offset = (tid / threads_per_subcore) * threads_per_subcore;
+		int mask = sibling_mask_first_cpu << offset;
+
+		paca[cpu].subcore_sibling_mask = mask;
+
+	}
+}
+
+static int cpu_update_split_mode(void *data)
+{
+	int cpu, new_mode = *(int *)data;
+
+	if (this_cpu_ptr(&split_state)->master) {
+		new_split_mode = new_mode;
+		smp_wmb();
+
+		cpumask_andnot(cpu_offline_mask, cpu_present_mask,
+			       cpu_online_mask);
+
+		/* This should work even though the cpu is offline */
+		for_each_cpu(cpu, cpu_offline_mask)
+			smp_send_reschedule(cpu);
+	}
+
+	cpu_do_split(new_mode);
+
+	if (this_cpu_ptr(&split_state)->master) {
+		/* Wait for all cpus to finish before we touch subcores_per_core */
+		for_each_present_cpu(cpu) {
+			if (cpu >= setup_max_cpus)
+				break;
+
+			while(per_cpu(split_state, cpu).step < SYNC_STEP_FINISHED)
+				barrier();
+		}
+
+		new_split_mode = 0;
+
+		/* Make the new mode public */
+		subcores_per_core = new_mode;
+		threads_per_subcore = threads_per_core / subcores_per_core;
+		update_subcore_sibling_mask();
+
+		/* Make sure the new mode is written before we exit */
+		mb();
+	}
+
+	return 0;
+}
+
+static int set_subcores_per_core(int new_mode)
+{
+	struct split_state *state;
+	int cpu;
+
+	if (kvm_hv_mode_active()) {
+		pr_err("Unable to change split core mode while KVM active.\n");
+		return -EBUSY;
+	}
+
+	/*
+	 * We are only called at boot, or from the sysfs write. If that ever
+	 * changes we'll need a lock here.
+	 */
+	BUG_ON(new_mode < 1 || new_mode > 4 || new_mode == 3);
+
+	for_each_present_cpu(cpu) {
+		state = &per_cpu(split_state, cpu);
+		state->step = SYNC_STEP_INITIAL;
+		state->master = 0;
+	}
+
+	get_online_cpus();
+
+	/* This cpu will update the globals before exiting stop machine */
+	this_cpu_ptr(&split_state)->master = 1;
+
+	/* Ensure state is consistent before we call the other cpus */
+	mb();
+
+	stop_machine(cpu_update_split_mode, &new_mode, cpu_online_mask);
+
+	put_online_cpus();
+
+	return 0;
+}
+
+static ssize_t __used store_subcores_per_core(struct device *dev,
+		struct device_attribute *attr, const char *buf,
+		size_t count)
+{
+	unsigned long val;
+	int rc;
+
+	/* We are serialised by the attribute lock */
+
+	rc = sscanf(buf, "%lx", &val);
+	if (rc != 1)
+		return -EINVAL;
+
+	switch (val) {
+	case 1:
+	case 2:
+	case 4:
+		if (subcores_per_core == val)
+			/* Nothing to do */
+			goto out;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	rc = set_subcores_per_core(val);
+	if (rc)
+		return rc;
+
+out:
+	return count;
+}
+
+static ssize_t show_subcores_per_core(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%x\n", subcores_per_core);
+}
+
+static DEVICE_ATTR(subcores_per_core, 0644,
+		show_subcores_per_core, store_subcores_per_core);
+
+static int subcore_init(void)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return 0;
+
+	/*
+	 * We need all threads in a core to be present to split/unsplit so
+         * continue only if max_cpus are aligned to threads_per_core.
+	 */
+	if (setup_max_cpus % threads_per_core)
+		return 0;
+
+	BUG_ON(!alloc_cpumask_var(&cpu_offline_mask, GFP_KERNEL));
+
+	set_subcores_per_core(1);
+
+	return device_create_file(cpu_subsys.dev_root,
+				  &dev_attr_subcores_per_core);
+}
+machine_device_initcall(powernv, subcore_init);
diff --git a/arch/powerpc/platforms/powernv/subcore.h b/arch/powerpc/platforms/powernv/subcore.h
new file mode 100644
index 000000000..84e02ae52
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/subcore.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/* These are ordered and tested with <= */
+#define SYNC_STEP_INITIAL	0
+#define SYNC_STEP_UNSPLIT	1	/* Set by secondary when it sees unsplit */
+#define SYNC_STEP_REAL_MODE	2	/* Set by secondary when in real mode  */
+#define SYNC_STEP_FINISHED	3	/* Set by secondary when split/unsplit is done */
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_SMP
+void split_core_secondary_loop(u8 *state);
+extern void update_subcore_sibling_mask(void);
+#else
+static inline void update_subcore_sibling_mask(void) { };
+#endif /* CONFIG_SMP */
+
+#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig
new file mode 100644
index 000000000..56f274064
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/Kconfig
@@ -0,0 +1,173 @@
+config PPC_PS3
+	bool "Sony PS3"
+	depends on PPC64 && PPC_BOOK3S
+	select PPC_CELL
+	select USB_OHCI_LITTLE_ENDIAN
+	select USB_OHCI_BIG_ENDIAN_MMIO
+	select USB_EHCI_BIG_ENDIAN_MMIO
+	select PPC_PCI_CHOICE
+	help
+	  This option enables support for the Sony PS3 game console
+	  and other platforms using the PS3 hypervisor.  Enabling this
+	  option will allow building otheros.bld, a kernel image suitable
+	  for programming into flash memory, and vmlinux, a kernel image
+	  suitable for loading via kexec.
+
+menu "PS3 Platform Options"
+	depends on PPC_PS3
+
+config PS3_ADVANCED
+	depends on PPC_PS3
+	bool "PS3 Advanced configuration options"
+	help
+	  This gives you access to some advanced options for the PS3. The
+	  defaults should be fine for most users, but these options may make
+	  it possible to better control the kernel configuration if you know
+	  what you are doing.
+
+	  Note that the answer to this question won't directly affect the
+	  kernel: saying N will just cause the configurator to skip all
+	  the questions about these options.
+
+	  Most users should say N to this question.
+
+config PS3_HTAB_SIZE
+	depends on PPC_PS3
+	int "PS3 Platform pagetable size" if PS3_ADVANCED
+	range 18 20
+	default 20
+	help
+	  This option is only for experts who may have the desire to fine
+	  tune the pagetable size on their system.  The value here is
+	  expressed as the log2 of the page table size.  Valid values are
+	  18, 19, and 20, corresponding to 256KB, 512KB and 1MB respectively.
+
+	  If unsure, choose the default (20) with the confidence that your
+	  system will have optimal runtime performance.
+
+config PS3_DYNAMIC_DMA
+	depends on PPC_PS3
+	bool "PS3 Platform dynamic DMA page table management"
+	default n
+	help
+	  This option will enable kernel support to take advantage of the
+	  per device dynamic DMA page table management provided by the Cell
+	  processor's IO Controller.  This support incurs some runtime
+	  overhead and also slightly increases kernel memory usage.  The
+	  current implementation should be considered experimental.
+
+	  This support is mainly for Linux kernel development.  If unsure,
+	  say N.
+
+config PS3_VUART
+	depends on PPC_PS3
+	tristate
+
+config PS3_PS3AV
+	depends on PPC_PS3
+	tristate "PS3 AV settings driver" if PS3_ADVANCED
+	select PS3_VUART
+	default y
+	help
+	  Include support for the PS3 AV Settings driver.
+
+	  This support is required for PS3 graphics and sound. In
+	  general, all users will say Y or M.
+
+config PS3_SYS_MANAGER
+	depends on PPC_PS3
+	tristate "PS3 System Manager driver" if PS3_ADVANCED
+	select PS3_VUART
+	default y
+	help
+	  Include support for the PS3 System Manager.
+
+	  This support is required for PS3 system control.  In
+	  general, all users will say Y or M.
+
+config PS3_REPOSITORY_WRITE
+	bool "PS3 Repository write support" if PS3_ADVANCED
+	depends on PPC_PS3
+	default n
+	help
+	  Enables support for writing to the PS3 System Repository.
+
+	  This support is intended for bootloaders that need to store data
+	  in the repository for later boot stages.
+
+	  If in doubt, say N here and reduce the size of the kernel by a
+	  small amount.
+
+config PS3_STORAGE
+	depends on PPC_PS3
+	tristate
+
+config PS3_DISK
+	tristate "PS3 Disk Storage Driver"
+	depends on PPC_PS3 && BLOCK
+	select PS3_STORAGE
+	help
+	  Include support for the PS3 Disk Storage.
+
+	  This support is required to access the PS3 hard disk.
+	  In general, all users will say Y or M.
+
+config PS3_ROM
+	tristate "PS3 BD/DVD/CD-ROM Storage Driver"
+	depends on PPC_PS3 && SCSI
+	select PS3_STORAGE
+	help
+	  Include support for the PS3 ROM Storage.
+
+	  This support is required to access the PS3 BD/DVD/CD-ROM drive.
+	  In general, all users will say Y or M.
+	  Also make sure to say Y or M to "SCSI CDROM support" later.
+
+config PS3_FLASH
+	tristate "PS3 FLASH ROM Storage Driver"
+	depends on PPC_PS3
+	select PS3_STORAGE
+	help
+	  Include support for the PS3 FLASH ROM Storage.
+
+	  This support is required to access the PS3 FLASH ROM, which
+	  contains the boot loader and some boot options.
+	  In general, PS3 OtherOS users will say Y or M.
+
+	  As this driver needs a fixed buffer of 256 KiB of memory, it can
+	  be disabled on the kernel command line using "ps3flash=off", to
+	  not allocate this fixed buffer.
+
+config PS3_VRAM
+	tristate "PS3 Video RAM Storage Driver"
+	depends on FB_PS3=y && BLOCK && m
+	help
+	  This driver allows you to use excess PS3 video RAM as volatile
+	  storage or system swap.
+
+config PS3_LPM
+	tristate "PS3 Logical Performance Monitor support"
+	depends on PPC_PS3
+	help
+	  Include support for the PS3 Logical Performance Monitor.
+
+	  This support is required to use the logical performance monitor
+	  of the PS3's LV1 hypervisor.
+
+	  If you intend to use the advanced performance monitoring and
+	  profiling support of the Cell processor with programs like
+	  oprofile and perfmon2, then say Y or M, otherwise say N.
+
+config PS3GELIC_UDBG
+	bool "PS3 udbg output via UDP broadcasts on Ethernet"
+	depends on PPC_PS3
+	help
+	  Enables udbg early debugging output by sending broadcast UDP
+	  via the Ethernet port (UDP port number 18194).
+
+	  This driver uses a trivial implementation and is independent
+	  from the main PS3 gelic network driver.
+
+	  If in doubt, say N here.
+
+endmenu
diff --git a/arch/powerpc/platforms/ps3/Makefile b/arch/powerpc/platforms/ps3/Makefile
new file mode 100644
index 000000000..02b9e636d
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/Makefile
@@ -0,0 +1,8 @@
+obj-y += setup.o mm.o time.o hvcall.o htab.o repository.o
+obj-y += interrupt.o exports.o os-area.o
+obj-y += system-bus.o
+
+obj-$(CONFIG_PS3GELIC_UDBG) += gelic_udbg.o
+obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_SPU_BASE) += spu.o
+obj-y += device-init.o
diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c
new file mode 100644
index 000000000..3f175e8ae
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/device-init.c
@@ -0,0 +1,991 @@
+/*
+ *  PS3 device registration routines.
+ *
+ *  Copyright (C) 2007 Sony Computer Entertainment Inc.
+ *  Copyright 2007 Sony Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/delay.h>
+#include <linux/freezer.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/reboot.h>
+
+#include <asm/firmware.h>
+#include <asm/lv1call.h>
+#include <asm/ps3stor.h>
+
+#include "platform.h"
+
+static int __init ps3_register_lpm_devices(void)
+{
+	int result;
+	u64 tmp1;
+	u64 tmp2;
+	struct ps3_system_bus_device *dev;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	dev->match_id = PS3_MATCH_ID_LPM;
+	dev->dev_type = PS3_DEVICE_TYPE_LPM;
+
+	/* The current lpm driver only supports a single BE processor. */
+
+	result = ps3_repository_read_be_node_id(0, &dev->lpm.node_id);
+
+	if (result) {
+		pr_debug("%s:%d: ps3_repository_read_be_node_id failed \n",
+			__func__, __LINE__);
+		goto fail_read_repo;
+	}
+
+	result = ps3_repository_read_lpm_privileges(dev->lpm.node_id, &tmp1,
+		&dev->lpm.rights);
+
+	if (result) {
+		pr_debug("%s:%d: ps3_repository_read_lpm_privleges failed \n",
+			__func__, __LINE__);
+		goto fail_read_repo;
+	}
+
+	lv1_get_logical_partition_id(&tmp2);
+
+	if (tmp1 != tmp2) {
+		pr_debug("%s:%d: wrong lpar\n",
+			__func__, __LINE__);
+		result = -ENODEV;
+		goto fail_rights;
+	}
+
+	if (!(dev->lpm.rights & PS3_LPM_RIGHTS_USE_LPM)) {
+		pr_debug("%s:%d: don't have rights to use lpm\n",
+			__func__, __LINE__);
+		result = -EPERM;
+		goto fail_rights;
+	}
+
+	pr_debug("%s:%d: pu_id %llu, rights %llu(%llxh)\n",
+		__func__, __LINE__, dev->lpm.pu_id, dev->lpm.rights,
+		dev->lpm.rights);
+
+	result = ps3_repository_read_pu_id(0, &dev->lpm.pu_id);
+
+	if (result) {
+		pr_debug("%s:%d: ps3_repository_read_pu_id failed \n",
+			__func__, __LINE__);
+		goto fail_read_repo;
+	}
+
+	result = ps3_system_bus_device_register(dev);
+
+	if (result) {
+		pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+			__func__, __LINE__);
+		goto fail_register;
+	}
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return 0;
+
+
+fail_register:
+fail_rights:
+fail_read_repo:
+	kfree(dev);
+	pr_debug(" <- %s:%d: failed\n", __func__, __LINE__);
+	return result;
+}
+
+/**
+ * ps3_setup_gelic_device - Setup and register a gelic device instance.
+ *
+ * Allocates memory for a struct ps3_system_bus_device instance, initialises the
+ * structure members, and registers the device instance with the system bus.
+ */
+
+static int __init ps3_setup_gelic_device(
+	const struct ps3_repository_device *repo)
+{
+	int result;
+	struct layout {
+		struct ps3_system_bus_device dev;
+		struct ps3_dma_region d_region;
+	} *p;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	BUG_ON(repo->bus_type != PS3_BUS_TYPE_SB);
+	BUG_ON(repo->dev_type != PS3_DEV_TYPE_SB_GELIC);
+
+	p = kzalloc(sizeof(struct layout), GFP_KERNEL);
+
+	if (!p) {
+		result = -ENOMEM;
+		goto fail_malloc;
+	}
+
+	p->dev.match_id = PS3_MATCH_ID_GELIC;
+	p->dev.dev_type = PS3_DEVICE_TYPE_SB;
+	p->dev.bus_id = repo->bus_id;
+	p->dev.dev_id = repo->dev_id;
+	p->dev.d_region = &p->d_region;
+
+	result = ps3_repository_find_interrupt(repo,
+		PS3_INTERRUPT_TYPE_EVENT_PORT, &p->dev.interrupt_id);
+
+	if (result) {
+		pr_debug("%s:%d ps3_repository_find_interrupt failed\n",
+			__func__, __LINE__);
+		goto fail_find_interrupt;
+	}
+
+	BUG_ON(p->dev.interrupt_id != 0);
+
+	result = ps3_dma_region_init(&p->dev, p->dev.d_region, PS3_DMA_64K,
+		PS3_DMA_OTHER, NULL, 0);
+
+	if (result) {
+		pr_debug("%s:%d ps3_dma_region_init failed\n",
+			__func__, __LINE__);
+		goto fail_dma_init;
+	}
+
+	result = ps3_system_bus_device_register(&p->dev);
+
+	if (result) {
+		pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+			__func__, __LINE__);
+		goto fail_device_register;
+	}
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+
+fail_device_register:
+fail_dma_init:
+fail_find_interrupt:
+	kfree(p);
+fail_malloc:
+	pr_debug(" <- %s:%d: fail.\n", __func__, __LINE__);
+	return result;
+}
+
+static int __init_refok ps3_setup_uhc_device(
+	const struct ps3_repository_device *repo, enum ps3_match_id match_id,
+	enum ps3_interrupt_type interrupt_type, enum ps3_reg_type reg_type)
+{
+	int result;
+	struct layout {
+		struct ps3_system_bus_device dev;
+		struct ps3_dma_region d_region;
+		struct ps3_mmio_region m_region;
+	} *p;
+	u64 bus_addr;
+	u64 len;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	BUG_ON(repo->bus_type != PS3_BUS_TYPE_SB);
+	BUG_ON(repo->dev_type != PS3_DEV_TYPE_SB_USB);
+
+	p = kzalloc(sizeof(struct layout), GFP_KERNEL);
+
+	if (!p) {
+		result = -ENOMEM;
+		goto fail_malloc;
+	}
+
+	p->dev.match_id = match_id;
+	p->dev.dev_type = PS3_DEVICE_TYPE_SB;
+	p->dev.bus_id = repo->bus_id;
+	p->dev.dev_id = repo->dev_id;
+	p->dev.d_region = &p->d_region;
+	p->dev.m_region = &p->m_region;
+
+	result = ps3_repository_find_interrupt(repo,
+		interrupt_type, &p->dev.interrupt_id);
+
+	if (result) {
+		pr_debug("%s:%d ps3_repository_find_interrupt failed\n",
+			__func__, __LINE__);
+		goto fail_find_interrupt;
+	}
+
+	result = ps3_repository_find_reg(repo, reg_type,
+		&bus_addr, &len);
+
+	if (result) {
+		pr_debug("%s:%d ps3_repository_find_reg failed\n",
+			__func__, __LINE__);
+		goto fail_find_reg;
+	}
+
+	result = ps3_dma_region_init(&p->dev, p->dev.d_region, PS3_DMA_64K,
+		PS3_DMA_INTERNAL, NULL, 0);
+
+	if (result) {
+		pr_debug("%s:%d ps3_dma_region_init failed\n",
+			__func__, __LINE__);
+		goto fail_dma_init;
+	}
+
+	result = ps3_mmio_region_init(&p->dev, p->dev.m_region, bus_addr, len,
+		PS3_MMIO_4K);
+
+	if (result) {
+		pr_debug("%s:%d ps3_mmio_region_init failed\n",
+			__func__, __LINE__);
+		goto fail_mmio_init;
+	}
+
+	result = ps3_system_bus_device_register(&p->dev);
+
+	if (result) {
+		pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+			__func__, __LINE__);
+		goto fail_device_register;
+	}
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+
+fail_device_register:
+fail_mmio_init:
+fail_dma_init:
+fail_find_reg:
+fail_find_interrupt:
+	kfree(p);
+fail_malloc:
+	pr_debug(" <- %s:%d: fail.\n", __func__, __LINE__);
+	return result;
+}
+
+static int __init ps3_setup_ehci_device(
+	const struct ps3_repository_device *repo)
+{
+	return ps3_setup_uhc_device(repo, PS3_MATCH_ID_EHCI,
+		PS3_INTERRUPT_TYPE_SB_EHCI, PS3_REG_TYPE_SB_EHCI);
+}
+
+static int __init ps3_setup_ohci_device(
+	const struct ps3_repository_device *repo)
+{
+	return ps3_setup_uhc_device(repo, PS3_MATCH_ID_OHCI,
+		PS3_INTERRUPT_TYPE_SB_OHCI, PS3_REG_TYPE_SB_OHCI);
+}
+
+static int __init ps3_setup_vuart_device(enum ps3_match_id match_id,
+	unsigned int port_number)
+{
+	int result;
+	struct layout {
+		struct ps3_system_bus_device dev;
+	} *p;
+
+	pr_debug(" -> %s:%d: match_id %u, port %u\n", __func__, __LINE__,
+		match_id, port_number);
+
+	p = kzalloc(sizeof(struct layout), GFP_KERNEL);
+
+	if (!p)
+		return -ENOMEM;
+
+	p->dev.match_id = match_id;
+	p->dev.dev_type = PS3_DEVICE_TYPE_VUART;
+	p->dev.port_number = port_number;
+
+	result = ps3_system_bus_device_register(&p->dev);
+
+	if (result) {
+		pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+			__func__, __LINE__);
+		goto fail_device_register;
+	}
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return 0;
+
+fail_device_register:
+	kfree(p);
+	pr_debug(" <- %s:%d fail\n", __func__, __LINE__);
+	return result;
+}
+
+static int ps3_setup_storage_dev(const struct ps3_repository_device *repo,
+				 enum ps3_match_id match_id)
+{
+	int result;
+	struct ps3_storage_device *p;
+	u64 port, blk_size, num_blocks;
+	unsigned int num_regions, i;
+
+	pr_debug(" -> %s:%u: match_id %u\n", __func__, __LINE__, match_id);
+
+	result = ps3_repository_read_stor_dev_info(repo->bus_index,
+						   repo->dev_index, &port,
+						   &blk_size, &num_blocks,
+						   &num_regions);
+	if (result) {
+		printk(KERN_ERR "%s:%u: _read_stor_dev_info failed %d\n",
+		       __func__, __LINE__, result);
+		return -ENODEV;
+	}
+
+	pr_debug("%s:%u: (%u:%u:%u): port %llu blk_size %llu num_blocks %llu "
+		 "num_regions %u\n", __func__, __LINE__, repo->bus_index,
+		 repo->dev_index, repo->dev_type, port, blk_size, num_blocks,
+		 num_regions);
+
+	p = kzalloc(sizeof(struct ps3_storage_device) +
+		    num_regions * sizeof(struct ps3_storage_region),
+		    GFP_KERNEL);
+	if (!p) {
+		result = -ENOMEM;
+		goto fail_malloc;
+	}
+
+	p->sbd.match_id = match_id;
+	p->sbd.dev_type = PS3_DEVICE_TYPE_SB;
+	p->sbd.bus_id = repo->bus_id;
+	p->sbd.dev_id = repo->dev_id;
+	p->sbd.d_region = &p->dma_region;
+	p->blk_size = blk_size;
+	p->num_regions = num_regions;
+
+	result = ps3_repository_find_interrupt(repo,
+					       PS3_INTERRUPT_TYPE_EVENT_PORT,
+					       &p->sbd.interrupt_id);
+	if (result) {
+		printk(KERN_ERR "%s:%u: find_interrupt failed %d\n", __func__,
+		       __LINE__, result);
+		result = -ENODEV;
+		goto fail_find_interrupt;
+	}
+
+	for (i = 0; i < num_regions; i++) {
+		unsigned int id;
+		u64 start, size;
+
+		result = ps3_repository_read_stor_dev_region(repo->bus_index,
+							     repo->dev_index,
+							     i, &id, &start,
+							     &size);
+		if (result) {
+			printk(KERN_ERR
+			       "%s:%u: read_stor_dev_region failed %d\n",
+			       __func__, __LINE__, result);
+			result = -ENODEV;
+			goto fail_read_region;
+		}
+		pr_debug("%s:%u: region %u: id %u start %llu size %llu\n",
+			 __func__, __LINE__, i, id, start, size);
+
+		p->regions[i].id = id;
+		p->regions[i].start = start;
+		p->regions[i].size = size;
+	}
+
+	result = ps3_system_bus_device_register(&p->sbd);
+	if (result) {
+		pr_debug("%s:%u ps3_system_bus_device_register failed\n",
+			 __func__, __LINE__);
+		goto fail_device_register;
+	}
+
+	pr_debug(" <- %s:%u\n", __func__, __LINE__);
+	return 0;
+
+fail_device_register:
+fail_read_region:
+fail_find_interrupt:
+	kfree(p);
+fail_malloc:
+	pr_debug(" <- %s:%u: fail.\n", __func__, __LINE__);
+	return result;
+}
+
+static int __init ps3_register_vuart_devices(void)
+{
+	int result;
+	unsigned int port_number;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	result = ps3_repository_read_vuart_av_port(&port_number);
+	if (result)
+		port_number = 0; /* av default */
+
+	result = ps3_setup_vuart_device(PS3_MATCH_ID_AV_SETTINGS, port_number);
+	WARN_ON(result);
+
+	result = ps3_repository_read_vuart_sysmgr_port(&port_number);
+	if (result)
+		port_number = 2; /* sysmgr default */
+
+	result = ps3_setup_vuart_device(PS3_MATCH_ID_SYSTEM_MANAGER,
+		port_number);
+	WARN_ON(result);
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+}
+
+static int __init ps3_register_sound_devices(void)
+{
+	int result;
+	struct layout {
+		struct ps3_system_bus_device dev;
+		struct ps3_dma_region d_region;
+		struct ps3_mmio_region m_region;
+	} *p;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	p->dev.match_id = PS3_MATCH_ID_SOUND;
+	p->dev.dev_type = PS3_DEVICE_TYPE_IOC0;
+	p->dev.d_region = &p->d_region;
+	p->dev.m_region = &p->m_region;
+
+	result = ps3_system_bus_device_register(&p->dev);
+
+	if (result) {
+		pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+			__func__, __LINE__);
+		goto fail_device_register;
+	}
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return 0;
+
+fail_device_register:
+	kfree(p);
+	pr_debug(" <- %s:%d failed\n", __func__, __LINE__);
+	return result;
+}
+
+static int __init ps3_register_graphics_devices(void)
+{
+	int result;
+	struct layout {
+		struct ps3_system_bus_device dev;
+	} *p;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	p = kzalloc(sizeof(struct layout), GFP_KERNEL);
+
+	if (!p)
+		return -ENOMEM;
+
+	p->dev.match_id = PS3_MATCH_ID_GPU;
+	p->dev.match_sub_id = PS3_MATCH_SUB_ID_GPU_FB;
+	p->dev.dev_type = PS3_DEVICE_TYPE_IOC0;
+
+	result = ps3_system_bus_device_register(&p->dev);
+
+	if (result) {
+		pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+			__func__, __LINE__);
+		goto fail_device_register;
+	}
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return 0;
+
+fail_device_register:
+	kfree(p);
+	pr_debug(" <- %s:%d failed\n", __func__, __LINE__);
+	return result;
+}
+
+static int __init ps3_register_ramdisk_device(void)
+{
+	int result;
+	struct layout {
+		struct ps3_system_bus_device dev;
+	} *p;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	p = kzalloc(sizeof(struct layout), GFP_KERNEL);
+
+	if (!p)
+		return -ENOMEM;
+
+	p->dev.match_id = PS3_MATCH_ID_GPU;
+	p->dev.match_sub_id = PS3_MATCH_SUB_ID_GPU_RAMDISK;
+	p->dev.dev_type = PS3_DEVICE_TYPE_IOC0;
+
+	result = ps3_system_bus_device_register(&p->dev);
+
+	if (result) {
+		pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+			__func__, __LINE__);
+		goto fail_device_register;
+	}
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return 0;
+
+fail_device_register:
+	kfree(p);
+	pr_debug(" <- %s:%d failed\n", __func__, __LINE__);
+	return result;
+}
+
+/**
+ * ps3_setup_dynamic_device - Setup a dynamic device from the repository
+ */
+
+static int ps3_setup_dynamic_device(const struct ps3_repository_device *repo)
+{
+	int result;
+
+	switch (repo->dev_type) {
+	case PS3_DEV_TYPE_STOR_DISK:
+		result = ps3_setup_storage_dev(repo, PS3_MATCH_ID_STOR_DISK);
+
+		/* Some devices are not accessible from the Other OS lpar. */
+		if (result == -ENODEV) {
+			result = 0;
+			pr_debug("%s:%u: not accessible\n", __func__,
+				 __LINE__);
+		}
+
+		if (result)
+			pr_debug("%s:%u ps3_setup_storage_dev failed\n",
+				 __func__, __LINE__);
+		break;
+
+	case PS3_DEV_TYPE_STOR_ROM:
+		result = ps3_setup_storage_dev(repo, PS3_MATCH_ID_STOR_ROM);
+		if (result)
+			pr_debug("%s:%u ps3_setup_storage_dev failed\n",
+				 __func__, __LINE__);
+		break;
+
+	case PS3_DEV_TYPE_STOR_FLASH:
+		result = ps3_setup_storage_dev(repo, PS3_MATCH_ID_STOR_FLASH);
+		if (result)
+			pr_debug("%s:%u ps3_setup_storage_dev failed\n",
+				 __func__, __LINE__);
+		break;
+
+	default:
+		result = 0;
+		pr_debug("%s:%u: unsupported dev_type %u\n", __func__, __LINE__,
+			repo->dev_type);
+	}
+
+	return result;
+}
+
+/**
+ * ps3_setup_static_device - Setup a static device from the repository
+ */
+
+static int __init ps3_setup_static_device(const struct ps3_repository_device *repo)
+{
+	int result;
+
+	switch (repo->dev_type) {
+	case PS3_DEV_TYPE_SB_GELIC:
+		result = ps3_setup_gelic_device(repo);
+		if (result) {
+			pr_debug("%s:%d ps3_setup_gelic_device failed\n",
+				__func__, __LINE__);
+		}
+		break;
+	case PS3_DEV_TYPE_SB_USB:
+
+		/* Each USB device has both an EHCI and an OHCI HC */
+
+		result = ps3_setup_ehci_device(repo);
+
+		if (result) {
+			pr_debug("%s:%d ps3_setup_ehci_device failed\n",
+				__func__, __LINE__);
+		}
+
+		result = ps3_setup_ohci_device(repo);
+
+		if (result) {
+			pr_debug("%s:%d ps3_setup_ohci_device failed\n",
+				__func__, __LINE__);
+		}
+		break;
+
+	default:
+		return ps3_setup_dynamic_device(repo);
+	}
+
+	return result;
+}
+
+static void ps3_find_and_add_device(u64 bus_id, u64 dev_id)
+{
+	struct ps3_repository_device repo;
+	int res;
+	unsigned int retries;
+	unsigned long rem;
+
+	/*
+	 * On some firmware versions (e.g. 1.90), the device may not show up
+	 * in the repository immediately
+	 */
+	for (retries = 0; retries < 10; retries++) {
+		res = ps3_repository_find_device_by_id(&repo, bus_id, dev_id);
+		if (!res)
+			goto found;
+
+		rem = msleep_interruptible(100);
+		if (rem)
+			break;
+	}
+	pr_warning("%s:%u: device %llu:%llu not found\n", __func__, __LINE__,
+		   bus_id, dev_id);
+	return;
+
+found:
+	if (retries)
+		pr_debug("%s:%u: device %llu:%llu found after %u retries\n",
+			 __func__, __LINE__, bus_id, dev_id, retries);
+
+	ps3_setup_dynamic_device(&repo);
+	return;
+}
+
+#define PS3_NOTIFICATION_DEV_ID		ULONG_MAX
+#define PS3_NOTIFICATION_INTERRUPT_ID	0
+
+struct ps3_notification_device {
+	struct ps3_system_bus_device sbd;
+	spinlock_t lock;
+	u64 tag;
+	u64 lv1_status;
+	struct completion done;
+};
+
+enum ps3_notify_type {
+	notify_device_ready = 0,
+	notify_region_probe = 1,
+	notify_region_update = 2,
+};
+
+struct ps3_notify_cmd {
+	u64 operation_code;		/* must be zero */
+	u64 event_mask;			/* OR of 1UL << enum ps3_notify_type */
+};
+
+struct ps3_notify_event {
+	u64 event_type;			/* enum ps3_notify_type */
+	u64 bus_id;
+	u64 dev_id;
+	u64 dev_type;
+	u64 dev_port;
+};
+
+static irqreturn_t ps3_notification_interrupt(int irq, void *data)
+{
+	struct ps3_notification_device *dev = data;
+	int res;
+	u64 tag, status;
+
+	spin_lock(&dev->lock);
+	res = lv1_storage_get_async_status(PS3_NOTIFICATION_DEV_ID, &tag,
+					   &status);
+	if (tag != dev->tag)
+		pr_err("%s:%u: tag mismatch, got %llx, expected %llx\n",
+		       __func__, __LINE__, tag, dev->tag);
+
+	if (res) {
+		pr_err("%s:%u: res %d status 0x%llx\n", __func__, __LINE__, res,
+		       status);
+	} else {
+		pr_debug("%s:%u: completed, status 0x%llx\n", __func__,
+			 __LINE__, status);
+		dev->lv1_status = status;
+		complete(&dev->done);
+	}
+	spin_unlock(&dev->lock);
+	return IRQ_HANDLED;
+}
+
+static int ps3_notification_read_write(struct ps3_notification_device *dev,
+				       u64 lpar, int write)
+{
+	const char *op = write ? "write" : "read";
+	unsigned long flags;
+	int res;
+
+	init_completion(&dev->done);
+	spin_lock_irqsave(&dev->lock, flags);
+	res = write ? lv1_storage_write(dev->sbd.dev_id, 0, 0, 1, 0, lpar,
+					&dev->tag)
+		    : lv1_storage_read(dev->sbd.dev_id, 0, 0, 1, 0, lpar,
+				       &dev->tag);
+	spin_unlock_irqrestore(&dev->lock, flags);
+	if (res) {
+		pr_err("%s:%u: %s failed %d\n", __func__, __LINE__, op, res);
+		return -EPERM;
+	}
+	pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op);
+
+	res = wait_event_interruptible(dev->done.wait,
+				       dev->done.done || kthread_should_stop());
+	if (kthread_should_stop())
+		res = -EINTR;
+	if (res) {
+		pr_debug("%s:%u: interrupted %s\n", __func__, __LINE__, op);
+		return res;
+	}
+
+	if (dev->lv1_status) {
+		pr_err("%s:%u: %s not completed, status 0x%llx\n", __func__,
+		       __LINE__, op, dev->lv1_status);
+		return -EIO;
+	}
+	pr_debug("%s:%u: notification %s completed\n", __func__, __LINE__, op);
+
+	return 0;
+}
+
+static struct task_struct *probe_task;
+
+/**
+ * ps3_probe_thread - Background repository probing at system startup.
+ *
+ * This implementation only supports background probing on a single bus.
+ * It uses the hypervisor's storage device notification mechanism to wait until
+ * a storage device is ready.  The device notification mechanism uses a
+ * pseudo device to asynchronously notify the guest when storage devices become
+ * ready.  The notification device has a block size of 512 bytes.
+ */
+
+static int ps3_probe_thread(void *data)
+{
+	struct ps3_notification_device dev;
+	int res;
+	unsigned int irq;
+	u64 lpar;
+	void *buf;
+	struct ps3_notify_cmd *notify_cmd;
+	struct ps3_notify_event *notify_event;
+
+	pr_debug(" -> %s:%u: kthread started\n", __func__, __LINE__);
+
+	buf = kzalloc(512, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	lpar = ps3_mm_phys_to_lpar(__pa(buf));
+	notify_cmd = buf;
+	notify_event = buf;
+
+	/* dummy system bus device */
+	dev.sbd.bus_id = (u64)data;
+	dev.sbd.dev_id = PS3_NOTIFICATION_DEV_ID;
+	dev.sbd.interrupt_id = PS3_NOTIFICATION_INTERRUPT_ID;
+
+	res = lv1_open_device(dev.sbd.bus_id, dev.sbd.dev_id, 0);
+	if (res) {
+		pr_err("%s:%u: lv1_open_device failed %s\n", __func__,
+		       __LINE__, ps3_result(res));
+		goto fail_free;
+	}
+
+	res = ps3_sb_event_receive_port_setup(&dev.sbd, PS3_BINDING_CPU_ANY,
+					      &irq);
+	if (res) {
+		pr_err("%s:%u: ps3_sb_event_receive_port_setup failed %d\n",
+		       __func__, __LINE__, res);
+	       goto fail_close_device;
+	}
+
+	spin_lock_init(&dev.lock);
+
+	res = request_irq(irq, ps3_notification_interrupt, 0,
+			  "ps3_notification", &dev);
+	if (res) {
+		pr_err("%s:%u: request_irq failed %d\n", __func__, __LINE__,
+		       res);
+		goto fail_sb_event_receive_port_destroy;
+	}
+
+	/* Setup and write the request for device notification. */
+	notify_cmd->operation_code = 0; /* must be zero */
+	notify_cmd->event_mask = 1UL << notify_region_probe;
+
+	res = ps3_notification_read_write(&dev, lpar, 1);
+	if (res)
+		goto fail_free_irq;
+
+	/* Loop here processing the requested notification events. */
+	do {
+		try_to_freeze();
+
+		memset(notify_event, 0, sizeof(*notify_event));
+
+		res = ps3_notification_read_write(&dev, lpar, 0);
+		if (res)
+			break;
+
+		pr_debug("%s:%u: notify event type 0x%llx bus id %llu dev id %llu"
+			 " type %llu port %llu\n", __func__, __LINE__,
+			 notify_event->event_type, notify_event->bus_id,
+			 notify_event->dev_id, notify_event->dev_type,
+			 notify_event->dev_port);
+
+		if (notify_event->event_type != notify_region_probe ||
+		    notify_event->bus_id != dev.sbd.bus_id) {
+			pr_warning("%s:%u: bad notify_event: event %llu, "
+				   "dev_id %llu, dev_type %llu\n",
+				   __func__, __LINE__, notify_event->event_type,
+				   notify_event->dev_id,
+				   notify_event->dev_type);
+			continue;
+		}
+
+		ps3_find_and_add_device(dev.sbd.bus_id, notify_event->dev_id);
+
+	} while (!kthread_should_stop());
+
+fail_free_irq:
+	free_irq(irq, &dev);
+fail_sb_event_receive_port_destroy:
+	ps3_sb_event_receive_port_destroy(&dev.sbd, irq);
+fail_close_device:
+	lv1_close_device(dev.sbd.bus_id, dev.sbd.dev_id);
+fail_free:
+	kfree(buf);
+
+	probe_task = NULL;
+
+	pr_debug(" <- %s:%u: kthread finished\n", __func__, __LINE__);
+
+	return 0;
+}
+
+/**
+ * ps3_stop_probe_thread - Stops the background probe thread.
+ *
+ */
+
+static int ps3_stop_probe_thread(struct notifier_block *nb, unsigned long code,
+				 void *data)
+{
+	if (probe_task)
+		kthread_stop(probe_task);
+	return 0;
+}
+
+static struct notifier_block nb = {
+	.notifier_call = ps3_stop_probe_thread
+};
+
+/**
+ * ps3_start_probe_thread - Starts the background probe thread.
+ *
+ */
+
+static int __init ps3_start_probe_thread(enum ps3_bus_type bus_type)
+{
+	int result;
+	struct task_struct *task;
+	struct ps3_repository_device repo;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	memset(&repo, 0, sizeof(repo));
+
+	repo.bus_type = bus_type;
+
+	result = ps3_repository_find_bus(repo.bus_type, 0, &repo.bus_index);
+
+	if (result) {
+		printk(KERN_ERR "%s: Cannot find bus (%d)\n", __func__, result);
+		return -ENODEV;
+	}
+
+	result = ps3_repository_read_bus_id(repo.bus_index, &repo.bus_id);
+
+	if (result) {
+		printk(KERN_ERR "%s: read_bus_id failed %d\n", __func__,
+			result);
+		return -ENODEV;
+	}
+
+	task = kthread_run(ps3_probe_thread, (void *)repo.bus_id,
+			   "ps3-probe-%u", bus_type);
+
+	if (IS_ERR(task)) {
+		result = PTR_ERR(task);
+		printk(KERN_ERR "%s: kthread_run failed %d\n", __func__,
+		       result);
+		return result;
+	}
+
+	probe_task = task;
+	register_reboot_notifier(&nb);
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return 0;
+}
+
+/**
+ * ps3_register_devices - Probe the system and register devices found.
+ *
+ * A device_initcall() routine.
+ */
+
+static int __init ps3_register_devices(void)
+{
+	int result;
+
+	if (!firmware_has_feature(FW_FEATURE_PS3_LV1))
+		return -ENODEV;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	/* ps3_repository_dump_bus_info(); */
+
+	result = ps3_start_probe_thread(PS3_BUS_TYPE_STORAGE);
+
+	ps3_register_vuart_devices();
+
+	ps3_register_graphics_devices();
+
+	ps3_repository_find_devices(PS3_BUS_TYPE_SB, ps3_setup_static_device);
+
+	ps3_register_sound_devices();
+
+	ps3_register_lpm_devices();
+
+	ps3_register_ramdisk_device();
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return 0;
+}
+
+device_initcall(ps3_register_devices);
diff --git a/arch/powerpc/platforms/ps3/exports.c b/arch/powerpc/platforms/ps3/exports.c
new file mode 100644
index 000000000..7df5b7d8f
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/exports.c
@@ -0,0 +1,25 @@
+/*
+ *  PS3 hvcall exports for modules.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#define LV1_CALL(name, in, out, num)                          \
+  extern s64 _lv1_##name(LV1_##in##_IN_##out##_OUT_ARG_DECL); \
+  EXPORT_SYMBOL(_lv1_##name);
+
+#include <asm/lv1call.h>
diff --git a/arch/powerpc/platforms/ps3/gelic_udbg.c b/arch/powerpc/platforms/ps3/gelic_udbg.c
new file mode 100644
index 000000000..20b46a19a
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/gelic_udbg.c
@@ -0,0 +1,273 @@
+/*
+ * udbg debug output routine via GELIC UDP broadcasts
+ *
+ * Copyright (C) 2007 Sony Computer Entertainment Inc.
+ * Copyright 2006, 2007 Sony Corporation
+ * Copyright (C) 2010 Hector Martin <hector@marcansoft.com>
+ * Copyright (C) 2011 Andre Heider <a.heider@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/io.h>
+#include <asm/udbg.h>
+#include <asm/lv1call.h>
+
+#define GELIC_BUS_ID 1
+#define GELIC_DEVICE_ID 0
+#define GELIC_DEBUG_PORT 18194
+#define GELIC_MAX_MESSAGE_SIZE 1000
+
+#define GELIC_LV1_GET_MAC_ADDRESS 1
+#define GELIC_LV1_GET_VLAN_ID 4
+#define GELIC_LV1_VLAN_TX_ETHERNET_0 2
+
+#define GELIC_DESCR_DMA_STAT_MASK 0xf0000000
+#define GELIC_DESCR_DMA_CARDOWNED 0xa0000000
+
+#define GELIC_DESCR_TX_DMA_IKE 0x00080000
+#define GELIC_DESCR_TX_DMA_NO_CHKSUM 0x00000000
+#define GELIC_DESCR_TX_DMA_FRAME_TAIL 0x00040000
+
+#define GELIC_DESCR_DMA_CMD_NO_CHKSUM (GELIC_DESCR_DMA_CARDOWNED | \
+				       GELIC_DESCR_TX_DMA_IKE | \
+				       GELIC_DESCR_TX_DMA_NO_CHKSUM)
+
+static u64 bus_addr;
+
+struct gelic_descr {
+	/* as defined by the hardware */
+	__be32 buf_addr;
+	__be32 buf_size;
+	__be32 next_descr_addr;
+	__be32 dmac_cmd_status;
+	__be32 result_size;
+	__be32 valid_size;	/* all zeroes for tx */
+	__be32 data_status;
+	__be32 data_error;	/* all zeroes for tx */
+} __attribute__((aligned(32)));
+
+struct debug_block {
+	struct gelic_descr descr;
+	u8 pkt[1520];
+} __packed;
+
+struct ethhdr {
+	u8 dest[6];
+	u8 src[6];
+	u16 type;
+} __packed;
+
+struct vlantag {
+	u16 vlan;
+	u16 subtype;
+} __packed;
+
+struct iphdr {
+	u8 ver_len;
+	u8 dscp_ecn;
+	u16 total_length;
+	u16 ident;
+	u16 frag_off_flags;
+	u8 ttl;
+	u8 proto;
+	u16 checksum;
+	u32 src;
+	u32 dest;
+} __packed;
+
+struct udphdr {
+	u16 src;
+	u16 dest;
+	u16 len;
+	u16 checksum;
+} __packed;
+
+static __iomem struct ethhdr *h_eth;
+static __iomem struct vlantag *h_vlan;
+static __iomem struct iphdr *h_ip;
+static __iomem struct udphdr *h_udp;
+
+static __iomem char *pmsg;
+static __iomem char *pmsgc;
+
+static __iomem struct debug_block dbg __attribute__((aligned(32)));
+
+static int header_size;
+
+static void map_dma_mem(int bus_id, int dev_id, void *start, size_t len,
+			u64 *real_bus_addr)
+{
+	s64 result;
+	u64 real_addr = ((u64)start) & 0x0fffffffffffffffUL;
+	u64 real_end = real_addr + len;
+	u64 map_start = real_addr & ~0xfff;
+	u64 map_end = (real_end + 0xfff) & ~0xfff;
+	u64 bus_addr = 0;
+
+	u64 flags = 0xf800000000000000UL;
+
+	result = lv1_allocate_device_dma_region(bus_id, dev_id,
+						map_end - map_start, 12, 0,
+						&bus_addr);
+	if (result)
+		lv1_panic(0);
+
+	result = lv1_map_device_dma_region(bus_id, dev_id, map_start,
+					   bus_addr, map_end - map_start,
+					   flags);
+	if (result)
+		lv1_panic(0);
+
+	*real_bus_addr = bus_addr + real_addr - map_start;
+}
+
+static int unmap_dma_mem(int bus_id, int dev_id, u64 bus_addr, size_t len)
+{
+	s64 result;
+	u64 real_bus_addr;
+
+	real_bus_addr = bus_addr & ~0xfff;
+	len += bus_addr - real_bus_addr;
+	len = (len + 0xfff) & ~0xfff;
+
+	result = lv1_unmap_device_dma_region(bus_id, dev_id, real_bus_addr,
+					     len);
+	if (result)
+		return result;
+
+	return lv1_free_device_dma_region(bus_id, dev_id, real_bus_addr);
+}
+
+static void gelic_debug_init(void)
+{
+	s64 result;
+	u64 v2;
+	u64 mac;
+	u64 vlan_id;
+
+	result = lv1_open_device(GELIC_BUS_ID, GELIC_DEVICE_ID, 0);
+	if (result)
+		lv1_panic(0);
+
+	map_dma_mem(GELIC_BUS_ID, GELIC_DEVICE_ID, &dbg, sizeof(dbg),
+		    &bus_addr);
+
+	memset(&dbg, 0, sizeof(dbg));
+
+	dbg.descr.buf_addr = bus_addr + offsetof(struct debug_block, pkt);
+
+	wmb();
+
+	result = lv1_net_control(GELIC_BUS_ID, GELIC_DEVICE_ID,
+				 GELIC_LV1_GET_MAC_ADDRESS, 0, 0, 0,
+				 &mac, &v2);
+	if (result)
+		lv1_panic(0);
+
+	mac <<= 16;
+
+	h_eth = (struct ethhdr *)dbg.pkt;
+
+	memset(&h_eth->dest, 0xff, 6);
+	memcpy(&h_eth->src, &mac, 6);
+
+	header_size = sizeof(struct ethhdr);
+
+	result = lv1_net_control(GELIC_BUS_ID, GELIC_DEVICE_ID,
+				 GELIC_LV1_GET_VLAN_ID,
+				 GELIC_LV1_VLAN_TX_ETHERNET_0, 0, 0,
+				 &vlan_id, &v2);
+	if (!result) {
+		h_eth->type = 0x8100;
+
+		header_size += sizeof(struct vlantag);
+		h_vlan = (struct vlantag *)(h_eth + 1);
+		h_vlan->vlan = vlan_id;
+		h_vlan->subtype = 0x0800;
+		h_ip = (struct iphdr *)(h_vlan + 1);
+	} else {
+		h_eth->type = 0x0800;
+		h_ip = (struct iphdr *)(h_eth + 1);
+	}
+
+	header_size += sizeof(struct iphdr);
+	h_ip->ver_len = 0x45;
+	h_ip->ttl = 10;
+	h_ip->proto = 0x11;
+	h_ip->src = 0x00000000;
+	h_ip->dest = 0xffffffff;
+
+	header_size += sizeof(struct udphdr);
+	h_udp = (struct udphdr *)(h_ip + 1);
+	h_udp->src = GELIC_DEBUG_PORT;
+	h_udp->dest = GELIC_DEBUG_PORT;
+
+	pmsgc = pmsg = (char *)(h_udp + 1);
+}
+
+static void gelic_debug_shutdown(void)
+{
+	if (bus_addr)
+		unmap_dma_mem(GELIC_BUS_ID, GELIC_DEVICE_ID,
+			      bus_addr, sizeof(dbg));
+	lv1_close_device(GELIC_BUS_ID, GELIC_DEVICE_ID);
+}
+
+static void gelic_sendbuf(int msgsize)
+{
+	u16 *p;
+	u32 sum;
+	int i;
+
+	dbg.descr.buf_size = header_size + msgsize;
+	h_ip->total_length = msgsize + sizeof(struct udphdr) +
+			     sizeof(struct iphdr);
+	h_udp->len = msgsize + sizeof(struct udphdr);
+
+	h_ip->checksum = 0;
+	sum = 0;
+	p = (u16 *)h_ip;
+	for (i = 0; i < 5; i++)
+		sum += *p++;
+	h_ip->checksum = ~(sum + (sum >> 16));
+
+	dbg.descr.dmac_cmd_status = GELIC_DESCR_DMA_CMD_NO_CHKSUM |
+				    GELIC_DESCR_TX_DMA_FRAME_TAIL;
+	dbg.descr.result_size = 0;
+	dbg.descr.data_status = 0;
+
+	wmb();
+
+	lv1_net_start_tx_dma(GELIC_BUS_ID, GELIC_DEVICE_ID, bus_addr, 0);
+
+	while ((dbg.descr.dmac_cmd_status & GELIC_DESCR_DMA_STAT_MASK) ==
+	       GELIC_DESCR_DMA_CARDOWNED)
+		cpu_relax();
+}
+
+static void ps3gelic_udbg_putc(char ch)
+{
+	*pmsgc++ = ch;
+	if (ch == '\n' || (pmsgc-pmsg) >= GELIC_MAX_MESSAGE_SIZE) {
+		gelic_sendbuf(pmsgc-pmsg);
+		pmsgc = pmsg;
+	}
+}
+
+void __init udbg_init_ps3gelic(void)
+{
+	gelic_debug_init();
+	udbg_putc = ps3gelic_udbg_putc;
+}
+
+void udbg_shutdown_ps3gelic(void)
+{
+	udbg_putc = NULL;
+	gelic_debug_shutdown();
+}
+EXPORT_SYMBOL(udbg_shutdown_ps3gelic);
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
new file mode 100644
index 000000000..2f95d33cf
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -0,0 +1,207 @@
+/*
+ *  PS3 pagetable management routines.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006, 2007 Sony Corporation
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/lv1call.h>
+#include <asm/ps3fb.h>
+
+#define PS3_VERBOSE_RESULT
+#include "platform.h"
+
+/**
+ * enum lpar_vas_id - id of LPAR virtual address space.
+ * @lpar_vas_id_current: Current selected virtual address space
+ *
+ * Identify the target LPAR address space.
+ */
+
+enum ps3_lpar_vas_id {
+	PS3_LPAR_VAS_ID_CURRENT = 0,
+};
+
+
+static DEFINE_SPINLOCK(ps3_htab_lock);
+
+static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn,
+	unsigned long pa, unsigned long rflags, unsigned long vflags,
+	int psize, int apsize, int ssize)
+{
+	int result;
+	u64 hpte_v, hpte_r;
+	u64 inserted_index;
+	u64 evicted_v, evicted_r;
+	u64 hpte_v_array[4], hpte_rs;
+	unsigned long flags;
+	long ret = -1;
+
+	/*
+	 * lv1_insert_htab_entry() will search for victim
+	 * entry in both primary and secondary pte group
+	 */
+	vflags &= ~HPTE_V_SECONDARY;
+
+	hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
+	hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags;
+
+	spin_lock_irqsave(&ps3_htab_lock, flags);
+
+	/* talk hvc to replace entries BOLTED == 0 */
+	result = lv1_insert_htab_entry(PS3_LPAR_VAS_ID_CURRENT, hpte_group,
+				       hpte_v, hpte_r,
+				       HPTE_V_BOLTED, 0,
+				       &inserted_index,
+				       &evicted_v, &evicted_r);
+
+	if (result) {
+		/* all entries bolted !*/
+		pr_info("%s:result=%s vpn=%lx pa=%lx ix=%lx v=%llx r=%llx\n",
+			__func__, ps3_result(result), vpn, pa, hpte_group,
+			hpte_v, hpte_r);
+		BUG();
+	}
+
+	/*
+	 * see if the entry is inserted into secondary pteg
+	 */
+	result = lv1_read_htab_entries(PS3_LPAR_VAS_ID_CURRENT,
+				       inserted_index & ~0x3UL,
+				       &hpte_v_array[0], &hpte_v_array[1],
+				       &hpte_v_array[2], &hpte_v_array[3],
+				       &hpte_rs);
+	BUG_ON(result);
+
+	if (hpte_v_array[inserted_index % 4] & HPTE_V_SECONDARY)
+		ret = (inserted_index & 7) | (1 << 3);
+	else
+		ret = inserted_index & 7;
+
+	spin_unlock_irqrestore(&ps3_htab_lock, flags);
+
+	return ret;
+}
+
+static long ps3_hpte_remove(unsigned long hpte_group)
+{
+	panic("ps3_hpte_remove() not implemented");
+	return 0;
+}
+
+static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp,
+			      unsigned long vpn, int psize, int apsize,
+			      int ssize, unsigned long inv_flags)
+{
+	int result;
+	u64 hpte_v, want_v, hpte_rs;
+	u64 hpte_v_array[4];
+	unsigned long flags;
+	long ret;
+
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
+
+	spin_lock_irqsave(&ps3_htab_lock, flags);
+
+	result = lv1_read_htab_entries(PS3_LPAR_VAS_ID_CURRENT, slot & ~0x3UL,
+				       &hpte_v_array[0], &hpte_v_array[1],
+				       &hpte_v_array[2], &hpte_v_array[3],
+				       &hpte_rs);
+
+	if (result) {
+		pr_info("%s: result=%s read vpn=%lx slot=%lx psize=%d\n",
+			__func__, ps3_result(result), vpn, slot, psize);
+		BUG();
+	}
+
+	hpte_v = hpte_v_array[slot % 4];
+
+	/*
+	 * As lv1_read_htab_entries() does not give us the RPN, we can
+	 * not synthesize the new hpte_r value here, and therefore can
+	 * not update the hpte with lv1_insert_htab_entry(), so we
+	 * instead invalidate it and ask the caller to update it via
+	 * ps3_hpte_insert() by returning a -1 value.
+	 */
+	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
+		/* not found */
+		ret = -1;
+	} else {
+		/* entry found, just invalidate it */
+		result = lv1_write_htab_entry(PS3_LPAR_VAS_ID_CURRENT,
+					      slot, 0, 0);
+		ret = -1;
+	}
+
+	spin_unlock_irqrestore(&ps3_htab_lock, flags);
+	return ret;
+}
+
+static void ps3_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
+	int psize, int ssize)
+{
+	panic("ps3_hpte_updateboltedpp() not implemented");
+}
+
+static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn,
+				int psize, int apsize, int ssize, int local)
+{
+	unsigned long flags;
+	int result;
+
+	spin_lock_irqsave(&ps3_htab_lock, flags);
+
+	result = lv1_write_htab_entry(PS3_LPAR_VAS_ID_CURRENT, slot, 0, 0);
+
+	if (result) {
+		pr_info("%s: result=%s vpn=%lx slot=%lx psize=%d\n",
+			__func__, ps3_result(result), vpn, slot, psize);
+		BUG();
+	}
+
+	spin_unlock_irqrestore(&ps3_htab_lock, flags);
+}
+
+static void ps3_hpte_clear(void)
+{
+	unsigned long hpte_count = (1UL << ppc64_pft_size) >> 4;
+	u64 i;
+
+	for (i = 0; i < hpte_count; i++)
+		lv1_write_htab_entry(PS3_LPAR_VAS_ID_CURRENT, i, 0, 0);
+
+	ps3_mm_shutdown();
+	ps3_mm_vas_destroy();
+}
+
+void __init ps3_hpte_init(unsigned long htab_size)
+{
+	ppc_md.hpte_invalidate = ps3_hpte_invalidate;
+	ppc_md.hpte_updatepp = ps3_hpte_updatepp;
+	ppc_md.hpte_updateboltedpp = ps3_hpte_updateboltedpp;
+	ppc_md.hpte_insert = ps3_hpte_insert;
+	ppc_md.hpte_remove = ps3_hpte_remove;
+	ppc_md.hpte_clear_all = ps3_hpte_clear;
+
+	ppc64_pft_size = __ilog2(htab_size);
+}
+
diff --git a/arch/powerpc/platforms/ps3/hvcall.S b/arch/powerpc/platforms/ps3/hvcall.S
new file mode 100644
index 000000000..54be6523a
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/hvcall.S
@@ -0,0 +1,804 @@
+/*
+ *  PS3 hvcall interface.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ *  Copyright 2003, 2004 (c) MontaVista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+#define lv1call .long 0x44000022; extsw r3, r3
+
+#define LV1_N_IN_0_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_0_IN_0_OUT LV1_N_IN_0_OUT
+#define LV1_1_IN_0_OUT LV1_N_IN_0_OUT
+#define LV1_2_IN_0_OUT LV1_N_IN_0_OUT
+#define LV1_3_IN_0_OUT LV1_N_IN_0_OUT
+#define LV1_4_IN_0_OUT LV1_N_IN_0_OUT
+#define LV1_5_IN_0_OUT LV1_N_IN_0_OUT
+#define LV1_6_IN_0_OUT LV1_N_IN_0_OUT
+#define LV1_7_IN_0_OUT LV1_N_IN_0_OUT
+
+#define LV1_0_IN_1_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	stdu    r3, -8(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 8;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_0_IN_2_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r3, -8(r1);			\
+	stdu	r4, -16(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 16;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_0_IN_3_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r3, -8(r1);			\
+	std	r4, -16(r1);			\
+	stdu	r5, -24(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 24;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_0_IN_7_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r3, -8(r1);			\
+	std	r4, -16(r1);			\
+	std	r5, -24(r1);			\
+	std	r6, -32(r1);			\
+	std	r7, -40(r1);			\
+	std	r8, -48(r1);			\
+	stdu	r9, -56(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 56;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+	ld	r11, -32(r1);			\
+	std	r7, 0(r11);			\
+	ld	r11, -40(r1);			\
+	std	r8, 0(r11);			\
+	ld	r11, -48(r1);			\
+	std	r9, 0(r11);			\
+	ld	r11, -56(r1);			\
+	std	r10, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_1_IN_1_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	stdu    r4, -8(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 8;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_1_IN_2_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r4, -8(r1);			\
+	stdu	r5, -16(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 16;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_1_IN_3_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r4, -8(r1);			\
+	std	r5, -16(r1);			\
+	stdu	r6, -24(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 24;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_1_IN_4_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r4, -8(r1);			\
+	std	r5, -16(r1);			\
+	std	r6, -24(r1);			\
+	stdu	r7, -32(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 32;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+	ld	r11, -32(r1);			\
+	std	r7, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_1_IN_5_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r4, -8(r1);			\
+	std	r5, -16(r1);			\
+	std	r6, -24(r1);			\
+	std	r7, -32(r1);			\
+	stdu	r8, -40(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 40;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+	ld	r11, -32(r1);			\
+	std	r7, 0(r11);			\
+	ld	r11, -40(r1);			\
+	std	r8, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_1_IN_6_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r4, -8(r1);			\
+	std	r5, -16(r1);			\
+	std	r6, -24(r1);			\
+	std	r7, -32(r1);			\
+	std	r8, -40(r1);			\
+	stdu	r9, -48(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 48;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+	ld	r11, -32(r1);			\
+	std	r7, 0(r11);			\
+	ld	r11, -40(r1);			\
+	std	r8, 0(r11);			\
+	ld	r11, -48(r1);			\
+	std	r9, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_1_IN_7_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r4, -8(r1);			\
+	std	r5, -16(r1);			\
+	std	r6, -24(r1);			\
+	std	r7, -32(r1);			\
+	std	r8, -40(r1);			\
+	std	r9, -48(r1);			\
+	stdu	r10, -56(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 56;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+	ld	r11, -32(r1);			\
+	std	r7, 0(r11);			\
+	ld	r11, -40(r1);			\
+	std	r8, 0(r11);			\
+	ld	r11, -48(r1);			\
+	std	r9, 0(r11);			\
+	ld	r11, -56(r1);			\
+	std	r10, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_2_IN_1_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	stdu	r5, -8(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 8;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_2_IN_2_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r5, -8(r1);			\
+	stdu	r6, -16(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 16;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_2_IN_3_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r5, -8(r1);			\
+	std	r6, -16(r1);			\
+	stdu	r7, -24(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 24;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_2_IN_4_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r5, -8(r1);			\
+	std	r6, -16(r1);			\
+	std	r7, -24(r1);			\
+	stdu	r8, -32(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 32;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+	ld	r11, -32(r1);			\
+	std	r7, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_2_IN_5_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r5, -8(r1);			\
+	std	r6, -16(r1);			\
+	std	r7, -24(r1);			\
+	std	r8, -32(r1);			\
+	stdu	r9, -40(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 40;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+	ld	r11, -32(r1);			\
+	std	r7, 0(r11);			\
+	ld	r11, -40(r1);			\
+	std	r8, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_3_IN_1_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	stdu	r6, -8(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 8;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_3_IN_2_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r6, -8(r1);			\
+	stdu	r7, -16(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 16;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_3_IN_3_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r6, -8(r1);			\
+	std	r7, -16(r1);			\
+	stdu	r8, -24(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 24;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_4_IN_1_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	stdu    r7, -8(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 8;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_4_IN_2_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r7, -8(r1);			\
+	stdu	r8, -16(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 16;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_4_IN_3_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r7, -8(r1);			\
+	std	r8, -16(r1);			\
+	stdu	r9, -24(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 24;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_5_IN_1_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	stdu    r8, -8(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 8;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_5_IN_2_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r8, -8(r1);			\
+	stdu	r9, -16(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 16;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_5_IN_3_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r8, -8(r1);			\
+	std	r9, -16(r1);			\
+	stdu	r10, -24(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 24;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, -24(r1);			\
+	std	r6, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_6_IN_1_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	stdu    r9, -8(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 8;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_6_IN_2_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r9, -8(r1);			\
+	stdu    r10, -16(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 16;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_6_IN_3_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std     r9, -8(r1);			\
+	stdu    r10, -16(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 16;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+	ld	r11, -16(r1);			\
+	std	r5, 0(r11);			\
+	ld	r11, 48+8*8(r1);		\
+	std	r6, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_7_IN_1_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	stdu    r10, -8(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	addi	r1, r1, 8;			\
+	ld	r11, -8(r1);			\
+	std	r4, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_7_IN_6_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	std	r10, 48+8*7(r1);		\
+						\
+	li	r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	ld	r11, 48+8*7(r1);		\
+	std	r4, 0(r11);			\
+	ld	r11, 48+8*8(r1);		\
+	std	r5, 0(r11);			\
+	ld	r11, 48+8*9(r1);		\
+	std	r6, 0(r11);			\
+	ld	r11, 48+8*10(r1);		\
+	std	r7, 0(r11);			\
+	ld	r11, 48+8*11(r1);		\
+	std	r8, 0(r11);			\
+	ld	r11, 48+8*12(r1);		\
+	std	r9, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+#define LV1_8_IN_1_OUT(API_NAME, API_NUMBER)	\
+_GLOBAL(_##API_NAME)				\
+						\
+	mflr	r0;				\
+	std	r0, 16(r1);			\
+						\
+	li      r11, API_NUMBER;		\
+	lv1call;				\
+						\
+	ld	r11, 48+8*8(r1);		\
+	std	r4, 0(r11);			\
+						\
+	ld	r0, 16(r1);			\
+	mtlr	r0;				\
+	blr
+
+	.text
+
+/* the lv1 underscored call definitions expand here */
+
+#define LV1_CALL(name, in, out, num) LV1_##in##_IN_##out##_OUT(lv1_##name, num)
+#include <asm/lv1call.h>
diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c
new file mode 100644
index 000000000..a6c42f343
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/interrupt.c
@@ -0,0 +1,793 @@
+/*
+ *  PS3 interrupt routines.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/irq.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/lv1call.h>
+#include <asm/smp.h>
+
+#include "platform.h"
+
+#if defined(DEBUG)
+#define DBG udbg_printf
+#define FAIL udbg_printf
+#else
+#define DBG pr_devel
+#define FAIL pr_debug
+#endif
+
+/**
+ * struct ps3_bmp - a per cpu irq status and mask bitmap structure
+ * @status: 256 bit status bitmap indexed by plug
+ * @unused_1: Alignment
+ * @mask: 256 bit mask bitmap indexed by plug
+ * @unused_2: Alignment
+ *
+ * The HV maintains per SMT thread mappings of HV outlet to HV plug on
+ * behalf of the guest.  These mappings are implemented as 256 bit guest
+ * supplied bitmaps indexed by plug number.  The addresses of the bitmaps
+ * are registered with the HV through lv1_configure_irq_state_bitmap().
+ * The HV requires that the 512 bits of status + mask not cross a page
+ * boundary.  PS3_BMP_MINALIGN is used to define this minimal 64 byte
+ * alignment.
+ *
+ * The HV supports 256 plugs per thread, assigned as {0..255}, for a total
+ * of 512 plugs supported on a processor.  To simplify the logic this
+ * implementation equates HV plug value to Linux virq value, constrains each
+ * interrupt to have a system wide unique plug number, and limits the range
+ * of the plug values to map into the first dword of the bitmaps.  This
+ * gives a usable range of plug values of  {NUM_ISA_INTERRUPTS..63}.  Note
+ * that there is no constraint on how many in this set an individual thread
+ * can acquire.
+ *
+ * The mask is declared as unsigned long so we can use set/clear_bit on it.
+ */
+
+#define PS3_BMP_MINALIGN 64
+
+struct ps3_bmp {
+	struct {
+		u64 status;
+		u64 unused_1[3];
+		unsigned long mask;
+		u64 unused_2[3];
+	};
+};
+
+/**
+ * struct ps3_private - a per cpu data structure
+ * @bmp: ps3_bmp structure
+ * @bmp_lock: Syncronize access to bmp.
+ * @ipi_debug_brk_mask: Mask for debug break IPIs
+ * @ppe_id: HV logical_ppe_id
+ * @thread_id: HV thread_id
+ * @ipi_mask: Mask of IPI virqs
+ */
+
+struct ps3_private {
+	struct ps3_bmp bmp __attribute__ ((aligned (PS3_BMP_MINALIGN)));
+	spinlock_t bmp_lock;
+	u64 ppe_id;
+	u64 thread_id;
+	unsigned long ipi_debug_brk_mask;
+	unsigned long ipi_mask;
+};
+
+static DEFINE_PER_CPU(struct ps3_private, ps3_private);
+
+/**
+ * ps3_chip_mask - Set an interrupt mask bit in ps3_bmp.
+ * @virq: The assigned Linux virq.
+ *
+ * Sets ps3_bmp.mask and calls lv1_did_update_interrupt_mask().
+ */
+
+static void ps3_chip_mask(struct irq_data *d)
+{
+	struct ps3_private *pd = irq_data_get_irq_chip_data(d);
+	unsigned long flags;
+
+	DBG("%s:%d: thread_id %llu, virq %d\n", __func__, __LINE__,
+		pd->thread_id, d->irq);
+
+	local_irq_save(flags);
+	clear_bit(63 - d->irq, &pd->bmp.mask);
+	lv1_did_update_interrupt_mask(pd->ppe_id, pd->thread_id);
+	local_irq_restore(flags);
+}
+
+/**
+ * ps3_chip_unmask - Clear an interrupt mask bit in ps3_bmp.
+ * @virq: The assigned Linux virq.
+ *
+ * Clears ps3_bmp.mask and calls lv1_did_update_interrupt_mask().
+ */
+
+static void ps3_chip_unmask(struct irq_data *d)
+{
+	struct ps3_private *pd = irq_data_get_irq_chip_data(d);
+	unsigned long flags;
+
+	DBG("%s:%d: thread_id %llu, virq %d\n", __func__, __LINE__,
+		pd->thread_id, d->irq);
+
+	local_irq_save(flags);
+	set_bit(63 - d->irq, &pd->bmp.mask);
+	lv1_did_update_interrupt_mask(pd->ppe_id, pd->thread_id);
+	local_irq_restore(flags);
+}
+
+/**
+ * ps3_chip_eoi - HV end-of-interrupt.
+ * @virq: The assigned Linux virq.
+ *
+ * Calls lv1_end_of_interrupt_ext().
+ */
+
+static void ps3_chip_eoi(struct irq_data *d)
+{
+	const struct ps3_private *pd = irq_data_get_irq_chip_data(d);
+
+	/* non-IPIs are EOIed here. */
+
+	if (!test_bit(63 - d->irq, &pd->ipi_mask))
+		lv1_end_of_interrupt_ext(pd->ppe_id, pd->thread_id, d->irq);
+}
+
+/**
+ * ps3_irq_chip - Represents the ps3_bmp as a Linux struct irq_chip.
+ */
+
+static struct irq_chip ps3_irq_chip = {
+	.name = "ps3",
+	.irq_mask = ps3_chip_mask,
+	.irq_unmask = ps3_chip_unmask,
+	.irq_eoi = ps3_chip_eoi,
+};
+
+/**
+ * ps3_virq_setup - virq related setup.
+ * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be
+ * serviced on.
+ * @outlet: The HV outlet from the various create outlet routines.
+ * @virq: The assigned Linux virq.
+ *
+ * Calls irq_create_mapping() to get a virq and sets the chip data to
+ * ps3_private data.
+ */
+
+static int ps3_virq_setup(enum ps3_cpu_binding cpu, unsigned long outlet,
+			  unsigned int *virq)
+{
+	int result;
+	struct ps3_private *pd;
+
+	/* This defines the default interrupt distribution policy. */
+
+	if (cpu == PS3_BINDING_CPU_ANY)
+		cpu = 0;
+
+	pd = &per_cpu(ps3_private, cpu);
+
+	*virq = irq_create_mapping(NULL, outlet);
+
+	if (*virq == NO_IRQ) {
+		FAIL("%s:%d: irq_create_mapping failed: outlet %lu\n",
+			__func__, __LINE__, outlet);
+		result = -ENOMEM;
+		goto fail_create;
+	}
+
+	DBG("%s:%d: outlet %lu => cpu %u, virq %u\n", __func__, __LINE__,
+		outlet, cpu, *virq);
+
+	result = irq_set_chip_data(*virq, pd);
+
+	if (result) {
+		FAIL("%s:%d: irq_set_chip_data failed\n",
+			__func__, __LINE__);
+		goto fail_set;
+	}
+
+	ps3_chip_mask(irq_get_irq_data(*virq));
+
+	return result;
+
+fail_set:
+	irq_dispose_mapping(*virq);
+fail_create:
+	return result;
+}
+
+/**
+ * ps3_virq_destroy - virq related teardown.
+ * @virq: The assigned Linux virq.
+ *
+ * Clears chip data and calls irq_dispose_mapping() for the virq.
+ */
+
+static int ps3_virq_destroy(unsigned int virq)
+{
+	const struct ps3_private *pd = irq_get_chip_data(virq);
+
+	DBG("%s:%d: ppe_id %llu, thread_id %llu, virq %u\n", __func__,
+		__LINE__, pd->ppe_id, pd->thread_id, virq);
+
+	irq_set_chip_data(virq, NULL);
+	irq_dispose_mapping(virq);
+
+	DBG("%s:%d <-\n", __func__, __LINE__);
+	return 0;
+}
+
+/**
+ * ps3_irq_plug_setup - Generic outlet and virq related setup.
+ * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be
+ * serviced on.
+ * @outlet: The HV outlet from the various create outlet routines.
+ * @virq: The assigned Linux virq.
+ *
+ * Sets up virq and connects the irq plug.
+ */
+
+int ps3_irq_plug_setup(enum ps3_cpu_binding cpu, unsigned long outlet,
+	unsigned int *virq)
+{
+	int result;
+	struct ps3_private *pd;
+
+	result = ps3_virq_setup(cpu, outlet, virq);
+
+	if (result) {
+		FAIL("%s:%d: ps3_virq_setup failed\n", __func__, __LINE__);
+		goto fail_setup;
+	}
+
+	pd = irq_get_chip_data(*virq);
+
+	/* Binds outlet to cpu + virq. */
+
+	result = lv1_connect_irq_plug_ext(pd->ppe_id, pd->thread_id, *virq,
+		outlet, 0);
+
+	if (result) {
+		FAIL("%s:%d: lv1_connect_irq_plug_ext failed: %s\n",
+		__func__, __LINE__, ps3_result(result));
+		result = -EPERM;
+		goto fail_connect;
+	}
+
+	return result;
+
+fail_connect:
+	ps3_virq_destroy(*virq);
+fail_setup:
+	return result;
+}
+EXPORT_SYMBOL_GPL(ps3_irq_plug_setup);
+
+/**
+ * ps3_irq_plug_destroy - Generic outlet and virq related teardown.
+ * @virq: The assigned Linux virq.
+ *
+ * Disconnects the irq plug and tears down virq.
+ * Do not call for system bus event interrupts setup with
+ * ps3_sb_event_receive_port_setup().
+ */
+
+int ps3_irq_plug_destroy(unsigned int virq)
+{
+	int result;
+	const struct ps3_private *pd = irq_get_chip_data(virq);
+
+	DBG("%s:%d: ppe_id %llu, thread_id %llu, virq %u\n", __func__,
+		__LINE__, pd->ppe_id, pd->thread_id, virq);
+
+	ps3_chip_mask(irq_get_irq_data(virq));
+
+	result = lv1_disconnect_irq_plug_ext(pd->ppe_id, pd->thread_id, virq);
+
+	if (result)
+		FAIL("%s:%d: lv1_disconnect_irq_plug_ext failed: %s\n",
+		__func__, __LINE__, ps3_result(result));
+
+	ps3_virq_destroy(virq);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(ps3_irq_plug_destroy);
+
+/**
+ * ps3_event_receive_port_setup - Setup an event receive port.
+ * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be
+ * serviced on.
+ * @virq: The assigned Linux virq.
+ *
+ * The virq can be used with lv1_connect_interrupt_event_receive_port() to
+ * arrange to receive interrupts from system-bus devices, or with
+ * ps3_send_event_locally() to signal events.
+ */
+
+int ps3_event_receive_port_setup(enum ps3_cpu_binding cpu, unsigned int *virq)
+{
+	int result;
+	u64 outlet;
+
+	result = lv1_construct_event_receive_port(&outlet);
+
+	if (result) {
+		FAIL("%s:%d: lv1_construct_event_receive_port failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		*virq = NO_IRQ;
+		return result;
+	}
+
+	result = ps3_irq_plug_setup(cpu, outlet, virq);
+	BUG_ON(result);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(ps3_event_receive_port_setup);
+
+/**
+ * ps3_event_receive_port_destroy - Destroy an event receive port.
+ * @virq: The assigned Linux virq.
+ *
+ * Since ps3_event_receive_port_destroy destroys the receive port outlet,
+ * SB devices need to call disconnect_interrupt_event_receive_port() before
+ * this.
+ */
+
+int ps3_event_receive_port_destroy(unsigned int virq)
+{
+	int result;
+
+	DBG(" -> %s:%d virq %u\n", __func__, __LINE__, virq);
+
+	ps3_chip_mask(irq_get_irq_data(virq));
+
+	result = lv1_destruct_event_receive_port(virq_to_hw(virq));
+
+	if (result)
+		FAIL("%s:%d: lv1_destruct_event_receive_port failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+
+	/*
+	 * Don't call ps3_virq_destroy() here since ps3_smp_cleanup_cpu()
+	 * calls from interrupt context (smp_call_function) when kexecing.
+	 */
+
+	DBG(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+}
+
+int ps3_send_event_locally(unsigned int virq)
+{
+	return lv1_send_event_locally(virq_to_hw(virq));
+}
+
+/**
+ * ps3_sb_event_receive_port_setup - Setup a system bus event receive port.
+ * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be
+ * serviced on.
+ * @dev: The system bus device instance.
+ * @virq: The assigned Linux virq.
+ *
+ * An event irq represents a virtual device interrupt.  The interrupt_id
+ * coresponds to the software interrupt number.
+ */
+
+int ps3_sb_event_receive_port_setup(struct ps3_system_bus_device *dev,
+	enum ps3_cpu_binding cpu, unsigned int *virq)
+{
+	/* this should go in system-bus.c */
+
+	int result;
+
+	result = ps3_event_receive_port_setup(cpu, virq);
+
+	if (result)
+		return result;
+
+	result = lv1_connect_interrupt_event_receive_port(dev->bus_id,
+		dev->dev_id, virq_to_hw(*virq), dev->interrupt_id);
+
+	if (result) {
+		FAIL("%s:%d: lv1_connect_interrupt_event_receive_port"
+			" failed: %s\n", __func__, __LINE__,
+			ps3_result(result));
+		ps3_event_receive_port_destroy(*virq);
+		*virq = NO_IRQ;
+		return result;
+	}
+
+	DBG("%s:%d: interrupt_id %u, virq %u\n", __func__, __LINE__,
+		dev->interrupt_id, *virq);
+
+	return 0;
+}
+EXPORT_SYMBOL(ps3_sb_event_receive_port_setup);
+
+int ps3_sb_event_receive_port_destroy(struct ps3_system_bus_device *dev,
+	unsigned int virq)
+{
+	/* this should go in system-bus.c */
+
+	int result;
+
+	DBG(" -> %s:%d: interrupt_id %u, virq %u\n", __func__, __LINE__,
+		dev->interrupt_id, virq);
+
+	result = lv1_disconnect_interrupt_event_receive_port(dev->bus_id,
+		dev->dev_id, virq_to_hw(virq), dev->interrupt_id);
+
+	if (result)
+		FAIL("%s:%d: lv1_disconnect_interrupt_event_receive_port"
+			" failed: %s\n", __func__, __LINE__,
+			ps3_result(result));
+
+	result = ps3_event_receive_port_destroy(virq);
+	BUG_ON(result);
+
+	/*
+	 * ps3_event_receive_port_destroy() destroys the IRQ plug,
+	 * so don't call ps3_irq_plug_destroy() here.
+	 */
+
+	result = ps3_virq_destroy(virq);
+	BUG_ON(result);
+
+	DBG(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+}
+EXPORT_SYMBOL(ps3_sb_event_receive_port_destroy);
+
+/**
+ * ps3_io_irq_setup - Setup a system bus io irq.
+ * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be
+ * serviced on.
+ * @interrupt_id: The device interrupt id read from the system repository.
+ * @virq: The assigned Linux virq.
+ *
+ * An io irq represents a non-virtualized device interrupt.  interrupt_id
+ * coresponds to the interrupt number of the interrupt controller.
+ */
+
+int ps3_io_irq_setup(enum ps3_cpu_binding cpu, unsigned int interrupt_id,
+	unsigned int *virq)
+{
+	int result;
+	u64 outlet;
+
+	result = lv1_construct_io_irq_outlet(interrupt_id, &outlet);
+
+	if (result) {
+		FAIL("%s:%d: lv1_construct_io_irq_outlet failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		return result;
+	}
+
+	result = ps3_irq_plug_setup(cpu, outlet, virq);
+	BUG_ON(result);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(ps3_io_irq_setup);
+
+int ps3_io_irq_destroy(unsigned int virq)
+{
+	int result;
+	unsigned long outlet = virq_to_hw(virq);
+
+	ps3_chip_mask(irq_get_irq_data(virq));
+
+	/*
+	 * lv1_destruct_io_irq_outlet() will destroy the IRQ plug,
+	 * so call ps3_irq_plug_destroy() first.
+	 */
+
+	result = ps3_irq_plug_destroy(virq);
+	BUG_ON(result);
+
+	result = lv1_destruct_io_irq_outlet(outlet);
+
+	if (result)
+		FAIL("%s:%d: lv1_destruct_io_irq_outlet failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(ps3_io_irq_destroy);
+
+/**
+ * ps3_vuart_irq_setup - Setup the system virtual uart virq.
+ * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be
+ * serviced on.
+ * @virt_addr_bmp: The caller supplied virtual uart interrupt bitmap.
+ * @virq: The assigned Linux virq.
+ *
+ * The system supports only a single virtual uart, so multiple calls without
+ * freeing the interrupt will return a wrong state error.
+ */
+
+int ps3_vuart_irq_setup(enum ps3_cpu_binding cpu, void* virt_addr_bmp,
+	unsigned int *virq)
+{
+	int result;
+	u64 outlet;
+	u64 lpar_addr;
+
+	BUG_ON(!is_kernel_addr((u64)virt_addr_bmp));
+
+	lpar_addr = ps3_mm_phys_to_lpar(__pa(virt_addr_bmp));
+
+	result = lv1_configure_virtual_uart_irq(lpar_addr, &outlet);
+
+	if (result) {
+		FAIL("%s:%d: lv1_configure_virtual_uart_irq failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		return result;
+	}
+
+	result = ps3_irq_plug_setup(cpu, outlet, virq);
+	BUG_ON(result);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(ps3_vuart_irq_setup);
+
+int ps3_vuart_irq_destroy(unsigned int virq)
+{
+	int result;
+
+	ps3_chip_mask(irq_get_irq_data(virq));
+	result = lv1_deconfigure_virtual_uart_irq();
+
+	if (result) {
+		FAIL("%s:%d: lv1_configure_virtual_uart_irq failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		return result;
+	}
+
+	result = ps3_irq_plug_destroy(virq);
+	BUG_ON(result);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(ps3_vuart_irq_destroy);
+
+/**
+ * ps3_spe_irq_setup - Setup an spe virq.
+ * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be
+ * serviced on.
+ * @spe_id: The spe_id returned from lv1_construct_logical_spe().
+ * @class: The spe interrupt class {0,1,2}.
+ * @virq: The assigned Linux virq.
+ *
+ */
+
+int ps3_spe_irq_setup(enum ps3_cpu_binding cpu, unsigned long spe_id,
+	unsigned int class, unsigned int *virq)
+{
+	int result;
+	u64 outlet;
+
+	BUG_ON(class > 2);
+
+	result = lv1_get_spe_irq_outlet(spe_id, class, &outlet);
+
+	if (result) {
+		FAIL("%s:%d: lv1_get_spe_irq_outlet failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		return result;
+	}
+
+	result = ps3_irq_plug_setup(cpu, outlet, virq);
+	BUG_ON(result);
+
+	return result;
+}
+
+int ps3_spe_irq_destroy(unsigned int virq)
+{
+	int result;
+
+	ps3_chip_mask(irq_get_irq_data(virq));
+
+	result = ps3_irq_plug_destroy(virq);
+	BUG_ON(result);
+
+	return result;
+}
+
+
+#define PS3_INVALID_OUTLET ((irq_hw_number_t)-1)
+#define PS3_PLUG_MAX 63
+
+#if defined(DEBUG)
+static void _dump_64_bmp(const char *header, const u64 *p, unsigned cpu,
+	const char* func, int line)
+{
+	pr_debug("%s:%d: %s %u {%04llx_%04llx_%04llx_%04llx}\n",
+		func, line, header, cpu,
+		*p >> 48, (*p >> 32) & 0xffff, (*p >> 16) & 0xffff,
+		*p & 0xffff);
+}
+
+static void __maybe_unused _dump_256_bmp(const char *header,
+	const u64 *p, unsigned cpu, const char* func, int line)
+{
+	pr_debug("%s:%d: %s %u {%016llx:%016llx:%016llx:%016llx}\n",
+		func, line, header, cpu, p[0], p[1], p[2], p[3]);
+}
+
+#define dump_bmp(_x) _dump_bmp(_x, __func__, __LINE__)
+static void _dump_bmp(struct ps3_private* pd, const char* func, int line)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pd->bmp_lock, flags);
+	_dump_64_bmp("stat", &pd->bmp.status, pd->thread_id, func, line);
+	_dump_64_bmp("mask", (u64*)&pd->bmp.mask, pd->thread_id, func, line);
+	spin_unlock_irqrestore(&pd->bmp_lock, flags);
+}
+
+#define dump_mask(_x) _dump_mask(_x, __func__, __LINE__)
+static void __maybe_unused _dump_mask(struct ps3_private *pd,
+	const char* func, int line)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pd->bmp_lock, flags);
+	_dump_64_bmp("mask", (u64*)&pd->bmp.mask, pd->thread_id, func, line);
+	spin_unlock_irqrestore(&pd->bmp_lock, flags);
+}
+#else
+static void dump_bmp(struct ps3_private* pd) {};
+#endif /* defined(DEBUG) */
+
+static int ps3_host_map(struct irq_domain *h, unsigned int virq,
+	irq_hw_number_t hwirq)
+{
+	DBG("%s:%d: hwirq %lu, virq %u\n", __func__, __LINE__, hwirq,
+		virq);
+
+	irq_set_chip_and_handler(virq, &ps3_irq_chip, handle_fasteoi_irq);
+
+	return 0;
+}
+
+static int ps3_host_match(struct irq_domain *h, struct device_node *np)
+{
+	/* Match all */
+	return 1;
+}
+
+static const struct irq_domain_ops ps3_host_ops = {
+	.map = ps3_host_map,
+	.match = ps3_host_match,
+};
+
+void __init ps3_register_ipi_debug_brk(unsigned int cpu, unsigned int virq)
+{
+	struct ps3_private *pd = &per_cpu(ps3_private, cpu);
+
+	set_bit(63 - virq, &pd->ipi_debug_brk_mask);
+
+	DBG("%s:%d: cpu %u, virq %u, mask %lxh\n", __func__, __LINE__,
+		cpu, virq, pd->ipi_debug_brk_mask);
+}
+
+void __init ps3_register_ipi_irq(unsigned int cpu, unsigned int virq)
+{
+	struct ps3_private *pd = &per_cpu(ps3_private, cpu);
+
+	set_bit(63 - virq, &pd->ipi_mask);
+
+	DBG("%s:%d: cpu %u, virq %u, ipi_mask %lxh\n", __func__, __LINE__,
+		cpu, virq, pd->ipi_mask);
+}
+
+static unsigned int ps3_get_irq(void)
+{
+	struct ps3_private *pd = this_cpu_ptr(&ps3_private);
+	u64 x = (pd->bmp.status & pd->bmp.mask);
+	unsigned int plug;
+
+	/* check for ipi break first to stop this cpu ASAP */
+
+	if (x & pd->ipi_debug_brk_mask)
+		x &= pd->ipi_debug_brk_mask;
+
+	asm volatile("cntlzd %0,%1" : "=r" (plug) : "r" (x));
+	plug &= 0x3f;
+
+	if (unlikely(plug == NO_IRQ)) {
+		DBG("%s:%d: no plug found: thread_id %llu\n", __func__,
+			__LINE__, pd->thread_id);
+		dump_bmp(&per_cpu(ps3_private, 0));
+		dump_bmp(&per_cpu(ps3_private, 1));
+		return NO_IRQ;
+	}
+
+#if defined(DEBUG)
+	if (unlikely(plug < NUM_ISA_INTERRUPTS || plug > PS3_PLUG_MAX)) {
+		dump_bmp(&per_cpu(ps3_private, 0));
+		dump_bmp(&per_cpu(ps3_private, 1));
+		BUG();
+	}
+#endif
+
+	/* IPIs are EOIed here. */
+
+	if (test_bit(63 - plug, &pd->ipi_mask))
+		lv1_end_of_interrupt_ext(pd->ppe_id, pd->thread_id, plug);
+
+	return plug;
+}
+
+void __init ps3_init_IRQ(void)
+{
+	int result;
+	unsigned cpu;
+	struct irq_domain *host;
+
+	host = irq_domain_add_nomap(NULL, PS3_PLUG_MAX + 1, &ps3_host_ops, NULL);
+	irq_set_default_host(host);
+
+	for_each_possible_cpu(cpu) {
+		struct ps3_private *pd = &per_cpu(ps3_private, cpu);
+
+		lv1_get_logical_ppe_id(&pd->ppe_id);
+		pd->thread_id = get_hard_smp_processor_id(cpu);
+		spin_lock_init(&pd->bmp_lock);
+
+		DBG("%s:%d: ppe_id %llu, thread_id %llu, bmp %lxh\n",
+			__func__, __LINE__, pd->ppe_id, pd->thread_id,
+			ps3_mm_phys_to_lpar(__pa(&pd->bmp)));
+
+		result = lv1_configure_irq_state_bitmap(pd->ppe_id,
+			pd->thread_id, ps3_mm_phys_to_lpar(__pa(&pd->bmp)));
+
+		if (result)
+			FAIL("%s:%d: lv1_configure_irq_state_bitmap failed:"
+				" %s\n", __func__, __LINE__,
+				ps3_result(result));
+	}
+
+	ppc_md.get_irq = ps3_get_irq;
+}
+
+void ps3_shutdown_IRQ(int cpu)
+{
+	int result;
+	u64 ppe_id;
+	u64 thread_id = get_hard_smp_processor_id(cpu);
+
+	lv1_get_logical_ppe_id(&ppe_id);
+	result = lv1_configure_irq_state_bitmap(ppe_id, thread_id, 0);
+
+	DBG("%s:%d: lv1_configure_irq_state_bitmap (%llu:%llu/%d) %s\n", __func__,
+		__LINE__, ppe_id, thread_id, cpu, ps3_result(result));
+}
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
new file mode 100644
index 000000000..b0f34663b
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -0,0 +1,1251 @@
+/*
+ *  PS3 address space management.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+
+#include <asm/cell-regs.h>
+#include <asm/firmware.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/lv1call.h>
+#include <asm/setup.h>
+
+#include "platform.h"
+
+#if defined(DEBUG)
+#define DBG udbg_printf
+#else
+#define DBG pr_devel
+#endif
+
+enum {
+#if defined(CONFIG_PS3_DYNAMIC_DMA)
+	USE_DYNAMIC_DMA = 1,
+#else
+	USE_DYNAMIC_DMA = 0,
+#endif
+};
+
+enum {
+	PAGE_SHIFT_4K = 12U,
+	PAGE_SHIFT_64K = 16U,
+	PAGE_SHIFT_16M = 24U,
+};
+
+static unsigned long make_page_sizes(unsigned long a, unsigned long b)
+{
+	return (a << 56) | (b << 48);
+}
+
+enum {
+	ALLOCATE_MEMORY_TRY_ALT_UNIT = 0X04,
+	ALLOCATE_MEMORY_ADDR_ZERO = 0X08,
+};
+
+/* valid htab sizes are {18,19,20} = 256K, 512K, 1M */
+
+enum {
+	HTAB_SIZE_MAX = 20U, /* HV limit of 1MB */
+	HTAB_SIZE_MIN = 18U, /* CPU limit of 256KB */
+};
+
+/*============================================================================*/
+/* virtual address space routines                                             */
+/*============================================================================*/
+
+/**
+ * struct mem_region - memory region structure
+ * @base: base address
+ * @size: size in bytes
+ * @offset: difference between base and rm.size
+ * @destroy: flag if region should be destroyed upon shutdown
+ */
+
+struct mem_region {
+	u64 base;
+	u64 size;
+	unsigned long offset;
+	int destroy;
+};
+
+/**
+ * struct map - address space state variables holder
+ * @total: total memory available as reported by HV
+ * @vas_id - HV virtual address space id
+ * @htab_size: htab size in bytes
+ *
+ * The HV virtual address space (vas) allows for hotplug memory regions.
+ * Memory regions can be created and destroyed in the vas at runtime.
+ * @rm: real mode (bootmem) region
+ * @r1: highmem region(s)
+ *
+ * ps3 addresses
+ * virt_addr: a cpu 'translated' effective address
+ * phys_addr: an address in what Linux thinks is the physical address space
+ * lpar_addr: an address in the HV virtual address space
+ * bus_addr: an io controller 'translated' address on a device bus
+ */
+
+struct map {
+	u64 total;
+	u64 vas_id;
+	u64 htab_size;
+	struct mem_region rm;
+	struct mem_region r1;
+};
+
+#define debug_dump_map(x) _debug_dump_map(x, __func__, __LINE__)
+static void __maybe_unused _debug_dump_map(const struct map *m,
+	const char *func, int line)
+{
+	DBG("%s:%d: map.total     = %llxh\n", func, line, m->total);
+	DBG("%s:%d: map.rm.size   = %llxh\n", func, line, m->rm.size);
+	DBG("%s:%d: map.vas_id    = %llu\n", func, line, m->vas_id);
+	DBG("%s:%d: map.htab_size = %llxh\n", func, line, m->htab_size);
+	DBG("%s:%d: map.r1.base   = %llxh\n", func, line, m->r1.base);
+	DBG("%s:%d: map.r1.offset = %lxh\n", func, line, m->r1.offset);
+	DBG("%s:%d: map.r1.size   = %llxh\n", func, line, m->r1.size);
+}
+
+static struct map map;
+
+/**
+ * ps3_mm_phys_to_lpar - translate a linux physical address to lpar address
+ * @phys_addr: linux physical address
+ */
+
+unsigned long ps3_mm_phys_to_lpar(unsigned long phys_addr)
+{
+	BUG_ON(is_kernel_addr(phys_addr));
+	return (phys_addr < map.rm.size || phys_addr >= map.total)
+		? phys_addr : phys_addr + map.r1.offset;
+}
+
+EXPORT_SYMBOL(ps3_mm_phys_to_lpar);
+
+/**
+ * ps3_mm_vas_create - create the virtual address space
+ */
+
+void __init ps3_mm_vas_create(unsigned long* htab_size)
+{
+	int result;
+	u64 start_address;
+	u64 size;
+	u64 access_right;
+	u64 max_page_size;
+	u64 flags;
+
+	result = lv1_query_logical_partition_address_region_info(0,
+		&start_address, &size, &access_right, &max_page_size,
+		&flags);
+
+	if (result) {
+		DBG("%s:%d: lv1_query_logical_partition_address_region_info "
+			"failed: %s\n", __func__, __LINE__,
+			ps3_result(result));
+		goto fail;
+	}
+
+	if (max_page_size < PAGE_SHIFT_16M) {
+		DBG("%s:%d: bad max_page_size %llxh\n", __func__, __LINE__,
+			max_page_size);
+		goto fail;
+	}
+
+	BUILD_BUG_ON(CONFIG_PS3_HTAB_SIZE > HTAB_SIZE_MAX);
+	BUILD_BUG_ON(CONFIG_PS3_HTAB_SIZE < HTAB_SIZE_MIN);
+
+	result = lv1_construct_virtual_address_space(CONFIG_PS3_HTAB_SIZE,
+			2, make_page_sizes(PAGE_SHIFT_16M, PAGE_SHIFT_64K),
+			&map.vas_id, &map.htab_size);
+
+	if (result) {
+		DBG("%s:%d: lv1_construct_virtual_address_space failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		goto fail;
+	}
+
+	result = lv1_select_virtual_address_space(map.vas_id);
+
+	if (result) {
+		DBG("%s:%d: lv1_select_virtual_address_space failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		goto fail;
+	}
+
+	*htab_size = map.htab_size;
+
+	debug_dump_map(&map);
+
+	return;
+
+fail:
+	panic("ps3_mm_vas_create failed");
+}
+
+/**
+ * ps3_mm_vas_destroy -
+ */
+
+void ps3_mm_vas_destroy(void)
+{
+	int result;
+
+	DBG("%s:%d: map.vas_id    = %llu\n", __func__, __LINE__, map.vas_id);
+
+	if (map.vas_id) {
+		result = lv1_select_virtual_address_space(0);
+		BUG_ON(result);
+		result = lv1_destruct_virtual_address_space(map.vas_id);
+		BUG_ON(result);
+		map.vas_id = 0;
+	}
+}
+
+static int ps3_mm_get_repository_highmem(struct mem_region *r)
+{
+	int result;
+
+	/* Assume a single highmem region. */
+
+	result = ps3_repository_read_highmem_info(0, &r->base, &r->size);
+
+	if (result)
+		goto zero_region;
+
+	if (!r->base || !r->size) {
+		result = -1;
+		goto zero_region;
+	}
+
+	r->offset = r->base - map.rm.size;
+
+	DBG("%s:%d: Found high region in repository: %llxh %llxh\n",
+	    __func__, __LINE__, r->base, r->size);
+
+	return 0;
+
+zero_region:
+	DBG("%s:%d: No high region in repository.\n", __func__, __LINE__);
+
+	r->size = r->base = r->offset = 0;
+	return result;
+}
+
+static int ps3_mm_set_repository_highmem(const struct mem_region *r)
+{
+	/* Assume a single highmem region. */
+
+	return r ? ps3_repository_write_highmem_info(0, r->base, r->size) :
+		ps3_repository_write_highmem_info(0, 0, 0);
+}
+
+/**
+ * ps3_mm_region_create - create a memory region in the vas
+ * @r: pointer to a struct mem_region to accept initialized values
+ * @size: requested region size
+ *
+ * This implementation creates the region with the vas large page size.
+ * @size is rounded down to a multiple of the vas large page size.
+ */
+
+static int ps3_mm_region_create(struct mem_region *r, unsigned long size)
+{
+	int result;
+	u64 muid;
+
+	r->size = _ALIGN_DOWN(size, 1 << PAGE_SHIFT_16M);
+
+	DBG("%s:%d requested  %lxh\n", __func__, __LINE__, size);
+	DBG("%s:%d actual     %llxh\n", __func__, __LINE__, r->size);
+	DBG("%s:%d difference %llxh (%lluMB)\n", __func__, __LINE__,
+		size - r->size, (size - r->size) / 1024 / 1024);
+
+	if (r->size == 0) {
+		DBG("%s:%d: size == 0\n", __func__, __LINE__);
+		result = -1;
+		goto zero_region;
+	}
+
+	result = lv1_allocate_memory(r->size, PAGE_SHIFT_16M, 0,
+		ALLOCATE_MEMORY_TRY_ALT_UNIT, &r->base, &muid);
+
+	if (result || r->base < map.rm.size) {
+		DBG("%s:%d: lv1_allocate_memory failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		goto zero_region;
+	}
+
+	r->destroy = 1;
+	r->offset = r->base - map.rm.size;
+	return result;
+
+zero_region:
+	r->size = r->base = r->offset = 0;
+	return result;
+}
+
+/**
+ * ps3_mm_region_destroy - destroy a memory region
+ * @r: pointer to struct mem_region
+ */
+
+static void ps3_mm_region_destroy(struct mem_region *r)
+{
+	int result;
+
+	if (!r->destroy) {
+		pr_info("%s:%d: Not destroying high region: %llxh %llxh\n",
+			__func__, __LINE__, r->base, r->size);
+		return;
+	}
+
+	DBG("%s:%d: r->base = %llxh\n", __func__, __LINE__, r->base);
+
+	if (r->base) {
+		result = lv1_release_memory(r->base);
+		BUG_ON(result);
+		r->size = r->base = r->offset = 0;
+		map.total = map.rm.size;
+	}
+	ps3_mm_set_repository_highmem(NULL);
+}
+
+/*============================================================================*/
+/* dma routines                                                               */
+/*============================================================================*/
+
+/**
+ * dma_sb_lpar_to_bus - Translate an lpar address to ioc mapped bus address.
+ * @r: pointer to dma region structure
+ * @lpar_addr: HV lpar address
+ */
+
+static unsigned long dma_sb_lpar_to_bus(struct ps3_dma_region *r,
+	unsigned long lpar_addr)
+{
+	if (lpar_addr >= map.rm.size)
+		lpar_addr -= map.r1.offset;
+	BUG_ON(lpar_addr < r->offset);
+	BUG_ON(lpar_addr >= r->offset + r->len);
+	return r->bus_addr + lpar_addr - r->offset;
+}
+
+#define dma_dump_region(_a) _dma_dump_region(_a, __func__, __LINE__)
+static void  __maybe_unused _dma_dump_region(const struct ps3_dma_region *r,
+	const char *func, int line)
+{
+	DBG("%s:%d: dev        %llu:%llu\n", func, line, r->dev->bus_id,
+		r->dev->dev_id);
+	DBG("%s:%d: page_size  %u\n", func, line, r->page_size);
+	DBG("%s:%d: bus_addr   %lxh\n", func, line, r->bus_addr);
+	DBG("%s:%d: len        %lxh\n", func, line, r->len);
+	DBG("%s:%d: offset     %lxh\n", func, line, r->offset);
+}
+
+  /**
+ * dma_chunk - A chunk of dma pages mapped by the io controller.
+ * @region - The dma region that owns this chunk.
+ * @lpar_addr: Starting lpar address of the area to map.
+ * @bus_addr: Starting ioc bus address of the area to map.
+ * @len: Length in bytes of the area to map.
+ * @link: A struct list_head used with struct ps3_dma_region.chunk_list, the
+ * list of all chuncks owned by the region.
+ *
+ * This implementation uses a very simple dma page manager
+ * based on the dma_chunk structure.  This scheme assumes
+ * that all drivers use very well behaved dma ops.
+ */
+
+struct dma_chunk {
+	struct ps3_dma_region *region;
+	unsigned long lpar_addr;
+	unsigned long bus_addr;
+	unsigned long len;
+	struct list_head link;
+	unsigned int usage_count;
+};
+
+#define dma_dump_chunk(_a) _dma_dump_chunk(_a, __func__, __LINE__)
+static void _dma_dump_chunk (const struct dma_chunk* c, const char* func,
+	int line)
+{
+	DBG("%s:%d: r.dev        %llu:%llu\n", func, line,
+		c->region->dev->bus_id, c->region->dev->dev_id);
+	DBG("%s:%d: r.bus_addr   %lxh\n", func, line, c->region->bus_addr);
+	DBG("%s:%d: r.page_size  %u\n", func, line, c->region->page_size);
+	DBG("%s:%d: r.len        %lxh\n", func, line, c->region->len);
+	DBG("%s:%d: r.offset     %lxh\n", func, line, c->region->offset);
+	DBG("%s:%d: c.lpar_addr  %lxh\n", func, line, c->lpar_addr);
+	DBG("%s:%d: c.bus_addr   %lxh\n", func, line, c->bus_addr);
+	DBG("%s:%d: c.len        %lxh\n", func, line, c->len);
+}
+
+static struct dma_chunk * dma_find_chunk(struct ps3_dma_region *r,
+	unsigned long bus_addr, unsigned long len)
+{
+	struct dma_chunk *c;
+	unsigned long aligned_bus = _ALIGN_DOWN(bus_addr, 1 << r->page_size);
+	unsigned long aligned_len = _ALIGN_UP(len+bus_addr-aligned_bus,
+					      1 << r->page_size);
+
+	list_for_each_entry(c, &r->chunk_list.head, link) {
+		/* intersection */
+		if (aligned_bus >= c->bus_addr &&
+		    aligned_bus + aligned_len <= c->bus_addr + c->len)
+			return c;
+
+		/* below */
+		if (aligned_bus + aligned_len <= c->bus_addr)
+			continue;
+
+		/* above */
+		if (aligned_bus >= c->bus_addr + c->len)
+			continue;
+
+		/* we don't handle the multi-chunk case for now */
+		dma_dump_chunk(c);
+		BUG();
+	}
+	return NULL;
+}
+
+static struct dma_chunk *dma_find_chunk_lpar(struct ps3_dma_region *r,
+	unsigned long lpar_addr, unsigned long len)
+{
+	struct dma_chunk *c;
+	unsigned long aligned_lpar = _ALIGN_DOWN(lpar_addr, 1 << r->page_size);
+	unsigned long aligned_len = _ALIGN_UP(len + lpar_addr - aligned_lpar,
+					      1 << r->page_size);
+
+	list_for_each_entry(c, &r->chunk_list.head, link) {
+		/* intersection */
+		if (c->lpar_addr <= aligned_lpar &&
+		    aligned_lpar < c->lpar_addr + c->len) {
+			if (aligned_lpar + aligned_len <= c->lpar_addr + c->len)
+				return c;
+			else {
+				dma_dump_chunk(c);
+				BUG();
+			}
+		}
+		/* below */
+		if (aligned_lpar + aligned_len <= c->lpar_addr) {
+			continue;
+		}
+		/* above */
+		if (c->lpar_addr + c->len <= aligned_lpar) {
+			continue;
+		}
+	}
+	return NULL;
+}
+
+static int dma_sb_free_chunk(struct dma_chunk *c)
+{
+	int result = 0;
+
+	if (c->bus_addr) {
+		result = lv1_unmap_device_dma_region(c->region->dev->bus_id,
+			c->region->dev->dev_id, c->bus_addr, c->len);
+		BUG_ON(result);
+	}
+
+	kfree(c);
+	return result;
+}
+
+static int dma_ioc0_free_chunk(struct dma_chunk *c)
+{
+	int result = 0;
+	int iopage;
+	unsigned long offset;
+	struct ps3_dma_region *r = c->region;
+
+	DBG("%s:start\n", __func__);
+	for (iopage = 0; iopage < (c->len >> r->page_size); iopage++) {
+		offset = (1 << r->page_size) * iopage;
+		/* put INVALID entry */
+		result = lv1_put_iopte(0,
+				       c->bus_addr + offset,
+				       c->lpar_addr + offset,
+				       r->ioid,
+				       0);
+		DBG("%s: bus=%#lx, lpar=%#lx, ioid=%d\n", __func__,
+		    c->bus_addr + offset,
+		    c->lpar_addr + offset,
+		    r->ioid);
+
+		if (result) {
+			DBG("%s:%d: lv1_put_iopte failed: %s\n", __func__,
+			    __LINE__, ps3_result(result));
+		}
+	}
+	kfree(c);
+	DBG("%s:end\n", __func__);
+	return result;
+}
+
+/**
+ * dma_sb_map_pages - Maps dma pages into the io controller bus address space.
+ * @r: Pointer to a struct ps3_dma_region.
+ * @phys_addr: Starting physical address of the area to map.
+ * @len: Length in bytes of the area to map.
+ * c_out: A pointer to receive an allocated struct dma_chunk for this area.
+ *
+ * This is the lowest level dma mapping routine, and is the one that will
+ * make the HV call to add the pages into the io controller address space.
+ */
+
+static int dma_sb_map_pages(struct ps3_dma_region *r, unsigned long phys_addr,
+	    unsigned long len, struct dma_chunk **c_out, u64 iopte_flag)
+{
+	int result;
+	struct dma_chunk *c;
+
+	c = kzalloc(sizeof(struct dma_chunk), GFP_ATOMIC);
+
+	if (!c) {
+		result = -ENOMEM;
+		goto fail_alloc;
+	}
+
+	c->region = r;
+	c->lpar_addr = ps3_mm_phys_to_lpar(phys_addr);
+	c->bus_addr = dma_sb_lpar_to_bus(r, c->lpar_addr);
+	c->len = len;
+
+	BUG_ON(iopte_flag != 0xf800000000000000UL);
+	result = lv1_map_device_dma_region(c->region->dev->bus_id,
+					   c->region->dev->dev_id, c->lpar_addr,
+					   c->bus_addr, c->len, iopte_flag);
+	if (result) {
+		DBG("%s:%d: lv1_map_device_dma_region failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		goto fail_map;
+	}
+
+	list_add(&c->link, &r->chunk_list.head);
+
+	*c_out = c;
+	return 0;
+
+fail_map:
+	kfree(c);
+fail_alloc:
+	*c_out = NULL;
+	DBG(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+}
+
+static int dma_ioc0_map_pages(struct ps3_dma_region *r, unsigned long phys_addr,
+			      unsigned long len, struct dma_chunk **c_out,
+			      u64 iopte_flag)
+{
+	int result;
+	struct dma_chunk *c, *last;
+	int iopage, pages;
+	unsigned long offset;
+
+	DBG(KERN_ERR "%s: phy=%#lx, lpar%#lx, len=%#lx\n", __func__,
+	    phys_addr, ps3_mm_phys_to_lpar(phys_addr), len);
+	c = kzalloc(sizeof(struct dma_chunk), GFP_ATOMIC);
+
+	if (!c) {
+		result = -ENOMEM;
+		goto fail_alloc;
+	}
+
+	c->region = r;
+	c->len = len;
+	c->lpar_addr = ps3_mm_phys_to_lpar(phys_addr);
+	/* allocate IO address */
+	if (list_empty(&r->chunk_list.head)) {
+		/* first one */
+		c->bus_addr = r->bus_addr;
+	} else {
+		/* derive from last bus addr*/
+		last  = list_entry(r->chunk_list.head.next,
+				   struct dma_chunk, link);
+		c->bus_addr = last->bus_addr + last->len;
+		DBG("%s: last bus=%#lx, len=%#lx\n", __func__,
+		    last->bus_addr, last->len);
+	}
+
+	/* FIXME: check whether length exceeds region size */
+
+	/* build ioptes for the area */
+	pages = len >> r->page_size;
+	DBG("%s: pgsize=%#x len=%#lx pages=%#x iopteflag=%#llx\n", __func__,
+	    r->page_size, r->len, pages, iopte_flag);
+	for (iopage = 0; iopage < pages; iopage++) {
+		offset = (1 << r->page_size) * iopage;
+		result = lv1_put_iopte(0,
+				       c->bus_addr + offset,
+				       c->lpar_addr + offset,
+				       r->ioid,
+				       iopte_flag);
+		if (result) {
+			pr_warning("%s:%d: lv1_put_iopte failed: %s\n",
+				   __func__, __LINE__, ps3_result(result));
+			goto fail_map;
+		}
+		DBG("%s: pg=%d bus=%#lx, lpar=%#lx, ioid=%#x\n", __func__,
+		    iopage, c->bus_addr + offset, c->lpar_addr + offset,
+		    r->ioid);
+	}
+
+	/* be sure that last allocated one is inserted at head */
+	list_add(&c->link, &r->chunk_list.head);
+
+	*c_out = c;
+	DBG("%s: end\n", __func__);
+	return 0;
+
+fail_map:
+	for (iopage--; 0 <= iopage; iopage--) {
+		lv1_put_iopte(0,
+			      c->bus_addr + offset,
+			      c->lpar_addr + offset,
+			      r->ioid,
+			      0);
+	}
+	kfree(c);
+fail_alloc:
+	*c_out = NULL;
+	return result;
+}
+
+/**
+ * dma_sb_region_create - Create a device dma region.
+ * @r: Pointer to a struct ps3_dma_region.
+ *
+ * This is the lowest level dma region create routine, and is the one that
+ * will make the HV call to create the region.
+ */
+
+static int dma_sb_region_create(struct ps3_dma_region *r)
+{
+	int result;
+	u64 bus_addr;
+
+	DBG(" -> %s:%d:\n", __func__, __LINE__);
+
+	BUG_ON(!r);
+
+	if (!r->dev->bus_id) {
+		pr_info("%s:%d: %llu:%llu no dma\n", __func__, __LINE__,
+			r->dev->bus_id, r->dev->dev_id);
+		return 0;
+	}
+
+	DBG("%s:%u: len = 0x%lx, page_size = %u, offset = 0x%lx\n", __func__,
+	    __LINE__, r->len, r->page_size, r->offset);
+
+	BUG_ON(!r->len);
+	BUG_ON(!r->page_size);
+	BUG_ON(!r->region_ops);
+
+	INIT_LIST_HEAD(&r->chunk_list.head);
+	spin_lock_init(&r->chunk_list.lock);
+
+	result = lv1_allocate_device_dma_region(r->dev->bus_id, r->dev->dev_id,
+		roundup_pow_of_two(r->len), r->page_size, r->region_type,
+		&bus_addr);
+	r->bus_addr = bus_addr;
+
+	if (result) {
+		DBG("%s:%d: lv1_allocate_device_dma_region failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		r->len = r->bus_addr = 0;
+	}
+
+	return result;
+}
+
+static int dma_ioc0_region_create(struct ps3_dma_region *r)
+{
+	int result;
+	u64 bus_addr;
+
+	INIT_LIST_HEAD(&r->chunk_list.head);
+	spin_lock_init(&r->chunk_list.lock);
+
+	result = lv1_allocate_io_segment(0,
+					 r->len,
+					 r->page_size,
+					 &bus_addr);
+	r->bus_addr = bus_addr;
+	if (result) {
+		DBG("%s:%d: lv1_allocate_io_segment failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		r->len = r->bus_addr = 0;
+	}
+	DBG("%s: len=%#lx, pg=%d, bus=%#lx\n", __func__,
+	    r->len, r->page_size, r->bus_addr);
+	return result;
+}
+
+/**
+ * dma_region_free - Free a device dma region.
+ * @r: Pointer to a struct ps3_dma_region.
+ *
+ * This is the lowest level dma region free routine, and is the one that
+ * will make the HV call to free the region.
+ */
+
+static int dma_sb_region_free(struct ps3_dma_region *r)
+{
+	int result;
+	struct dma_chunk *c;
+	struct dma_chunk *tmp;
+
+	BUG_ON(!r);
+
+	if (!r->dev->bus_id) {
+		pr_info("%s:%d: %llu:%llu no dma\n", __func__, __LINE__,
+			r->dev->bus_id, r->dev->dev_id);
+		return 0;
+	}
+
+	list_for_each_entry_safe(c, tmp, &r->chunk_list.head, link) {
+		list_del(&c->link);
+		dma_sb_free_chunk(c);
+	}
+
+	result = lv1_free_device_dma_region(r->dev->bus_id, r->dev->dev_id,
+		r->bus_addr);
+
+	if (result)
+		DBG("%s:%d: lv1_free_device_dma_region failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+
+	r->bus_addr = 0;
+
+	return result;
+}
+
+static int dma_ioc0_region_free(struct ps3_dma_region *r)
+{
+	int result;
+	struct dma_chunk *c, *n;
+
+	DBG("%s: start\n", __func__);
+	list_for_each_entry_safe(c, n, &r->chunk_list.head, link) {
+		list_del(&c->link);
+		dma_ioc0_free_chunk(c);
+	}
+
+	result = lv1_release_io_segment(0, r->bus_addr);
+
+	if (result)
+		DBG("%s:%d: lv1_free_device_dma_region failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+
+	r->bus_addr = 0;
+	DBG("%s: end\n", __func__);
+
+	return result;
+}
+
+/**
+ * dma_sb_map_area - Map an area of memory into a device dma region.
+ * @r: Pointer to a struct ps3_dma_region.
+ * @virt_addr: Starting virtual address of the area to map.
+ * @len: Length in bytes of the area to map.
+ * @bus_addr: A pointer to return the starting ioc bus address of the area to
+ * map.
+ *
+ * This is the common dma mapping routine.
+ */
+
+static int dma_sb_map_area(struct ps3_dma_region *r, unsigned long virt_addr,
+	   unsigned long len, dma_addr_t *bus_addr,
+	   u64 iopte_flag)
+{
+	int result;
+	unsigned long flags;
+	struct dma_chunk *c;
+	unsigned long phys_addr = is_kernel_addr(virt_addr) ? __pa(virt_addr)
+		: virt_addr;
+	unsigned long aligned_phys = _ALIGN_DOWN(phys_addr, 1 << r->page_size);
+	unsigned long aligned_len = _ALIGN_UP(len + phys_addr - aligned_phys,
+					      1 << r->page_size);
+	*bus_addr = dma_sb_lpar_to_bus(r, ps3_mm_phys_to_lpar(phys_addr));
+
+	if (!USE_DYNAMIC_DMA) {
+		unsigned long lpar_addr = ps3_mm_phys_to_lpar(phys_addr);
+		DBG(" -> %s:%d\n", __func__, __LINE__);
+		DBG("%s:%d virt_addr %lxh\n", __func__, __LINE__,
+			virt_addr);
+		DBG("%s:%d phys_addr %lxh\n", __func__, __LINE__,
+			phys_addr);
+		DBG("%s:%d lpar_addr %lxh\n", __func__, __LINE__,
+			lpar_addr);
+		DBG("%s:%d len       %lxh\n", __func__, __LINE__, len);
+		DBG("%s:%d bus_addr  %llxh (%lxh)\n", __func__, __LINE__,
+		*bus_addr, len);
+	}
+
+	spin_lock_irqsave(&r->chunk_list.lock, flags);
+	c = dma_find_chunk(r, *bus_addr, len);
+
+	if (c) {
+		DBG("%s:%d: reusing mapped chunk", __func__, __LINE__);
+		dma_dump_chunk(c);
+		c->usage_count++;
+		spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+		return 0;
+	}
+
+	result = dma_sb_map_pages(r, aligned_phys, aligned_len, &c, iopte_flag);
+
+	if (result) {
+		*bus_addr = 0;
+		DBG("%s:%d: dma_sb_map_pages failed (%d)\n",
+			__func__, __LINE__, result);
+		spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+		return result;
+	}
+
+	c->usage_count = 1;
+
+	spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+	return result;
+}
+
+static int dma_ioc0_map_area(struct ps3_dma_region *r, unsigned long virt_addr,
+	     unsigned long len, dma_addr_t *bus_addr,
+	     u64 iopte_flag)
+{
+	int result;
+	unsigned long flags;
+	struct dma_chunk *c;
+	unsigned long phys_addr = is_kernel_addr(virt_addr) ? __pa(virt_addr)
+		: virt_addr;
+	unsigned long aligned_phys = _ALIGN_DOWN(phys_addr, 1 << r->page_size);
+	unsigned long aligned_len = _ALIGN_UP(len + phys_addr - aligned_phys,
+					      1 << r->page_size);
+
+	DBG(KERN_ERR "%s: vaddr=%#lx, len=%#lx\n", __func__,
+	    virt_addr, len);
+	DBG(KERN_ERR "%s: ph=%#lx a_ph=%#lx a_l=%#lx\n", __func__,
+	    phys_addr, aligned_phys, aligned_len);
+
+	spin_lock_irqsave(&r->chunk_list.lock, flags);
+	c = dma_find_chunk_lpar(r, ps3_mm_phys_to_lpar(phys_addr), len);
+
+	if (c) {
+		/* FIXME */
+		BUG();
+		*bus_addr = c->bus_addr + phys_addr - aligned_phys;
+		c->usage_count++;
+		spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+		return 0;
+	}
+
+	result = dma_ioc0_map_pages(r, aligned_phys, aligned_len, &c,
+				    iopte_flag);
+
+	if (result) {
+		*bus_addr = 0;
+		DBG("%s:%d: dma_ioc0_map_pages failed (%d)\n",
+			__func__, __LINE__, result);
+		spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+		return result;
+	}
+	*bus_addr = c->bus_addr + phys_addr - aligned_phys;
+	DBG("%s: va=%#lx pa=%#lx a_pa=%#lx bus=%#llx\n", __func__,
+	    virt_addr, phys_addr, aligned_phys, *bus_addr);
+	c->usage_count = 1;
+
+	spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+	return result;
+}
+
+/**
+ * dma_sb_unmap_area - Unmap an area of memory from a device dma region.
+ * @r: Pointer to a struct ps3_dma_region.
+ * @bus_addr: The starting ioc bus address of the area to unmap.
+ * @len: Length in bytes of the area to unmap.
+ *
+ * This is the common dma unmap routine.
+ */
+
+static int dma_sb_unmap_area(struct ps3_dma_region *r, dma_addr_t bus_addr,
+	unsigned long len)
+{
+	unsigned long flags;
+	struct dma_chunk *c;
+
+	spin_lock_irqsave(&r->chunk_list.lock, flags);
+	c = dma_find_chunk(r, bus_addr, len);
+
+	if (!c) {
+		unsigned long aligned_bus = _ALIGN_DOWN(bus_addr,
+			1 << r->page_size);
+		unsigned long aligned_len = _ALIGN_UP(len + bus_addr
+			- aligned_bus, 1 << r->page_size);
+		DBG("%s:%d: not found: bus_addr %llxh\n",
+			__func__, __LINE__, bus_addr);
+		DBG("%s:%d: not found: len %lxh\n",
+			__func__, __LINE__, len);
+		DBG("%s:%d: not found: aligned_bus %lxh\n",
+			__func__, __LINE__, aligned_bus);
+		DBG("%s:%d: not found: aligned_len %lxh\n",
+			__func__, __LINE__, aligned_len);
+		BUG();
+	}
+
+	c->usage_count--;
+
+	if (!c->usage_count) {
+		list_del(&c->link);
+		dma_sb_free_chunk(c);
+	}
+
+	spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+	return 0;
+}
+
+static int dma_ioc0_unmap_area(struct ps3_dma_region *r,
+			dma_addr_t bus_addr, unsigned long len)
+{
+	unsigned long flags;
+	struct dma_chunk *c;
+
+	DBG("%s: start a=%#llx l=%#lx\n", __func__, bus_addr, len);
+	spin_lock_irqsave(&r->chunk_list.lock, flags);
+	c = dma_find_chunk(r, bus_addr, len);
+
+	if (!c) {
+		unsigned long aligned_bus = _ALIGN_DOWN(bus_addr,
+							1 << r->page_size);
+		unsigned long aligned_len = _ALIGN_UP(len + bus_addr
+						      - aligned_bus,
+						      1 << r->page_size);
+		DBG("%s:%d: not found: bus_addr %llxh\n",
+		    __func__, __LINE__, bus_addr);
+		DBG("%s:%d: not found: len %lxh\n",
+		    __func__, __LINE__, len);
+		DBG("%s:%d: not found: aligned_bus %lxh\n",
+		    __func__, __LINE__, aligned_bus);
+		DBG("%s:%d: not found: aligned_len %lxh\n",
+		    __func__, __LINE__, aligned_len);
+		BUG();
+	}
+
+	c->usage_count--;
+
+	if (!c->usage_count) {
+		list_del(&c->link);
+		dma_ioc0_free_chunk(c);
+	}
+
+	spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+	DBG("%s: end\n", __func__);
+	return 0;
+}
+
+/**
+ * dma_sb_region_create_linear - Setup a linear dma mapping for a device.
+ * @r: Pointer to a struct ps3_dma_region.
+ *
+ * This routine creates an HV dma region for the device and maps all available
+ * ram into the io controller bus address space.
+ */
+
+static int dma_sb_region_create_linear(struct ps3_dma_region *r)
+{
+	int result;
+	unsigned long virt_addr, len;
+	dma_addr_t tmp;
+
+	if (r->len > 16*1024*1024) {	/* FIXME: need proper fix */
+		/* force 16M dma pages for linear mapping */
+		if (r->page_size != PS3_DMA_16M) {
+			pr_info("%s:%d: forcing 16M pages for linear map\n",
+				__func__, __LINE__);
+			r->page_size = PS3_DMA_16M;
+			r->len = _ALIGN_UP(r->len, 1 << r->page_size);
+		}
+	}
+
+	result = dma_sb_region_create(r);
+	BUG_ON(result);
+
+	if (r->offset < map.rm.size) {
+		/* Map (part of) 1st RAM chunk */
+		virt_addr = map.rm.base + r->offset;
+		len = map.rm.size - r->offset;
+		if (len > r->len)
+			len = r->len;
+		result = dma_sb_map_area(r, virt_addr, len, &tmp,
+			CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_SO_RW |
+			CBE_IOPTE_M);
+		BUG_ON(result);
+	}
+
+	if (r->offset + r->len > map.rm.size) {
+		/* Map (part of) 2nd RAM chunk */
+		virt_addr = map.rm.size;
+		len = r->len;
+		if (r->offset >= map.rm.size)
+			virt_addr += r->offset - map.rm.size;
+		else
+			len -= map.rm.size - r->offset;
+		result = dma_sb_map_area(r, virt_addr, len, &tmp,
+			CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_SO_RW |
+			CBE_IOPTE_M);
+		BUG_ON(result);
+	}
+
+	return result;
+}
+
+/**
+ * dma_sb_region_free_linear - Free a linear dma mapping for a device.
+ * @r: Pointer to a struct ps3_dma_region.
+ *
+ * This routine will unmap all mapped areas and free the HV dma region.
+ */
+
+static int dma_sb_region_free_linear(struct ps3_dma_region *r)
+{
+	int result;
+	dma_addr_t bus_addr;
+	unsigned long len, lpar_addr;
+
+	if (r->offset < map.rm.size) {
+		/* Unmap (part of) 1st RAM chunk */
+		lpar_addr = map.rm.base + r->offset;
+		len = map.rm.size - r->offset;
+		if (len > r->len)
+			len = r->len;
+		bus_addr = dma_sb_lpar_to_bus(r, lpar_addr);
+		result = dma_sb_unmap_area(r, bus_addr, len);
+		BUG_ON(result);
+	}
+
+	if (r->offset + r->len > map.rm.size) {
+		/* Unmap (part of) 2nd RAM chunk */
+		lpar_addr = map.r1.base;
+		len = r->len;
+		if (r->offset >= map.rm.size)
+			lpar_addr += r->offset - map.rm.size;
+		else
+			len -= map.rm.size - r->offset;
+		bus_addr = dma_sb_lpar_to_bus(r, lpar_addr);
+		result = dma_sb_unmap_area(r, bus_addr, len);
+		BUG_ON(result);
+	}
+
+	result = dma_sb_region_free(r);
+	BUG_ON(result);
+
+	return result;
+}
+
+/**
+ * dma_sb_map_area_linear - Map an area of memory into a device dma region.
+ * @r: Pointer to a struct ps3_dma_region.
+ * @virt_addr: Starting virtual address of the area to map.
+ * @len: Length in bytes of the area to map.
+ * @bus_addr: A pointer to return the starting ioc bus address of the area to
+ * map.
+ *
+ * This routine just returns the corresponding bus address.  Actual mapping
+ * occurs in dma_region_create_linear().
+ */
+
+static int dma_sb_map_area_linear(struct ps3_dma_region *r,
+	unsigned long virt_addr, unsigned long len, dma_addr_t *bus_addr,
+	u64 iopte_flag)
+{
+	unsigned long phys_addr = is_kernel_addr(virt_addr) ? __pa(virt_addr)
+		: virt_addr;
+	*bus_addr = dma_sb_lpar_to_bus(r, ps3_mm_phys_to_lpar(phys_addr));
+	return 0;
+}
+
+/**
+ * dma_unmap_area_linear - Unmap an area of memory from a device dma region.
+ * @r: Pointer to a struct ps3_dma_region.
+ * @bus_addr: The starting ioc bus address of the area to unmap.
+ * @len: Length in bytes of the area to unmap.
+ *
+ * This routine does nothing.  Unmapping occurs in dma_sb_region_free_linear().
+ */
+
+static int dma_sb_unmap_area_linear(struct ps3_dma_region *r,
+	dma_addr_t bus_addr, unsigned long len)
+{
+	return 0;
+};
+
+static const struct ps3_dma_region_ops ps3_dma_sb_region_ops =  {
+	.create = dma_sb_region_create,
+	.free = dma_sb_region_free,
+	.map = dma_sb_map_area,
+	.unmap = dma_sb_unmap_area
+};
+
+static const struct ps3_dma_region_ops ps3_dma_sb_region_linear_ops = {
+	.create = dma_sb_region_create_linear,
+	.free = dma_sb_region_free_linear,
+	.map = dma_sb_map_area_linear,
+	.unmap = dma_sb_unmap_area_linear
+};
+
+static const struct ps3_dma_region_ops ps3_dma_ioc0_region_ops = {
+	.create = dma_ioc0_region_create,
+	.free = dma_ioc0_region_free,
+	.map = dma_ioc0_map_area,
+	.unmap = dma_ioc0_unmap_area
+};
+
+int ps3_dma_region_init(struct ps3_system_bus_device *dev,
+	struct ps3_dma_region *r, enum ps3_dma_page_size page_size,
+	enum ps3_dma_region_type region_type, void *addr, unsigned long len)
+{
+	unsigned long lpar_addr;
+
+	lpar_addr = addr ? ps3_mm_phys_to_lpar(__pa(addr)) : 0;
+
+	r->dev = dev;
+	r->page_size = page_size;
+	r->region_type = region_type;
+	r->offset = lpar_addr;
+	if (r->offset >= map.rm.size)
+		r->offset -= map.r1.offset;
+	r->len = len ? len : _ALIGN_UP(map.total, 1 << r->page_size);
+
+	switch (dev->dev_type) {
+	case PS3_DEVICE_TYPE_SB:
+		r->region_ops =  (USE_DYNAMIC_DMA)
+			? &ps3_dma_sb_region_ops
+			: &ps3_dma_sb_region_linear_ops;
+		break;
+	case PS3_DEVICE_TYPE_IOC0:
+		r->region_ops = &ps3_dma_ioc0_region_ops;
+		break;
+	default:
+		BUG();
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ps3_dma_region_init);
+
+int ps3_dma_region_create(struct ps3_dma_region *r)
+{
+	BUG_ON(!r);
+	BUG_ON(!r->region_ops);
+	BUG_ON(!r->region_ops->create);
+	return r->region_ops->create(r);
+}
+EXPORT_SYMBOL(ps3_dma_region_create);
+
+int ps3_dma_region_free(struct ps3_dma_region *r)
+{
+	BUG_ON(!r);
+	BUG_ON(!r->region_ops);
+	BUG_ON(!r->region_ops->free);
+	return r->region_ops->free(r);
+}
+EXPORT_SYMBOL(ps3_dma_region_free);
+
+int ps3_dma_map(struct ps3_dma_region *r, unsigned long virt_addr,
+	unsigned long len, dma_addr_t *bus_addr,
+	u64 iopte_flag)
+{
+	return r->region_ops->map(r, virt_addr, len, bus_addr, iopte_flag);
+}
+
+int ps3_dma_unmap(struct ps3_dma_region *r, dma_addr_t bus_addr,
+	unsigned long len)
+{
+	return r->region_ops->unmap(r, bus_addr, len);
+}
+
+/*============================================================================*/
+/* system startup routines                                                    */
+/*============================================================================*/
+
+/**
+ * ps3_mm_init - initialize the address space state variables
+ */
+
+void __init ps3_mm_init(void)
+{
+	int result;
+
+	DBG(" -> %s:%d\n", __func__, __LINE__);
+
+	result = ps3_repository_read_mm_info(&map.rm.base, &map.rm.size,
+		&map.total);
+
+	if (result)
+		panic("ps3_repository_read_mm_info() failed");
+
+	map.rm.offset = map.rm.base;
+	map.vas_id = map.htab_size = 0;
+
+	/* this implementation assumes map.rm.base is zero */
+
+	BUG_ON(map.rm.base);
+	BUG_ON(!map.rm.size);
+
+	/* Check if we got the highmem region from an earlier boot step */
+
+	if (ps3_mm_get_repository_highmem(&map.r1)) {
+		result = ps3_mm_region_create(&map.r1, map.total - map.rm.size);
+
+		if (!result)
+			ps3_mm_set_repository_highmem(&map.r1);
+	}
+
+	/* correct map.total for the real total amount of memory we use */
+	map.total = map.rm.size + map.r1.size;
+
+	if (!map.r1.size) {
+		DBG("%s:%d: No highmem region found\n", __func__, __LINE__);
+	} else {
+		DBG("%s:%d: Adding highmem region: %llxh %llxh\n",
+			__func__, __LINE__, map.rm.size,
+			map.total - map.rm.size);
+		memblock_add(map.rm.size, map.total - map.rm.size);
+	}
+
+	DBG(" <- %s:%d\n", __func__, __LINE__);
+}
+
+/**
+ * ps3_mm_shutdown - final cleanup of address space
+ */
+
+void ps3_mm_shutdown(void)
+{
+	ps3_mm_region_destroy(&map.r1);
+}
diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c
new file mode 100644
index 000000000..097871398
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/os-area.c
@@ -0,0 +1,851 @@
+/*
+ *  PS3 flash memory os area.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/workqueue.h>
+#include <linux/fs.h>
+#include <linux/syscalls.h>
+#include <linux/export.h>
+#include <linux/ctype.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+
+#include <asm/prom.h>
+
+#include "platform.h"
+
+enum {
+	OS_AREA_SEGMENT_SIZE = 0X200,
+};
+
+enum os_area_ldr_format {
+	HEADER_LDR_FORMAT_RAW = 0,
+	HEADER_LDR_FORMAT_GZIP = 1,
+};
+
+#define OS_AREA_HEADER_MAGIC_NUM "cell_ext_os_area"
+
+/**
+ * struct os_area_header - os area header segment.
+ * @magic_num: Always 'cell_ext_os_area'.
+ * @hdr_version: Header format version number.
+ * @db_area_offset: Starting segment number of other os database area.
+ * @ldr_area_offset: Starting segment number of bootloader image area.
+ * @ldr_format: HEADER_LDR_FORMAT flag.
+ * @ldr_size: Size of bootloader image in bytes.
+ *
+ * Note that the docs refer to area offsets.  These are offsets in units of
+ * segments from the start of the os area (top of the header).  These are
+ * better thought of as segment numbers.  The os area of the os area is
+ * reserved for the os image.
+ */
+
+struct os_area_header {
+	u8 magic_num[16];
+	u32 hdr_version;
+	u32 db_area_offset;
+	u32 ldr_area_offset;
+	u32 _reserved_1;
+	u32 ldr_format;
+	u32 ldr_size;
+	u32 _reserved_2[6];
+};
+
+enum os_area_boot_flag {
+	PARAM_BOOT_FLAG_GAME_OS = 0,
+	PARAM_BOOT_FLAG_OTHER_OS = 1,
+};
+
+enum os_area_ctrl_button {
+	PARAM_CTRL_BUTTON_O_IS_YES = 0,
+	PARAM_CTRL_BUTTON_X_IS_YES = 1,
+};
+
+/**
+ * struct os_area_params - os area params segment.
+ * @boot_flag: User preference of operating system, PARAM_BOOT_FLAG flag.
+ * @num_params: Number of params in this (params) segment.
+ * @rtc_diff: Difference in seconds between 1970 and the ps3 rtc value.
+ * @av_multi_out: User preference of AV output, PARAM_AV_MULTI_OUT flag.
+ * @ctrl_button: User preference of controller button config, PARAM_CTRL_BUTTON
+ *	flag.
+ * @static_ip_addr: User preference of static IP address.
+ * @network_mask: User preference of static network mask.
+ * @default_gateway: User preference of static default gateway.
+ * @dns_primary: User preference of static primary dns server.
+ * @dns_secondary: User preference of static secondary dns server.
+ *
+ * The ps3 rtc maintains a read-only value that approximates seconds since
+ * 2000-01-01 00:00:00 UTC.
+ *
+ * User preference of zero for static_ip_addr means use dhcp.
+ */
+
+struct os_area_params {
+	u32 boot_flag;
+	u32 _reserved_1[3];
+	u32 num_params;
+	u32 _reserved_2[3];
+	/* param 0 */
+	s64 rtc_diff;
+	u8 av_multi_out;
+	u8 ctrl_button;
+	u8 _reserved_3[6];
+	/* param 1 */
+	u8 static_ip_addr[4];
+	u8 network_mask[4];
+	u8 default_gateway[4];
+	u8 _reserved_4[4];
+	/* param 2 */
+	u8 dns_primary[4];
+	u8 dns_secondary[4];
+	u8 _reserved_5[8];
+};
+
+#define OS_AREA_DB_MAGIC_NUM "-db-"
+
+/**
+ * struct os_area_db - Shared flash memory database.
+ * @magic_num: Always '-db-'.
+ * @version: os_area_db format version number.
+ * @index_64: byte offset of the database id index for 64 bit variables.
+ * @count_64: number of usable 64 bit index entries
+ * @index_32: byte offset of the database id index for 32 bit variables.
+ * @count_32: number of usable 32 bit index entries
+ * @index_16: byte offset of the database id index for 16 bit variables.
+ * @count_16: number of usable 16 bit index entries
+ *
+ * Flash rom storage for exclusive use by guests running in the other os lpar.
+ * The current system configuration allocates 1K (two segments) for other os
+ * use.
+ */
+
+struct os_area_db {
+	u8 magic_num[4];
+	u16 version;
+	u16 _reserved_1;
+	u16 index_64;
+	u16 count_64;
+	u16 index_32;
+	u16 count_32;
+	u16 index_16;
+	u16 count_16;
+	u32 _reserved_2;
+	u8 _db_data[1000];
+};
+
+/**
+ * enum os_area_db_owner - Data owners.
+ */
+
+enum os_area_db_owner {
+	OS_AREA_DB_OWNER_ANY = -1,
+	OS_AREA_DB_OWNER_NONE = 0,
+	OS_AREA_DB_OWNER_PROTOTYPE = 1,
+	OS_AREA_DB_OWNER_LINUX = 2,
+	OS_AREA_DB_OWNER_PETITBOOT = 3,
+	OS_AREA_DB_OWNER_MAX = 32,
+};
+
+enum os_area_db_key {
+	OS_AREA_DB_KEY_ANY = -1,
+	OS_AREA_DB_KEY_NONE = 0,
+	OS_AREA_DB_KEY_RTC_DIFF = 1,
+	OS_AREA_DB_KEY_VIDEO_MODE = 2,
+	OS_AREA_DB_KEY_MAX = 8,
+};
+
+struct os_area_db_id {
+	int owner;
+	int key;
+};
+
+static const struct os_area_db_id os_area_db_id_empty = {
+	.owner = OS_AREA_DB_OWNER_NONE,
+	.key = OS_AREA_DB_KEY_NONE
+};
+
+static const struct os_area_db_id os_area_db_id_any = {
+	.owner = OS_AREA_DB_OWNER_ANY,
+	.key = OS_AREA_DB_KEY_ANY
+};
+
+static const struct os_area_db_id os_area_db_id_rtc_diff = {
+	.owner = OS_AREA_DB_OWNER_LINUX,
+	.key = OS_AREA_DB_KEY_RTC_DIFF
+};
+
+static const struct os_area_db_id os_area_db_id_video_mode = {
+	.owner = OS_AREA_DB_OWNER_LINUX,
+	.key = OS_AREA_DB_KEY_VIDEO_MODE
+};
+
+#define SECONDS_FROM_1970_TO_2000 946684800LL
+
+/**
+ * struct saved_params - Static working copies of data from the PS3 'os area'.
+ *
+ * The order of preference we use for the rtc_diff source:
+ *  1) The database value.
+ *  2) The game os value.
+ *  3) The number of seconds from 1970 to 2000.
+ */
+
+struct saved_params {
+	unsigned int valid;
+	s64 rtc_diff;
+	unsigned int av_multi_out;
+} static saved_params;
+
+static struct property property_rtc_diff = {
+	.name = "linux,rtc_diff",
+	.length = sizeof(saved_params.rtc_diff),
+	.value = &saved_params.rtc_diff,
+};
+
+static struct property property_av_multi_out = {
+	.name = "linux,av_multi_out",
+	.length = sizeof(saved_params.av_multi_out),
+	.value = &saved_params.av_multi_out,
+};
+
+
+static DEFINE_MUTEX(os_area_flash_mutex);
+
+static const struct ps3_os_area_flash_ops *os_area_flash_ops;
+
+void ps3_os_area_flash_register(const struct ps3_os_area_flash_ops *ops)
+{
+	mutex_lock(&os_area_flash_mutex);
+	os_area_flash_ops = ops;
+	mutex_unlock(&os_area_flash_mutex);
+}
+EXPORT_SYMBOL_GPL(ps3_os_area_flash_register);
+
+static ssize_t os_area_flash_read(void *buf, size_t count, loff_t pos)
+{
+	ssize_t res = -ENODEV;
+
+	mutex_lock(&os_area_flash_mutex);
+	if (os_area_flash_ops)
+		res = os_area_flash_ops->read(buf, count, pos);
+	mutex_unlock(&os_area_flash_mutex);
+
+	return res;
+}
+
+static ssize_t os_area_flash_write(const void *buf, size_t count, loff_t pos)
+{
+	ssize_t res = -ENODEV;
+
+	mutex_lock(&os_area_flash_mutex);
+	if (os_area_flash_ops)
+		res = os_area_flash_ops->write(buf, count, pos);
+	mutex_unlock(&os_area_flash_mutex);
+
+	return res;
+}
+
+
+/**
+ * os_area_set_property - Add or overwrite a saved_params value to the device tree.
+ *
+ * Overwrites an existing property.
+ */
+
+static void os_area_set_property(struct device_node *node,
+	struct property *prop)
+{
+	int result;
+	struct property *tmp = of_find_property(node, prop->name, NULL);
+
+	if (tmp) {
+		pr_debug("%s:%d found %s\n", __func__, __LINE__, prop->name);
+		of_remove_property(node, tmp);
+	}
+
+	result = of_add_property(node, prop);
+
+	if (result)
+		pr_debug("%s:%d of_set_property failed\n", __func__,
+			__LINE__);
+}
+
+/**
+ * os_area_get_property - Get a saved_params value from the device tree.
+ *
+ */
+
+static void __init os_area_get_property(struct device_node *node,
+	struct property *prop)
+{
+	const struct property *tmp = of_find_property(node, prop->name, NULL);
+
+	if (tmp) {
+		BUG_ON(prop->length != tmp->length);
+		memcpy(prop->value, tmp->value, prop->length);
+	} else
+		pr_debug("%s:%d not found %s\n", __func__, __LINE__,
+			prop->name);
+}
+
+static void dump_field(char *s, const u8 *field, int size_of_field)
+{
+#if defined(DEBUG)
+	int i;
+
+	for (i = 0; i < size_of_field; i++)
+		s[i] = isprint(field[i]) ? field[i] : '.';
+	s[i] = 0;
+#endif
+}
+
+#define dump_header(_a) _dump_header(_a, __func__, __LINE__)
+static void _dump_header(const struct os_area_header *h, const char *func,
+	int line)
+{
+	char str[sizeof(h->magic_num) + 1];
+
+	dump_field(str, h->magic_num, sizeof(h->magic_num));
+	pr_debug("%s:%d: h.magic_num:       '%s'\n", func, line,
+		str);
+	pr_debug("%s:%d: h.hdr_version:     %u\n", func, line,
+		h->hdr_version);
+	pr_debug("%s:%d: h.db_area_offset:  %u\n", func, line,
+		h->db_area_offset);
+	pr_debug("%s:%d: h.ldr_area_offset: %u\n", func, line,
+		h->ldr_area_offset);
+	pr_debug("%s:%d: h.ldr_format:      %u\n", func, line,
+		h->ldr_format);
+	pr_debug("%s:%d: h.ldr_size:        %xh\n", func, line,
+		h->ldr_size);
+}
+
+#define dump_params(_a) _dump_params(_a, __func__, __LINE__)
+static void _dump_params(const struct os_area_params *p, const char *func,
+	int line)
+{
+	pr_debug("%s:%d: p.boot_flag:       %u\n", func, line, p->boot_flag);
+	pr_debug("%s:%d: p.num_params:      %u\n", func, line, p->num_params);
+	pr_debug("%s:%d: p.rtc_diff         %lld\n", func, line, p->rtc_diff);
+	pr_debug("%s:%d: p.av_multi_out     %u\n", func, line, p->av_multi_out);
+	pr_debug("%s:%d: p.ctrl_button:     %u\n", func, line, p->ctrl_button);
+	pr_debug("%s:%d: p.static_ip_addr:  %u.%u.%u.%u\n", func, line,
+		p->static_ip_addr[0], p->static_ip_addr[1],
+		p->static_ip_addr[2], p->static_ip_addr[3]);
+	pr_debug("%s:%d: p.network_mask:    %u.%u.%u.%u\n", func, line,
+		p->network_mask[0], p->network_mask[1],
+		p->network_mask[2], p->network_mask[3]);
+	pr_debug("%s:%d: p.default_gateway: %u.%u.%u.%u\n", func, line,
+		p->default_gateway[0], p->default_gateway[1],
+		p->default_gateway[2], p->default_gateway[3]);
+	pr_debug("%s:%d: p.dns_primary:     %u.%u.%u.%u\n", func, line,
+		p->dns_primary[0], p->dns_primary[1],
+		p->dns_primary[2], p->dns_primary[3]);
+	pr_debug("%s:%d: p.dns_secondary:   %u.%u.%u.%u\n", func, line,
+		p->dns_secondary[0], p->dns_secondary[1],
+		p->dns_secondary[2], p->dns_secondary[3]);
+}
+
+static int verify_header(const struct os_area_header *header)
+{
+	if (memcmp(header->magic_num, OS_AREA_HEADER_MAGIC_NUM,
+		sizeof(header->magic_num))) {
+		pr_debug("%s:%d magic_num failed\n", __func__, __LINE__);
+		return -1;
+	}
+
+	if (header->hdr_version < 1) {
+		pr_debug("%s:%d hdr_version failed\n", __func__, __LINE__);
+		return -1;
+	}
+
+	if (header->db_area_offset > header->ldr_area_offset) {
+		pr_debug("%s:%d offsets failed\n", __func__, __LINE__);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int db_verify(const struct os_area_db *db)
+{
+	if (memcmp(db->magic_num, OS_AREA_DB_MAGIC_NUM,
+		sizeof(db->magic_num))) {
+		pr_debug("%s:%d magic_num failed\n", __func__, __LINE__);
+		return -EINVAL;
+	}
+
+	if (db->version != 1) {
+		pr_debug("%s:%d version failed\n", __func__, __LINE__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+struct db_index {
+       uint8_t owner:5;
+       uint8_t key:3;
+};
+
+struct db_iterator {
+	const struct os_area_db *db;
+	struct os_area_db_id match_id;
+	struct db_index *idx;
+	struct db_index *last_idx;
+	union {
+		uint64_t *value_64;
+		uint32_t *value_32;
+		uint16_t *value_16;
+	};
+};
+
+static unsigned int db_align_up(unsigned int val, unsigned int size)
+{
+	return (val + (size - 1)) & (~(size - 1));
+}
+
+/**
+ * db_for_each_64 - Iterator for 64 bit entries.
+ *
+ * A NULL value for id can be used to match all entries.
+ * OS_AREA_DB_OWNER_ANY and OS_AREA_DB_KEY_ANY can be used to match all.
+ */
+
+static int db_for_each_64(const struct os_area_db *db,
+	const struct os_area_db_id *match_id, struct db_iterator *i)
+{
+next:
+	if (!i->db) {
+		i->db = db;
+		i->match_id = match_id ? *match_id : os_area_db_id_any;
+		i->idx = (void *)db + db->index_64;
+		i->last_idx = i->idx + db->count_64;
+		i->value_64 = (void *)db + db->index_64
+			+ db_align_up(db->count_64, 8);
+	} else {
+		i->idx++;
+		i->value_64++;
+	}
+
+	if (i->idx >= i->last_idx) {
+		pr_debug("%s:%d: reached end\n", __func__, __LINE__);
+		return 0;
+	}
+
+	if (i->match_id.owner != OS_AREA_DB_OWNER_ANY
+		&& i->match_id.owner != (int)i->idx->owner)
+		goto next;
+	if (i->match_id.key != OS_AREA_DB_KEY_ANY
+		&& i->match_id.key != (int)i->idx->key)
+		goto next;
+
+	return 1;
+}
+
+static int db_delete_64(struct os_area_db *db, const struct os_area_db_id *id)
+{
+	struct db_iterator i;
+
+	for (i.db = NULL; db_for_each_64(db, id, &i); ) {
+
+		pr_debug("%s:%d: got (%d:%d) %llxh\n", __func__, __LINE__,
+			i.idx->owner, i.idx->key,
+			(unsigned long long)*i.value_64);
+
+		i.idx->owner = 0;
+		i.idx->key = 0;
+		*i.value_64 = 0;
+	}
+	return 0;
+}
+
+static int db_set_64(struct os_area_db *db, const struct os_area_db_id *id,
+	uint64_t value)
+{
+	struct db_iterator i;
+
+	pr_debug("%s:%d: (%d:%d) <= %llxh\n", __func__, __LINE__,
+		id->owner, id->key, (unsigned long long)value);
+
+	if (!id->owner || id->owner == OS_AREA_DB_OWNER_ANY
+		|| id->key == OS_AREA_DB_KEY_ANY) {
+		pr_debug("%s:%d: bad id: (%d:%d)\n", __func__,
+			__LINE__, id->owner, id->key);
+		return -1;
+	}
+
+	db_delete_64(db, id);
+
+	i.db = NULL;
+	if (db_for_each_64(db, &os_area_db_id_empty, &i)) {
+
+		pr_debug("%s:%d: got (%d:%d) %llxh\n", __func__, __LINE__,
+			i.idx->owner, i.idx->key,
+			(unsigned long long)*i.value_64);
+
+		i.idx->owner = id->owner;
+		i.idx->key = id->key;
+		*i.value_64 = value;
+
+		pr_debug("%s:%d: set (%d:%d) <= %llxh\n", __func__, __LINE__,
+			i.idx->owner, i.idx->key,
+			(unsigned long long)*i.value_64);
+		return 0;
+	}
+	pr_debug("%s:%d: database full.\n",
+		__func__, __LINE__);
+	return -1;
+}
+
+static int db_get_64(const struct os_area_db *db,
+	const struct os_area_db_id *id, uint64_t *value)
+{
+	struct db_iterator i;
+
+	i.db = NULL;
+	if (db_for_each_64(db, id, &i)) {
+		*value = *i.value_64;
+		pr_debug("%s:%d: found %lld\n", __func__, __LINE__,
+				(long long int)*i.value_64);
+		return 0;
+	}
+	pr_debug("%s:%d: not found\n", __func__, __LINE__);
+	return -1;
+}
+
+static int db_get_rtc_diff(const struct os_area_db *db, int64_t *rtc_diff)
+{
+	return db_get_64(db, &os_area_db_id_rtc_diff, (uint64_t*)rtc_diff);
+}
+
+#define dump_db(a) _dump_db(a, __func__, __LINE__)
+static void _dump_db(const struct os_area_db *db, const char *func,
+	int line)
+{
+	char str[sizeof(db->magic_num) + 1];
+
+	dump_field(str, db->magic_num, sizeof(db->magic_num));
+	pr_debug("%s:%d: db.magic_num:      '%s'\n", func, line,
+		str);
+	pr_debug("%s:%d: db.version:         %u\n", func, line,
+		db->version);
+	pr_debug("%s:%d: db.index_64:        %u\n", func, line,
+		db->index_64);
+	pr_debug("%s:%d: db.count_64:        %u\n", func, line,
+		db->count_64);
+	pr_debug("%s:%d: db.index_32:        %u\n", func, line,
+		db->index_32);
+	pr_debug("%s:%d: db.count_32:        %u\n", func, line,
+		db->count_32);
+	pr_debug("%s:%d: db.index_16:        %u\n", func, line,
+		db->index_16);
+	pr_debug("%s:%d: db.count_16:        %u\n", func, line,
+		db->count_16);
+}
+
+static void os_area_db_init(struct os_area_db *db)
+{
+	enum {
+		HEADER_SIZE = offsetof(struct os_area_db, _db_data),
+		INDEX_64_COUNT = 64,
+		VALUES_64_COUNT = 57,
+		INDEX_32_COUNT = 64,
+		VALUES_32_COUNT = 57,
+		INDEX_16_COUNT = 64,
+		VALUES_16_COUNT = 57,
+	};
+
+	memset(db, 0, sizeof(struct os_area_db));
+
+	memcpy(db->magic_num, OS_AREA_DB_MAGIC_NUM, sizeof(db->magic_num));
+	db->version = 1;
+	db->index_64 = HEADER_SIZE;
+	db->count_64 = VALUES_64_COUNT;
+	db->index_32 = HEADER_SIZE
+			+ INDEX_64_COUNT * sizeof(struct db_index)
+			+ VALUES_64_COUNT * sizeof(u64);
+	db->count_32 = VALUES_32_COUNT;
+	db->index_16 = HEADER_SIZE
+			+ INDEX_64_COUNT * sizeof(struct db_index)
+			+ VALUES_64_COUNT * sizeof(u64)
+			+ INDEX_32_COUNT * sizeof(struct db_index)
+			+ VALUES_32_COUNT * sizeof(u32);
+	db->count_16 = VALUES_16_COUNT;
+
+	/* Rules to check db layout. */
+
+	BUILD_BUG_ON(sizeof(struct db_index) != 1);
+	BUILD_BUG_ON(sizeof(struct os_area_db) != 2 * OS_AREA_SEGMENT_SIZE);
+	BUILD_BUG_ON(INDEX_64_COUNT & 0x7);
+	BUILD_BUG_ON(VALUES_64_COUNT > INDEX_64_COUNT);
+	BUILD_BUG_ON(INDEX_32_COUNT & 0x7);
+	BUILD_BUG_ON(VALUES_32_COUNT > INDEX_32_COUNT);
+	BUILD_BUG_ON(INDEX_16_COUNT & 0x7);
+	BUILD_BUG_ON(VALUES_16_COUNT > INDEX_16_COUNT);
+	BUILD_BUG_ON(HEADER_SIZE
+			+ INDEX_64_COUNT * sizeof(struct db_index)
+			+ VALUES_64_COUNT * sizeof(u64)
+			+ INDEX_32_COUNT * sizeof(struct db_index)
+			+ VALUES_32_COUNT * sizeof(u32)
+			+ INDEX_16_COUNT * sizeof(struct db_index)
+			+ VALUES_16_COUNT * sizeof(u16)
+			> sizeof(struct os_area_db));
+}
+
+/**
+ * update_flash_db - Helper for os_area_queue_work_handler.
+ *
+ */
+
+static int update_flash_db(void)
+{
+	const unsigned int buf_len = 8 * OS_AREA_SEGMENT_SIZE;
+	struct os_area_header *header;
+	ssize_t count;
+	int error;
+	loff_t pos;
+	struct os_area_db* db;
+
+	/* Read in header and db from flash. */
+
+	header = kmalloc(buf_len, GFP_KERNEL);
+	if (!header) {
+		pr_debug("%s: kmalloc failed\n", __func__);
+		return -ENOMEM;
+	}
+
+	count = os_area_flash_read(header, buf_len, 0);
+	if (count < 0) {
+		pr_debug("%s: os_area_flash_read failed %zd\n", __func__,
+			 count);
+		error = count;
+		goto fail;
+	}
+
+	pos = header->db_area_offset * OS_AREA_SEGMENT_SIZE;
+	if (count < OS_AREA_SEGMENT_SIZE || verify_header(header) ||
+	    count < pos) {
+		pr_debug("%s: verify_header failed\n", __func__);
+		dump_header(header);
+		error = -EINVAL;
+		goto fail;
+	}
+
+	/* Now got a good db offset and some maybe good db data. */
+
+	db = (void *)header + pos;
+
+	error = db_verify(db);
+	if (error) {
+		pr_notice("%s: Verify of flash database failed, formatting.\n",
+			  __func__);
+		dump_db(db);
+		os_area_db_init(db);
+	}
+
+	/* Now got good db data. */
+
+	db_set_64(db, &os_area_db_id_rtc_diff, saved_params.rtc_diff);
+
+	count = os_area_flash_write(db, sizeof(struct os_area_db), pos);
+	if (count < sizeof(struct os_area_db)) {
+		pr_debug("%s: os_area_flash_write failed %zd\n", __func__,
+			 count);
+		error = count < 0 ? count : -EIO;
+	}
+
+fail:
+	kfree(header);
+	return error;
+}
+
+/**
+ * os_area_queue_work_handler - Asynchronous write handler.
+ *
+ * An asynchronous write for flash memory and the device tree.  Do not
+ * call directly, use os_area_queue_work().
+ */
+
+static void os_area_queue_work_handler(struct work_struct *work)
+{
+	struct device_node *node;
+	int error;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	node = of_find_node_by_path("/");
+	if (node) {
+		os_area_set_property(node, &property_rtc_diff);
+		of_node_put(node);
+	} else
+		pr_debug("%s:%d of_find_node_by_path failed\n",
+			__func__, __LINE__);
+
+	error = update_flash_db();
+	if (error)
+		pr_warning("%s: Could not update FLASH ROM\n", __func__);
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+}
+
+static void os_area_queue_work(void)
+{
+	static DECLARE_WORK(q, os_area_queue_work_handler);
+
+	wmb();
+	schedule_work(&q);
+}
+
+/**
+ * ps3_os_area_save_params - Copy data from os area mirror to @saved_params.
+ *
+ * For the convenience of the guest the HV makes a copy of the os area in
+ * flash to a high address in the boot memory region and then puts that RAM
+ * address and the byte count into the repository for retrieval by the guest.
+ * We copy the data we want into a static variable and allow the memory setup
+ * by the HV to be claimed by the memblock manager.
+ *
+ * The os area mirror will not be available to a second stage kernel, and
+ * the header verify will fail.  In this case, the saved_params values will
+ * be set from flash memory or the passed in device tree in ps3_os_area_init().
+ */
+
+void __init ps3_os_area_save_params(void)
+{
+	int result;
+	u64 lpar_addr;
+	unsigned int size;
+	struct os_area_header *header;
+	struct os_area_params *params;
+	struct os_area_db *db;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	result = ps3_repository_read_boot_dat_info(&lpar_addr, &size);
+
+	if (result) {
+		pr_debug("%s:%d ps3_repository_read_boot_dat_info failed\n",
+			__func__, __LINE__);
+		return;
+	}
+
+	header = (struct os_area_header *)__va(lpar_addr);
+	params = (struct os_area_params *)__va(lpar_addr
+		+ OS_AREA_SEGMENT_SIZE);
+
+	result = verify_header(header);
+
+	if (result) {
+		/* Second stage kernels exit here. */
+		pr_debug("%s:%d verify_header failed\n", __func__, __LINE__);
+		dump_header(header);
+		return;
+	}
+
+	db = (struct os_area_db *)__va(lpar_addr
+		+ header->db_area_offset * OS_AREA_SEGMENT_SIZE);
+
+	dump_header(header);
+	dump_params(params);
+	dump_db(db);
+
+	result = db_verify(db) || db_get_rtc_diff(db, &saved_params.rtc_diff);
+	if (result)
+		saved_params.rtc_diff = params->rtc_diff ? params->rtc_diff
+			: SECONDS_FROM_1970_TO_2000;
+	saved_params.av_multi_out = params->av_multi_out;
+	saved_params.valid = 1;
+
+	memset(header, 0, sizeof(*header));
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+}
+
+/**
+ * ps3_os_area_init - Setup os area device tree properties as needed.
+ */
+
+void __init ps3_os_area_init(void)
+{
+	struct device_node *node;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	node = of_find_node_by_path("/");
+
+	if (!saved_params.valid && node) {
+		/* Second stage kernels should have a dt entry. */
+		os_area_get_property(node, &property_rtc_diff);
+		os_area_get_property(node, &property_av_multi_out);
+	}
+
+	if(!saved_params.rtc_diff)
+		saved_params.rtc_diff = SECONDS_FROM_1970_TO_2000;
+
+	if (node) {
+		os_area_set_property(node, &property_rtc_diff);
+		os_area_set_property(node, &property_av_multi_out);
+		of_node_put(node);
+	} else
+		pr_debug("%s:%d of_find_node_by_path failed\n",
+			__func__, __LINE__);
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+}
+
+/**
+ * ps3_os_area_get_rtc_diff - Returns the rtc diff value.
+ */
+
+u64 ps3_os_area_get_rtc_diff(void)
+{
+	return saved_params.rtc_diff;
+}
+EXPORT_SYMBOL_GPL(ps3_os_area_get_rtc_diff);
+
+/**
+ * ps3_os_area_set_rtc_diff - Set the rtc diff value.
+ *
+ * An asynchronous write is needed to support writing updates from
+ * the timer interrupt context.
+ */
+
+void ps3_os_area_set_rtc_diff(u64 rtc_diff)
+{
+	if (saved_params.rtc_diff != rtc_diff) {
+		saved_params.rtc_diff = rtc_diff;
+		os_area_queue_work();
+	}
+}
+EXPORT_SYMBOL_GPL(ps3_os_area_set_rtc_diff);
+
+/**
+ * ps3_os_area_get_av_multi_out - Returns the default video mode.
+ */
+
+enum ps3_param_av_multi_out ps3_os_area_get_av_multi_out(void)
+{
+    return saved_params.av_multi_out;
+}
+EXPORT_SYMBOL_GPL(ps3_os_area_get_av_multi_out);
diff --git a/arch/powerpc/platforms/ps3/platform.h b/arch/powerpc/platforms/ps3/platform.h
new file mode 100644
index 000000000..1809cfc56
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/platform.h
@@ -0,0 +1,265 @@
+/*
+ *  PS3 platform declarations.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#if !defined(_PS3_PLATFORM_H)
+#define _PS3_PLATFORM_H
+
+#include <linux/rtc.h>
+#include <scsi/scsi.h>
+
+#include <asm/ps3.h>
+
+/* htab */
+
+void __init ps3_hpte_init(unsigned long htab_size);
+void __init ps3_map_htab(void);
+
+/* mm */
+
+void __init ps3_mm_init(void);
+void __init ps3_mm_vas_create(unsigned long* htab_size);
+void ps3_mm_vas_destroy(void);
+void ps3_mm_shutdown(void);
+
+/* irq */
+
+void ps3_init_IRQ(void);
+void ps3_shutdown_IRQ(int cpu);
+void __init ps3_register_ipi_debug_brk(unsigned int cpu, unsigned int virq);
+void __init ps3_register_ipi_irq(unsigned int cpu, unsigned int virq);
+
+/* smp */
+
+void smp_init_ps3(void);
+#ifdef CONFIG_SMP
+void ps3_smp_cleanup_cpu(int cpu);
+#else
+static inline void ps3_smp_cleanup_cpu(int cpu) { }
+#endif
+
+/* time */
+
+void __init ps3_calibrate_decr(void);
+unsigned long __init ps3_get_boot_time(void);
+void ps3_get_rtc_time(struct rtc_time *time);
+int ps3_set_rtc_time(struct rtc_time *time);
+
+/* os area */
+
+void __init ps3_os_area_save_params(void);
+void __init ps3_os_area_init(void);
+
+/* spu */
+
+#if defined(CONFIG_SPU_BASE)
+void ps3_spu_set_platform (void);
+#else
+static inline void ps3_spu_set_platform (void) {}
+#endif
+
+/* repository bus info */
+
+enum ps3_bus_type {
+	PS3_BUS_TYPE_SB = 4,
+	PS3_BUS_TYPE_STORAGE = 5,
+};
+
+enum ps3_dev_type {
+	PS3_DEV_TYPE_STOR_DISK = TYPE_DISK,	/* 0 */
+	PS3_DEV_TYPE_SB_GELIC = 3,
+	PS3_DEV_TYPE_SB_USB = 4,
+	PS3_DEV_TYPE_STOR_ROM = TYPE_ROM,	/* 5 */
+	PS3_DEV_TYPE_SB_GPIO = 6,
+	PS3_DEV_TYPE_STOR_FLASH = TYPE_RBC,	/* 14 */
+};
+
+int ps3_repository_read_bus_str(unsigned int bus_index, const char *bus_str,
+	u64 *value);
+int ps3_repository_read_bus_id(unsigned int bus_index, u64 *bus_id);
+int ps3_repository_read_bus_type(unsigned int bus_index,
+	enum ps3_bus_type *bus_type);
+int ps3_repository_read_bus_num_dev(unsigned int bus_index,
+	unsigned int *num_dev);
+
+/* repository bus device info */
+
+enum ps3_interrupt_type {
+	PS3_INTERRUPT_TYPE_EVENT_PORT = 2,
+	PS3_INTERRUPT_TYPE_SB_OHCI = 3,
+	PS3_INTERRUPT_TYPE_SB_EHCI = 4,
+	PS3_INTERRUPT_TYPE_OTHER = 5,
+};
+
+enum ps3_reg_type {
+	PS3_REG_TYPE_SB_OHCI = 3,
+	PS3_REG_TYPE_SB_EHCI = 4,
+	PS3_REG_TYPE_SB_GPIO = 5,
+};
+
+int ps3_repository_read_dev_str(unsigned int bus_index,
+	unsigned int dev_index, const char *dev_str, u64 *value);
+int ps3_repository_read_dev_id(unsigned int bus_index, unsigned int dev_index,
+	u64 *dev_id);
+int ps3_repository_read_dev_type(unsigned int bus_index,
+	unsigned int dev_index, enum ps3_dev_type *dev_type);
+int ps3_repository_read_dev_intr(unsigned int bus_index,
+	unsigned int dev_index, unsigned int intr_index,
+	enum ps3_interrupt_type *intr_type, unsigned int *interrupt_id);
+int ps3_repository_read_dev_reg_type(unsigned int bus_index,
+	unsigned int dev_index, unsigned int reg_index,
+	enum ps3_reg_type *reg_type);
+int ps3_repository_read_dev_reg_addr(unsigned int bus_index,
+	unsigned int dev_index, unsigned int reg_index, u64 *bus_addr,
+	u64 *len);
+int ps3_repository_read_dev_reg(unsigned int bus_index,
+	unsigned int dev_index, unsigned int reg_index,
+	enum ps3_reg_type *reg_type, u64 *bus_addr, u64 *len);
+
+/* repository bus enumerators */
+
+struct ps3_repository_device {
+	unsigned int bus_index;
+	unsigned int dev_index;
+	enum ps3_bus_type bus_type;
+	enum ps3_dev_type dev_type;
+	u64 bus_id;
+	u64 dev_id;
+};
+
+int ps3_repository_find_device(struct ps3_repository_device *repo);
+int ps3_repository_find_device_by_id(struct ps3_repository_device *repo,
+				     u64 bus_id, u64 dev_id);
+int ps3_repository_find_devices(enum ps3_bus_type bus_type,
+	int (*callback)(const struct ps3_repository_device *repo));
+int ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from,
+	unsigned int *bus_index);
+int ps3_repository_find_interrupt(const struct ps3_repository_device *repo,
+	enum ps3_interrupt_type intr_type, unsigned int *interrupt_id);
+int ps3_repository_find_reg(const struct ps3_repository_device *repo,
+	enum ps3_reg_type reg_type, u64 *bus_addr, u64 *len);
+
+/* repository block device info */
+
+int ps3_repository_read_stor_dev_port(unsigned int bus_index,
+	unsigned int dev_index, u64 *port);
+int ps3_repository_read_stor_dev_blk_size(unsigned int bus_index,
+	unsigned int dev_index, u64 *blk_size);
+int ps3_repository_read_stor_dev_num_blocks(unsigned int bus_index,
+	unsigned int dev_index, u64 *num_blocks);
+int ps3_repository_read_stor_dev_num_regions(unsigned int bus_index,
+	unsigned int dev_index, unsigned int *num_regions);
+int ps3_repository_read_stor_dev_region_id(unsigned int bus_index,
+	unsigned int dev_index, unsigned int region_index,
+	unsigned int *region_id);
+int ps3_repository_read_stor_dev_region_size(unsigned int bus_index,
+	unsigned int dev_index,	unsigned int region_index, u64 *region_size);
+int ps3_repository_read_stor_dev_region_start(unsigned int bus_index,
+	unsigned int dev_index, unsigned int region_index, u64 *region_start);
+int ps3_repository_read_stor_dev_info(unsigned int bus_index,
+	unsigned int dev_index, u64 *port, u64 *blk_size,
+	u64 *num_blocks, unsigned int *num_regions);
+int ps3_repository_read_stor_dev_region(unsigned int bus_index,
+	unsigned int dev_index, unsigned int region_index,
+	unsigned int *region_id, u64 *region_start, u64 *region_size);
+
+/* repository logical pu and memory info */
+
+int ps3_repository_read_num_pu(u64 *num_pu);
+int ps3_repository_read_pu_id(unsigned int pu_index, u64 *pu_id);
+int ps3_repository_read_rm_base(unsigned int ppe_id, u64 *rm_base);
+int ps3_repository_read_rm_size(unsigned int ppe_id, u64 *rm_size);
+int ps3_repository_read_region_total(u64 *region_total);
+int ps3_repository_read_mm_info(u64 *rm_base, u64 *rm_size,
+	u64 *region_total);
+int ps3_repository_read_highmem_region_count(unsigned int *region_count);
+int ps3_repository_read_highmem_base(unsigned int region_index,
+	u64 *highmem_base);
+int ps3_repository_read_highmem_size(unsigned int region_index,
+	u64 *highmem_size);
+int ps3_repository_read_highmem_info(unsigned int region_index,
+	u64 *highmem_base, u64 *highmem_size);
+
+#if defined (CONFIG_PS3_REPOSITORY_WRITE)
+int ps3_repository_write_highmem_region_count(unsigned int region_count);
+int ps3_repository_write_highmem_base(unsigned int region_index,
+	u64 highmem_base);
+int ps3_repository_write_highmem_size(unsigned int region_index,
+	u64 highmem_size);
+int ps3_repository_write_highmem_info(unsigned int region_index,
+	u64 highmem_base, u64 highmem_size);
+int ps3_repository_delete_highmem_info(unsigned int region_index);
+#else
+static inline int ps3_repository_write_highmem_region_count(
+	unsigned int region_count) {return 0;}
+static inline int ps3_repository_write_highmem_base(unsigned int region_index,
+	u64 highmem_base) {return 0;}
+static inline int ps3_repository_write_highmem_size(unsigned int region_index,
+	u64 highmem_size) {return 0;}
+static inline int ps3_repository_write_highmem_info(unsigned int region_index,
+	u64 highmem_base, u64 highmem_size) {return 0;}
+static inline int ps3_repository_delete_highmem_info(unsigned int region_index)
+	{return 0;}
+#endif
+
+/* repository pme info */
+
+int ps3_repository_read_num_be(unsigned int *num_be);
+int ps3_repository_read_be_node_id(unsigned int be_index, u64 *node_id);
+int ps3_repository_read_be_id(u64 node_id, u64 *be_id);
+int ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq);
+int ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq);
+
+/* repository performance monitor info */
+
+int ps3_repository_read_lpm_privileges(unsigned int be_index, u64 *lpar,
+	u64 *rights);
+
+/* repository 'Other OS' area */
+
+int ps3_repository_read_boot_dat_addr(u64 *lpar_addr);
+int ps3_repository_read_boot_dat_size(unsigned int *size);
+int ps3_repository_read_boot_dat_info(u64 *lpar_addr, unsigned int *size);
+
+/* repository spu info */
+
+/**
+ * enum spu_resource_type - Type of spu resource.
+ * @spu_resource_type_shared: Logical spu is shared with other partions.
+ * @spu_resource_type_exclusive: Logical spu is not shared with other partions.
+ *
+ * Returned by ps3_repository_read_spu_resource_id().
+ */
+
+enum ps3_spu_resource_type {
+	PS3_SPU_RESOURCE_TYPE_SHARED = 0,
+	PS3_SPU_RESOURCE_TYPE_EXCLUSIVE = 0x8000000000000000UL,
+};
+
+int ps3_repository_read_num_spu_reserved(unsigned int *num_spu_reserved);
+int ps3_repository_read_num_spu_resource_id(unsigned int *num_resource_id);
+int ps3_repository_read_spu_resource_id(unsigned int res_index,
+	enum ps3_spu_resource_type* resource_type, unsigned int *resource_id);
+
+/* repository vuart info */
+
+int ps3_repository_read_vuart_av_port(unsigned int *port);
+int ps3_repository_read_vuart_sysmgr_port(unsigned int *port);
+
+#endif
diff --git a/arch/powerpc/platforms/ps3/repository.c b/arch/powerpc/platforms/ps3/repository.c
new file mode 100644
index 000000000..bfccdc7cb
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/repository.c
@@ -0,0 +1,1404 @@
+/*
+ *  PS3 repository routines.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <asm/lv1call.h>
+
+#include "platform.h"
+
+enum ps3_vendor_id {
+	PS3_VENDOR_ID_NONE = 0,
+	PS3_VENDOR_ID_SONY = 0x8000000000000000UL,
+};
+
+enum ps3_lpar_id {
+	PS3_LPAR_ID_CURRENT = 0,
+	PS3_LPAR_ID_PME = 1,
+};
+
+#define dump_field(_a, _b) _dump_field(_a, _b, __func__, __LINE__)
+static void _dump_field(const char *hdr, u64 n, const char *func, int line)
+{
+#if defined(DEBUG)
+	char s[16];
+	const char *const in = (const char *)&n;
+	unsigned int i;
+
+	for (i = 0; i < 8; i++)
+		s[i] = (in[i] <= 126 && in[i] >= 32) ? in[i] : '.';
+	s[i] = 0;
+
+	pr_devel("%s:%d: %s%016llx : %s\n", func, line, hdr, n, s);
+#endif
+}
+
+#define dump_node_name(_a, _b, _c, _d, _e) \
+	_dump_node_name(_a, _b, _c, _d, _e, __func__, __LINE__)
+static void _dump_node_name(unsigned int lpar_id, u64 n1, u64 n2, u64 n3,
+	u64 n4, const char *func, int line)
+{
+	pr_devel("%s:%d: lpar: %u\n", func, line, lpar_id);
+	_dump_field("n1: ", n1, func, line);
+	_dump_field("n2: ", n2, func, line);
+	_dump_field("n3: ", n3, func, line);
+	_dump_field("n4: ", n4, func, line);
+}
+
+#define dump_node(_a, _b, _c, _d, _e, _f, _g) \
+	_dump_node(_a, _b, _c, _d, _e, _f, _g, __func__, __LINE__)
+static void _dump_node(unsigned int lpar_id, u64 n1, u64 n2, u64 n3, u64 n4,
+	u64 v1, u64 v2, const char *func, int line)
+{
+	pr_devel("%s:%d: lpar: %u\n", func, line, lpar_id);
+	_dump_field("n1: ", n1, func, line);
+	_dump_field("n2: ", n2, func, line);
+	_dump_field("n3: ", n3, func, line);
+	_dump_field("n4: ", n4, func, line);
+	pr_devel("%s:%d: v1: %016llx\n", func, line, v1);
+	pr_devel("%s:%d: v2: %016llx\n", func, line, v2);
+}
+
+/**
+ * make_first_field - Make the first field of a repository node name.
+ * @text: Text portion of the field.
+ * @index: Numeric index portion of the field.  Use zero for 'don't care'.
+ *
+ * This routine sets the vendor id to zero (non-vendor specific).
+ * Returns field value.
+ */
+
+static u64 make_first_field(const char *text, u64 index)
+{
+	u64 n;
+
+	strncpy((char *)&n, text, 8);
+	return PS3_VENDOR_ID_NONE + (n >> 32) + index;
+}
+
+/**
+ * make_field - Make subsequent fields of a repository node name.
+ * @text: Text portion of the field.  Use "" for 'don't care'.
+ * @index: Numeric index portion of the field.  Use zero for 'don't care'.
+ *
+ * Returns field value.
+ */
+
+static u64 make_field(const char *text, u64 index)
+{
+	u64 n;
+
+	strncpy((char *)&n, text, 8);
+	return n + index;
+}
+
+/**
+ * read_node - Read a repository node from raw fields.
+ * @n1: First field of node name.
+ * @n2: Second field of node name.  Use zero for 'don't care'.
+ * @n3: Third field of node name.  Use zero for 'don't care'.
+ * @n4: Fourth field of node name.  Use zero for 'don't care'.
+ * @v1: First repository value (high word).
+ * @v2: Second repository value (low word).  Optional parameter, use zero
+ *      for 'don't care'.
+ */
+
+static int read_node(unsigned int lpar_id, u64 n1, u64 n2, u64 n3, u64 n4,
+	u64 *_v1, u64 *_v2)
+{
+	int result;
+	u64 v1;
+	u64 v2;
+
+	if (lpar_id == PS3_LPAR_ID_CURRENT) {
+		u64 id;
+		lv1_get_logical_partition_id(&id);
+		lpar_id = id;
+	}
+
+	result = lv1_read_repository_node(lpar_id, n1, n2, n3, n4, &v1,
+		&v2);
+
+	if (result) {
+		pr_warn("%s:%d: lv1_read_repository_node failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		dump_node_name(lpar_id, n1, n2, n3, n4);
+		return -ENOENT;
+	}
+
+	dump_node(lpar_id, n1, n2, n3, n4, v1, v2);
+
+	if (_v1)
+		*_v1 = v1;
+	if (_v2)
+		*_v2 = v2;
+
+	if (v1 && !_v1)
+		pr_devel("%s:%d: warning: discarding non-zero v1: %016llx\n",
+			__func__, __LINE__, v1);
+	if (v2 && !_v2)
+		pr_devel("%s:%d: warning: discarding non-zero v2: %016llx\n",
+			__func__, __LINE__, v2);
+
+	return 0;
+}
+
+int ps3_repository_read_bus_str(unsigned int bus_index, const char *bus_str,
+	u64 *value)
+{
+	return read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field(bus_str, 0),
+		0, 0,
+		value, NULL);
+}
+
+int ps3_repository_read_bus_id(unsigned int bus_index, u64 *bus_id)
+{
+	int result;
+
+	result = read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("id", 0),
+		0, 0,
+		bus_id, NULL);
+	return result;
+}
+
+int ps3_repository_read_bus_type(unsigned int bus_index,
+	enum ps3_bus_type *bus_type)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("type", 0),
+		0, 0,
+		&v1, NULL);
+	*bus_type = v1;
+	return result;
+}
+
+int ps3_repository_read_bus_num_dev(unsigned int bus_index,
+	unsigned int *num_dev)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("num_dev", 0),
+		0, 0,
+		&v1, NULL);
+	*num_dev = v1;
+	return result;
+}
+
+int ps3_repository_read_dev_str(unsigned int bus_index,
+	unsigned int dev_index, const char *dev_str, u64 *value)
+{
+	return read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("dev", dev_index),
+		make_field(dev_str, 0),
+		0,
+		value, NULL);
+}
+
+int ps3_repository_read_dev_id(unsigned int bus_index, unsigned int dev_index,
+	u64 *dev_id)
+{
+	int result;
+
+	result = read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("dev", dev_index),
+		make_field("id", 0),
+		0,
+		dev_id, NULL);
+	return result;
+}
+
+int ps3_repository_read_dev_type(unsigned int bus_index,
+	unsigned int dev_index, enum ps3_dev_type *dev_type)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("dev", dev_index),
+		make_field("type", 0),
+		0,
+		&v1, NULL);
+	*dev_type = v1;
+	return result;
+}
+
+int ps3_repository_read_dev_intr(unsigned int bus_index,
+	unsigned int dev_index, unsigned int intr_index,
+	enum ps3_interrupt_type *intr_type, unsigned int *interrupt_id)
+{
+	int result;
+	u64 v1 = 0;
+	u64 v2 = 0;
+
+	result = read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("dev", dev_index),
+		make_field("intr", intr_index),
+		0,
+		&v1, &v2);
+	*intr_type = v1;
+	*interrupt_id = v2;
+	return result;
+}
+
+int ps3_repository_read_dev_reg_type(unsigned int bus_index,
+	unsigned int dev_index, unsigned int reg_index,
+	enum ps3_reg_type *reg_type)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("dev", dev_index),
+		make_field("reg", reg_index),
+		make_field("type", 0),
+		&v1, NULL);
+	*reg_type = v1;
+	return result;
+}
+
+int ps3_repository_read_dev_reg_addr(unsigned int bus_index,
+	unsigned int dev_index, unsigned int reg_index, u64 *bus_addr, u64 *len)
+{
+	return read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("dev", dev_index),
+		make_field("reg", reg_index),
+		make_field("data", 0),
+		bus_addr, len);
+}
+
+int ps3_repository_read_dev_reg(unsigned int bus_index,
+	unsigned int dev_index, unsigned int reg_index,
+	enum ps3_reg_type *reg_type, u64 *bus_addr, u64 *len)
+{
+	int result = ps3_repository_read_dev_reg_type(bus_index, dev_index,
+		reg_index, reg_type);
+	return result ? result
+		: ps3_repository_read_dev_reg_addr(bus_index, dev_index,
+		reg_index, bus_addr, len);
+}
+
+
+
+int ps3_repository_find_device(struct ps3_repository_device *repo)
+{
+	int result;
+	struct ps3_repository_device tmp = *repo;
+	unsigned int num_dev;
+
+	BUG_ON(repo->bus_index > 10);
+	BUG_ON(repo->dev_index > 10);
+
+	result = ps3_repository_read_bus_num_dev(tmp.bus_index, &num_dev);
+
+	if (result) {
+		pr_devel("%s:%d read_bus_num_dev failed\n", __func__, __LINE__);
+		return result;
+	}
+
+	pr_devel("%s:%d: bus_type %u, bus_index %u, bus_id %llu, num_dev %u\n",
+		__func__, __LINE__, tmp.bus_type, tmp.bus_index, tmp.bus_id,
+		num_dev);
+
+	if (tmp.dev_index >= num_dev) {
+		pr_devel("%s:%d: no device found\n", __func__, __LINE__);
+		return -ENODEV;
+	}
+
+	result = ps3_repository_read_dev_type(tmp.bus_index, tmp.dev_index,
+		&tmp.dev_type);
+
+	if (result) {
+		pr_devel("%s:%d read_dev_type failed\n", __func__, __LINE__);
+		return result;
+	}
+
+	result = ps3_repository_read_dev_id(tmp.bus_index, tmp.dev_index,
+		&tmp.dev_id);
+
+	if (result) {
+		pr_devel("%s:%d ps3_repository_read_dev_id failed\n", __func__,
+		__LINE__);
+		return result;
+	}
+
+	pr_devel("%s:%d: found: dev_type %u, dev_index %u, dev_id %llu\n",
+		__func__, __LINE__, tmp.dev_type, tmp.dev_index, tmp.dev_id);
+
+	*repo = tmp;
+	return 0;
+}
+
+int ps3_repository_find_device_by_id(struct ps3_repository_device *repo,
+				     u64 bus_id, u64 dev_id)
+{
+	int result = -ENODEV;
+	struct ps3_repository_device tmp;
+	unsigned int num_dev;
+
+	pr_devel(" -> %s:%u: find device by id %llu:%llu\n", __func__, __LINE__,
+		 bus_id, dev_id);
+
+	for (tmp.bus_index = 0; tmp.bus_index < 10; tmp.bus_index++) {
+		result = ps3_repository_read_bus_id(tmp.bus_index,
+						    &tmp.bus_id);
+		if (result) {
+			pr_devel("%s:%u read_bus_id(%u) failed\n", __func__,
+				 __LINE__, tmp.bus_index);
+			return result;
+		}
+
+		if (tmp.bus_id == bus_id)
+			goto found_bus;
+
+		pr_devel("%s:%u: skip, bus_id %llu\n", __func__, __LINE__,
+			 tmp.bus_id);
+	}
+	pr_devel(" <- %s:%u: bus not found\n", __func__, __LINE__);
+	return result;
+
+found_bus:
+	result = ps3_repository_read_bus_type(tmp.bus_index, &tmp.bus_type);
+	if (result) {
+		pr_devel("%s:%u read_bus_type(%u) failed\n", __func__,
+			 __LINE__, tmp.bus_index);
+		return result;
+	}
+
+	result = ps3_repository_read_bus_num_dev(tmp.bus_index, &num_dev);
+	if (result) {
+		pr_devel("%s:%u read_bus_num_dev failed\n", __func__,
+			 __LINE__);
+		return result;
+	}
+
+	for (tmp.dev_index = 0; tmp.dev_index < num_dev; tmp.dev_index++) {
+		result = ps3_repository_read_dev_id(tmp.bus_index,
+						    tmp.dev_index,
+						    &tmp.dev_id);
+		if (result) {
+			pr_devel("%s:%u read_dev_id(%u:%u) failed\n", __func__,
+				 __LINE__, tmp.bus_index, tmp.dev_index);
+			return result;
+		}
+
+		if (tmp.dev_id == dev_id)
+			goto found_dev;
+
+		pr_devel("%s:%u: skip, dev_id %llu\n", __func__, __LINE__,
+			 tmp.dev_id);
+	}
+	pr_devel(" <- %s:%u: dev not found\n", __func__, __LINE__);
+	return result;
+
+found_dev:
+	result = ps3_repository_read_dev_type(tmp.bus_index, tmp.dev_index,
+					      &tmp.dev_type);
+	if (result) {
+		pr_devel("%s:%u read_dev_type failed\n", __func__, __LINE__);
+		return result;
+	}
+
+	pr_devel(" <- %s:%u: found: type (%u:%u) index (%u:%u) id (%llu:%llu)\n",
+		 __func__, __LINE__, tmp.bus_type, tmp.dev_type, tmp.bus_index,
+		 tmp.dev_index, tmp.bus_id, tmp.dev_id);
+	*repo = tmp;
+	return 0;
+}
+
+int ps3_repository_find_devices(enum ps3_bus_type bus_type,
+	int (*callback)(const struct ps3_repository_device *repo))
+{
+	int result = 0;
+	struct ps3_repository_device repo;
+
+	pr_devel(" -> %s:%d: find bus_type %u\n", __func__, __LINE__, bus_type);
+
+	repo.bus_type = bus_type;
+	result = ps3_repository_find_bus(repo.bus_type, 0, &repo.bus_index);
+	if (result) {
+		pr_devel(" <- %s:%u: bus not found\n", __func__, __LINE__);
+		return result;
+	}
+
+	result = ps3_repository_read_bus_id(repo.bus_index, &repo.bus_id);
+	if (result) {
+		pr_devel("%s:%d read_bus_id(%u) failed\n", __func__, __LINE__,
+			 repo.bus_index);
+		return result;
+	}
+
+	for (repo.dev_index = 0; ; repo.dev_index++) {
+		result = ps3_repository_find_device(&repo);
+		if (result == -ENODEV) {
+			result = 0;
+			break;
+		} else if (result)
+			break;
+
+		result = callback(&repo);
+		if (result) {
+			pr_devel("%s:%d: abort at callback\n", __func__,
+				__LINE__);
+			break;
+		}
+	}
+
+	pr_devel(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+}
+
+int ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from,
+	unsigned int *bus_index)
+{
+	unsigned int i;
+	enum ps3_bus_type type;
+	int error;
+
+	for (i = from; i < 10; i++) {
+		error = ps3_repository_read_bus_type(i, &type);
+		if (error) {
+			pr_devel("%s:%d read_bus_type failed\n",
+				__func__, __LINE__);
+			*bus_index = UINT_MAX;
+			return error;
+		}
+		if (type == bus_type) {
+			*bus_index = i;
+			return 0;
+		}
+	}
+	*bus_index = UINT_MAX;
+	return -ENODEV;
+}
+
+int ps3_repository_find_interrupt(const struct ps3_repository_device *repo,
+	enum ps3_interrupt_type intr_type, unsigned int *interrupt_id)
+{
+	int result = 0;
+	unsigned int res_index;
+
+	pr_devel("%s:%d: find intr_type %u\n", __func__, __LINE__, intr_type);
+
+	*interrupt_id = UINT_MAX;
+
+	for (res_index = 0; res_index < 10; res_index++) {
+		enum ps3_interrupt_type t;
+		unsigned int id;
+
+		result = ps3_repository_read_dev_intr(repo->bus_index,
+			repo->dev_index, res_index, &t, &id);
+
+		if (result) {
+			pr_devel("%s:%d read_dev_intr failed\n",
+				__func__, __LINE__);
+			return result;
+		}
+
+		if (t == intr_type) {
+			*interrupt_id = id;
+			break;
+		}
+	}
+
+	if (res_index == 10)
+		return -ENODEV;
+
+	pr_devel("%s:%d: found intr_type %u at res_index %u\n",
+		__func__, __LINE__, intr_type, res_index);
+
+	return result;
+}
+
+int ps3_repository_find_reg(const struct ps3_repository_device *repo,
+	enum ps3_reg_type reg_type, u64 *bus_addr, u64 *len)
+{
+	int result = 0;
+	unsigned int res_index;
+
+	pr_devel("%s:%d: find reg_type %u\n", __func__, __LINE__, reg_type);
+
+	*bus_addr = *len = 0;
+
+	for (res_index = 0; res_index < 10; res_index++) {
+		enum ps3_reg_type t;
+		u64 a;
+		u64 l;
+
+		result = ps3_repository_read_dev_reg(repo->bus_index,
+			repo->dev_index, res_index, &t, &a, &l);
+
+		if (result) {
+			pr_devel("%s:%d read_dev_reg failed\n",
+				__func__, __LINE__);
+			return result;
+		}
+
+		if (t == reg_type) {
+			*bus_addr = a;
+			*len = l;
+			break;
+		}
+	}
+
+	if (res_index == 10)
+		return -ENODEV;
+
+	pr_devel("%s:%d: found reg_type %u at res_index %u\n",
+		__func__, __LINE__, reg_type, res_index);
+
+	return result;
+}
+
+int ps3_repository_read_stor_dev_port(unsigned int bus_index,
+	unsigned int dev_index, u64 *port)
+{
+	return read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("dev", dev_index),
+		make_field("port", 0),
+		0, port, NULL);
+}
+
+int ps3_repository_read_stor_dev_blk_size(unsigned int bus_index,
+	unsigned int dev_index, u64 *blk_size)
+{
+	return read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("dev", dev_index),
+		make_field("blk_size", 0),
+		0, blk_size, NULL);
+}
+
+int ps3_repository_read_stor_dev_num_blocks(unsigned int bus_index,
+	unsigned int dev_index, u64 *num_blocks)
+{
+	return read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("dev", dev_index),
+		make_field("n_blocks", 0),
+		0, num_blocks, NULL);
+}
+
+int ps3_repository_read_stor_dev_num_regions(unsigned int bus_index,
+	unsigned int dev_index, unsigned int *num_regions)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_PME,
+		make_first_field("bus", bus_index),
+		make_field("dev", dev_index),
+		make_field("n_regs", 0),
+		0, &v1, NULL);
+	*num_regions = v1;
+	return result;
+}
+
+int ps3_repository_read_stor_dev_region_id(unsigned int bus_index,
+	unsigned int dev_index, unsigned int region_index,
+	unsigned int *region_id)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_PME,
+	    make_first_field("bus", bus_index),
+	    make_field("dev", dev_index),
+	    make_field("region", region_index),
+	    make_field("id", 0),
+	    &v1, NULL);
+	*region_id = v1;
+	return result;
+}
+
+int ps3_repository_read_stor_dev_region_size(unsigned int bus_index,
+	unsigned int dev_index,	unsigned int region_index, u64 *region_size)
+{
+	return read_node(PS3_LPAR_ID_PME,
+	    make_first_field("bus", bus_index),
+	    make_field("dev", dev_index),
+	    make_field("region", region_index),
+	    make_field("size", 0),
+	    region_size, NULL);
+}
+
+int ps3_repository_read_stor_dev_region_start(unsigned int bus_index,
+	unsigned int dev_index, unsigned int region_index, u64 *region_start)
+{
+	return read_node(PS3_LPAR_ID_PME,
+	    make_first_field("bus", bus_index),
+	    make_field("dev", dev_index),
+	    make_field("region", region_index),
+	    make_field("start", 0),
+	    region_start, NULL);
+}
+
+int ps3_repository_read_stor_dev_info(unsigned int bus_index,
+	unsigned int dev_index, u64 *port, u64 *blk_size,
+	u64 *num_blocks, unsigned int *num_regions)
+{
+	int result;
+
+	result = ps3_repository_read_stor_dev_port(bus_index, dev_index, port);
+	if (result)
+	    return result;
+
+	result = ps3_repository_read_stor_dev_blk_size(bus_index, dev_index,
+		blk_size);
+	if (result)
+	    return result;
+
+	result = ps3_repository_read_stor_dev_num_blocks(bus_index, dev_index,
+		num_blocks);
+	if (result)
+	    return result;
+
+	result = ps3_repository_read_stor_dev_num_regions(bus_index, dev_index,
+		num_regions);
+	return result;
+}
+
+int ps3_repository_read_stor_dev_region(unsigned int bus_index,
+	unsigned int dev_index, unsigned int region_index,
+	unsigned int *region_id, u64 *region_start, u64 *region_size)
+{
+	int result;
+
+	result = ps3_repository_read_stor_dev_region_id(bus_index, dev_index,
+		region_index, region_id);
+	if (result)
+	    return result;
+
+	result = ps3_repository_read_stor_dev_region_start(bus_index, dev_index,
+		region_index, region_start);
+	if (result)
+	    return result;
+
+	result = ps3_repository_read_stor_dev_region_size(bus_index, dev_index,
+		region_index, region_size);
+	return result;
+}
+
+/**
+ * ps3_repository_read_num_pu - Number of logical PU processors for this lpar.
+ */
+
+int ps3_repository_read_num_pu(u64 *num_pu)
+{
+	*num_pu = 0;
+	return read_node(PS3_LPAR_ID_CURRENT,
+			   make_first_field("bi", 0),
+			   make_field("pun", 0),
+			   0, 0,
+			   num_pu, NULL);
+}
+
+/**
+ * ps3_repository_read_pu_id - Read the logical PU id.
+ * @pu_index: Zero based index.
+ * @pu_id: The logical PU id.
+ */
+
+int ps3_repository_read_pu_id(unsigned int pu_index, u64 *pu_id)
+{
+	return read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("bi", 0),
+		make_field("pu", pu_index),
+		0, 0,
+		pu_id, NULL);
+}
+
+int ps3_repository_read_rm_size(unsigned int ppe_id, u64 *rm_size)
+{
+	return read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("bi", 0),
+		make_field("pu", 0),
+		ppe_id,
+		make_field("rm_size", 0),
+		rm_size, NULL);
+}
+
+int ps3_repository_read_region_total(u64 *region_total)
+{
+	return read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("bi", 0),
+		make_field("rgntotal", 0),
+		0, 0,
+		region_total, NULL);
+}
+
+/**
+ * ps3_repository_read_mm_info - Read mm info for single pu system.
+ * @rm_base: Real mode memory base address.
+ * @rm_size: Real mode memory size.
+ * @region_total: Maximum memory region size.
+ */
+
+int ps3_repository_read_mm_info(u64 *rm_base, u64 *rm_size, u64 *region_total)
+{
+	int result;
+	u64 ppe_id;
+
+	lv1_get_logical_ppe_id(&ppe_id);
+	*rm_base = 0;
+	result = ps3_repository_read_rm_size(ppe_id, rm_size);
+	return result ? result
+		: ps3_repository_read_region_total(region_total);
+}
+
+/**
+ * ps3_repository_read_highmem_region_count - Read the number of highmem regions
+ *
+ * Bootloaders must arrange the repository nodes such that regions are indexed
+ * with a region_index from 0 to region_count-1.
+ */
+
+int ps3_repository_read_highmem_region_count(unsigned int *region_count)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("highmem", 0),
+		make_field("region", 0),
+		make_field("count", 0),
+		0,
+		&v1, NULL);
+	*region_count = v1;
+	return result;
+}
+
+
+int ps3_repository_read_highmem_base(unsigned int region_index,
+	u64 *highmem_base)
+{
+	return read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("highmem", 0),
+		make_field("region", region_index),
+		make_field("base", 0),
+		0,
+		highmem_base, NULL);
+}
+
+int ps3_repository_read_highmem_size(unsigned int region_index,
+	u64 *highmem_size)
+{
+	return read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("highmem", 0),
+		make_field("region", region_index),
+		make_field("size", 0),
+		0,
+		highmem_size, NULL);
+}
+
+/**
+ * ps3_repository_read_highmem_info - Read high memory region info
+ * @region_index: Region index, {0,..,region_count-1}.
+ * @highmem_base: High memory base address.
+ * @highmem_size: High memory size.
+ *
+ * Bootloaders that preallocate highmem regions must place the
+ * region info into the repository at these well known nodes.
+ */
+
+int ps3_repository_read_highmem_info(unsigned int region_index,
+	u64 *highmem_base, u64 *highmem_size)
+{
+	int result;
+
+	*highmem_base = 0;
+	result = ps3_repository_read_highmem_base(region_index, highmem_base);
+	return result ? result
+		: ps3_repository_read_highmem_size(region_index, highmem_size);
+}
+
+/**
+ * ps3_repository_read_num_spu_reserved - Number of physical spus reserved.
+ * @num_spu: Number of physical spus.
+ */
+
+int ps3_repository_read_num_spu_reserved(unsigned int *num_spu_reserved)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("bi", 0),
+		make_field("spun", 0),
+		0, 0,
+		&v1, NULL);
+	*num_spu_reserved = v1;
+	return result;
+}
+
+/**
+ * ps3_repository_read_num_spu_resource_id - Number of spu resource reservations.
+ * @num_resource_id: Number of spu resource ids.
+ */
+
+int ps3_repository_read_num_spu_resource_id(unsigned int *num_resource_id)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("bi", 0),
+		make_field("spursvn", 0),
+		0, 0,
+		&v1, NULL);
+	*num_resource_id = v1;
+	return result;
+}
+
+/**
+ * ps3_repository_read_spu_resource_id - spu resource reservation id value.
+ * @res_index: Resource reservation index.
+ * @resource_type: Resource reservation type.
+ * @resource_id: Resource reservation id.
+ */
+
+int ps3_repository_read_spu_resource_id(unsigned int res_index,
+	enum ps3_spu_resource_type *resource_type, unsigned int *resource_id)
+{
+	int result;
+	u64 v1 = 0;
+	u64 v2 = 0;
+
+	result = read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("bi", 0),
+		make_field("spursv", 0),
+		res_index,
+		0,
+		&v1, &v2);
+	*resource_type = v1;
+	*resource_id = v2;
+	return result;
+}
+
+static int ps3_repository_read_boot_dat_address(u64 *address)
+{
+	return read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("bi", 0),
+		make_field("boot_dat", 0),
+		make_field("address", 0),
+		0,
+		address, NULL);
+}
+
+int ps3_repository_read_boot_dat_size(unsigned int *size)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("bi", 0),
+		make_field("boot_dat", 0),
+		make_field("size", 0),
+		0,
+		&v1, NULL);
+	*size = v1;
+	return result;
+}
+
+int ps3_repository_read_vuart_av_port(unsigned int *port)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("bi", 0),
+		make_field("vir_uart", 0),
+		make_field("port", 0),
+		make_field("avset", 0),
+		&v1, NULL);
+	*port = v1;
+	return result;
+}
+
+int ps3_repository_read_vuart_sysmgr_port(unsigned int *port)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_CURRENT,
+		make_first_field("bi", 0),
+		make_field("vir_uart", 0),
+		make_field("port", 0),
+		make_field("sysmgr", 0),
+		&v1, NULL);
+	*port = v1;
+	return result;
+}
+
+/**
+  * ps3_repository_read_boot_dat_info - Get address and size of cell_ext_os_area.
+  * address: lpar address of cell_ext_os_area
+  * @size: size of cell_ext_os_area
+  */
+
+int ps3_repository_read_boot_dat_info(u64 *lpar_addr, unsigned int *size)
+{
+	int result;
+
+	*size = 0;
+	result = ps3_repository_read_boot_dat_address(lpar_addr);
+	return result ? result
+		: ps3_repository_read_boot_dat_size(size);
+}
+
+/**
+ * ps3_repository_read_num_be - Number of physical BE processors in the system.
+ */
+
+int ps3_repository_read_num_be(unsigned int *num_be)
+{
+	int result;
+	u64 v1 = 0;
+
+	result = read_node(PS3_LPAR_ID_PME,
+		make_first_field("ben", 0),
+		0,
+		0,
+		0,
+		&v1, NULL);
+	*num_be = v1;
+	return result;
+}
+
+/**
+ * ps3_repository_read_be_node_id - Read the physical BE processor node id.
+ * @be_index: Zero based index.
+ * @node_id: The BE processor node id.
+ */
+
+int ps3_repository_read_be_node_id(unsigned int be_index, u64 *node_id)
+{
+	return read_node(PS3_LPAR_ID_PME,
+		make_first_field("be", be_index),
+		0,
+		0,
+		0,
+		node_id, NULL);
+}
+
+/**
+ * ps3_repository_read_be_id - Read the physical BE processor id.
+ * @node_id: The BE processor node id.
+ * @be_id: The BE processor id.
+ */
+
+int ps3_repository_read_be_id(u64 node_id, u64 *be_id)
+{
+	return read_node(PS3_LPAR_ID_PME,
+		make_first_field("be", 0),
+		node_id,
+		0,
+		0,
+		be_id, NULL);
+}
+
+int ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq)
+{
+	return read_node(PS3_LPAR_ID_PME,
+		make_first_field("be", 0),
+		node_id,
+		make_field("clock", 0),
+		0,
+		tb_freq, NULL);
+}
+
+int ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq)
+{
+	int result;
+	u64 node_id;
+
+	*tb_freq = 0;
+	result = ps3_repository_read_be_node_id(be_index, &node_id);
+	return result ? result
+		: ps3_repository_read_tb_freq(node_id, tb_freq);
+}
+
+int ps3_repository_read_lpm_privileges(unsigned int be_index, u64 *lpar,
+	u64 *rights)
+{
+	int result;
+	u64 node_id;
+
+	*lpar = 0;
+	*rights = 0;
+	result = ps3_repository_read_be_node_id(be_index, &node_id);
+	return result ? result
+		: read_node(PS3_LPAR_ID_PME,
+			    make_first_field("be", 0),
+			    node_id,
+			    make_field("lpm", 0),
+			    make_field("priv", 0),
+			    lpar, rights);
+}
+
+#if defined(CONFIG_PS3_REPOSITORY_WRITE)
+
+static int create_node(u64 n1, u64 n2, u64 n3, u64 n4, u64 v1, u64 v2)
+{
+	int result;
+
+	dump_node(0, n1, n2, n3, n4, v1, v2);
+
+	result = lv1_create_repository_node(n1, n2, n3, n4, v1, v2);
+
+	if (result) {
+		pr_devel("%s:%d: lv1_create_repository_node failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+static int delete_node(u64 n1, u64 n2, u64 n3, u64 n4)
+{
+	int result;
+
+	dump_node(0, n1, n2, n3, n4, 0, 0);
+
+	result = lv1_delete_repository_node(n1, n2, n3, n4);
+
+	if (result) {
+		pr_devel("%s:%d: lv1_delete_repository_node failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+static int write_node(u64 n1, u64 n2, u64 n3, u64 n4, u64 v1, u64 v2)
+{
+	int result;
+
+	result = create_node(n1, n2, n3, n4, v1, v2);
+
+	if (!result)
+		return 0;
+
+	result = lv1_write_repository_node(n1, n2, n3, n4, v1, v2);
+
+	if (result) {
+		pr_devel("%s:%d: lv1_write_repository_node failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+int ps3_repository_write_highmem_region_count(unsigned int region_count)
+{
+	int result;
+	u64 v1 = (u64)region_count;
+
+	result = write_node(
+		make_first_field("highmem", 0),
+		make_field("region", 0),
+		make_field("count", 0),
+		0,
+		v1, 0);
+	return result;
+}
+
+int ps3_repository_write_highmem_base(unsigned int region_index,
+	u64 highmem_base)
+{
+	return write_node(
+		make_first_field("highmem", 0),
+		make_field("region", region_index),
+		make_field("base", 0),
+		0,
+		highmem_base, 0);
+}
+
+int ps3_repository_write_highmem_size(unsigned int region_index,
+	u64 highmem_size)
+{
+	return write_node(
+		make_first_field("highmem", 0),
+		make_field("region", region_index),
+		make_field("size", 0),
+		0,
+		highmem_size, 0);
+}
+
+int ps3_repository_write_highmem_info(unsigned int region_index,
+	u64 highmem_base, u64 highmem_size)
+{
+	int result;
+
+	result = ps3_repository_write_highmem_base(region_index, highmem_base);
+	return result ? result
+		: ps3_repository_write_highmem_size(region_index, highmem_size);
+}
+
+static int ps3_repository_delete_highmem_base(unsigned int region_index)
+{
+	return delete_node(
+		make_first_field("highmem", 0),
+		make_field("region", region_index),
+		make_field("base", 0),
+		0);
+}
+
+static int ps3_repository_delete_highmem_size(unsigned int region_index)
+{
+	return delete_node(
+		make_first_field("highmem", 0),
+		make_field("region", region_index),
+		make_field("size", 0),
+		0);
+}
+
+int ps3_repository_delete_highmem_info(unsigned int region_index)
+{
+	int result;
+
+	result = ps3_repository_delete_highmem_base(region_index);
+	result += ps3_repository_delete_highmem_size(region_index);
+
+	return result ? -1 : 0;
+}
+
+#endif /* defined(CONFIG_PS3_WRITE_REPOSITORY) */
+
+#if defined(DEBUG)
+
+int ps3_repository_dump_resource_info(const struct ps3_repository_device *repo)
+{
+	int result = 0;
+	unsigned int res_index;
+
+	pr_devel(" -> %s:%d: (%u:%u)\n", __func__, __LINE__,
+		repo->bus_index, repo->dev_index);
+
+	for (res_index = 0; res_index < 10; res_index++) {
+		enum ps3_interrupt_type intr_type;
+		unsigned int interrupt_id;
+
+		result = ps3_repository_read_dev_intr(repo->bus_index,
+			repo->dev_index, res_index, &intr_type, &interrupt_id);
+
+		if (result) {
+			if (result !=  LV1_NO_ENTRY)
+				pr_devel("%s:%d ps3_repository_read_dev_intr"
+					" (%u:%u) failed\n", __func__, __LINE__,
+					repo->bus_index, repo->dev_index);
+			break;
+		}
+
+		pr_devel("%s:%d (%u:%u) intr_type %u, interrupt_id %u\n",
+			__func__, __LINE__, repo->bus_index, repo->dev_index,
+			intr_type, interrupt_id);
+	}
+
+	for (res_index = 0; res_index < 10; res_index++) {
+		enum ps3_reg_type reg_type;
+		u64 bus_addr;
+		u64 len;
+
+		result = ps3_repository_read_dev_reg(repo->bus_index,
+			repo->dev_index, res_index, &reg_type, &bus_addr, &len);
+
+		if (result) {
+			if (result !=  LV1_NO_ENTRY)
+				pr_devel("%s:%d ps3_repository_read_dev_reg"
+					" (%u:%u) failed\n", __func__, __LINE__,
+					repo->bus_index, repo->dev_index);
+			break;
+		}
+
+		pr_devel("%s:%d (%u:%u) reg_type %u, bus_addr %llxh, len %llxh\n",
+			__func__, __LINE__, repo->bus_index, repo->dev_index,
+			reg_type, bus_addr, len);
+	}
+
+	pr_devel(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+}
+
+static int dump_stor_dev_info(struct ps3_repository_device *repo)
+{
+	int result = 0;
+	unsigned int num_regions, region_index;
+	u64 port, blk_size, num_blocks;
+
+	pr_devel(" -> %s:%d: (%u:%u)\n", __func__, __LINE__,
+		repo->bus_index, repo->dev_index);
+
+	result = ps3_repository_read_stor_dev_info(repo->bus_index,
+		repo->dev_index, &port, &blk_size, &num_blocks, &num_regions);
+	if (result) {
+		pr_devel("%s:%d ps3_repository_read_stor_dev_info"
+			" (%u:%u) failed\n", __func__, __LINE__,
+			repo->bus_index, repo->dev_index);
+		goto out;
+	}
+
+	pr_devel("%s:%d  (%u:%u): port %llu, blk_size %llu, num_blocks "
+		 "%llu, num_regions %u\n",
+		 __func__, __LINE__, repo->bus_index, repo->dev_index,
+		port, blk_size, num_blocks, num_regions);
+
+	for (region_index = 0; region_index < num_regions; region_index++) {
+		unsigned int region_id;
+		u64 region_start, region_size;
+
+		result = ps3_repository_read_stor_dev_region(repo->bus_index,
+			repo->dev_index, region_index, &region_id,
+			&region_start, &region_size);
+		if (result) {
+			 pr_devel("%s:%d ps3_repository_read_stor_dev_region"
+				  " (%u:%u) failed\n", __func__, __LINE__,
+				  repo->bus_index, repo->dev_index);
+			break;
+		}
+
+		pr_devel("%s:%d (%u:%u) region_id %u, start %lxh, size %lxh\n",
+			__func__, __LINE__, repo->bus_index, repo->dev_index,
+			region_id, (unsigned long)region_start,
+			(unsigned long)region_size);
+	}
+
+out:
+	pr_devel(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+}
+
+static int dump_device_info(struct ps3_repository_device *repo,
+	unsigned int num_dev)
+{
+	int result = 0;
+
+	pr_devel(" -> %s:%d: bus_%u\n", __func__, __LINE__, repo->bus_index);
+
+	for (repo->dev_index = 0; repo->dev_index < num_dev;
+		repo->dev_index++) {
+
+		result = ps3_repository_read_dev_type(repo->bus_index,
+			repo->dev_index, &repo->dev_type);
+
+		if (result) {
+			pr_devel("%s:%d ps3_repository_read_dev_type"
+				" (%u:%u) failed\n", __func__, __LINE__,
+				repo->bus_index, repo->dev_index);
+			break;
+		}
+
+		result = ps3_repository_read_dev_id(repo->bus_index,
+			repo->dev_index, &repo->dev_id);
+
+		if (result) {
+			pr_devel("%s:%d ps3_repository_read_dev_id"
+				" (%u:%u) failed\n", __func__, __LINE__,
+				repo->bus_index, repo->dev_index);
+			continue;
+		}
+
+		pr_devel("%s:%d  (%u:%u): dev_type %u, dev_id %lu\n", __func__,
+			__LINE__, repo->bus_index, repo->dev_index,
+			repo->dev_type, (unsigned long)repo->dev_id);
+
+		ps3_repository_dump_resource_info(repo);
+
+		if (repo->bus_type == PS3_BUS_TYPE_STORAGE)
+			dump_stor_dev_info(repo);
+	}
+
+	pr_devel(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+}
+
+int ps3_repository_dump_bus_info(void)
+{
+	int result = 0;
+	struct ps3_repository_device repo;
+
+	pr_devel(" -> %s:%d\n", __func__, __LINE__);
+
+	memset(&repo, 0, sizeof(repo));
+
+	for (repo.bus_index = 0; repo.bus_index < 10; repo.bus_index++) {
+		unsigned int num_dev;
+
+		result = ps3_repository_read_bus_type(repo.bus_index,
+			&repo.bus_type);
+
+		if (result) {
+			pr_devel("%s:%d read_bus_type(%u) failed\n",
+				__func__, __LINE__, repo.bus_index);
+			break;
+		}
+
+		result = ps3_repository_read_bus_id(repo.bus_index,
+			&repo.bus_id);
+
+		if (result) {
+			pr_devel("%s:%d read_bus_id(%u) failed\n",
+				__func__, __LINE__, repo.bus_index);
+			continue;
+		}
+
+		if (repo.bus_index != repo.bus_id)
+			pr_devel("%s:%d bus_index != bus_id\n",
+				__func__, __LINE__);
+
+		result = ps3_repository_read_bus_num_dev(repo.bus_index,
+			&num_dev);
+
+		if (result) {
+			pr_devel("%s:%d read_bus_num_dev(%u) failed\n",
+				__func__, __LINE__, repo.bus_index);
+			continue;
+		}
+
+		pr_devel("%s:%d bus_%u: bus_type %u, bus_id %lu, num_dev %u\n",
+			__func__, __LINE__, repo.bus_index, repo.bus_type,
+			(unsigned long)repo.bus_id, num_dev);
+
+		dump_device_info(&repo, num_dev);
+	}
+
+	pr_devel(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+}
+
+#endif /* defined(DEBUG) */
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
new file mode 100644
index 000000000..799c8580a
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -0,0 +1,281 @@
+/*
+ *  PS3 platform setup routines.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/root_dev.h>
+#include <linux/console.h>
+#include <linux/export.h>
+#include <linux/bootmem.h>
+
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/time.h>
+#include <asm/iommu.h>
+#include <asm/udbg.h>
+#include <asm/prom.h>
+#include <asm/lv1call.h>
+#include <asm/ps3gpu.h>
+
+#include "platform.h"
+
+#if defined(DEBUG)
+#define DBG udbg_printf
+#else
+#define DBG pr_debug
+#endif
+
+/* mutex synchronizing GPU accesses and video mode changes */
+DEFINE_MUTEX(ps3_gpu_mutex);
+EXPORT_SYMBOL_GPL(ps3_gpu_mutex);
+
+static union ps3_firmware_version ps3_firmware_version;
+
+void ps3_get_firmware_version(union ps3_firmware_version *v)
+{
+	*v = ps3_firmware_version;
+}
+EXPORT_SYMBOL_GPL(ps3_get_firmware_version);
+
+int ps3_compare_firmware_version(u16 major, u16 minor, u16 rev)
+{
+	union ps3_firmware_version x;
+
+	x.pad = 0;
+	x.major = major;
+	x.minor = minor;
+	x.rev = rev;
+
+	return (ps3_firmware_version.raw > x.raw) -
+	       (ps3_firmware_version.raw < x.raw);
+}
+EXPORT_SYMBOL_GPL(ps3_compare_firmware_version);
+
+static void ps3_power_save(void)
+{
+	/*
+	 * lv1_pause() puts the PPE thread into inactive state until an
+	 * irq on an unmasked plug exists. MSR[EE] has no effect.
+	 * flags: 0 = wake on DEC interrupt, 1 = ignore DEC interrupt.
+	 */
+
+	lv1_pause(0);
+}
+
+static void ps3_restart(char *cmd)
+{
+	DBG("%s:%d cmd '%s'\n", __func__, __LINE__, cmd);
+
+	smp_send_stop();
+	ps3_sys_manager_restart(); /* never returns */
+}
+
+static void ps3_power_off(void)
+{
+	DBG("%s:%d\n", __func__, __LINE__);
+
+	smp_send_stop();
+	ps3_sys_manager_power_off(); /* never returns */
+}
+
+static void ps3_halt(void)
+{
+	DBG("%s:%d\n", __func__, __LINE__);
+
+	smp_send_stop();
+	ps3_sys_manager_halt(); /* never returns */
+}
+
+static void ps3_panic(char *str)
+{
+	DBG("%s:%d %s\n", __func__, __LINE__, str);
+
+	smp_send_stop();
+	printk("\n");
+	printk("   System does not reboot automatically.\n");
+	printk("   Please press POWER button.\n");
+	printk("\n");
+
+	while(1)
+		lv1_pause(1);
+}
+
+#if defined(CONFIG_FB_PS3) || defined(CONFIG_FB_PS3_MODULE) || \
+    defined(CONFIG_PS3_FLASH) || defined(CONFIG_PS3_FLASH_MODULE)
+static void __init prealloc(struct ps3_prealloc *p)
+{
+	if (!p->size)
+		return;
+
+	p->address = memblock_virt_alloc(p->size, p->align);
+
+	printk(KERN_INFO "%s: %lu bytes at %p\n", p->name, p->size,
+	       p->address);
+}
+#endif
+
+#if defined(CONFIG_FB_PS3) || defined(CONFIG_FB_PS3_MODULE)
+struct ps3_prealloc ps3fb_videomemory = {
+	.name = "ps3fb videomemory",
+	.size = CONFIG_FB_PS3_DEFAULT_SIZE_M*1024*1024,
+	.align = 1024*1024		/* the GPU requires 1 MiB alignment */
+};
+EXPORT_SYMBOL_GPL(ps3fb_videomemory);
+#define prealloc_ps3fb_videomemory()	prealloc(&ps3fb_videomemory)
+
+static int __init early_parse_ps3fb(char *p)
+{
+	if (!p)
+		return 1;
+
+	ps3fb_videomemory.size = _ALIGN_UP(memparse(p, &p),
+					   ps3fb_videomemory.align);
+	return 0;
+}
+early_param("ps3fb", early_parse_ps3fb);
+#else
+#define prealloc_ps3fb_videomemory()	do { } while (0)
+#endif
+
+#if defined(CONFIG_PS3_FLASH) || defined(CONFIG_PS3_FLASH_MODULE)
+struct ps3_prealloc ps3flash_bounce_buffer = {
+	.name = "ps3flash bounce buffer",
+	.size = 256*1024,
+	.align = 256*1024
+};
+EXPORT_SYMBOL_GPL(ps3flash_bounce_buffer);
+#define prealloc_ps3flash_bounce_buffer()	prealloc(&ps3flash_bounce_buffer)
+
+static int __init early_parse_ps3flash(char *p)
+{
+	if (!p)
+		return 1;
+
+	if (!strcmp(p, "off"))
+		ps3flash_bounce_buffer.size = 0;
+
+	return 0;
+}
+early_param("ps3flash", early_parse_ps3flash);
+#else
+#define prealloc_ps3flash_bounce_buffer()	do { } while (0)
+#endif
+
+static int ps3_set_dabr(unsigned long dabr, unsigned long dabrx)
+{
+	/* Have to set at least one bit in the DABRX */
+	if (dabrx == 0 && dabr == 0)
+		dabrx = DABRX_USER;
+	/* hypervisor only allows us to set BTI, Kernel and user */
+	dabrx &= DABRX_BTI | DABRX_KERNEL | DABRX_USER;
+
+	return lv1_set_dabr(dabr, dabrx) ? -1 : 0;
+}
+
+static void __init ps3_setup_arch(void)
+{
+	u64 tmp;
+
+	DBG(" -> %s:%d\n", __func__, __LINE__);
+
+	lv1_get_version_info(&ps3_firmware_version.raw, &tmp);
+
+	printk(KERN_INFO "PS3 firmware version %u.%u.%u\n",
+	       ps3_firmware_version.major, ps3_firmware_version.minor,
+	       ps3_firmware_version.rev);
+
+	ps3_spu_set_platform();
+
+#ifdef CONFIG_SMP
+	smp_init_ps3();
+#endif
+
+#ifdef CONFIG_DUMMY_CONSOLE
+	conswitchp = &dummy_con;
+#endif
+
+	prealloc_ps3fb_videomemory();
+	prealloc_ps3flash_bounce_buffer();
+
+	ppc_md.power_save = ps3_power_save;
+	ps3_os_area_init();
+
+	DBG(" <- %s:%d\n", __func__, __LINE__);
+}
+
+static void __init ps3_progress(char *s, unsigned short hex)
+{
+	printk("*** %04x : %s\n", hex, s ? s : "");
+}
+
+static int __init ps3_probe(void)
+{
+	unsigned long htab_size;
+	unsigned long dt_root;
+
+	DBG(" -> %s:%d\n", __func__, __LINE__);
+
+	dt_root = of_get_flat_dt_root();
+	if (!of_flat_dt_is_compatible(dt_root, "sony,ps3"))
+		return 0;
+
+	powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE;
+
+	ps3_os_area_save_params();
+	ps3_mm_init();
+	ps3_mm_vas_create(&htab_size);
+	ps3_hpte_init(htab_size);
+	pm_power_off = ps3_power_off;
+
+	DBG(" <- %s:%d\n", __func__, __LINE__);
+	return 1;
+}
+
+#if defined(CONFIG_KEXEC)
+static void ps3_kexec_cpu_down(int crash_shutdown, int secondary)
+{
+	int cpu = smp_processor_id();
+
+	DBG(" -> %s:%d: (%d)\n", __func__, __LINE__, cpu);
+
+	ps3_smp_cleanup_cpu(cpu);
+	ps3_shutdown_IRQ(cpu);
+
+	DBG(" <- %s:%d\n", __func__, __LINE__);
+}
+#endif
+
+define_machine(ps3) {
+	.name				= "PS3",
+	.probe				= ps3_probe,
+	.setup_arch			= ps3_setup_arch,
+	.init_IRQ			= ps3_init_IRQ,
+	.panic				= ps3_panic,
+	.get_boot_time			= ps3_get_boot_time,
+	.set_dabr			= ps3_set_dabr,
+	.calibrate_decr			= ps3_calibrate_decr,
+	.progress			= ps3_progress,
+	.restart			= ps3_restart,
+	.halt				= ps3_halt,
+#if defined(CONFIG_KEXEC)
+	.kexec_cpu_down			= ps3_kexec_cpu_down,
+#endif
+};
diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c
new file mode 100644
index 000000000..3c7707af3
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/smp.c
@@ -0,0 +1,132 @@
+/*
+ *  PS3 SMP routines.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+
+#include "platform.h"
+
+#if defined(DEBUG)
+#define DBG udbg_printf
+#else
+#define DBG pr_debug
+#endif
+
+/**
+  * ps3_ipi_virqs - a per cpu array of virqs for ipi use
+  */
+
+#define MSG_COUNT 4
+static DEFINE_PER_CPU(unsigned int [MSG_COUNT], ps3_ipi_virqs);
+
+static void ps3_smp_message_pass(int cpu, int msg)
+{
+	int result;
+	unsigned int virq;
+
+	if (msg >= MSG_COUNT) {
+		DBG("%s:%d: bad msg: %d\n", __func__, __LINE__, msg);
+		return;
+	}
+
+	virq = per_cpu(ps3_ipi_virqs, cpu)[msg];
+	result = ps3_send_event_locally(virq);
+
+	if (result)
+		DBG("%s:%d: ps3_send_event_locally(%d, %d) failed"
+			" (%d)\n", __func__, __LINE__, cpu, msg, result);
+}
+
+static void __init ps3_smp_probe(void)
+{
+	int cpu;
+
+	for (cpu = 0; cpu < 2; cpu++) {
+		int result;
+		unsigned int *virqs = per_cpu(ps3_ipi_virqs, cpu);
+		int i;
+
+		DBG(" -> %s:%d: (%d)\n", __func__, __LINE__, cpu);
+
+		/*
+		* Check assumptions on ps3_ipi_virqs[] indexing. If this
+		* check fails, then a different mapping of PPC_MSG_
+		* to index needs to be setup.
+		*/
+
+		BUILD_BUG_ON(PPC_MSG_CALL_FUNCTION    != 0);
+		BUILD_BUG_ON(PPC_MSG_RESCHEDULE       != 1);
+		BUILD_BUG_ON(PPC_MSG_TICK_BROADCAST   != 2);
+		BUILD_BUG_ON(PPC_MSG_DEBUGGER_BREAK   != 3);
+
+		for (i = 0; i < MSG_COUNT; i++) {
+			result = ps3_event_receive_port_setup(cpu, &virqs[i]);
+
+			if (result)
+				continue;
+
+			DBG("%s:%d: (%d, %d) => virq %u\n",
+				__func__, __LINE__, cpu, i, virqs[i]);
+
+			result = smp_request_message_ipi(virqs[i], i);
+
+			if (result)
+				virqs[i] = NO_IRQ;
+			else
+				ps3_register_ipi_irq(cpu, virqs[i]);
+		}
+
+		ps3_register_ipi_debug_brk(cpu, virqs[PPC_MSG_DEBUGGER_BREAK]);
+
+		DBG(" <- %s:%d: (%d)\n", __func__, __LINE__, cpu);
+	}
+}
+
+void ps3_smp_cleanup_cpu(int cpu)
+{
+	unsigned int *virqs = per_cpu(ps3_ipi_virqs, cpu);
+	int i;
+
+	DBG(" -> %s:%d: (%d)\n", __func__, __LINE__, cpu);
+
+	for (i = 0; i < MSG_COUNT; i++) {
+		/* Can't call free_irq from interrupt context. */
+		ps3_event_receive_port_destroy(virqs[i]);
+		virqs[i] = NO_IRQ;
+	}
+
+	DBG(" <- %s:%d: (%d)\n", __func__, __LINE__, cpu);
+}
+
+static struct smp_ops_t ps3_smp_ops = {
+	.probe		= ps3_smp_probe,
+	.message_pass	= ps3_smp_message_pass,
+	.kick_cpu	= smp_generic_kick_cpu,
+};
+
+void smp_init_ps3(void)
+{
+	DBG(" -> %s\n", __func__);
+	smp_ops = &ps3_smp_ops;
+	DBG(" <- %s\n", __func__);
+}
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
new file mode 100644
index 000000000..a0bca05e2
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -0,0 +1,636 @@
+/*
+ *  PS3 Platform spu routines.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/mmzone.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/lv1call.h>
+#include <asm/ps3.h>
+
+#include "../cell/spufs/spufs.h"
+#include "platform.h"
+
+/* spu_management_ops */
+
+/**
+ * enum spe_type - Type of spe to create.
+ * @spe_type_logical: Standard logical spe.
+ *
+ * For use with lv1_construct_logical_spe().  The current HV does not support
+ * any types other than those listed.
+ */
+
+enum spe_type {
+	SPE_TYPE_LOGICAL = 0,
+};
+
+/**
+ * struct spe_shadow - logical spe shadow register area.
+ *
+ * Read-only shadow of spe registers.
+ */
+
+struct spe_shadow {
+	u8 padding_0140[0x0140];
+	u64 int_status_class0_RW;       /* 0x0140 */
+	u64 int_status_class1_RW;       /* 0x0148 */
+	u64 int_status_class2_RW;       /* 0x0150 */
+	u8 padding_0158[0x0610-0x0158];
+	u64 mfc_dsisr_RW;               /* 0x0610 */
+	u8 padding_0618[0x0620-0x0618];
+	u64 mfc_dar_RW;                 /* 0x0620 */
+	u8 padding_0628[0x0800-0x0628];
+	u64 mfc_dsipr_R;                /* 0x0800 */
+	u8 padding_0808[0x0810-0x0808];
+	u64 mfc_lscrr_R;                /* 0x0810 */
+	u8 padding_0818[0x0c00-0x0818];
+	u64 mfc_cer_R;                  /* 0x0c00 */
+	u8 padding_0c08[0x0f00-0x0c08];
+	u64 spe_execution_status;       /* 0x0f00 */
+	u8 padding_0f08[0x1000-0x0f08];
+};
+
+/**
+ * enum spe_ex_state - Logical spe execution state.
+ * @spe_ex_state_unexecutable: Uninitialized.
+ * @spe_ex_state_executable: Enabled, not ready.
+ * @spe_ex_state_executed: Ready for use.
+ *
+ * The execution state (status) of the logical spe as reported in
+ * struct spe_shadow:spe_execution_status.
+ */
+
+enum spe_ex_state {
+	SPE_EX_STATE_UNEXECUTABLE = 0,
+	SPE_EX_STATE_EXECUTABLE = 2,
+	SPE_EX_STATE_EXECUTED = 3,
+};
+
+/**
+ * struct priv1_cache - Cached values of priv1 registers.
+ * @masks[]: Array of cached spe interrupt masks, indexed by class.
+ * @sr1: Cached mfc_sr1 register.
+ * @tclass_id: Cached mfc_tclass_id register.
+ */
+
+struct priv1_cache {
+	u64 masks[3];
+	u64 sr1;
+	u64 tclass_id;
+};
+
+/**
+ * struct spu_pdata - Platform state variables.
+ * @spe_id: HV spe id returned by lv1_construct_logical_spe().
+ * @resource_id: HV spe resource id returned by
+ * 	ps3_repository_read_spe_resource_id().
+ * @priv2_addr: lpar address of spe priv2 area returned by
+ * 	lv1_construct_logical_spe().
+ * @shadow_addr: lpar address of spe register shadow area returned by
+ * 	lv1_construct_logical_spe().
+ * @shadow: Virtual (ioremap) address of spe register shadow area.
+ * @cache: Cached values of priv1 registers.
+ */
+
+struct spu_pdata {
+	u64 spe_id;
+	u64 resource_id;
+	u64 priv2_addr;
+	u64 shadow_addr;
+	struct spe_shadow __iomem *shadow;
+	struct priv1_cache cache;
+};
+
+static struct spu_pdata *spu_pdata(struct spu *spu)
+{
+	return spu->pdata;
+}
+
+#define dump_areas(_a, _b, _c, _d, _e) \
+	_dump_areas(_a, _b, _c, _d, _e, __func__, __LINE__)
+static void _dump_areas(unsigned int spe_id, unsigned long priv2,
+	unsigned long problem, unsigned long ls, unsigned long shadow,
+	const char* func, int line)
+{
+	pr_debug("%s:%d: spe_id:  %xh (%u)\n", func, line, spe_id, spe_id);
+	pr_debug("%s:%d: priv2:   %lxh\n", func, line, priv2);
+	pr_debug("%s:%d: problem: %lxh\n", func, line, problem);
+	pr_debug("%s:%d: ls:      %lxh\n", func, line, ls);
+	pr_debug("%s:%d: shadow:  %lxh\n", func, line, shadow);
+}
+
+u64 ps3_get_spe_id(void *arg)
+{
+	return spu_pdata(arg)->spe_id;
+}
+EXPORT_SYMBOL_GPL(ps3_get_spe_id);
+
+static unsigned long get_vas_id(void)
+{
+	u64 id;
+
+	lv1_get_logical_ppe_id(&id);
+	lv1_get_virtual_address_space_id_of_ppe(&id);
+
+	return id;
+}
+
+static int __init construct_spu(struct spu *spu)
+{
+	int result;
+	u64 unused;
+	u64 problem_phys;
+	u64 local_store_phys;
+
+	result = lv1_construct_logical_spe(PAGE_SHIFT, PAGE_SHIFT, PAGE_SHIFT,
+		PAGE_SHIFT, PAGE_SHIFT, get_vas_id(), SPE_TYPE_LOGICAL,
+		&spu_pdata(spu)->priv2_addr, &problem_phys,
+		&local_store_phys, &unused,
+		&spu_pdata(spu)->shadow_addr,
+		&spu_pdata(spu)->spe_id);
+	spu->problem_phys = problem_phys;
+	spu->local_store_phys = local_store_phys;
+
+	if (result) {
+		pr_debug("%s:%d: lv1_construct_logical_spe failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		return result;
+	}
+
+	return result;
+}
+
+static void spu_unmap(struct spu *spu)
+{
+	iounmap(spu->priv2);
+	iounmap(spu->problem);
+	iounmap((__force u8 __iomem *)spu->local_store);
+	iounmap(spu_pdata(spu)->shadow);
+}
+
+/**
+ * setup_areas - Map the spu regions into the address space.
+ *
+ * The current HV requires the spu shadow regs to be mapped with the
+ * PTE page protection bits set as read-only (PP=3).  This implementation
+ * uses the low level __ioremap() to bypass the page protection settings
+ * inforced by ioremap_prot() to get the needed PTE bits set for the
+ * shadow regs.
+ */
+
+static int __init setup_areas(struct spu *spu)
+{
+	struct table {char* name; unsigned long addr; unsigned long size;};
+	static const unsigned long shadow_flags = _PAGE_NO_CACHE | 3;
+
+	spu_pdata(spu)->shadow = __ioremap(spu_pdata(spu)->shadow_addr,
+					   sizeof(struct spe_shadow),
+					   shadow_flags);
+	if (!spu_pdata(spu)->shadow) {
+		pr_debug("%s:%d: ioremap shadow failed\n", __func__, __LINE__);
+		goto fail_ioremap;
+	}
+
+	spu->local_store = (__force void *)ioremap_prot(spu->local_store_phys,
+		LS_SIZE, _PAGE_NO_CACHE);
+
+	if (!spu->local_store) {
+		pr_debug("%s:%d: ioremap local_store failed\n",
+			__func__, __LINE__);
+		goto fail_ioremap;
+	}
+
+	spu->problem = ioremap(spu->problem_phys,
+		sizeof(struct spu_problem));
+
+	if (!spu->problem) {
+		pr_debug("%s:%d: ioremap problem failed\n", __func__, __LINE__);
+		goto fail_ioremap;
+	}
+
+	spu->priv2 = ioremap(spu_pdata(spu)->priv2_addr,
+		sizeof(struct spu_priv2));
+
+	if (!spu->priv2) {
+		pr_debug("%s:%d: ioremap priv2 failed\n", __func__, __LINE__);
+		goto fail_ioremap;
+	}
+
+	dump_areas(spu_pdata(spu)->spe_id, spu_pdata(spu)->priv2_addr,
+		spu->problem_phys, spu->local_store_phys,
+		spu_pdata(spu)->shadow_addr);
+	dump_areas(spu_pdata(spu)->spe_id, (unsigned long)spu->priv2,
+		(unsigned long)spu->problem, (unsigned long)spu->local_store,
+		(unsigned long)spu_pdata(spu)->shadow);
+
+	return 0;
+
+fail_ioremap:
+	spu_unmap(spu);
+
+	return -ENOMEM;
+}
+
+static int __init setup_interrupts(struct spu *spu)
+{
+	int result;
+
+	result = ps3_spe_irq_setup(PS3_BINDING_CPU_ANY, spu_pdata(spu)->spe_id,
+		0, &spu->irqs[0]);
+
+	if (result)
+		goto fail_alloc_0;
+
+	result = ps3_spe_irq_setup(PS3_BINDING_CPU_ANY, spu_pdata(spu)->spe_id,
+		1, &spu->irqs[1]);
+
+	if (result)
+		goto fail_alloc_1;
+
+	result = ps3_spe_irq_setup(PS3_BINDING_CPU_ANY, spu_pdata(spu)->spe_id,
+		2, &spu->irqs[2]);
+
+	if (result)
+		goto fail_alloc_2;
+
+	return result;
+
+fail_alloc_2:
+	ps3_spe_irq_destroy(spu->irqs[1]);
+fail_alloc_1:
+	ps3_spe_irq_destroy(spu->irqs[0]);
+fail_alloc_0:
+	spu->irqs[0] = spu->irqs[1] = spu->irqs[2] = NO_IRQ;
+	return result;
+}
+
+static int __init enable_spu(struct spu *spu)
+{
+	int result;
+
+	result = lv1_enable_logical_spe(spu_pdata(spu)->spe_id,
+		spu_pdata(spu)->resource_id);
+
+	if (result) {
+		pr_debug("%s:%d: lv1_enable_logical_spe failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		goto fail_enable;
+	}
+
+	result = setup_areas(spu);
+
+	if (result)
+		goto fail_areas;
+
+	result = setup_interrupts(spu);
+
+	if (result)
+		goto fail_interrupts;
+
+	return 0;
+
+fail_interrupts:
+	spu_unmap(spu);
+fail_areas:
+	lv1_disable_logical_spe(spu_pdata(spu)->spe_id, 0);
+fail_enable:
+	return result;
+}
+
+static int ps3_destroy_spu(struct spu *spu)
+{
+	int result;
+
+	pr_debug("%s:%d spu_%d\n", __func__, __LINE__, spu->number);
+
+	result = lv1_disable_logical_spe(spu_pdata(spu)->spe_id, 0);
+	BUG_ON(result);
+
+	ps3_spe_irq_destroy(spu->irqs[2]);
+	ps3_spe_irq_destroy(spu->irqs[1]);
+	ps3_spe_irq_destroy(spu->irqs[0]);
+
+	spu->irqs[0] = spu->irqs[1] = spu->irqs[2] = NO_IRQ;
+
+	spu_unmap(spu);
+
+	result = lv1_destruct_logical_spe(spu_pdata(spu)->spe_id);
+	BUG_ON(result);
+
+	kfree(spu->pdata);
+	spu->pdata = NULL;
+
+	return 0;
+}
+
+static int __init ps3_create_spu(struct spu *spu, void *data)
+{
+	int result;
+
+	pr_debug("%s:%d spu_%d\n", __func__, __LINE__, spu->number);
+
+	spu->pdata = kzalloc(sizeof(struct spu_pdata),
+		GFP_KERNEL);
+
+	if (!spu->pdata) {
+		result = -ENOMEM;
+		goto fail_malloc;
+	}
+
+	spu_pdata(spu)->resource_id = (unsigned long)data;
+
+	/* Init cached reg values to HV defaults. */
+
+	spu_pdata(spu)->cache.sr1 = 0x33;
+
+	result = construct_spu(spu);
+
+	if (result)
+		goto fail_construct;
+
+	/* For now, just go ahead and enable it. */
+
+	result = enable_spu(spu);
+
+	if (result)
+		goto fail_enable;
+
+	/* Make sure the spu is in SPE_EX_STATE_EXECUTED. */
+
+	/* need something better here!!! */
+	while (in_be64(&spu_pdata(spu)->shadow->spe_execution_status)
+		!= SPE_EX_STATE_EXECUTED)
+		(void)0;
+
+	return result;
+
+fail_enable:
+fail_construct:
+	ps3_destroy_spu(spu);
+fail_malloc:
+	return result;
+}
+
+static int __init ps3_enumerate_spus(int (*fn)(void *data))
+{
+	int result;
+	unsigned int num_resource_id;
+	unsigned int i;
+
+	result = ps3_repository_read_num_spu_resource_id(&num_resource_id);
+
+	pr_debug("%s:%d: num_resource_id %u\n", __func__, __LINE__,
+		num_resource_id);
+
+	/*
+	 * For now, just create logical spus equal to the number
+	 * of physical spus reserved for the partition.
+	 */
+
+	for (i = 0; i < num_resource_id; i++) {
+		enum ps3_spu_resource_type resource_type;
+		unsigned int resource_id;
+
+		result = ps3_repository_read_spu_resource_id(i,
+			&resource_type, &resource_id);
+
+		if (result)
+			break;
+
+		if (resource_type == PS3_SPU_RESOURCE_TYPE_EXCLUSIVE) {
+			result = fn((void*)(unsigned long)resource_id);
+
+			if (result)
+				break;
+		}
+	}
+
+	if (result) {
+		printk(KERN_WARNING "%s:%d: Error initializing spus\n",
+			__func__, __LINE__);
+		return result;
+	}
+
+	return num_resource_id;
+}
+
+static int ps3_init_affinity(void)
+{
+	return 0;
+}
+
+/**
+ * ps3_enable_spu - Enable SPU run control.
+ *
+ * An outstanding enhancement for the PS3 would be to add a guard to check
+ * for incorrect access to the spu problem state when the spu context is
+ * disabled.  This check could be implemented with a flag added to the spu
+ * context that would inhibit mapping problem state pages, and a routine
+ * to unmap spu problem state pages.  When the spu is enabled with
+ * ps3_enable_spu() the flag would be set allowing pages to be mapped,
+ * and when the spu is disabled with ps3_disable_spu() the flag would be
+ * cleared and the mapped problem state pages would be unmapped.
+ */
+
+static void ps3_enable_spu(struct spu_context *ctx)
+{
+}
+
+static void ps3_disable_spu(struct spu_context *ctx)
+{
+	ctx->ops->runcntl_stop(ctx);
+}
+
+const struct spu_management_ops spu_management_ps3_ops = {
+	.enumerate_spus = ps3_enumerate_spus,
+	.create_spu = ps3_create_spu,
+	.destroy_spu = ps3_destroy_spu,
+	.enable_spu = ps3_enable_spu,
+	.disable_spu = ps3_disable_spu,
+	.init_affinity = ps3_init_affinity,
+};
+
+/* spu_priv1_ops */
+
+static void int_mask_and(struct spu *spu, int class, u64 mask)
+{
+	u64 old_mask;
+
+	/* are these serialized by caller??? */
+	old_mask = spu_int_mask_get(spu, class);
+	spu_int_mask_set(spu, class, old_mask & mask);
+}
+
+static void int_mask_or(struct spu *spu, int class, u64 mask)
+{
+	u64 old_mask;
+
+	old_mask = spu_int_mask_get(spu, class);
+	spu_int_mask_set(spu, class, old_mask | mask);
+}
+
+static void int_mask_set(struct spu *spu, int class, u64 mask)
+{
+	spu_pdata(spu)->cache.masks[class] = mask;
+	lv1_set_spe_interrupt_mask(spu_pdata(spu)->spe_id, class,
+		spu_pdata(spu)->cache.masks[class]);
+}
+
+static u64 int_mask_get(struct spu *spu, int class)
+{
+	return spu_pdata(spu)->cache.masks[class];
+}
+
+static void int_stat_clear(struct spu *spu, int class, u64 stat)
+{
+	/* Note that MFC_DSISR will be cleared when class1[MF] is set. */
+
+	lv1_clear_spe_interrupt_status(spu_pdata(spu)->spe_id, class,
+		stat, 0);
+}
+
+static u64 int_stat_get(struct spu *spu, int class)
+{
+	u64 stat;
+
+	lv1_get_spe_interrupt_status(spu_pdata(spu)->spe_id, class, &stat);
+	return stat;
+}
+
+static void cpu_affinity_set(struct spu *spu, int cpu)
+{
+	/* No support. */
+}
+
+static u64 mfc_dar_get(struct spu *spu)
+{
+	return in_be64(&spu_pdata(spu)->shadow->mfc_dar_RW);
+}
+
+static void mfc_dsisr_set(struct spu *spu, u64 dsisr)
+{
+	/* Nothing to do, cleared in int_stat_clear(). */
+}
+
+static u64 mfc_dsisr_get(struct spu *spu)
+{
+	return in_be64(&spu_pdata(spu)->shadow->mfc_dsisr_RW);
+}
+
+static void mfc_sdr_setup(struct spu *spu)
+{
+	/* Nothing to do. */
+}
+
+static void mfc_sr1_set(struct spu *spu, u64 sr1)
+{
+	/* Check bits allowed by HV. */
+
+	static const u64 allowed = ~(MFC_STATE1_LOCAL_STORAGE_DECODE_MASK
+		| MFC_STATE1_PROBLEM_STATE_MASK);
+
+	BUG_ON((sr1 & allowed) != (spu_pdata(spu)->cache.sr1 & allowed));
+
+	spu_pdata(spu)->cache.sr1 = sr1;
+	lv1_set_spe_privilege_state_area_1_register(
+		spu_pdata(spu)->spe_id,
+		offsetof(struct spu_priv1, mfc_sr1_RW),
+		spu_pdata(spu)->cache.sr1);
+}
+
+static u64 mfc_sr1_get(struct spu *spu)
+{
+	return spu_pdata(spu)->cache.sr1;
+}
+
+static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id)
+{
+	spu_pdata(spu)->cache.tclass_id = tclass_id;
+	lv1_set_spe_privilege_state_area_1_register(
+		spu_pdata(spu)->spe_id,
+		offsetof(struct spu_priv1, mfc_tclass_id_RW),
+		spu_pdata(spu)->cache.tclass_id);
+}
+
+static u64 mfc_tclass_id_get(struct spu *spu)
+{
+	return spu_pdata(spu)->cache.tclass_id;
+}
+
+static void tlb_invalidate(struct spu *spu)
+{
+	/* Nothing to do. */
+}
+
+static void resource_allocation_groupID_set(struct spu *spu, u64 id)
+{
+	/* No support. */
+}
+
+static u64 resource_allocation_groupID_get(struct spu *spu)
+{
+	return 0; /* No support. */
+}
+
+static void resource_allocation_enable_set(struct spu *spu, u64 enable)
+{
+	/* No support. */
+}
+
+static u64 resource_allocation_enable_get(struct spu *spu)
+{
+	return 0; /* No support. */
+}
+
+const struct spu_priv1_ops spu_priv1_ps3_ops = {
+	.int_mask_and = int_mask_and,
+	.int_mask_or = int_mask_or,
+	.int_mask_set = int_mask_set,
+	.int_mask_get = int_mask_get,
+	.int_stat_clear = int_stat_clear,
+	.int_stat_get = int_stat_get,
+	.cpu_affinity_set = cpu_affinity_set,
+	.mfc_dar_get = mfc_dar_get,
+	.mfc_dsisr_set = mfc_dsisr_set,
+	.mfc_dsisr_get = mfc_dsisr_get,
+	.mfc_sdr_setup = mfc_sdr_setup,
+	.mfc_sr1_set = mfc_sr1_set,
+	.mfc_sr1_get = mfc_sr1_get,
+	.mfc_tclass_id_set = mfc_tclass_id_set,
+	.mfc_tclass_id_get = mfc_tclass_id_get,
+	.tlb_invalidate = tlb_invalidate,
+	.resource_allocation_groupID_set = resource_allocation_groupID_set,
+	.resource_allocation_groupID_get = resource_allocation_groupID_get,
+	.resource_allocation_enable_set = resource_allocation_enable_set,
+	.resource_allocation_enable_get = resource_allocation_enable_get,
+};
+
+void ps3_spu_set_platform(void)
+{
+	spu_priv1_ops = &spu_priv1_ps3_ops;
+	spu_management_ops = &spu_management_ps3_ops;
+}
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
new file mode 100644
index 000000000..5606fe36f
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -0,0 +1,813 @@
+/*
+ *  PS3 system bus driver.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+#include <asm/udbg.h>
+#include <asm/lv1call.h>
+#include <asm/firmware.h>
+#include <asm/cell-regs.h>
+
+#include "platform.h"
+
+static struct device ps3_system_bus = {
+	.init_name = "ps3_system",
+};
+
+/* FIXME: need device usage counters! */
+struct {
+	struct mutex mutex;
+	int sb_11; /* usb 0 */
+	int sb_12; /* usb 0 */
+	int gpu;
+} static usage_hack;
+
+static int ps3_is_device(struct ps3_system_bus_device *dev, u64 bus_id,
+			 u64 dev_id)
+{
+	return dev->bus_id == bus_id && dev->dev_id == dev_id;
+}
+
+static int ps3_open_hv_device_sb(struct ps3_system_bus_device *dev)
+{
+	int result;
+
+	BUG_ON(!dev->bus_id);
+	mutex_lock(&usage_hack.mutex);
+
+	if (ps3_is_device(dev, 1, 1)) {
+		usage_hack.sb_11++;
+		if (usage_hack.sb_11 > 1) {
+			result = 0;
+			goto done;
+		}
+	}
+
+	if (ps3_is_device(dev, 1, 2)) {
+		usage_hack.sb_12++;
+		if (usage_hack.sb_12 > 1) {
+			result = 0;
+			goto done;
+		}
+	}
+
+	result = lv1_open_device(dev->bus_id, dev->dev_id, 0);
+
+	if (result) {
+		pr_debug("%s:%d: lv1_open_device failed: %s\n", __func__,
+			__LINE__, ps3_result(result));
+			result = -EPERM;
+	}
+
+done:
+	mutex_unlock(&usage_hack.mutex);
+	return result;
+}
+
+static int ps3_close_hv_device_sb(struct ps3_system_bus_device *dev)
+{
+	int result;
+
+	BUG_ON(!dev->bus_id);
+	mutex_lock(&usage_hack.mutex);
+
+	if (ps3_is_device(dev, 1, 1)) {
+		usage_hack.sb_11--;
+		if (usage_hack.sb_11) {
+			result = 0;
+			goto done;
+		}
+	}
+
+	if (ps3_is_device(dev, 1, 2)) {
+		usage_hack.sb_12--;
+		if (usage_hack.sb_12) {
+			result = 0;
+			goto done;
+		}
+	}
+
+	result = lv1_close_device(dev->bus_id, dev->dev_id);
+	BUG_ON(result);
+
+done:
+	mutex_unlock(&usage_hack.mutex);
+	return result;
+}
+
+static int ps3_open_hv_device_gpu(struct ps3_system_bus_device *dev)
+{
+	int result;
+
+	mutex_lock(&usage_hack.mutex);
+
+	usage_hack.gpu++;
+	if (usage_hack.gpu > 1) {
+		result = 0;
+		goto done;
+	}
+
+	result = lv1_gpu_open(0);
+
+	if (result) {
+		pr_debug("%s:%d: lv1_gpu_open failed: %s\n", __func__,
+			__LINE__, ps3_result(result));
+			result = -EPERM;
+	}
+
+done:
+	mutex_unlock(&usage_hack.mutex);
+	return result;
+}
+
+static int ps3_close_hv_device_gpu(struct ps3_system_bus_device *dev)
+{
+	int result;
+
+	mutex_lock(&usage_hack.mutex);
+
+	usage_hack.gpu--;
+	if (usage_hack.gpu) {
+		result = 0;
+		goto done;
+	}
+
+	result = lv1_gpu_close();
+	BUG_ON(result);
+
+done:
+	mutex_unlock(&usage_hack.mutex);
+	return result;
+}
+
+int ps3_open_hv_device(struct ps3_system_bus_device *dev)
+{
+	BUG_ON(!dev);
+	pr_debug("%s:%d: match_id: %u\n", __func__, __LINE__, dev->match_id);
+
+	switch (dev->match_id) {
+	case PS3_MATCH_ID_EHCI:
+	case PS3_MATCH_ID_OHCI:
+	case PS3_MATCH_ID_GELIC:
+	case PS3_MATCH_ID_STOR_DISK:
+	case PS3_MATCH_ID_STOR_ROM:
+	case PS3_MATCH_ID_STOR_FLASH:
+		return ps3_open_hv_device_sb(dev);
+
+	case PS3_MATCH_ID_SOUND:
+	case PS3_MATCH_ID_GPU:
+		return ps3_open_hv_device_gpu(dev);
+
+	case PS3_MATCH_ID_AV_SETTINGS:
+	case PS3_MATCH_ID_SYSTEM_MANAGER:
+		pr_debug("%s:%d: unsupported match_id: %u\n", __func__,
+			__LINE__, dev->match_id);
+		pr_debug("%s:%d: bus_id: %llu\n", __func__, __LINE__,
+			dev->bus_id);
+		BUG();
+		return -EINVAL;
+
+	default:
+		break;
+	}
+
+	pr_debug("%s:%d: unknown match_id: %u\n", __func__, __LINE__,
+		dev->match_id);
+	BUG();
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(ps3_open_hv_device);
+
+int ps3_close_hv_device(struct ps3_system_bus_device *dev)
+{
+	BUG_ON(!dev);
+	pr_debug("%s:%d: match_id: %u\n", __func__, __LINE__, dev->match_id);
+
+	switch (dev->match_id) {
+	case PS3_MATCH_ID_EHCI:
+	case PS3_MATCH_ID_OHCI:
+	case PS3_MATCH_ID_GELIC:
+	case PS3_MATCH_ID_STOR_DISK:
+	case PS3_MATCH_ID_STOR_ROM:
+	case PS3_MATCH_ID_STOR_FLASH:
+		return ps3_close_hv_device_sb(dev);
+
+	case PS3_MATCH_ID_SOUND:
+	case PS3_MATCH_ID_GPU:
+		return ps3_close_hv_device_gpu(dev);
+
+	case PS3_MATCH_ID_AV_SETTINGS:
+	case PS3_MATCH_ID_SYSTEM_MANAGER:
+		pr_debug("%s:%d: unsupported match_id: %u\n", __func__,
+			__LINE__, dev->match_id);
+		pr_debug("%s:%d: bus_id: %llu\n", __func__, __LINE__,
+			dev->bus_id);
+		BUG();
+		return -EINVAL;
+
+	default:
+		break;
+	}
+
+	pr_debug("%s:%d: unknown match_id: %u\n", __func__, __LINE__,
+		dev->match_id);
+	BUG();
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(ps3_close_hv_device);
+
+#define dump_mmio_region(_a) _dump_mmio_region(_a, __func__, __LINE__)
+static void _dump_mmio_region(const struct ps3_mmio_region* r,
+	const char* func, int line)
+{
+	pr_debug("%s:%d: dev       %llu:%llu\n", func, line, r->dev->bus_id,
+		r->dev->dev_id);
+	pr_debug("%s:%d: bus_addr  %lxh\n", func, line, r->bus_addr);
+	pr_debug("%s:%d: len       %lxh\n", func, line, r->len);
+	pr_debug("%s:%d: lpar_addr %lxh\n", func, line, r->lpar_addr);
+}
+
+static int ps3_sb_mmio_region_create(struct ps3_mmio_region *r)
+{
+	int result;
+	u64 lpar_addr;
+
+	result = lv1_map_device_mmio_region(r->dev->bus_id, r->dev->dev_id,
+		r->bus_addr, r->len, r->page_size, &lpar_addr);
+	r->lpar_addr = lpar_addr;
+
+	if (result) {
+		pr_debug("%s:%d: lv1_map_device_mmio_region failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+		r->lpar_addr = 0;
+	}
+
+	dump_mmio_region(r);
+	return result;
+}
+
+static int ps3_ioc0_mmio_region_create(struct ps3_mmio_region *r)
+{
+	/* device specific; do nothing currently */
+	return 0;
+}
+
+int ps3_mmio_region_create(struct ps3_mmio_region *r)
+{
+	return r->mmio_ops->create(r);
+}
+EXPORT_SYMBOL_GPL(ps3_mmio_region_create);
+
+static int ps3_sb_free_mmio_region(struct ps3_mmio_region *r)
+{
+	int result;
+
+	dump_mmio_region(r);
+	result = lv1_unmap_device_mmio_region(r->dev->bus_id, r->dev->dev_id,
+		r->lpar_addr);
+
+	if (result)
+		pr_debug("%s:%d: lv1_unmap_device_mmio_region failed: %s\n",
+			__func__, __LINE__, ps3_result(result));
+
+	r->lpar_addr = 0;
+	return result;
+}
+
+static int ps3_ioc0_free_mmio_region(struct ps3_mmio_region *r)
+{
+	/* device specific; do nothing currently */
+	return 0;
+}
+
+
+int ps3_free_mmio_region(struct ps3_mmio_region *r)
+{
+	return r->mmio_ops->free(r);
+}
+
+EXPORT_SYMBOL_GPL(ps3_free_mmio_region);
+
+static const struct ps3_mmio_region_ops ps3_mmio_sb_region_ops = {
+	.create = ps3_sb_mmio_region_create,
+	.free = ps3_sb_free_mmio_region
+};
+
+static const struct ps3_mmio_region_ops ps3_mmio_ioc0_region_ops = {
+	.create = ps3_ioc0_mmio_region_create,
+	.free = ps3_ioc0_free_mmio_region
+};
+
+int ps3_mmio_region_init(struct ps3_system_bus_device *dev,
+	struct ps3_mmio_region *r, unsigned long bus_addr, unsigned long len,
+	enum ps3_mmio_page_size page_size)
+{
+	r->dev = dev;
+	r->bus_addr = bus_addr;
+	r->len = len;
+	r->page_size = page_size;
+	switch (dev->dev_type) {
+	case PS3_DEVICE_TYPE_SB:
+		r->mmio_ops = &ps3_mmio_sb_region_ops;
+		break;
+	case PS3_DEVICE_TYPE_IOC0:
+		r->mmio_ops = &ps3_mmio_ioc0_region_ops;
+		break;
+	default:
+		BUG();
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ps3_mmio_region_init);
+
+static int ps3_system_bus_match(struct device *_dev,
+	struct device_driver *_drv)
+{
+	int result;
+	struct ps3_system_bus_driver *drv = ps3_drv_to_system_bus_drv(_drv);
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+
+	if (!dev->match_sub_id)
+		result = dev->match_id == drv->match_id;
+	else
+		result = dev->match_sub_id == drv->match_sub_id &&
+			dev->match_id == drv->match_id;
+
+	if (result)
+		pr_info("%s:%d: dev=%u.%u(%s), drv=%u.%u(%s): match\n",
+			__func__, __LINE__,
+			dev->match_id, dev->match_sub_id, dev_name(&dev->core),
+			drv->match_id, drv->match_sub_id, drv->core.name);
+	else
+		pr_debug("%s:%d: dev=%u.%u(%s), drv=%u.%u(%s): miss\n",
+			__func__, __LINE__,
+			dev->match_id, dev->match_sub_id, dev_name(&dev->core),
+			drv->match_id, drv->match_sub_id, drv->core.name);
+
+	return result;
+}
+
+static int ps3_system_bus_probe(struct device *_dev)
+{
+	int result = 0;
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	struct ps3_system_bus_driver *drv;
+
+	BUG_ON(!dev);
+	dev_dbg(_dev, "%s:%d\n", __func__, __LINE__);
+
+	drv = ps3_system_bus_dev_to_system_bus_drv(dev);
+	BUG_ON(!drv);
+
+	if (drv->probe)
+		result = drv->probe(dev);
+	else
+		pr_debug("%s:%d: %s no probe method\n", __func__, __LINE__,
+			dev_name(&dev->core));
+
+	pr_debug(" <- %s:%d: %s\n", __func__, __LINE__, dev_name(&dev->core));
+	return result;
+}
+
+static int ps3_system_bus_remove(struct device *_dev)
+{
+	int result = 0;
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	struct ps3_system_bus_driver *drv;
+
+	BUG_ON(!dev);
+	dev_dbg(_dev, "%s:%d\n", __func__, __LINE__);
+
+	drv = ps3_system_bus_dev_to_system_bus_drv(dev);
+	BUG_ON(!drv);
+
+	if (drv->remove)
+		result = drv->remove(dev);
+	else
+		dev_dbg(&dev->core, "%s:%d %s: no remove method\n",
+			__func__, __LINE__, drv->core.name);
+
+	pr_debug(" <- %s:%d: %s\n", __func__, __LINE__, dev_name(&dev->core));
+	return result;
+}
+
+static void ps3_system_bus_shutdown(struct device *_dev)
+{
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	struct ps3_system_bus_driver *drv;
+
+	BUG_ON(!dev);
+
+	dev_dbg(&dev->core, " -> %s:%d: match_id %d\n", __func__, __LINE__,
+		dev->match_id);
+
+	if (!dev->core.driver) {
+		dev_dbg(&dev->core, "%s:%d: no driver bound\n", __func__,
+			__LINE__);
+		return;
+	}
+
+	drv = ps3_system_bus_dev_to_system_bus_drv(dev);
+
+	BUG_ON(!drv);
+
+	dev_dbg(&dev->core, "%s:%d: %s -> %s\n", __func__, __LINE__,
+		dev_name(&dev->core), drv->core.name);
+
+	if (drv->shutdown)
+		drv->shutdown(dev);
+	else if (drv->remove) {
+		dev_dbg(&dev->core, "%s:%d %s: no shutdown, calling remove\n",
+			__func__, __LINE__, drv->core.name);
+		drv->remove(dev);
+	} else {
+		dev_dbg(&dev->core, "%s:%d %s: no shutdown method\n",
+			__func__, __LINE__, drv->core.name);
+		BUG();
+	}
+
+	dev_dbg(&dev->core, " <- %s:%d\n", __func__, __LINE__);
+}
+
+static int ps3_system_bus_uevent(struct device *_dev, struct kobj_uevent_env *env)
+{
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+
+	if (add_uevent_var(env, "MODALIAS=ps3:%d:%d", dev->match_id,
+			   dev->match_sub_id))
+		return -ENOMEM;
+	return 0;
+}
+
+static ssize_t modalias_show(struct device *_dev, struct device_attribute *a,
+	char *buf)
+{
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	int len = snprintf(buf, PAGE_SIZE, "ps3:%d:%d\n", dev->match_id,
+			   dev->match_sub_id);
+
+	return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
+}
+
+static struct device_attribute ps3_system_bus_dev_attrs[] = {
+	__ATTR_RO(modalias),
+	__ATTR_NULL,
+};
+
+struct bus_type ps3_system_bus_type = {
+	.name = "ps3_system_bus",
+	.match = ps3_system_bus_match,
+	.uevent = ps3_system_bus_uevent,
+	.probe = ps3_system_bus_probe,
+	.remove = ps3_system_bus_remove,
+	.shutdown = ps3_system_bus_shutdown,
+	.dev_attrs = ps3_system_bus_dev_attrs,
+};
+
+static int __init ps3_system_bus_init(void)
+{
+	int result;
+
+	if (!firmware_has_feature(FW_FEATURE_PS3_LV1))
+		return -ENODEV;
+
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+	mutex_init(&usage_hack.mutex);
+
+	result = device_register(&ps3_system_bus);
+	BUG_ON(result);
+
+	result = bus_register(&ps3_system_bus_type);
+	BUG_ON(result);
+
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
+	return result;
+}
+
+core_initcall(ps3_system_bus_init);
+
+/* Allocates a contiguous real buffer and creates mappings over it.
+ * Returns the virtual address of the buffer and sets dma_handle
+ * to the dma address (mapping) of the first page.
+ */
+static void * ps3_alloc_coherent(struct device *_dev, size_t size,
+				 dma_addr_t *dma_handle, gfp_t flag,
+				 struct dma_attrs *attrs)
+{
+	int result;
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	unsigned long virt_addr;
+
+	flag &= ~(__GFP_DMA | __GFP_HIGHMEM);
+	flag |= __GFP_ZERO;
+
+	virt_addr = __get_free_pages(flag, get_order(size));
+
+	if (!virt_addr) {
+		pr_debug("%s:%d: get_free_pages failed\n", __func__, __LINE__);
+		goto clean_none;
+	}
+
+	result = ps3_dma_map(dev->d_region, virt_addr, size, dma_handle,
+			     CBE_IOPTE_PP_W | CBE_IOPTE_PP_R |
+			     CBE_IOPTE_SO_RW | CBE_IOPTE_M);
+
+	if (result) {
+		pr_debug("%s:%d: ps3_dma_map failed (%d)\n",
+			__func__, __LINE__, result);
+		BUG_ON("check region type");
+		goto clean_alloc;
+	}
+
+	return (void*)virt_addr;
+
+clean_alloc:
+	free_pages(virt_addr, get_order(size));
+clean_none:
+	dma_handle = NULL;
+	return NULL;
+}
+
+static void ps3_free_coherent(struct device *_dev, size_t size, void *vaddr,
+			      dma_addr_t dma_handle, struct dma_attrs *attrs)
+{
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+
+	ps3_dma_unmap(dev->d_region, dma_handle, size);
+	free_pages((unsigned long)vaddr, get_order(size));
+}
+
+/* Creates TCEs for a user provided buffer.  The user buffer must be
+ * contiguous real kernel storage (not vmalloc).  The address passed here
+ * comprises a page address and offset into that page. The dma_addr_t
+ * returned will point to the same byte within the page as was passed in.
+ */
+
+static dma_addr_t ps3_sb_map_page(struct device *_dev, struct page *page,
+	unsigned long offset, size_t size, enum dma_data_direction direction,
+	struct dma_attrs *attrs)
+{
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	int result;
+	dma_addr_t bus_addr;
+	void *ptr = page_address(page) + offset;
+
+	result = ps3_dma_map(dev->d_region, (unsigned long)ptr, size,
+			     &bus_addr,
+			     CBE_IOPTE_PP_R | CBE_IOPTE_PP_W |
+			     CBE_IOPTE_SO_RW | CBE_IOPTE_M);
+
+	if (result) {
+		pr_debug("%s:%d: ps3_dma_map failed (%d)\n",
+			__func__, __LINE__, result);
+	}
+
+	return bus_addr;
+}
+
+static dma_addr_t ps3_ioc0_map_page(struct device *_dev, struct page *page,
+				    unsigned long offset, size_t size,
+				    enum dma_data_direction direction,
+				    struct dma_attrs *attrs)
+{
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	int result;
+	dma_addr_t bus_addr;
+	u64 iopte_flag;
+	void *ptr = page_address(page) + offset;
+
+	iopte_flag = CBE_IOPTE_M;
+	switch (direction) {
+	case DMA_BIDIRECTIONAL:
+		iopte_flag |= CBE_IOPTE_PP_R | CBE_IOPTE_PP_W | CBE_IOPTE_SO_RW;
+		break;
+	case DMA_TO_DEVICE:
+		iopte_flag |= CBE_IOPTE_PP_R | CBE_IOPTE_SO_R;
+		break;
+	case DMA_FROM_DEVICE:
+		iopte_flag |= CBE_IOPTE_PP_W | CBE_IOPTE_SO_RW;
+		break;
+	default:
+		/* not happned */
+		BUG();
+	};
+	result = ps3_dma_map(dev->d_region, (unsigned long)ptr, size,
+			     &bus_addr, iopte_flag);
+
+	if (result) {
+		pr_debug("%s:%d: ps3_dma_map failed (%d)\n",
+			__func__, __LINE__, result);
+	}
+	return bus_addr;
+}
+
+static void ps3_unmap_page(struct device *_dev, dma_addr_t dma_addr,
+	size_t size, enum dma_data_direction direction, struct dma_attrs *attrs)
+{
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	int result;
+
+	result = ps3_dma_unmap(dev->d_region, dma_addr, size);
+
+	if (result) {
+		pr_debug("%s:%d: ps3_dma_unmap failed (%d)\n",
+			__func__, __LINE__, result);
+	}
+}
+
+static int ps3_sb_map_sg(struct device *_dev, struct scatterlist *sgl,
+	int nents, enum dma_data_direction direction, struct dma_attrs *attrs)
+{
+#if defined(CONFIG_PS3_DYNAMIC_DMA)
+	BUG_ON("do");
+	return -EPERM;
+#else
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i) {
+		int result = ps3_dma_map(dev->d_region, sg_phys(sg),
+					sg->length, &sg->dma_address, 0);
+
+		if (result) {
+			pr_debug("%s:%d: ps3_dma_map failed (%d)\n",
+				__func__, __LINE__, result);
+			return -EINVAL;
+		}
+
+		sg->dma_length = sg->length;
+	}
+
+	return nents;
+#endif
+}
+
+static int ps3_ioc0_map_sg(struct device *_dev, struct scatterlist *sg,
+			   int nents,
+			   enum dma_data_direction direction,
+			   struct dma_attrs *attrs)
+{
+	BUG();
+	return 0;
+}
+
+static void ps3_sb_unmap_sg(struct device *_dev, struct scatterlist *sg,
+	int nents, enum dma_data_direction direction, struct dma_attrs *attrs)
+{
+#if defined(CONFIG_PS3_DYNAMIC_DMA)
+	BUG_ON("do");
+#endif
+}
+
+static void ps3_ioc0_unmap_sg(struct device *_dev, struct scatterlist *sg,
+			    int nents, enum dma_data_direction direction,
+			    struct dma_attrs *attrs)
+{
+	BUG();
+}
+
+static int ps3_dma_supported(struct device *_dev, u64 mask)
+{
+	return mask >= DMA_BIT_MASK(32);
+}
+
+static u64 ps3_dma_get_required_mask(struct device *_dev)
+{
+	return DMA_BIT_MASK(32);
+}
+
+static struct dma_map_ops ps3_sb_dma_ops = {
+	.alloc = ps3_alloc_coherent,
+	.free = ps3_free_coherent,
+	.map_sg = ps3_sb_map_sg,
+	.unmap_sg = ps3_sb_unmap_sg,
+	.dma_supported = ps3_dma_supported,
+	.get_required_mask = ps3_dma_get_required_mask,
+	.map_page = ps3_sb_map_page,
+	.unmap_page = ps3_unmap_page,
+};
+
+static struct dma_map_ops ps3_ioc0_dma_ops = {
+	.alloc = ps3_alloc_coherent,
+	.free = ps3_free_coherent,
+	.map_sg = ps3_ioc0_map_sg,
+	.unmap_sg = ps3_ioc0_unmap_sg,
+	.dma_supported = ps3_dma_supported,
+	.get_required_mask = ps3_dma_get_required_mask,
+	.map_page = ps3_ioc0_map_page,
+	.unmap_page = ps3_unmap_page,
+};
+
+/**
+ * ps3_system_bus_release_device - remove a device from the system bus
+ */
+
+static void ps3_system_bus_release_device(struct device *_dev)
+{
+	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	kfree(dev);
+}
+
+/**
+ * ps3_system_bus_device_register - add a device to the system bus
+ *
+ * ps3_system_bus_device_register() expects the dev object to be allocated
+ * dynamically by the caller.  The system bus takes ownership of the dev
+ * object and frees the object in ps3_system_bus_release_device().
+ */
+
+int ps3_system_bus_device_register(struct ps3_system_bus_device *dev)
+{
+	int result;
+	static unsigned int dev_ioc0_count;
+	static unsigned int dev_sb_count;
+	static unsigned int dev_vuart_count;
+	static unsigned int dev_lpm_count;
+
+	if (!dev->core.parent)
+		dev->core.parent = &ps3_system_bus;
+	dev->core.bus = &ps3_system_bus_type;
+	dev->core.release = ps3_system_bus_release_device;
+
+	switch (dev->dev_type) {
+	case PS3_DEVICE_TYPE_IOC0:
+		dev->core.archdata.dma_ops = &ps3_ioc0_dma_ops;
+		dev_set_name(&dev->core, "ioc0_%02x", ++dev_ioc0_count);
+		break;
+	case PS3_DEVICE_TYPE_SB:
+		dev->core.archdata.dma_ops = &ps3_sb_dma_ops;
+		dev_set_name(&dev->core, "sb_%02x", ++dev_sb_count);
+
+		break;
+	case PS3_DEVICE_TYPE_VUART:
+		dev_set_name(&dev->core, "vuart_%02x", ++dev_vuart_count);
+		break;
+	case PS3_DEVICE_TYPE_LPM:
+		dev_set_name(&dev->core, "lpm_%02x", ++dev_lpm_count);
+		break;
+	default:
+		BUG();
+	};
+
+	dev->core.of_node = NULL;
+	set_dev_node(&dev->core, 0);
+
+	pr_debug("%s:%d add %s\n", __func__, __LINE__, dev_name(&dev->core));
+
+	result = device_register(&dev->core);
+	return result;
+}
+
+EXPORT_SYMBOL_GPL(ps3_system_bus_device_register);
+
+int ps3_system_bus_driver_register(struct ps3_system_bus_driver *drv)
+{
+	int result;
+
+	pr_debug(" -> %s:%d: %s\n", __func__, __LINE__, drv->core.name);
+
+	if (!firmware_has_feature(FW_FEATURE_PS3_LV1))
+		return -ENODEV;
+
+	drv->core.bus = &ps3_system_bus_type;
+
+	result = driver_register(&drv->core);
+	pr_debug(" <- %s:%d: %s\n", __func__, __LINE__, drv->core.name);
+	return result;
+}
+
+EXPORT_SYMBOL_GPL(ps3_system_bus_driver_register);
+
+void ps3_system_bus_driver_unregister(struct ps3_system_bus_driver *drv)
+{
+	pr_debug(" -> %s:%d: %s\n", __func__, __LINE__, drv->core.name);
+	driver_unregister(&drv->core);
+	pr_debug(" <- %s:%d: %s\n", __func__, __LINE__, drv->core.name);
+}
+
+EXPORT_SYMBOL_GPL(ps3_system_bus_driver_unregister);
diff --git a/arch/powerpc/platforms/ps3/time.c b/arch/powerpc/platforms/ps3/time.c
new file mode 100644
index 000000000..ce73ce865
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/time.c
@@ -0,0 +1,96 @@
+/*
+ *  PS3 time and rtc routines.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+
+#include <asm/firmware.h>
+#include <asm/rtc.h>
+#include <asm/lv1call.h>
+#include <asm/ps3.h>
+
+#include "platform.h"
+
+#define dump_tm(_a) _dump_tm(_a, __func__, __LINE__)
+static void _dump_tm(const struct rtc_time *tm, const char* func, int line)
+{
+	pr_debug("%s:%d tm_sec  %d\n", func, line, tm->tm_sec);
+	pr_debug("%s:%d tm_min  %d\n", func, line, tm->tm_min);
+	pr_debug("%s:%d tm_hour %d\n", func, line, tm->tm_hour);
+	pr_debug("%s:%d tm_mday %d\n", func, line, tm->tm_mday);
+	pr_debug("%s:%d tm_mon  %d\n", func, line, tm->tm_mon);
+	pr_debug("%s:%d tm_year %d\n", func, line, tm->tm_year);
+	pr_debug("%s:%d tm_wday %d\n", func, line, tm->tm_wday);
+}
+
+#define dump_time(_a) _dump_time(_a, __func__, __LINE__)
+static void __maybe_unused _dump_time(int time, const char *func,
+	int line)
+{
+	struct rtc_time tm;
+
+	to_tm(time, &tm);
+
+	pr_debug("%s:%d time    %d\n", func, line, time);
+	_dump_tm(&tm, func, line);
+}
+
+void __init ps3_calibrate_decr(void)
+{
+	int result;
+	u64 tmp;
+
+	result = ps3_repository_read_be_tb_freq(0, &tmp);
+	BUG_ON(result);
+
+	ppc_tb_freq = tmp;
+	ppc_proc_freq = ppc_tb_freq * 40;
+}
+
+static u64 read_rtc(void)
+{
+	int result;
+	u64 rtc_val;
+	u64 tb_val;
+
+	result = lv1_get_rtc(&rtc_val, &tb_val);
+	BUG_ON(result);
+
+	return rtc_val;
+}
+
+unsigned long __init ps3_get_boot_time(void)
+{
+	return read_rtc() + ps3_os_area_get_rtc_diff();
+}
+
+static int __init ps3_rtc_init(void)
+{
+	struct platform_device *pdev;
+
+	if (!firmware_has_feature(FW_FEATURE_PS3_LV1))
+		return -ENODEV;
+
+	pdev = platform_device_register_simple("rtc-ps3", -1, NULL, 0);
+
+	return PTR_ERR_OR_ZERO(pdev);
+}
+
+module_init(ps3_rtc_init);
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
new file mode 100644
index 000000000..54c87d5d3
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -0,0 +1,134 @@
+config PPC_PSERIES
+	depends on PPC64 && PPC_BOOK3S
+	bool "IBM pSeries & new (POWER5-based) iSeries"
+	select HAVE_PCSPKR_PLATFORM
+	select MPIC
+	select OF_DYNAMIC
+	select PCI_MSI
+	select PPC_XICS
+	select PPC_ICP_NATIVE
+	select PPC_ICP_HV
+	select PPC_ICS_RTAS
+	select PPC_I8259
+	select PPC_RTAS
+	select PPC_RTAS_DAEMON
+	select RTAS_ERROR_LOGGING
+	select PPC_UDBG_16550
+	select PPC_NATIVE
+	select PPC_PCI_CHOICE if EXPERT
+	select PPC_DOORBELL
+	select HAVE_CONTEXT_TRACKING
+	select HOTPLUG_CPU if SMP
+	select ARCH_RANDOM
+	select PPC_DOORBELL
+	default y
+
+config PPC_SPLPAR
+	depends on PPC_PSERIES
+	bool "Support for shared-processor logical partitions"
+	default n
+	help
+	  Enabling this option will make the kernel run more efficiently
+	  on logically-partitioned pSeries systems which use shared
+	  processors, that is, which share physical processors between
+	  two or more partitions.
+
+config DTL
+	bool "Dispatch Trace Log"
+	depends on PPC_SPLPAR && DEBUG_FS
+	help
+	  SPLPAR machines can log hypervisor preempt & dispatch events to a
+	  kernel buffer. Saying Y here will enable logging these events,
+	  which are accessible through a debugfs file.
+
+	  Say N if you are unsure.
+
+config PSERIES_MSI
+       bool
+       depends on PCI_MSI && PPC_PSERIES && EEH
+       default y
+
+config PSERIES_ENERGY
+	tristate "pSeries energy management capabilities driver"
+	depends on PPC_PSERIES
+	default y
+	help
+	  Provides interface to platform energy management capabilities
+	  on supported PSERIES platforms.
+	  Provides: /sys/devices/system/cpu/pseries_(de)activation_hint_list
+	  and /sys/devices/system/cpu/cpuN/pseries_(de)activation_hint
+
+config SCANLOG
+	tristate "Scanlog dump interface"
+	depends on RTAS_PROC && PPC_PSERIES
+
+config IO_EVENT_IRQ
+	bool "IO Event Interrupt support"
+	depends on PPC_PSERIES
+	default y
+	help
+	  Select this option, if you want to enable support for IO Event
+	  interrupts. IO event interrupt is a mechanism provided by RTAS
+	  to return information about hardware error and non-error events
+	  which may need OS attention. RTAS returns events for multiple
+	  event types and scopes. Device drivers can register their handlers
+	  to receive events.
+
+	  This option will only enable the IO event platform code. You
+	  will still need to enable or compile the actual drivers
+	  that use this infrastructure to handle IO event interrupts.
+
+	  Say Y if you are unsure.
+
+config LPARCFG
+	bool "LPAR Configuration Data"
+	depends on PPC_PSERIES
+	help
+	Provide system capacity information via human readable
+	<key word>=<value> pairs through a /proc/ppc64/lparcfg interface.
+
+config PPC_PSERIES_DEBUG
+	depends on PPC_PSERIES && PPC_EARLY_DEBUG
+	bool "Enable extra debug logging in platforms/pseries"
+        help
+	  Say Y here if you want the pseries core to produce a bunch of
+	  debug messages to the system log. Select this if you are having a
+	  problem with the pseries core and want to see more of what is
+	  going on. This does not enable debugging in lpar.c, which must
+	  be manually done due to its verbosity.
+	default y
+
+config PPC_SMLPAR
+	bool "Support for shared-memory logical partitions"
+	depends on PPC_PSERIES
+	select LPARCFG
+	default n
+	help
+	  Select this option to enable shared memory partition support.
+	  With this option a system running in an LPAR can be given more
+	  memory than physically available and will allow firmware to
+	  balance memory across many LPARs.
+
+config CMM
+	tristate "Collaborative memory management"
+	depends on PPC_SMLPAR
+	default y
+	help
+	  Select this option, if you want to enable the kernel interface
+	  to reduce the memory size of the system. This is accomplished
+	  by allocating pages of memory and put them "on hold". This only
+	  makes sense for a system running in an LPAR where the unused pages
+	  will be reused for other LPARs. The interface allows firmware to
+	  balance memory across many LPARs.
+
+config HV_PERF_CTRS
+       bool "Hypervisor supplied PMU events (24x7 & GPCI)"
+       default y
+       depends on PERF_EVENTS && PPC_PSERIES
+       help
+	  Enable access to hypervisor supplied counters in perf. Currently,
+	  this enables code that uses the hcall GetPerfCounterInfo and 24x7
+	  interfaces to retrieve counters. GPCI exists on Power 6 and later
+	  systems. 24x7 is available on Power 8 systems.
+
+          If unsure, select Y.
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
new file mode 100644
index 000000000..03480796a
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -0,0 +1,28 @@
+ccflags-$(CONFIG_PPC64)			:= $(NO_MINIMAL_TOC)
+ccflags-$(CONFIG_PPC_PSERIES_DEBUG)	+= -DDEBUG
+
+obj-y			:= lpar.o hvCall.o nvram.o reconfig.o \
+			   setup.o iommu.o event_sources.o ras.o \
+			   firmware.o power.o dlpar.o mobility.o rng.o
+obj-$(CONFIG_SMP)	+= smp.o
+obj-$(CONFIG_SCANLOG)	+= scanlog.o
+obj-$(CONFIG_EEH)	+= eeh_pseries.o
+obj-$(CONFIG_KEXEC)	+= kexec.o
+obj-$(CONFIG_PCI)	+= pci.o pci_dlpar.o
+obj-$(CONFIG_PSERIES_MSI)	+= msi.o
+obj-$(CONFIG_PSERIES_ENERGY)	+= pseries_energy.o
+
+obj-$(CONFIG_HOTPLUG_CPU)	+= hotplug-cpu.o
+obj-$(CONFIG_MEMORY_HOTPLUG)	+= hotplug-memory.o
+
+obj-$(CONFIG_HVC_CONSOLE)	+= hvconsole.o
+obj-$(CONFIG_HVCS)		+= hvcserver.o
+obj-$(CONFIG_HCALL_STATS)	+= hvCall_inst.o
+obj-$(CONFIG_CMM)		+= cmm.o
+obj-$(CONFIG_DTL)		+= dtl.o
+obj-$(CONFIG_IO_EVENT_IRQ)	+= io_event_irq.o
+obj-$(CONFIG_LPARCFG)		+= lparcfg.o
+
+ifeq ($(CONFIG_PPC_PSERIES),y)
+obj-$(CONFIG_SUSPEND)		+= suspend.o
+endif
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
new file mode 100644
index 000000000..fc44ad047
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -0,0 +1,739 @@
+/*
+ * Collaborative memory management interface.
+ *
+ * Copyright (C) 2008 IBM Corporation
+ * Author(s): Brian King (brking@linux.vnet.ibm.com),
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/oom.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/stringify.h>
+#include <linux/swap.h>
+#include <linux/device.h>
+#include <asm/firmware.h>
+#include <asm/hvcall.h>
+#include <asm/mmu.h>
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <linux/memory.h>
+#include <asm/plpar_wrappers.h>
+
+#define CMM_DRIVER_VERSION	"1.0.0"
+#define CMM_DEFAULT_DELAY	1
+#define CMM_HOTPLUG_DELAY	5
+#define CMM_DEBUG			0
+#define CMM_DISABLE		0
+#define CMM_OOM_KB		1024
+#define CMM_MIN_MEM_MB		256
+#define KB2PAGES(_p)		((_p)>>(PAGE_SHIFT-10))
+#define PAGES2KB(_p)		((_p)<<(PAGE_SHIFT-10))
+/*
+ * The priority level tries to ensure that this notifier is called as
+ * late as possible to reduce thrashing in the shared memory pool.
+ */
+#define CMM_MEM_HOTPLUG_PRI	1
+#define CMM_MEM_ISOLATE_PRI	15
+
+static unsigned int delay = CMM_DEFAULT_DELAY;
+static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
+static unsigned int oom_kb = CMM_OOM_KB;
+static unsigned int cmm_debug = CMM_DEBUG;
+static unsigned int cmm_disabled = CMM_DISABLE;
+static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
+static struct device cmm_dev;
+
+MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(CMM_DRIVER_VERSION);
+
+module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
+		 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
+module_param_named(hotplug_delay, hotplug_delay, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(delay, "Delay (in seconds) after memory hotplug remove "
+		 "before loaning resumes. "
+		 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
+module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
+		 "[Default=" __stringify(CMM_OOM_KB) "]");
+module_param_named(min_mem_mb, min_mem_mb, ulong, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
+		 "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
+module_param_named(debug, cmm_debug, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
+		 "[Default=" __stringify(CMM_DEBUG) "]");
+
+#define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long))
+
+#define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
+
+struct cmm_page_array {
+	struct cmm_page_array *next;
+	unsigned long index;
+	unsigned long page[CMM_NR_PAGES];
+};
+
+static unsigned long loaned_pages;
+static unsigned long loaned_pages_target;
+static unsigned long oom_freed_pages;
+
+static struct cmm_page_array *cmm_page_list;
+static DEFINE_SPINLOCK(cmm_lock);
+
+static DEFINE_MUTEX(hotplug_mutex);
+static int hotplug_occurred; /* protected by the hotplug mutex */
+
+static struct task_struct *cmm_thread_ptr;
+
+/**
+ * cmm_alloc_pages - Allocate pages and mark them as loaned
+ * @nr:	number of pages to allocate
+ *
+ * Return value:
+ * 	number of pages requested to be allocated which were not
+ **/
+static long cmm_alloc_pages(long nr)
+{
+	struct cmm_page_array *pa, *npa;
+	unsigned long addr;
+	long rc;
+
+	cmm_dbg("Begin request for %ld pages\n", nr);
+
+	while (nr) {
+		/* Exit if a hotplug operation is in progress or occurred */
+		if (mutex_trylock(&hotplug_mutex)) {
+			if (hotplug_occurred) {
+				mutex_unlock(&hotplug_mutex);
+				break;
+			}
+			mutex_unlock(&hotplug_mutex);
+		} else {
+			break;
+		}
+
+		addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
+				       __GFP_NORETRY | __GFP_NOMEMALLOC);
+		if (!addr)
+			break;
+		spin_lock(&cmm_lock);
+		pa = cmm_page_list;
+		if (!pa || pa->index >= CMM_NR_PAGES) {
+			/* Need a new page for the page list. */
+			spin_unlock(&cmm_lock);
+			npa = (struct cmm_page_array *)__get_free_page(
+					GFP_NOIO | __GFP_NOWARN |
+					__GFP_NORETRY | __GFP_NOMEMALLOC);
+			if (!npa) {
+				pr_info("%s: Can not allocate new page list\n", __func__);
+				free_page(addr);
+				break;
+			}
+			spin_lock(&cmm_lock);
+			pa = cmm_page_list;
+
+			if (!pa || pa->index >= CMM_NR_PAGES) {
+				npa->next = pa;
+				npa->index = 0;
+				pa = npa;
+				cmm_page_list = pa;
+			} else
+				free_page((unsigned long) npa);
+		}
+
+		if ((rc = plpar_page_set_loaned(__pa(addr)))) {
+			pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc);
+			spin_unlock(&cmm_lock);
+			free_page(addr);
+			break;
+		}
+
+		pa->page[pa->index++] = addr;
+		loaned_pages++;
+		totalram_pages--;
+		spin_unlock(&cmm_lock);
+		nr--;
+	}
+
+	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
+	return nr;
+}
+
+/**
+ * cmm_free_pages - Free pages and mark them as active
+ * @nr:	number of pages to free
+ *
+ * Return value:
+ * 	number of pages requested to be freed which were not
+ **/
+static long cmm_free_pages(long nr)
+{
+	struct cmm_page_array *pa;
+	unsigned long addr;
+
+	cmm_dbg("Begin free of %ld pages.\n", nr);
+	spin_lock(&cmm_lock);
+	pa = cmm_page_list;
+	while (nr) {
+		if (!pa || pa->index <= 0)
+			break;
+		addr = pa->page[--pa->index];
+
+		if (pa->index == 0) {
+			pa = pa->next;
+			free_page((unsigned long) cmm_page_list);
+			cmm_page_list = pa;
+		}
+
+		plpar_page_set_active(__pa(addr));
+		free_page(addr);
+		loaned_pages--;
+		nr--;
+		totalram_pages++;
+	}
+	spin_unlock(&cmm_lock);
+	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
+	return nr;
+}
+
+/**
+ * cmm_oom_notify - OOM notifier
+ * @self:	notifier block struct
+ * @dummy:	not used
+ * @parm:	returned - number of pages freed
+ *
+ * Return value:
+ * 	NOTIFY_OK
+ **/
+static int cmm_oom_notify(struct notifier_block *self,
+			  unsigned long dummy, void *parm)
+{
+	unsigned long *freed = parm;
+	long nr = KB2PAGES(oom_kb);
+
+	cmm_dbg("OOM processing started\n");
+	nr = cmm_free_pages(nr);
+	loaned_pages_target = loaned_pages;
+	*freed += KB2PAGES(oom_kb) - nr;
+	oom_freed_pages += KB2PAGES(oom_kb) - nr;
+	cmm_dbg("OOM processing complete\n");
+	return NOTIFY_OK;
+}
+
+/**
+ * cmm_get_mpp - Read memory performance parameters
+ *
+ * Makes hcall to query the current page loan request from the hypervisor.
+ *
+ * Return value:
+ * 	nothing
+ **/
+static void cmm_get_mpp(void)
+{
+	int rc;
+	struct hvcall_mpp_data mpp_data;
+	signed long active_pages_target, page_loan_request, target;
+	signed long total_pages = totalram_pages + loaned_pages;
+	signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
+
+	rc = h_get_mpp(&mpp_data);
+
+	if (rc != H_SUCCESS)
+		return;
+
+	page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
+	target = page_loan_request + (signed long)loaned_pages;
+
+	if (target < 0 || total_pages < min_mem_pages)
+		target = 0;
+
+	if (target > oom_freed_pages)
+		target -= oom_freed_pages;
+	else
+		target = 0;
+
+	active_pages_target = total_pages - target;
+
+	if (min_mem_pages > active_pages_target)
+		target = total_pages - min_mem_pages;
+
+	if (target < 0)
+		target = 0;
+
+	loaned_pages_target = target;
+
+	cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
+		page_loan_request, loaned_pages, loaned_pages_target,
+		oom_freed_pages, totalram_pages);
+}
+
+static struct notifier_block cmm_oom_nb = {
+	.notifier_call = cmm_oom_notify
+};
+
+/**
+ * cmm_thread - CMM task thread
+ * @dummy:	not used
+ *
+ * Return value:
+ * 	0
+ **/
+static int cmm_thread(void *dummy)
+{
+	unsigned long timeleft;
+
+	while (1) {
+		timeleft = msleep_interruptible(delay * 1000);
+
+		if (kthread_should_stop() || timeleft)
+			break;
+
+		if (mutex_trylock(&hotplug_mutex)) {
+			if (hotplug_occurred) {
+				hotplug_occurred = 0;
+				mutex_unlock(&hotplug_mutex);
+				cmm_dbg("Hotplug operation has occurred, "
+						"loaning activity suspended "
+						"for %d seconds.\n",
+						hotplug_delay);
+				timeleft = msleep_interruptible(hotplug_delay *
+						1000);
+				if (kthread_should_stop() || timeleft)
+					break;
+				continue;
+			}
+			mutex_unlock(&hotplug_mutex);
+		} else {
+			cmm_dbg("Hotplug operation in progress, activity "
+					"suspended\n");
+			continue;
+		}
+
+		cmm_get_mpp();
+
+		if (loaned_pages_target > loaned_pages) {
+			if (cmm_alloc_pages(loaned_pages_target - loaned_pages))
+				loaned_pages_target = loaned_pages;
+		} else if (loaned_pages_target < loaned_pages)
+			cmm_free_pages(loaned_pages - loaned_pages_target);
+	}
+	return 0;
+}
+
+#define CMM_SHOW(name, format, args...)			\
+	static ssize_t show_##name(struct device *dev,	\
+				   struct device_attribute *attr,	\
+				   char *buf)			\
+	{							\
+		return sprintf(buf, format, ##args);		\
+	}							\
+	static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+
+CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
+CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
+
+static ssize_t show_oom_pages(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
+}
+
+static ssize_t store_oom_pages(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
+{
+	unsigned long val = simple_strtoul (buf, NULL, 10);
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (val != 0)
+		return -EBADMSG;
+
+	oom_freed_pages = 0;
+	return count;
+}
+
+static DEVICE_ATTR(oom_freed_kb, S_IWUSR | S_IRUGO,
+		   show_oom_pages, store_oom_pages);
+
+static struct device_attribute *cmm_attrs[] = {
+	&dev_attr_loaned_kb,
+	&dev_attr_loaned_target_kb,
+	&dev_attr_oom_freed_kb,
+};
+
+static struct bus_type cmm_subsys = {
+	.name = "cmm",
+	.dev_name = "cmm",
+};
+
+/**
+ * cmm_sysfs_register - Register with sysfs
+ *
+ * Return value:
+ * 	0 on success / other on failure
+ **/
+static int cmm_sysfs_register(struct device *dev)
+{
+	int i, rc;
+
+	if ((rc = subsys_system_register(&cmm_subsys, NULL)))
+		return rc;
+
+	dev->id = 0;
+	dev->bus = &cmm_subsys;
+
+	if ((rc = device_register(dev)))
+		goto subsys_unregister;
+
+	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
+		if ((rc = device_create_file(dev, cmm_attrs[i])))
+			goto fail;
+	}
+
+	return 0;
+
+fail:
+	while (--i >= 0)
+		device_remove_file(dev, cmm_attrs[i]);
+	device_unregister(dev);
+subsys_unregister:
+	bus_unregister(&cmm_subsys);
+	return rc;
+}
+
+/**
+ * cmm_unregister_sysfs - Unregister from sysfs
+ *
+ **/
+static void cmm_unregister_sysfs(struct device *dev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
+		device_remove_file(dev, cmm_attrs[i]);
+	device_unregister(dev);
+	bus_unregister(&cmm_subsys);
+}
+
+/**
+ * cmm_reboot_notifier - Make sure pages are not still marked as "loaned"
+ *
+ **/
+static int cmm_reboot_notifier(struct notifier_block *nb,
+			       unsigned long action, void *unused)
+{
+	if (action == SYS_RESTART) {
+		if (cmm_thread_ptr)
+			kthread_stop(cmm_thread_ptr);
+		cmm_thread_ptr = NULL;
+		cmm_free_pages(loaned_pages);
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block cmm_reboot_nb = {
+	.notifier_call = cmm_reboot_notifier,
+};
+
+/**
+ * cmm_count_pages - Count the number of pages loaned in a particular range.
+ *
+ * @arg: memory_isolate_notify structure with address range and count
+ *
+ * Return value:
+ *      0 on success
+ **/
+static unsigned long cmm_count_pages(void *arg)
+{
+	struct memory_isolate_notify *marg = arg;
+	struct cmm_page_array *pa;
+	unsigned long start = (unsigned long)pfn_to_kaddr(marg->start_pfn);
+	unsigned long end = start + (marg->nr_pages << PAGE_SHIFT);
+	unsigned long idx;
+
+	spin_lock(&cmm_lock);
+	pa = cmm_page_list;
+	while (pa) {
+		if ((unsigned long)pa >= start && (unsigned long)pa < end)
+			marg->pages_found++;
+		for (idx = 0; idx < pa->index; idx++)
+			if (pa->page[idx] >= start && pa->page[idx] < end)
+				marg->pages_found++;
+		pa = pa->next;
+	}
+	spin_unlock(&cmm_lock);
+	return 0;
+}
+
+/**
+ * cmm_memory_isolate_cb - Handle memory isolation notifier calls
+ * @self:	notifier block struct
+ * @action:	action to take
+ * @arg:	struct memory_isolate_notify data for handler
+ *
+ * Return value:
+ *	NOTIFY_OK or notifier error based on subfunction return value
+ **/
+static int cmm_memory_isolate_cb(struct notifier_block *self,
+				 unsigned long action, void *arg)
+{
+	int ret = 0;
+
+	if (action == MEM_ISOLATE_COUNT)
+		ret = cmm_count_pages(arg);
+
+	return notifier_from_errno(ret);
+}
+
+static struct notifier_block cmm_mem_isolate_nb = {
+	.notifier_call = cmm_memory_isolate_cb,
+	.priority = CMM_MEM_ISOLATE_PRI
+};
+
+/**
+ * cmm_mem_going_offline - Unloan pages where memory is to be removed
+ * @arg: memory_notify structure with page range to be offlined
+ *
+ * Return value:
+ *	0 on success
+ **/
+static int cmm_mem_going_offline(void *arg)
+{
+	struct memory_notify *marg = arg;
+	unsigned long start_page = (unsigned long)pfn_to_kaddr(marg->start_pfn);
+	unsigned long end_page = start_page + (marg->nr_pages << PAGE_SHIFT);
+	struct cmm_page_array *pa_curr, *pa_last, *npa;
+	unsigned long idx;
+	unsigned long freed = 0;
+
+	cmm_dbg("Memory going offline, searching 0x%lx (%ld pages).\n",
+			start_page, marg->nr_pages);
+	spin_lock(&cmm_lock);
+
+	/* Search the page list for pages in the range to be offlined */
+	pa_last = pa_curr = cmm_page_list;
+	while (pa_curr) {
+		for (idx = (pa_curr->index - 1); (idx + 1) > 0; idx--) {
+			if ((pa_curr->page[idx] < start_page) ||
+			    (pa_curr->page[idx] >= end_page))
+				continue;
+
+			plpar_page_set_active(__pa(pa_curr->page[idx]));
+			free_page(pa_curr->page[idx]);
+			freed++;
+			loaned_pages--;
+			totalram_pages++;
+			pa_curr->page[idx] = pa_last->page[--pa_last->index];
+			if (pa_last->index == 0) {
+				if (pa_curr == pa_last)
+					pa_curr = pa_last->next;
+				pa_last = pa_last->next;
+				free_page((unsigned long)cmm_page_list);
+				cmm_page_list = pa_last;
+			}
+		}
+		pa_curr = pa_curr->next;
+	}
+
+	/* Search for page list structures in the range to be offlined */
+	pa_last = NULL;
+	pa_curr = cmm_page_list;
+	while (pa_curr) {
+		if (((unsigned long)pa_curr >= start_page) &&
+				((unsigned long)pa_curr < end_page)) {
+			npa = (struct cmm_page_array *)__get_free_page(
+					GFP_NOIO | __GFP_NOWARN |
+					__GFP_NORETRY | __GFP_NOMEMALLOC);
+			if (!npa) {
+				spin_unlock(&cmm_lock);
+				cmm_dbg("Failed to allocate memory for list "
+						"management. Memory hotplug "
+						"failed.\n");
+				return ENOMEM;
+			}
+			memcpy(npa, pa_curr, PAGE_SIZE);
+			if (pa_curr == cmm_page_list)
+				cmm_page_list = npa;
+			if (pa_last)
+				pa_last->next = npa;
+			free_page((unsigned long) pa_curr);
+			freed++;
+			pa_curr = npa;
+		}
+
+		pa_last = pa_curr;
+		pa_curr = pa_curr->next;
+	}
+
+	spin_unlock(&cmm_lock);
+	cmm_dbg("Released %ld pages in the search range.\n", freed);
+
+	return 0;
+}
+
+/**
+ * cmm_memory_cb - Handle memory hotplug notifier calls
+ * @self:	notifier block struct
+ * @action:	action to take
+ * @arg:	struct memory_notify data for handler
+ *
+ * Return value:
+ *	NOTIFY_OK or notifier error based on subfunction return value
+ *
+ **/
+static int cmm_memory_cb(struct notifier_block *self,
+			unsigned long action, void *arg)
+{
+	int ret = 0;
+
+	switch (action) {
+	case MEM_GOING_OFFLINE:
+		mutex_lock(&hotplug_mutex);
+		hotplug_occurred = 1;
+		ret = cmm_mem_going_offline(arg);
+		break;
+	case MEM_OFFLINE:
+	case MEM_CANCEL_OFFLINE:
+		mutex_unlock(&hotplug_mutex);
+		cmm_dbg("Memory offline operation complete.\n");
+		break;
+	case MEM_GOING_ONLINE:
+	case MEM_ONLINE:
+	case MEM_CANCEL_ONLINE:
+		break;
+	}
+
+	return notifier_from_errno(ret);
+}
+
+static struct notifier_block cmm_mem_nb = {
+	.notifier_call = cmm_memory_cb,
+	.priority = CMM_MEM_HOTPLUG_PRI
+};
+
+/**
+ * cmm_init - Module initialization
+ *
+ * Return value:
+ * 	0 on success / other on failure
+ **/
+static int cmm_init(void)
+{
+	int rc = -ENOMEM;
+
+	if (!firmware_has_feature(FW_FEATURE_CMO))
+		return -EOPNOTSUPP;
+
+	if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0)
+		return rc;
+
+	if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
+		goto out_oom_notifier;
+
+	if ((rc = cmm_sysfs_register(&cmm_dev)))
+		goto out_reboot_notifier;
+
+	if (register_memory_notifier(&cmm_mem_nb) ||
+	    register_memory_isolate_notifier(&cmm_mem_isolate_nb))
+		goto out_unregister_notifier;
+
+	if (cmm_disabled)
+		return rc;
+
+	cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
+	if (IS_ERR(cmm_thread_ptr)) {
+		rc = PTR_ERR(cmm_thread_ptr);
+		goto out_unregister_notifier;
+	}
+
+	return rc;
+
+out_unregister_notifier:
+	unregister_memory_notifier(&cmm_mem_nb);
+	unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
+	cmm_unregister_sysfs(&cmm_dev);
+out_reboot_notifier:
+	unregister_reboot_notifier(&cmm_reboot_nb);
+out_oom_notifier:
+	unregister_oom_notifier(&cmm_oom_nb);
+	return rc;
+}
+
+/**
+ * cmm_exit - Module exit
+ *
+ * Return value:
+ * 	nothing
+ **/
+static void cmm_exit(void)
+{
+	if (cmm_thread_ptr)
+		kthread_stop(cmm_thread_ptr);
+	unregister_oom_notifier(&cmm_oom_nb);
+	unregister_reboot_notifier(&cmm_reboot_nb);
+	unregister_memory_notifier(&cmm_mem_nb);
+	unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
+	cmm_free_pages(loaned_pages);
+	cmm_unregister_sysfs(&cmm_dev);
+}
+
+/**
+ * cmm_set_disable - Disable/Enable CMM
+ *
+ * Return value:
+ * 	0 on success / other on failure
+ **/
+static int cmm_set_disable(const char *val, struct kernel_param *kp)
+{
+	int disable = simple_strtoul(val, NULL, 10);
+
+	if (disable != 0 && disable != 1)
+		return -EINVAL;
+
+	if (disable && !cmm_disabled) {
+		if (cmm_thread_ptr)
+			kthread_stop(cmm_thread_ptr);
+		cmm_thread_ptr = NULL;
+		cmm_free_pages(loaned_pages);
+	} else if (!disable && cmm_disabled) {
+		cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
+		if (IS_ERR(cmm_thread_ptr))
+			return PTR_ERR(cmm_thread_ptr);
+	}
+
+	cmm_disabled = disable;
+	return 0;
+}
+
+module_param_call(disable, cmm_set_disable, param_get_uint,
+		  &cmm_disabled, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
+		 "[Default=" __stringify(CMM_DISABLE) "]");
+
+module_init(cmm_init);
+module_exit(cmm_exit);
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
new file mode 100644
index 000000000..019d34aaf
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -0,0 +1,659 @@
+/*
+ * Support for dynamic reconfiguration for PCI, Memory, and CPU
+ * Hotplug and Dynamic Logical Partitioning on RPA platforms.
+ *
+ * Copyright (C) 2009 Nathan Fontenot
+ * Copyright (C) 2009 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt)	"dlpar: " fmt
+
+#include <linux/kernel.h>
+#include <linux/notifier.h>
+#include <linux/spinlock.h>
+#include <linux/cpu.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include "offline_states.h"
+#include "pseries.h"
+
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/uaccess.h>
+#include <asm/rtas.h>
+
+struct cc_workarea {
+	__be32	drc_index;
+	__be32	zero;
+	__be32	name_offset;
+	__be32	prop_length;
+	__be32	prop_offset;
+};
+
+void dlpar_free_cc_property(struct property *prop)
+{
+	kfree(prop->name);
+	kfree(prop->value);
+	kfree(prop);
+}
+
+static struct property *dlpar_parse_cc_property(struct cc_workarea *ccwa)
+{
+	struct property *prop;
+	char *name;
+	char *value;
+
+	prop = kzalloc(sizeof(*prop), GFP_KERNEL);
+	if (!prop)
+		return NULL;
+
+	name = (char *)ccwa + be32_to_cpu(ccwa->name_offset);
+	prop->name = kstrdup(name, GFP_KERNEL);
+
+	prop->length = be32_to_cpu(ccwa->prop_length);
+	value = (char *)ccwa + be32_to_cpu(ccwa->prop_offset);
+	prop->value = kmemdup(value, prop->length, GFP_KERNEL);
+	if (!prop->value) {
+		dlpar_free_cc_property(prop);
+		return NULL;
+	}
+
+	return prop;
+}
+
+static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa,
+					       const char *path)
+{
+	struct device_node *dn;
+	char *name;
+
+	/* If parent node path is "/" advance path to NULL terminator to
+	 * prevent double leading slashs in full_name.
+	 */
+	if (!path[1])
+		path++;
+
+	dn = kzalloc(sizeof(*dn), GFP_KERNEL);
+	if (!dn)
+		return NULL;
+
+	name = (char *)ccwa + be32_to_cpu(ccwa->name_offset);
+	dn->full_name = kasprintf(GFP_KERNEL, "%s/%s", path, name);
+	if (!dn->full_name) {
+		kfree(dn);
+		return NULL;
+	}
+
+	of_node_set_flag(dn, OF_DYNAMIC);
+	of_node_init(dn);
+
+	return dn;
+}
+
+static void dlpar_free_one_cc_node(struct device_node *dn)
+{
+	struct property *prop;
+
+	while (dn->properties) {
+		prop = dn->properties;
+		dn->properties = prop->next;
+		dlpar_free_cc_property(prop);
+	}
+
+	kfree(dn->full_name);
+	kfree(dn);
+}
+
+void dlpar_free_cc_nodes(struct device_node *dn)
+{
+	if (dn->child)
+		dlpar_free_cc_nodes(dn->child);
+
+	if (dn->sibling)
+		dlpar_free_cc_nodes(dn->sibling);
+
+	dlpar_free_one_cc_node(dn);
+}
+
+#define COMPLETE	0
+#define NEXT_SIBLING    1
+#define NEXT_CHILD      2
+#define NEXT_PROPERTY   3
+#define PREV_PARENT     4
+#define MORE_MEMORY     5
+#define CALL_AGAIN	-2
+#define ERR_CFG_USE     -9003
+
+struct device_node *dlpar_configure_connector(__be32 drc_index,
+					      struct device_node *parent)
+{
+	struct device_node *dn;
+	struct device_node *first_dn = NULL;
+	struct device_node *last_dn = NULL;
+	struct property *property;
+	struct property *last_property = NULL;
+	struct cc_workarea *ccwa;
+	char *data_buf;
+	const char *parent_path = parent->full_name;
+	int cc_token;
+	int rc = -1;
+
+	cc_token = rtas_token("ibm,configure-connector");
+	if (cc_token == RTAS_UNKNOWN_SERVICE)
+		return NULL;
+
+	data_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+	if (!data_buf)
+		return NULL;
+
+	ccwa = (struct cc_workarea *)&data_buf[0];
+	ccwa->drc_index = drc_index;
+	ccwa->zero = 0;
+
+	do {
+		/* Since we release the rtas_data_buf lock between configure
+		 * connector calls we want to re-populate the rtas_data_buffer
+		 * with the contents of the previous call.
+		 */
+		spin_lock(&rtas_data_buf_lock);
+
+		memcpy(rtas_data_buf, data_buf, RTAS_DATA_BUF_SIZE);
+		rc = rtas_call(cc_token, 2, 1, NULL, rtas_data_buf, NULL);
+		memcpy(data_buf, rtas_data_buf, RTAS_DATA_BUF_SIZE);
+
+		spin_unlock(&rtas_data_buf_lock);
+
+		switch (rc) {
+		case COMPLETE:
+			break;
+
+		case NEXT_SIBLING:
+			dn = dlpar_parse_cc_node(ccwa, parent_path);
+			if (!dn)
+				goto cc_error;
+
+			dn->parent = last_dn->parent;
+			last_dn->sibling = dn;
+			last_dn = dn;
+			break;
+
+		case NEXT_CHILD:
+			if (first_dn)
+				parent_path = last_dn->full_name;
+
+			dn = dlpar_parse_cc_node(ccwa, parent_path);
+			if (!dn)
+				goto cc_error;
+
+			if (!first_dn) {
+				dn->parent = parent;
+				first_dn = dn;
+			} else {
+				dn->parent = last_dn;
+				if (last_dn)
+					last_dn->child = dn;
+			}
+
+			last_dn = dn;
+			break;
+
+		case NEXT_PROPERTY:
+			property = dlpar_parse_cc_property(ccwa);
+			if (!property)
+				goto cc_error;
+
+			if (!last_dn->properties)
+				last_dn->properties = property;
+			else
+				last_property->next = property;
+
+			last_property = property;
+			break;
+
+		case PREV_PARENT:
+			last_dn = last_dn->parent;
+			parent_path = last_dn->parent->full_name;
+			break;
+
+		case CALL_AGAIN:
+			break;
+
+		case MORE_MEMORY:
+		case ERR_CFG_USE:
+		default:
+			printk(KERN_ERR "Unexpected Error (%d) "
+			       "returned from configure-connector\n", rc);
+			goto cc_error;
+		}
+	} while (rc);
+
+cc_error:
+	kfree(data_buf);
+
+	if (rc) {
+		if (first_dn)
+			dlpar_free_cc_nodes(first_dn);
+
+		return NULL;
+	}
+
+	return first_dn;
+}
+
+static struct device_node *derive_parent(const char *path)
+{
+	struct device_node *parent;
+	char *last_slash;
+
+	last_slash = strrchr(path, '/');
+	if (last_slash == path) {
+		parent = of_find_node_by_path("/");
+	} else {
+		char *parent_path;
+		int parent_path_len = last_slash - path + 1;
+		parent_path = kmalloc(parent_path_len, GFP_KERNEL);
+		if (!parent_path)
+			return NULL;
+
+		strlcpy(parent_path, path, parent_path_len);
+		parent = of_find_node_by_path(parent_path);
+		kfree(parent_path);
+	}
+
+	return parent;
+}
+
+int dlpar_attach_node(struct device_node *dn)
+{
+	int rc;
+
+	dn->parent = derive_parent(dn->full_name);
+	if (!dn->parent)
+		return -ENOMEM;
+
+	rc = of_attach_node(dn);
+	if (rc) {
+		printk(KERN_ERR "Failed to add device node %s\n",
+		       dn->full_name);
+		return rc;
+	}
+
+	of_node_put(dn->parent);
+	return 0;
+}
+
+int dlpar_detach_node(struct device_node *dn)
+{
+	struct device_node *child;
+	int rc;
+
+	child = of_get_next_child(dn, NULL);
+	while (child) {
+		dlpar_detach_node(child);
+		child = of_get_next_child(dn, child);
+	}
+
+	rc = of_detach_node(dn);
+	if (rc)
+		return rc;
+
+	of_node_put(dn); /* Must decrement the refcount */
+	return 0;
+}
+
+#define DR_ENTITY_SENSE		9003
+#define DR_ENTITY_PRESENT	1
+#define DR_ENTITY_UNUSABLE	2
+#define ALLOCATION_STATE	9003
+#define ALLOC_UNUSABLE		0
+#define ALLOC_USABLE		1
+#define ISOLATION_STATE		9001
+#define ISOLATE			0
+#define UNISOLATE		1
+
+int dlpar_acquire_drc(u32 drc_index)
+{
+	int dr_status, rc;
+
+	rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
+		       DR_ENTITY_SENSE, drc_index);
+	if (rc || dr_status != DR_ENTITY_UNUSABLE)
+		return -1;
+
+	rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_USABLE);
+	if (rc)
+		return rc;
+
+	rc = rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+	if (rc) {
+		rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
+		return rc;
+	}
+
+	return 0;
+}
+
+int dlpar_release_drc(u32 drc_index)
+{
+	int dr_status, rc;
+
+	rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
+		       DR_ENTITY_SENSE, drc_index);
+	if (rc || dr_status != DR_ENTITY_PRESENT)
+		return -1;
+
+	rc = rtas_set_indicator(ISOLATION_STATE, drc_index, ISOLATE);
+	if (rc)
+		return rc;
+
+	rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
+	if (rc) {
+		rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+		return rc;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+
+static int dlpar_online_cpu(struct device_node *dn)
+{
+	int rc = 0;
+	unsigned int cpu;
+	int len, nthreads, i;
+	const __be32 *intserv;
+	u32 thread;
+
+	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
+	if (!intserv)
+		return -EINVAL;
+
+	nthreads = len / sizeof(u32);
+
+	cpu_maps_update_begin();
+	for (i = 0; i < nthreads; i++) {
+		thread = be32_to_cpu(intserv[i]);
+		for_each_present_cpu(cpu) {
+			if (get_hard_smp_processor_id(cpu) != thread)
+				continue;
+			BUG_ON(get_cpu_current_state(cpu)
+					!= CPU_STATE_OFFLINE);
+			cpu_maps_update_done();
+			rc = device_online(get_cpu_device(cpu));
+			if (rc)
+				goto out;
+			cpu_maps_update_begin();
+
+			break;
+		}
+		if (cpu == num_possible_cpus())
+			printk(KERN_WARNING "Could not find cpu to online "
+			       "with physical id 0x%x\n", thread);
+	}
+	cpu_maps_update_done();
+
+out:
+	return rc;
+
+}
+
+static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
+{
+	struct device_node *dn, *parent;
+	u32 drc_index;
+	int rc;
+
+	rc = kstrtou32(buf, 0, &drc_index);
+	if (rc)
+		return -EINVAL;
+
+	rc = dlpar_acquire_drc(drc_index);
+	if (rc)
+		return -EINVAL;
+
+	parent = of_find_node_by_path("/cpus");
+	if (!parent)
+		return -ENODEV;
+
+	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
+	if (!dn)
+		return -EINVAL;
+
+	of_node_put(parent);
+
+	rc = dlpar_attach_node(dn);
+	if (rc) {
+		dlpar_release_drc(drc_index);
+		dlpar_free_cc_nodes(dn);
+		return rc;
+	}
+
+	rc = dlpar_online_cpu(dn);
+	if (rc)
+		return rc;
+
+	return count;
+}
+
+static int dlpar_offline_cpu(struct device_node *dn)
+{
+	int rc = 0;
+	unsigned int cpu;
+	int len, nthreads, i;
+	const __be32 *intserv;
+	u32 thread;
+
+	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
+	if (!intserv)
+		return -EINVAL;
+
+	nthreads = len / sizeof(u32);
+
+	cpu_maps_update_begin();
+	for (i = 0; i < nthreads; i++) {
+		thread = be32_to_cpu(intserv[i]);
+		for_each_present_cpu(cpu) {
+			if (get_hard_smp_processor_id(cpu) != thread)
+				continue;
+
+			if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
+				break;
+
+			if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
+				set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
+				cpu_maps_update_done();
+				rc = device_offline(get_cpu_device(cpu));
+				if (rc)
+					goto out;
+				cpu_maps_update_begin();
+				break;
+
+			}
+
+			/*
+			 * The cpu is in CPU_STATE_INACTIVE.
+			 * Upgrade it's state to CPU_STATE_OFFLINE.
+			 */
+			set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
+			BUG_ON(plpar_hcall_norets(H_PROD, thread)
+								!= H_SUCCESS);
+			__cpu_die(cpu);
+			break;
+		}
+		if (cpu == num_possible_cpus())
+			printk(KERN_WARNING "Could not find cpu to offline "
+			       "with physical id 0x%x\n", thread);
+	}
+	cpu_maps_update_done();
+
+out:
+	return rc;
+
+}
+
+static ssize_t dlpar_cpu_release(const char *buf, size_t count)
+{
+	struct device_node *dn;
+	u32 drc_index;
+	int rc;
+
+	dn = of_find_node_by_path(buf);
+	if (!dn)
+		return -EINVAL;
+
+	rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index);
+	if (rc) {
+		of_node_put(dn);
+		return -EINVAL;
+	}
+
+	rc = dlpar_offline_cpu(dn);
+	if (rc) {
+		of_node_put(dn);
+		return -EINVAL;
+	}
+
+	rc = dlpar_release_drc(drc_index);
+	if (rc) {
+		of_node_put(dn);
+		return rc;
+	}
+
+	rc = dlpar_detach_node(dn);
+	if (rc) {
+		dlpar_acquire_drc(drc_index);
+		return rc;
+	}
+
+	of_node_put(dn);
+
+	return count;
+}
+
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+
+static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
+{
+	int rc;
+
+	/* pseries error logs are in BE format, convert to cpu type */
+	switch (hp_elog->id_type) {
+	case PSERIES_HP_ELOG_ID_DRC_COUNT:
+		hp_elog->_drc_u.drc_count =
+					be32_to_cpu(hp_elog->_drc_u.drc_count);
+		break;
+	case PSERIES_HP_ELOG_ID_DRC_INDEX:
+		hp_elog->_drc_u.drc_index =
+					be32_to_cpu(hp_elog->_drc_u.drc_index);
+	}
+
+	switch (hp_elog->resource) {
+	case PSERIES_HP_ELOG_RESOURCE_MEM:
+		rc = dlpar_memory(hp_elog);
+		break;
+	default:
+		pr_warn_ratelimited("Invalid resource (%d) specified\n",
+				    hp_elog->resource);
+		rc = -EINVAL;
+	}
+
+	return rc;
+}
+
+static ssize_t dlpar_store(struct class *class, struct class_attribute *attr,
+			   const char *buf, size_t count)
+{
+	struct pseries_hp_errorlog *hp_elog;
+	const char *arg;
+	int rc;
+
+	hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL);
+	if (!hp_elog) {
+		rc = -ENOMEM;
+		goto dlpar_store_out;
+	}
+
+	/* Parse out the request from the user, this will be in the form
+	 * <resource> <action> <id_type> <id>
+	 */
+	arg = buf;
+	if (!strncmp(arg, "memory", 6)) {
+		hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_MEM;
+		arg += strlen("memory ");
+	} else {
+		pr_err("Invalid resource specified: \"%s\"\n", buf);
+		rc = -EINVAL;
+		goto dlpar_store_out;
+	}
+
+	if (!strncmp(arg, "add", 3)) {
+		hp_elog->action = PSERIES_HP_ELOG_ACTION_ADD;
+		arg += strlen("add ");
+	} else if (!strncmp(arg, "remove", 6)) {
+		hp_elog->action = PSERIES_HP_ELOG_ACTION_REMOVE;
+		arg += strlen("remove ");
+	} else {
+		pr_err("Invalid action specified: \"%s\"\n", buf);
+		rc = -EINVAL;
+		goto dlpar_store_out;
+	}
+
+	if (!strncmp(arg, "index", 5)) {
+		u32 index;
+
+		hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
+		arg += strlen("index ");
+		if (kstrtou32(arg, 0, &index)) {
+			rc = -EINVAL;
+			pr_err("Invalid drc_index specified: \"%s\"\n", buf);
+			goto dlpar_store_out;
+		}
+
+		hp_elog->_drc_u.drc_index = cpu_to_be32(index);
+	} else if (!strncmp(arg, "count", 5)) {
+		u32 count;
+
+		hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_COUNT;
+		arg += strlen("count ");
+		if (kstrtou32(arg, 0, &count)) {
+			rc = -EINVAL;
+			pr_err("Invalid count specified: \"%s\"\n", buf);
+			goto dlpar_store_out;
+		}
+
+		hp_elog->_drc_u.drc_count = cpu_to_be32(count);
+	} else {
+		pr_err("Invalid id_type specified: \"%s\"\n", buf);
+		rc = -EINVAL;
+		goto dlpar_store_out;
+	}
+
+	rc = handle_dlpar_errorlog(hp_elog);
+
+dlpar_store_out:
+	kfree(hp_elog);
+	return rc ? rc : count;
+}
+
+static CLASS_ATTR(dlpar, S_IWUSR, NULL, dlpar_store);
+
+static int __init pseries_dlpar_init(void)
+{
+	int rc;
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+	ppc_md.cpu_probe = dlpar_cpu_probe;
+	ppc_md.cpu_release = dlpar_cpu_release;
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+
+	rc = sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);
+
+	return rc;
+}
+machine_device_initcall(pseries, pseries_dlpar_init);
+
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
new file mode 100644
index 000000000..39049e488
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -0,0 +1,395 @@
+/*
+ * Virtual Processor Dispatch Trace Log
+ *
+ * (C) Copyright IBM Corporation 2009
+ *
+ * Author: Jeremy Kerr <jk@ozlabs.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/spinlock.h>
+#include <asm/smp.h>
+#include <asm/uaccess.h>
+#include <asm/firmware.h>
+#include <asm/lppaca.h>
+#include <asm/debug.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/machdep.h>
+
+struct dtl {
+	struct dtl_entry	*buf;
+	struct dentry		*file;
+	int			cpu;
+	int			buf_entries;
+	u64			last_idx;
+	spinlock_t		lock;
+};
+static DEFINE_PER_CPU(struct dtl, cpu_dtl);
+
+/*
+ * Dispatch trace log event mask:
+ * 0x7: 0x1: voluntary virtual processor waits
+ *      0x2: time-slice preempts
+ *      0x4: virtual partition memory page faults
+ */
+static u8 dtl_event_mask = 0x7;
+
+
+/*
+ * Size of per-cpu log buffers. Firmware requires that the buffer does
+ * not cross a 4k boundary.
+ */
+static int dtl_buf_entries = N_DISPATCH_LOG;
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+struct dtl_ring {
+	u64	write_index;
+	struct dtl_entry *write_ptr;
+	struct dtl_entry *buf;
+	struct dtl_entry *buf_end;
+	u8	saved_dtl_mask;
+};
+
+static DEFINE_PER_CPU(struct dtl_ring, dtl_rings);
+
+static atomic_t dtl_count;
+
+/*
+ * The cpu accounting code controls the DTL ring buffer, and we get
+ * given entries as they are processed.
+ */
+static void consume_dtle(struct dtl_entry *dtle, u64 index)
+{
+	struct dtl_ring *dtlr = this_cpu_ptr(&dtl_rings);
+	struct dtl_entry *wp = dtlr->write_ptr;
+	struct lppaca *vpa = local_paca->lppaca_ptr;
+
+	if (!wp)
+		return;
+
+	*wp = *dtle;
+	barrier();
+
+	/* check for hypervisor ring buffer overflow, ignore this entry if so */
+	if (index + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx))
+		return;
+
+	++wp;
+	if (wp == dtlr->buf_end)
+		wp = dtlr->buf;
+	dtlr->write_ptr = wp;
+
+	/* incrementing write_index makes the new entry visible */
+	smp_wmb();
+	++dtlr->write_index;
+}
+
+static int dtl_start(struct dtl *dtl)
+{
+	struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu);
+
+	dtlr->buf = dtl->buf;
+	dtlr->buf_end = dtl->buf + dtl->buf_entries;
+	dtlr->write_index = 0;
+
+	/* setting write_ptr enables logging into our buffer */
+	smp_wmb();
+	dtlr->write_ptr = dtl->buf;
+
+	/* enable event logging */
+	dtlr->saved_dtl_mask = lppaca_of(dtl->cpu).dtl_enable_mask;
+	lppaca_of(dtl->cpu).dtl_enable_mask |= dtl_event_mask;
+
+	dtl_consumer = consume_dtle;
+	atomic_inc(&dtl_count);
+	return 0;
+}
+
+static void dtl_stop(struct dtl *dtl)
+{
+	struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu);
+
+	dtlr->write_ptr = NULL;
+	smp_wmb();
+
+	dtlr->buf = NULL;
+
+	/* restore dtl_enable_mask */
+	lppaca_of(dtl->cpu).dtl_enable_mask = dtlr->saved_dtl_mask;
+
+	if (atomic_dec_and_test(&dtl_count))
+		dtl_consumer = NULL;
+}
+
+static u64 dtl_current_index(struct dtl *dtl)
+{
+	return per_cpu(dtl_rings, dtl->cpu).write_index;
+}
+
+#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+
+static int dtl_start(struct dtl *dtl)
+{
+	unsigned long addr;
+	int ret, hwcpu;
+
+	/* Register our dtl buffer with the hypervisor. The HV expects the
+	 * buffer size to be passed in the second word of the buffer */
+	((u32 *)dtl->buf)[1] = DISPATCH_LOG_BYTES;
+
+	hwcpu = get_hard_smp_processor_id(dtl->cpu);
+	addr = __pa(dtl->buf);
+	ret = register_dtl(hwcpu, addr);
+	if (ret) {
+		printk(KERN_WARNING "%s: DTL registration for cpu %d (hw %d) "
+		       "failed with %d\n", __func__, dtl->cpu, hwcpu, ret);
+		return -EIO;
+	}
+
+	/* set our initial buffer indices */
+	lppaca_of(dtl->cpu).dtl_idx = 0;
+
+	/* ensure that our updates to the lppaca fields have occurred before
+	 * we actually enable the logging */
+	smp_wmb();
+
+	/* enable event logging */
+	lppaca_of(dtl->cpu).dtl_enable_mask = dtl_event_mask;
+
+	return 0;
+}
+
+static void dtl_stop(struct dtl *dtl)
+{
+	int hwcpu = get_hard_smp_processor_id(dtl->cpu);
+
+	lppaca_of(dtl->cpu).dtl_enable_mask = 0x0;
+
+	unregister_dtl(hwcpu);
+}
+
+static u64 dtl_current_index(struct dtl *dtl)
+{
+	return lppaca_of(dtl->cpu).dtl_idx;
+}
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+
+static int dtl_enable(struct dtl *dtl)
+{
+	long int n_entries;
+	long int rc;
+	struct dtl_entry *buf = NULL;
+
+	if (!dtl_cache)
+		return -ENOMEM;
+
+	/* only allow one reader */
+	if (dtl->buf)
+		return -EBUSY;
+
+	n_entries = dtl_buf_entries;
+	buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL, cpu_to_node(dtl->cpu));
+	if (!buf) {
+		printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n",
+				__func__, dtl->cpu);
+		return -ENOMEM;
+	}
+
+	spin_lock(&dtl->lock);
+	rc = -EBUSY;
+	if (!dtl->buf) {
+		/* store the original allocation size for use during read */
+		dtl->buf_entries = n_entries;
+		dtl->buf = buf;
+		dtl->last_idx = 0;
+		rc = dtl_start(dtl);
+		if (rc)
+			dtl->buf = NULL;
+	}
+	spin_unlock(&dtl->lock);
+
+	if (rc)
+		kmem_cache_free(dtl_cache, buf);
+	return rc;
+}
+
+static void dtl_disable(struct dtl *dtl)
+{
+	spin_lock(&dtl->lock);
+	dtl_stop(dtl);
+	kmem_cache_free(dtl_cache, dtl->buf);
+	dtl->buf = NULL;
+	dtl->buf_entries = 0;
+	spin_unlock(&dtl->lock);
+}
+
+/* file interface */
+
+static int dtl_file_open(struct inode *inode, struct file *filp)
+{
+	struct dtl *dtl = inode->i_private;
+	int rc;
+
+	rc = dtl_enable(dtl);
+	if (rc)
+		return rc;
+
+	filp->private_data = dtl;
+	return 0;
+}
+
+static int dtl_file_release(struct inode *inode, struct file *filp)
+{
+	struct dtl *dtl = inode->i_private;
+	dtl_disable(dtl);
+	return 0;
+}
+
+static ssize_t dtl_file_read(struct file *filp, char __user *buf, size_t len,
+		loff_t *pos)
+{
+	long int rc, n_read, n_req, read_size;
+	struct dtl *dtl;
+	u64 cur_idx, last_idx, i;
+
+	if ((len % sizeof(struct dtl_entry)) != 0)
+		return -EINVAL;
+
+	dtl = filp->private_data;
+
+	/* requested number of entries to read */
+	n_req = len / sizeof(struct dtl_entry);
+
+	/* actual number of entries read */
+	n_read = 0;
+
+	spin_lock(&dtl->lock);
+
+	cur_idx = dtl_current_index(dtl);
+	last_idx = dtl->last_idx;
+
+	if (last_idx + dtl->buf_entries <= cur_idx)
+		last_idx = cur_idx - dtl->buf_entries + 1;
+
+	if (last_idx + n_req > cur_idx)
+		n_req = cur_idx - last_idx;
+
+	if (n_req > 0)
+		dtl->last_idx = last_idx + n_req;
+
+	spin_unlock(&dtl->lock);
+
+	if (n_req <= 0)
+		return 0;
+
+	i = last_idx % dtl->buf_entries;
+
+	/* read the tail of the buffer if we've wrapped */
+	if (i + n_req > dtl->buf_entries) {
+		read_size = dtl->buf_entries - i;
+
+		rc = copy_to_user(buf, &dtl->buf[i],
+				read_size * sizeof(struct dtl_entry));
+		if (rc)
+			return -EFAULT;
+
+		i = 0;
+		n_req -= read_size;
+		n_read += read_size;
+		buf += read_size * sizeof(struct dtl_entry);
+	}
+
+	/* .. and now the head */
+	rc = copy_to_user(buf, &dtl->buf[i], n_req * sizeof(struct dtl_entry));
+	if (rc)
+		return -EFAULT;
+
+	n_read += n_req;
+
+	return n_read * sizeof(struct dtl_entry);
+}
+
+static const struct file_operations dtl_fops = {
+	.open		= dtl_file_open,
+	.release	= dtl_file_release,
+	.read		= dtl_file_read,
+	.llseek		= no_llseek,
+};
+
+static struct dentry *dtl_dir;
+
+static int dtl_setup_file(struct dtl *dtl)
+{
+	char name[10];
+
+	sprintf(name, "cpu-%d", dtl->cpu);
+
+	dtl->file = debugfs_create_file(name, 0400, dtl_dir, dtl, &dtl_fops);
+	if (!dtl->file)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int dtl_init(void)
+{
+	struct dentry *event_mask_file, *buf_entries_file;
+	int rc, i;
+
+	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+		return -ENODEV;
+
+	/* set up common debugfs structure */
+
+	rc = -ENOMEM;
+	dtl_dir = debugfs_create_dir("dtl", powerpc_debugfs_root);
+	if (!dtl_dir) {
+		printk(KERN_WARNING "%s: can't create dtl root dir\n",
+				__func__);
+		goto err;
+	}
+
+	event_mask_file = debugfs_create_x8("dtl_event_mask", 0600,
+				dtl_dir, &dtl_event_mask);
+	buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0400,
+				dtl_dir, &dtl_buf_entries);
+
+	if (!event_mask_file || !buf_entries_file) {
+		printk(KERN_WARNING "%s: can't create dtl files\n", __func__);
+		goto err_remove_dir;
+	}
+
+	/* set up the per-cpu log structures */
+	for_each_possible_cpu(i) {
+		struct dtl *dtl = &per_cpu(cpu_dtl, i);
+		spin_lock_init(&dtl->lock);
+		dtl->cpu = i;
+
+		rc = dtl_setup_file(dtl);
+		if (rc)
+			goto err_remove_dir;
+	}
+
+	return 0;
+
+err_remove_dir:
+	debugfs_remove_recursive(dtl_dir);
+err:
+	return rc;
+}
+machine_arch_initcall(pseries, dtl_init);
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
new file mode 100644
index 000000000..2039397cc
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -0,0 +1,717 @@
+/*
+ * The file intends to implement the platform dependent EEH operations on pseries.
+ * Actually, the pseries platform is built based on RTAS heavily. That means the
+ * pseries platform dependent EEH operations will be built on RTAS calls. The functions
+ * are devired from arch/powerpc/platforms/pseries/eeh.c and necessary cleanup has
+ * been done.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2011.
+ * Copyright IBM Corporation 2001, 2005, 2006
+ * Copyright Dave Engebretsen & Todd Inglett 2001
+ * Copyright Linas Vepstas 2005, 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/atomic.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/rbtree.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+#include <asm/rtas.h>
+
+/* RTAS tokens */
+static int ibm_set_eeh_option;
+static int ibm_set_slot_reset;
+static int ibm_read_slot_reset_state;
+static int ibm_read_slot_reset_state2;
+static int ibm_slot_error_detail;
+static int ibm_get_config_addr_info;
+static int ibm_get_config_addr_info2;
+static int ibm_configure_bridge;
+static int ibm_configure_pe;
+
+/*
+ * Buffer for reporting slot-error-detail rtas calls. Its here
+ * in BSS, and not dynamically alloced, so that it ends up in
+ * RMO where RTAS can access it.
+ */
+static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
+static DEFINE_SPINLOCK(slot_errbuf_lock);
+static int eeh_error_buf_size;
+
+/**
+ * pseries_eeh_init - EEH platform dependent initialization
+ *
+ * EEH platform dependent initialization on pseries.
+ */
+static int pseries_eeh_init(void)
+{
+	/* figure out EEH RTAS function call tokens */
+	ibm_set_eeh_option		= rtas_token("ibm,set-eeh-option");
+	ibm_set_slot_reset		= rtas_token("ibm,set-slot-reset");
+	ibm_read_slot_reset_state2	= rtas_token("ibm,read-slot-reset-state2");
+	ibm_read_slot_reset_state	= rtas_token("ibm,read-slot-reset-state");
+	ibm_slot_error_detail		= rtas_token("ibm,slot-error-detail");
+	ibm_get_config_addr_info2	= rtas_token("ibm,get-config-addr-info2");
+	ibm_get_config_addr_info	= rtas_token("ibm,get-config-addr-info");
+	ibm_configure_pe		= rtas_token("ibm,configure-pe");
+	ibm_configure_bridge		= rtas_token("ibm,configure-bridge");
+
+	/*
+	 * Necessary sanity check. We needn't check "get-config-addr-info"
+	 * and its variant since the old firmware probably support address
+	 * of domain/bus/slot/function for EEH RTAS operations.
+	 */
+	if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE		||
+	    ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE		||
+	    (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE &&
+	     ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE)	||
+	    ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE	||
+	    (ibm_configure_pe == RTAS_UNKNOWN_SERVICE		&&
+	     ibm_configure_bridge == RTAS_UNKNOWN_SERVICE)) {
+		pr_info("EEH functionality not supported\n");
+		return -EINVAL;
+	}
+
+	/* Initialize error log lock and size */
+	spin_lock_init(&slot_errbuf_lock);
+	eeh_error_buf_size = rtas_token("rtas-error-log-max");
+	if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
+		pr_info("%s: unknown EEH error log size\n",
+			__func__);
+		eeh_error_buf_size = 1024;
+	} else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
+		pr_info("%s: EEH error log size %d exceeds the maximal %d\n",
+			__func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
+		eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
+	}
+
+	/* Set EEH probe mode */
+	eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
+
+	return 0;
+}
+
+static int pseries_eeh_cap_start(struct pci_dn *pdn)
+{
+	u32 status;
+
+	if (!pdn)
+		return 0;
+
+	rtas_read_config(pdn, PCI_STATUS, 2, &status);
+	if (!(status & PCI_STATUS_CAP_LIST))
+		return 0;
+
+	return PCI_CAPABILITY_LIST;
+}
+
+
+static int pseries_eeh_find_cap(struct pci_dn *pdn, int cap)
+{
+	int pos = pseries_eeh_cap_start(pdn);
+	int cnt = 48;	/* Maximal number of capabilities */
+	u32 id;
+
+	if (!pos)
+		return 0;
+
+        while (cnt--) {
+		rtas_read_config(pdn, pos, 1, &pos);
+		if (pos < 0x40)
+			break;
+		pos &= ~3;
+		rtas_read_config(pdn, pos + PCI_CAP_LIST_ID, 1, &id);
+		if (id == 0xff)
+			break;
+		if (id == cap)
+			return pos;
+		pos += PCI_CAP_LIST_NEXT;
+	}
+
+	return 0;
+}
+
+static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap)
+{
+	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+	u32 header;
+	int pos = 256;
+	int ttl = (4096 - 256) / 8;
+
+	if (!edev || !edev->pcie_cap)
+		return 0;
+	if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+		return 0;
+	else if (!header)
+		return 0;
+
+	while (ttl-- > 0) {
+		if (PCI_EXT_CAP_ID(header) == cap && pos)
+			return pos;
+
+		pos = PCI_EXT_CAP_NEXT(header);
+		if (pos < 256)
+			break;
+
+		if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+			break;
+	}
+
+	return 0;
+}
+
+/**
+ * pseries_eeh_probe - EEH probe on the given device
+ * @pdn: PCI device node
+ * @data: Unused
+ *
+ * When EEH module is installed during system boot, all PCI devices
+ * are checked one by one to see if it supports EEH. The function
+ * is introduced for the purpose.
+ */
+static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
+{
+	struct eeh_dev *edev;
+	struct eeh_pe pe;
+	u32 pcie_flags;
+	int enable = 0;
+	int ret;
+
+	/* Retrieve OF node and eeh device */
+	edev = pdn_to_eeh_dev(pdn);
+	if (!edev || edev->pe)
+		return NULL;
+
+	/* Check class/vendor/device IDs */
+	if (!pdn->vendor_id || !pdn->device_id || !pdn->class_code)
+		return NULL;
+
+	/* Skip for PCI-ISA bridge */
+        if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
+		return NULL;
+
+	/*
+	 * Update class code and mode of eeh device. We need
+	 * correctly reflects that current device is root port
+	 * or PCIe switch downstream port.
+	 */
+	edev->class_code = pdn->class_code;
+	edev->pcix_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
+	edev->pcie_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
+	edev->aer_cap = pseries_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
+	edev->mode &= 0xFFFFFF00;
+	if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
+		edev->mode |= EEH_DEV_BRIDGE;
+		if (edev->pcie_cap) {
+			rtas_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
+					 2, &pcie_flags);
+			pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4;
+			if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT)
+				edev->mode |= EEH_DEV_ROOT_PORT;
+			else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM)
+				edev->mode |= EEH_DEV_DS_PORT;
+		}
+	}
+
+	/* Initialize the fake PE */
+	memset(&pe, 0, sizeof(struct eeh_pe));
+	pe.phb = edev->phb;
+	pe.config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+
+	/* Enable EEH on the device */
+	ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
+	if (!ret) {
+		/* Retrieve PE address */
+		edev->config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+		edev->pe_config_addr = eeh_ops->get_pe_addr(&pe);
+		pe.addr = edev->pe_config_addr;
+
+		/* Some older systems (Power4) allow the ibm,set-eeh-option
+		 * call to succeed even on nodes where EEH is not supported.
+		 * Verify support explicitly.
+		 */
+		ret = eeh_ops->get_state(&pe, NULL);
+		if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT)
+			enable = 1;
+
+		if (enable) {
+			eeh_add_flag(EEH_ENABLED);
+			eeh_add_to_parent_pe(edev);
+
+			pr_debug("%s: EEH enabled on %02x:%02x.%01x PHB#%d-PE#%x\n",
+				__func__, pdn->busno, PCI_SLOT(pdn->devfn),
+				PCI_FUNC(pdn->devfn), pe.phb->global_number,
+				pe.addr);
+		} else if (pdn->parent && pdn_to_eeh_dev(pdn->parent) &&
+			   (pdn_to_eeh_dev(pdn->parent))->pe) {
+			/* This device doesn't support EEH, but it may have an
+			 * EEH parent, in which case we mark it as supported.
+			 */
+			edev->config_addr = pdn_to_eeh_dev(pdn->parent)->config_addr;
+			edev->pe_config_addr = pdn_to_eeh_dev(pdn->parent)->pe_config_addr;
+			eeh_add_to_parent_pe(edev);
+		}
+	}
+
+	/* Save memory bars */
+	eeh_save_bars(edev);
+
+	return NULL;
+}
+
+/**
+ * pseries_eeh_set_option - Initialize EEH or MMIO/DMA reenable
+ * @pe: EEH PE
+ * @option: operation to be issued
+ *
+ * The function is used to control the EEH functionality globally.
+ * Currently, following options are support according to PAPR:
+ * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
+ */
+static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
+{
+	int ret = 0;
+	int config_addr;
+
+	/*
+	 * When we're enabling or disabling EEH functioality on
+	 * the particular PE, the PE config address is possibly
+	 * unavailable. Therefore, we have to figure it out from
+	 * the FDT node.
+	 */
+	switch (option) {
+	case EEH_OPT_DISABLE:
+	case EEH_OPT_ENABLE:
+	case EEH_OPT_THAW_MMIO:
+	case EEH_OPT_THAW_DMA:
+		config_addr = pe->config_addr;
+		if (pe->addr)
+			config_addr = pe->addr;
+		break;
+	case EEH_OPT_FREEZE_PE:
+		/* Not support */
+		return 0;
+	default:
+		pr_err("%s: Invalid option %d\n",
+			__func__, option);
+		return -EINVAL;
+	}
+
+	ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
+			config_addr, BUID_HI(pe->phb->buid),
+			BUID_LO(pe->phb->buid), option);
+
+	return ret;
+}
+
+/**
+ * pseries_eeh_get_pe_addr - Retrieve PE address
+ * @pe: EEH PE
+ *
+ * Retrieve the assocated PE address. Actually, there're 2 RTAS
+ * function calls dedicated for the purpose. We need implement
+ * it through the new function and then the old one. Besides,
+ * you should make sure the config address is figured out from
+ * FDT node before calling the function.
+ *
+ * It's notable that zero'ed return value means invalid PE config
+ * address.
+ */
+static int pseries_eeh_get_pe_addr(struct eeh_pe *pe)
+{
+	int ret = 0;
+	int rets[3];
+
+	if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
+		/*
+		 * First of all, we need to make sure there has one PE
+		 * associated with the device. Otherwise, PE address is
+		 * meaningless.
+		 */
+		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+				pe->config_addr, BUID_HI(pe->phb->buid),
+				BUID_LO(pe->phb->buid), 1);
+		if (ret || (rets[0] == 0))
+			return 0;
+
+		/* Retrieve the associated PE config address */
+		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+				pe->config_addr, BUID_HI(pe->phb->buid),
+				BUID_LO(pe->phb->buid), 0);
+		if (ret) {
+			pr_warn("%s: Failed to get address for PHB#%d-PE#%x\n",
+				__func__, pe->phb->global_number, pe->config_addr);
+			return 0;
+		}
+
+		return rets[0];
+	}
+
+	if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
+		ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
+				pe->config_addr, BUID_HI(pe->phb->buid),
+				BUID_LO(pe->phb->buid), 0);
+		if (ret) {
+			pr_warn("%s: Failed to get address for PHB#%d-PE#%x\n",
+				__func__, pe->phb->global_number, pe->config_addr);
+			return 0;
+		}
+
+		return rets[0];
+	}
+
+	return ret;
+}
+
+/**
+ * pseries_eeh_get_state - Retrieve PE state
+ * @pe: EEH PE
+ * @state: return value
+ *
+ * Retrieve the state of the specified PE. On RTAS compliant
+ * pseries platform, there already has one dedicated RTAS function
+ * for the purpose. It's notable that the associated PE config address
+ * might be ready when calling the function. Therefore, endeavour to
+ * use the PE config address if possible. Further more, there're 2
+ * RTAS calls for the purpose, we need to try the new one and back
+ * to the old one if the new one couldn't work properly.
+ */
+static int pseries_eeh_get_state(struct eeh_pe *pe, int *state)
+{
+	int config_addr;
+	int ret;
+	int rets[4];
+	int result;
+
+	/* Figure out PE config address if possible */
+	config_addr = pe->config_addr;
+	if (pe->addr)
+		config_addr = pe->addr;
+
+	if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
+		ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets,
+				config_addr, BUID_HI(pe->phb->buid),
+				BUID_LO(pe->phb->buid));
+	} else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) {
+		/* Fake PE unavailable info */
+		rets[2] = 0;
+		ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets,
+				config_addr, BUID_HI(pe->phb->buid),
+				BUID_LO(pe->phb->buid));
+	} else {
+		return EEH_STATE_NOT_SUPPORT;
+	}
+
+	if (ret)
+		return ret;
+
+	/* Parse the result out */
+	result = 0;
+	if (rets[1]) {
+		switch(rets[0]) {
+		case 0:
+			result &= ~EEH_STATE_RESET_ACTIVE;
+			result |= EEH_STATE_MMIO_ACTIVE;
+			result |= EEH_STATE_DMA_ACTIVE;
+			break;
+		case 1:
+			result |= EEH_STATE_RESET_ACTIVE;
+			result |= EEH_STATE_MMIO_ACTIVE;
+			result |= EEH_STATE_DMA_ACTIVE;
+			break;
+		case 2:
+			result &= ~EEH_STATE_RESET_ACTIVE;
+			result &= ~EEH_STATE_MMIO_ACTIVE;
+			result &= ~EEH_STATE_DMA_ACTIVE;
+			break;
+		case 4:
+			result &= ~EEH_STATE_RESET_ACTIVE;
+			result &= ~EEH_STATE_MMIO_ACTIVE;
+			result &= ~EEH_STATE_DMA_ACTIVE;
+			result |= EEH_STATE_MMIO_ENABLED;
+			break;
+		case 5:
+			if (rets[2]) {
+				if (state) *state = rets[2];
+				result = EEH_STATE_UNAVAILABLE;
+			} else {
+				result = EEH_STATE_NOT_SUPPORT;
+			}
+			break;
+		default:
+			result = EEH_STATE_NOT_SUPPORT;
+		}
+	} else {
+		result = EEH_STATE_NOT_SUPPORT;
+	}
+
+	return result;
+}
+
+/**
+ * pseries_eeh_reset - Reset the specified PE
+ * @pe: EEH PE
+ * @option: reset option
+ *
+ * Reset the specified PE
+ */
+static int pseries_eeh_reset(struct eeh_pe *pe, int option)
+{
+	int config_addr;
+	int ret;
+
+	/* Figure out PE address */
+	config_addr = pe->config_addr;
+	if (pe->addr)
+		config_addr = pe->addr;
+
+	/* Reset PE through RTAS call */
+	ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+			config_addr, BUID_HI(pe->phb->buid),
+			BUID_LO(pe->phb->buid), option);
+
+	/* If fundamental-reset not supported, try hot-reset */
+	if (option == EEH_RESET_FUNDAMENTAL &&
+	    ret == -8) {
+		option = EEH_RESET_HOT;
+		ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+				config_addr, BUID_HI(pe->phb->buid),
+				BUID_LO(pe->phb->buid), option);
+	}
+
+	/* We need reset hold or settlement delay */
+	if (option == EEH_RESET_FUNDAMENTAL ||
+	    option == EEH_RESET_HOT)
+		msleep(EEH_PE_RST_HOLD_TIME);
+	else
+		msleep(EEH_PE_RST_SETTLE_TIME);
+
+	return ret;
+}
+
+/**
+ * pseries_eeh_wait_state - Wait for PE state
+ * @pe: EEH PE
+ * @max_wait: maximal period in microsecond
+ *
+ * Wait for the state of associated PE. It might take some time
+ * to retrieve the PE's state.
+ */
+static int pseries_eeh_wait_state(struct eeh_pe *pe, int max_wait)
+{
+	int ret;
+	int mwait;
+
+	/*
+	 * According to PAPR, the state of PE might be temporarily
+	 * unavailable. Under the circumstance, we have to wait
+	 * for indicated time determined by firmware. The maximal
+	 * wait time is 5 minutes, which is acquired from the original
+	 * EEH implementation. Also, the original implementation
+	 * also defined the minimal wait time as 1 second.
+	 */
+#define EEH_STATE_MIN_WAIT_TIME	(1000)
+#define EEH_STATE_MAX_WAIT_TIME	(300 * 1000)
+
+	while (1) {
+		ret = pseries_eeh_get_state(pe, &mwait);
+
+		/*
+		 * If the PE's state is temporarily unavailable,
+		 * we have to wait for the specified time. Otherwise,
+		 * the PE's state will be returned immediately.
+		 */
+		if (ret != EEH_STATE_UNAVAILABLE)
+			return ret;
+
+		if (max_wait <= 0) {
+			pr_warn("%s: Timeout when getting PE's state (%d)\n",
+				__func__, max_wait);
+			return EEH_STATE_NOT_SUPPORT;
+		}
+
+		if (mwait <= 0) {
+			pr_warn("%s: Firmware returned bad wait value %d\n",
+				__func__, mwait);
+			mwait = EEH_STATE_MIN_WAIT_TIME;
+		} else if (mwait > EEH_STATE_MAX_WAIT_TIME) {
+			pr_warn("%s: Firmware returned too long wait value %d\n",
+				__func__, mwait);
+			mwait = EEH_STATE_MAX_WAIT_TIME;
+		}
+
+		max_wait -= mwait;
+		msleep(mwait);
+	}
+
+	return EEH_STATE_NOT_SUPPORT;
+}
+
+/**
+ * pseries_eeh_get_log - Retrieve error log
+ * @pe: EEH PE
+ * @severity: temporary or permanent error log
+ * @drv_log: driver log to be combined with retrieved error log
+ * @len: length of driver log
+ *
+ * Retrieve the temporary or permanent error from the PE.
+ * Actually, the error will be retrieved through the dedicated
+ * RTAS call.
+ */
+static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len)
+{
+	int config_addr;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&slot_errbuf_lock, flags);
+	memset(slot_errbuf, 0, eeh_error_buf_size);
+
+	/* Figure out the PE address */
+	config_addr = pe->config_addr;
+	if (pe->addr)
+		config_addr = pe->addr;
+
+	ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, config_addr,
+			BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid),
+			virt_to_phys(drv_log), len,
+			virt_to_phys(slot_errbuf), eeh_error_buf_size,
+			severity);
+	if (!ret)
+		log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
+	spin_unlock_irqrestore(&slot_errbuf_lock, flags);
+
+	return ret;
+}
+
+/**
+ * pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE
+ * @pe: EEH PE
+ *
+ * The function will be called to reconfigure the bridges included
+ * in the specified PE so that the mulfunctional PE would be recovered
+ * again.
+ */
+static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
+{
+	int config_addr;
+	int ret;
+
+	/* Figure out the PE address */
+	config_addr = pe->config_addr;
+	if (pe->addr)
+		config_addr = pe->addr;
+
+	/* Use new configure-pe function, if supported */
+	if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
+		ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
+				config_addr, BUID_HI(pe->phb->buid),
+				BUID_LO(pe->phb->buid));
+	} else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) {
+		ret = rtas_call(ibm_configure_bridge, 3, 1, NULL,
+				config_addr, BUID_HI(pe->phb->buid),
+				BUID_LO(pe->phb->buid));
+	} else {
+		return -EFAULT;
+	}
+
+	if (ret)
+		pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n",
+			__func__, pe->phb->global_number, pe->addr, ret);
+
+	return ret;
+}
+
+/**
+ * pseries_eeh_read_config - Read PCI config space
+ * @pdn: PCI device node
+ * @where: PCI address
+ * @size: size to read
+ * @val: return value
+ *
+ * Read config space from the speicifed device
+ */
+static int pseries_eeh_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
+{
+	return rtas_read_config(pdn, where, size, val);
+}
+
+/**
+ * pseries_eeh_write_config - Write PCI config space
+ * @pdn: PCI device node
+ * @where: PCI address
+ * @size: size to write
+ * @val: value to be written
+ *
+ * Write config space to the specified device
+ */
+static int pseries_eeh_write_config(struct pci_dn *pdn, int where, int size, u32 val)
+{
+	return rtas_write_config(pdn, where, size, val);
+}
+
+static struct eeh_ops pseries_eeh_ops = {
+	.name			= "pseries",
+	.init			= pseries_eeh_init,
+	.probe			= pseries_eeh_probe,
+	.set_option		= pseries_eeh_set_option,
+	.get_pe_addr		= pseries_eeh_get_pe_addr,
+	.get_state		= pseries_eeh_get_state,
+	.reset			= pseries_eeh_reset,
+	.wait_state		= pseries_eeh_wait_state,
+	.get_log		= pseries_eeh_get_log,
+	.configure_bridge       = pseries_eeh_configure_bridge,
+	.err_inject		= NULL,
+	.read_config		= pseries_eeh_read_config,
+	.write_config		= pseries_eeh_write_config,
+	.next_error		= NULL,
+	.restore_config		= NULL
+};
+
+/**
+ * eeh_pseries_init - Register platform dependent EEH operations
+ *
+ * EEH initialization on pseries platform. This function should be
+ * called before any EEH related functions.
+ */
+static int __init eeh_pseries_init(void)
+{
+	int ret;
+
+	ret = eeh_ops_register(&pseries_eeh_ops);
+	if (!ret)
+		pr_info("EEH: pSeries platform initialized\n");
+	else
+		pr_info("EEH: pSeries platform initialization failure (%d)\n",
+			ret);
+
+	return ret;
+}
+machine_early_initcall(pseries, eeh_pseries_init);
diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c
new file mode 100644
index 000000000..18380e8f6
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/event_sources.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2001 Dave Engebretsen IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <asm/prom.h>
+
+#include "pseries.h"
+
+void request_event_sources_irqs(struct device_node *np,
+				irq_handler_t handler,
+				const char *name)
+{
+	int i, index, count = 0;
+	struct of_phandle_args oirq;
+	const u32 *opicprop;
+	unsigned int opicplen;
+	unsigned int virqs[16];
+
+	/* Check for obsolete "open-pic-interrupt" property. If present, then
+	 * map those interrupts using the default interrupt host and default
+	 * trigger
+	 */
+	opicprop = of_get_property(np, "open-pic-interrupt", &opicplen);
+	if (opicprop) {
+		opicplen /= sizeof(u32);
+		for (i = 0; i < opicplen; i++) {
+			if (count > 15)
+				break;
+			virqs[count] = irq_create_mapping(NULL, *(opicprop++));
+			if (virqs[count] == NO_IRQ) {
+				pr_err("event-sources: Unable to allocate "
+				       "interrupt number for %s\n",
+				       np->full_name);
+				WARN_ON(1);
+			}
+			else
+				count++;
+
+		}
+	}
+	/* Else use normal interrupt tree parsing */
+	else {
+		/* First try to do a proper OF tree parsing */
+		for (index = 0; of_irq_parse_one(np, index, &oirq) == 0;
+		     index++) {
+			if (count > 15)
+				break;
+			virqs[count] = irq_create_of_mapping(&oirq);
+			if (virqs[count] == NO_IRQ) {
+				pr_err("event-sources: Unable to allocate "
+				       "interrupt number for %s\n",
+				       np->full_name);
+				WARN_ON(1);
+			}
+			else
+				count++;
+		}
+	}
+
+	/* Now request them */
+	for (i = 0; i < count; i++) {
+		if (request_irq(virqs[i], handler, 0, name, NULL)) {
+			pr_err("event-sources: Unable to request interrupt "
+			       "%d for %s\n", virqs[i], np->full_name);
+			WARN_ON(1);
+			return;
+		}
+	}
+}
+
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
new file mode 100644
index 000000000..8c80588ab
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -0,0 +1,133 @@
+/*
+ *  pSeries firmware setup code.
+ *
+ *  Portions from arch/powerpc/platforms/pseries/setup.c:
+ *   Copyright (C) 1995  Linus Torvalds
+ *   Adapted from 'alpha' version by Gary Thomas
+ *   Modified by Cort Dougan (cort@cs.nmt.edu)
+ *   Modified by PPC64 Team, IBM Corp
+ *
+ *  Portions from arch/powerpc/kernel/firmware.c
+ *   Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *   Modifications for ppc64:
+ *    Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ *    Copyright (C) 2005 Stephen Rothwell, IBM Corporation
+ *
+ *  Copyright 2006 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <asm/firmware.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+
+#include "pseries.h"
+
+struct hypertas_fw_feature {
+    unsigned long val;
+    char * name;
+};
+
+/*
+ * The names in this table match names in rtas/ibm,hypertas-functions.  If the
+ * entry ends in a '*', only upto the '*' is matched.  Otherwise the entire
+ * string must match.
+ */
+static __initdata struct hypertas_fw_feature
+hypertas_fw_features_table[] = {
+	{FW_FEATURE_PFT,		"hcall-pft"},
+	{FW_FEATURE_TCE,		"hcall-tce"},
+	{FW_FEATURE_SPRG0,		"hcall-sprg0"},
+	{FW_FEATURE_DABR,		"hcall-dabr"},
+	{FW_FEATURE_COPY,		"hcall-copy"},
+	{FW_FEATURE_ASR,		"hcall-asr"},
+	{FW_FEATURE_DEBUG,		"hcall-debug"},
+	{FW_FEATURE_PERF,		"hcall-perf"},
+	{FW_FEATURE_DUMP,		"hcall-dump"},
+	{FW_FEATURE_INTERRUPT,		"hcall-interrupt"},
+	{FW_FEATURE_MIGRATE,		"hcall-migrate"},
+	{FW_FEATURE_PERFMON,		"hcall-perfmon"},
+	{FW_FEATURE_CRQ,		"hcall-crq"},
+	{FW_FEATURE_VIO,		"hcall-vio"},
+	{FW_FEATURE_RDMA,		"hcall-rdma"},
+	{FW_FEATURE_LLAN,		"hcall-lLAN"},
+	{FW_FEATURE_BULK_REMOVE,	"hcall-bulk"},
+	{FW_FEATURE_XDABR,		"hcall-xdabr"},
+	{FW_FEATURE_MULTITCE,		"hcall-multi-tce"},
+	{FW_FEATURE_SPLPAR,		"hcall-splpar"},
+	{FW_FEATURE_VPHN,		"hcall-vphn"},
+	{FW_FEATURE_SET_MODE,		"hcall-set-mode"},
+	{FW_FEATURE_BEST_ENERGY,	"hcall-best-energy-1*"},
+};
+
+/* Build up the firmware features bitmask using the contents of
+ * device-tree/ibm,hypertas-functions.  Ultimately this functionality may
+ * be moved into prom.c prom_init().
+ */
+void __init fw_hypertas_feature_init(const char *hypertas, unsigned long len)
+{
+	const char *s;
+	int i;
+
+	pr_debug(" -> fw_hypertas_feature_init()\n");
+
+	for (s = hypertas; s < hypertas + len; s += strlen(s) + 1) {
+		for (i = 0; i < ARRAY_SIZE(hypertas_fw_features_table); i++) {
+			const char *name = hypertas_fw_features_table[i].name;
+			size_t size;
+
+			/*
+			 * If there is a '*' at the end of name, only check
+			 * upto there
+			 */
+			size = strlen(name);
+			if (size && name[size - 1] == '*') {
+				if (strncmp(name, s, size - 1))
+					continue;
+			} else if (strcmp(name, s))
+				continue;
+
+			/* we have a match */
+			powerpc_firmware_features |=
+				hypertas_fw_features_table[i].val;
+			break;
+		}
+	}
+
+	pr_debug(" <- fw_hypertas_feature_init()\n");
+}
+
+struct vec5_fw_feature {
+	unsigned long	val;
+	unsigned int	feature;
+};
+
+static __initdata struct vec5_fw_feature
+vec5_fw_features_table[] = {
+	{FW_FEATURE_TYPE1_AFFINITY,	OV5_TYPE1_AFFINITY},
+	{FW_FEATURE_PRRN,		OV5_PRRN},
+};
+
+void __init fw_vec5_feature_init(const char *vec5, unsigned long len)
+{
+	unsigned int index, feat;
+	int i;
+
+	pr_debug(" -> fw_vec5_feature_init()\n");
+
+	for (i = 0; i < ARRAY_SIZE(vec5_fw_features_table); i++) {
+		index = OV5_INDX(vec5_fw_features_table[i].feature);
+		feat = OV5_FEAT(vec5_fw_features_table[i].feature);
+
+		if (vec5[index] & feat)
+			powerpc_firmware_features |=
+				vec5_fw_features_table[i].val;
+	}
+
+	pr_debug(" <- fw_vec5_feature_init()\n");
+}
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
new file mode 100644
index 000000000..62475440f
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -0,0 +1,427 @@
+/*
+ * pseries CPU Hotplug infrastructure.
+ *
+ * Split out from arch/powerpc/platforms/pseries/setup.c
+ *  arch/powerpc/kernel/rtas.c, and arch/powerpc/platforms/pseries/smp.c
+ *
+ * Peter Bergner, IBM	March 2001.
+ * Copyright (C) 2001 IBM.
+ * Dave Engebretsen, Peter Bergner, and
+ * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
+ * Plus various changes from other IBM teams...
+ *
+ * Copyright (C) 2006 Michael Ellerman, IBM Corporation
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/sched.h>	/* for idle_task_exit */
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+#include <asm/vdso_datapage.h>
+#include <asm/xics.h>
+#include <asm/plpar_wrappers.h>
+
+#include "offline_states.h"
+
+/* This version can't take the spinlock, because it never returns */
+static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
+
+static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) =
+							CPU_STATE_OFFLINE;
+static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE;
+
+static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE;
+
+static int cede_offline_enabled __read_mostly = 1;
+
+/*
+ * Enable/disable cede_offline when available.
+ */
+static int __init setup_cede_offline(char *str)
+{
+	if (!strcmp(str, "off"))
+		cede_offline_enabled = 0;
+	else if (!strcmp(str, "on"))
+		cede_offline_enabled = 1;
+	else
+		return 0;
+	return 1;
+}
+
+__setup("cede_offline=", setup_cede_offline);
+
+enum cpu_state_vals get_cpu_current_state(int cpu)
+{
+	return per_cpu(current_state, cpu);
+}
+
+void set_cpu_current_state(int cpu, enum cpu_state_vals state)
+{
+	per_cpu(current_state, cpu) = state;
+}
+
+enum cpu_state_vals get_preferred_offline_state(int cpu)
+{
+	return per_cpu(preferred_offline_state, cpu);
+}
+
+void set_preferred_offline_state(int cpu, enum cpu_state_vals state)
+{
+	per_cpu(preferred_offline_state, cpu) = state;
+}
+
+void set_default_offline_state(int cpu)
+{
+	per_cpu(preferred_offline_state, cpu) = default_offline_state;
+}
+
+static void rtas_stop_self(void)
+{
+	static struct rtas_args args = {
+		.nargs = 0,
+		.nret = cpu_to_be32(1),
+		.rets = &args.args[0],
+	};
+
+	args.token = cpu_to_be32(rtas_stop_self_token);
+
+	local_irq_disable();
+
+	BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE);
+
+	printk("cpu %u (hwid %u) Ready to die...\n",
+	       smp_processor_id(), hard_smp_processor_id());
+	enter_rtas(__pa(&args));
+
+	panic("Alas, I survived.\n");
+}
+
+static void pseries_mach_cpu_die(void)
+{
+	unsigned int cpu = smp_processor_id();
+	unsigned int hwcpu = hard_smp_processor_id();
+	u8 cede_latency_hint = 0;
+
+	local_irq_disable();
+	idle_task_exit();
+	xics_teardown_cpu();
+
+	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
+		set_cpu_current_state(cpu, CPU_STATE_INACTIVE);
+		if (ppc_md.suspend_disable_cpu)
+			ppc_md.suspend_disable_cpu();
+
+		cede_latency_hint = 2;
+
+		get_lppaca()->idle = 1;
+		if (!lppaca_shared_proc(get_lppaca()))
+			get_lppaca()->donate_dedicated_cpu = 1;
+
+		while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
+			while (!prep_irq_for_idle()) {
+				local_irq_enable();
+				local_irq_disable();
+			}
+
+			extended_cede_processor(cede_latency_hint);
+		}
+
+		local_irq_disable();
+
+		if (!lppaca_shared_proc(get_lppaca()))
+			get_lppaca()->donate_dedicated_cpu = 0;
+		get_lppaca()->idle = 0;
+
+		if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) {
+			unregister_slb_shadow(hwcpu);
+
+			hard_irq_disable();
+			/*
+			 * Call to start_secondary_resume() will not return.
+			 * Kernel stack will be reset and start_secondary()
+			 * will be called to continue the online operation.
+			 */
+			start_secondary_resume();
+		}
+	}
+
+	/* Requested state is CPU_STATE_OFFLINE at this point */
+	WARN_ON(get_preferred_offline_state(cpu) != CPU_STATE_OFFLINE);
+
+	set_cpu_current_state(cpu, CPU_STATE_OFFLINE);
+	unregister_slb_shadow(hwcpu);
+	rtas_stop_self();
+
+	/* Should never get here... */
+	BUG();
+	for(;;);
+}
+
+static int pseries_cpu_disable(void)
+{
+	int cpu = smp_processor_id();
+
+	set_cpu_online(cpu, false);
+	vdso_data->processorCount--;
+
+	/*fix boot_cpuid here*/
+	if (cpu == boot_cpuid)
+		boot_cpuid = cpumask_any(cpu_online_mask);
+
+	/* FIXME: abstract this to not be platform specific later on */
+	xics_migrate_irqs_away();
+	return 0;
+}
+
+/*
+ * pseries_cpu_die: Wait for the cpu to die.
+ * @cpu: logical processor id of the CPU whose death we're awaiting.
+ *
+ * This function is called from the context of the thread which is performing
+ * the cpu-offline. Here we wait for long enough to allow the cpu in question
+ * to self-destroy so that the cpu-offline thread can send the CPU_DEAD
+ * notifications.
+ *
+ * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to
+ * self-destruct.
+ */
+static void pseries_cpu_die(unsigned int cpu)
+{
+	int tries;
+	int cpu_status = 1;
+	unsigned int pcpu = get_hard_smp_processor_id(cpu);
+
+	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
+		cpu_status = 1;
+		for (tries = 0; tries < 5000; tries++) {
+			if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) {
+				cpu_status = 0;
+				break;
+			}
+			msleep(1);
+		}
+	} else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {
+
+		for (tries = 0; tries < 25; tries++) {
+			cpu_status = smp_query_cpu_stopped(pcpu);
+			if (cpu_status == QCSS_STOPPED ||
+			    cpu_status == QCSS_HARDWARE_ERROR)
+				break;
+			cpu_relax();
+		}
+	}
+
+	if (cpu_status != 0) {
+		printk("Querying DEAD? cpu %i (%i) shows %i\n",
+		       cpu, pcpu, cpu_status);
+	}
+
+	/* Isolation and deallocation are definitely done by
+	 * drslot_chrp_cpu.  If they were not they would be
+	 * done here.  Change isolate state to Isolate and
+	 * change allocation-state to Unusable.
+	 */
+	paca[cpu].cpu_start = 0;
+}
+
+/*
+ * Update cpu_present_mask and paca(s) for a new cpu node.  The wrinkle
+ * here is that a cpu device node may represent up to two logical cpus
+ * in the SMT case.  We must honor the assumption in other code that
+ * the logical ids for sibling SMT threads x and y are adjacent, such
+ * that x^1 == y and y^1 == x.
+ */
+static int pseries_add_processor(struct device_node *np)
+{
+	unsigned int cpu;
+	cpumask_var_t candidate_mask, tmp;
+	int err = -ENOSPC, len, nthreads, i;
+	const __be32 *intserv;
+
+	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
+	if (!intserv)
+		return 0;
+
+	zalloc_cpumask_var(&candidate_mask, GFP_KERNEL);
+	zalloc_cpumask_var(&tmp, GFP_KERNEL);
+
+	nthreads = len / sizeof(u32);
+	for (i = 0; i < nthreads; i++)
+		cpumask_set_cpu(i, tmp);
+
+	cpu_maps_update_begin();
+
+	BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask));
+
+	/* Get a bitmap of unoccupied slots. */
+	cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask);
+	if (cpumask_empty(candidate_mask)) {
+		/* If we get here, it most likely means that NR_CPUS is
+		 * less than the partition's max processors setting.
+		 */
+		printk(KERN_ERR "Cannot add cpu %s; this system configuration"
+		       " supports %d logical cpus.\n", np->full_name,
+		       num_possible_cpus());
+		goto out_unlock;
+	}
+
+	while (!cpumask_empty(tmp))
+		if (cpumask_subset(tmp, candidate_mask))
+			/* Found a range where we can insert the new cpu(s) */
+			break;
+		else
+			cpumask_shift_left(tmp, tmp, nthreads);
+
+	if (cpumask_empty(tmp)) {
+		printk(KERN_ERR "Unable to find space in cpu_present_mask for"
+		       " processor %s with %d thread(s)\n", np->name,
+		       nthreads);
+		goto out_unlock;
+	}
+
+	for_each_cpu(cpu, tmp) {
+		BUG_ON(cpu_present(cpu));
+		set_cpu_present(cpu, true);
+		set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++));
+	}
+	err = 0;
+out_unlock:
+	cpu_maps_update_done();
+	free_cpumask_var(candidate_mask);
+	free_cpumask_var(tmp);
+	return err;
+}
+
+/*
+ * Update the present map for a cpu node which is going away, and set
+ * the hard id in the paca(s) to -1 to be consistent with boot time
+ * convention for non-present cpus.
+ */
+static void pseries_remove_processor(struct device_node *np)
+{
+	unsigned int cpu;
+	int len, nthreads, i;
+	const __be32 *intserv;
+	u32 thread;
+
+	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
+	if (!intserv)
+		return;
+
+	nthreads = len / sizeof(u32);
+
+	cpu_maps_update_begin();
+	for (i = 0; i < nthreads; i++) {
+		thread = be32_to_cpu(intserv[i]);
+		for_each_present_cpu(cpu) {
+			if (get_hard_smp_processor_id(cpu) != thread)
+				continue;
+			BUG_ON(cpu_online(cpu));
+			set_cpu_present(cpu, false);
+			set_hard_smp_processor_id(cpu, -1);
+			break;
+		}
+		if (cpu >= nr_cpu_ids)
+			printk(KERN_WARNING "Could not find cpu to remove "
+			       "with physical id 0x%x\n", thread);
+	}
+	cpu_maps_update_done();
+}
+
+static int pseries_smp_notifier(struct notifier_block *nb,
+				unsigned long action, void *data)
+{
+	struct of_reconfig_data *rd = data;
+	int err = 0;
+
+	switch (action) {
+	case OF_RECONFIG_ATTACH_NODE:
+		err = pseries_add_processor(rd->dn);
+		break;
+	case OF_RECONFIG_DETACH_NODE:
+		pseries_remove_processor(rd->dn);
+		break;
+	}
+	return notifier_from_errno(err);
+}
+
+static struct notifier_block pseries_smp_nb = {
+	.notifier_call = pseries_smp_notifier,
+};
+
+#define MAX_CEDE_LATENCY_LEVELS		4
+#define	CEDE_LATENCY_PARAM_LENGTH	10
+#define CEDE_LATENCY_PARAM_MAX_LENGTH	\
+	(MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char))
+#define CEDE_LATENCY_TOKEN		45
+
+static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH];
+
+static int parse_cede_parameters(void)
+{
+	memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH);
+	return rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+			 NULL,
+			 CEDE_LATENCY_TOKEN,
+			 __pa(cede_parameters),
+			 CEDE_LATENCY_PARAM_MAX_LENGTH);
+}
+
+static int __init pseries_cpu_hotplug_init(void)
+{
+	struct device_node *np;
+	const char *typep;
+	int cpu;
+	int qcss_tok;
+
+	for_each_node_by_name(np, "interrupt-controller") {
+		typep = of_get_property(np, "compatible", NULL);
+		if (strstr(typep, "open-pic")) {
+			of_node_put(np);
+
+			printk(KERN_INFO "CPU Hotplug not supported on "
+				"systems using MPIC\n");
+			return 0;
+		}
+	}
+
+	rtas_stop_self_token = rtas_token("stop-self");
+	qcss_tok = rtas_token("query-cpu-stopped-state");
+
+	if (rtas_stop_self_token == RTAS_UNKNOWN_SERVICE ||
+			qcss_tok == RTAS_UNKNOWN_SERVICE) {
+		printk(KERN_INFO "CPU Hotplug not supported by firmware "
+				"- disabling.\n");
+		return 0;
+	}
+
+	ppc_md.cpu_die = pseries_mach_cpu_die;
+	smp_ops->cpu_disable = pseries_cpu_disable;
+	smp_ops->cpu_die = pseries_cpu_die;
+
+	/* Processors can be added/removed only on LPAR */
+	if (firmware_has_feature(FW_FEATURE_LPAR)) {
+		of_reconfig_notifier_register(&pseries_smp_nb);
+		cpu_maps_update_begin();
+		if (cede_offline_enabled && parse_cede_parameters() == 0) {
+			default_offline_state = CPU_STATE_INACTIVE;
+			for_each_online_cpu(cpu)
+				set_default_offline_state(cpu);
+		}
+		cpu_maps_update_done();
+	}
+
+	return 0;
+}
+machine_arch_initcall(pseries, pseries_cpu_hotplug_init);
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
new file mode 100644
index 000000000..0ced387e1
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -0,0 +1,738 @@
+/*
+ * pseries Memory Hotplug infrastructure.
+ *
+ * Copyright (C) 2008 Badari Pulavarty, IBM Corporation
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt)	"pseries-hotplug-mem: " fmt
+
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/memblock.h>
+#include <linux/memory.h>
+#include <linux/memory_hotplug.h>
+#include <linux/slab.h>
+
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/sparsemem.h>
+#include "pseries.h"
+
+static bool rtas_hp_event;
+
+unsigned long pseries_memory_block_size(void)
+{
+	struct device_node *np;
+	unsigned int memblock_size = MIN_MEMORY_BLOCK_SIZE;
+	struct resource r;
+
+	np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+	if (np) {
+		const __be64 *size;
+
+		size = of_get_property(np, "ibm,lmb-size", NULL);
+		if (size)
+			memblock_size = be64_to_cpup(size);
+		of_node_put(np);
+	} else  if (machine_is(pseries)) {
+		/* This fallback really only applies to pseries */
+		unsigned int memzero_size = 0;
+
+		np = of_find_node_by_path("/memory@0");
+		if (np) {
+			if (!of_address_to_resource(np, 0, &r))
+				memzero_size = resource_size(&r);
+			of_node_put(np);
+		}
+
+		if (memzero_size) {
+			/* We now know the size of memory@0, use this to find
+			 * the first memoryblock and get its size.
+			 */
+			char buf[64];
+
+			sprintf(buf, "/memory@%x", memzero_size);
+			np = of_find_node_by_path(buf);
+			if (np) {
+				if (!of_address_to_resource(np, 0, &r))
+					memblock_size = resource_size(&r);
+				of_node_put(np);
+			}
+		}
+	}
+	return memblock_size;
+}
+
+static void dlpar_free_drconf_property(struct property *prop)
+{
+	kfree(prop->name);
+	kfree(prop->value);
+	kfree(prop);
+}
+
+static struct property *dlpar_clone_drconf_property(struct device_node *dn)
+{
+	struct property *prop, *new_prop;
+	struct of_drconf_cell *lmbs;
+	u32 num_lmbs, *p;
+	int i;
+
+	prop = of_find_property(dn, "ibm,dynamic-memory", NULL);
+	if (!prop)
+		return NULL;
+
+	new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
+	if (!new_prop)
+		return NULL;
+
+	new_prop->name = kstrdup(prop->name, GFP_KERNEL);
+	new_prop->value = kmalloc(prop->length, GFP_KERNEL);
+	if (!new_prop->name || !new_prop->value) {
+		dlpar_free_drconf_property(new_prop);
+		return NULL;
+	}
+
+	memcpy(new_prop->value, prop->value, prop->length);
+	new_prop->length = prop->length;
+
+	/* Convert the property to cpu endian-ness */
+	p = new_prop->value;
+	*p = be32_to_cpu(*p);
+
+	num_lmbs = *p++;
+	lmbs = (struct of_drconf_cell *)p;
+
+	for (i = 0; i < num_lmbs; i++) {
+		lmbs[i].base_addr = be64_to_cpu(lmbs[i].base_addr);
+		lmbs[i].drc_index = be32_to_cpu(lmbs[i].drc_index);
+		lmbs[i].flags = be32_to_cpu(lmbs[i].flags);
+	}
+
+	return new_prop;
+}
+
+static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
+{
+	unsigned long section_nr;
+	struct mem_section *mem_sect;
+	struct memory_block *mem_block;
+
+	section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
+	mem_sect = __nr_to_section(section_nr);
+
+	mem_block = find_memory_block(mem_sect);
+	return mem_block;
+}
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
+{
+	unsigned long block_sz, start_pfn;
+	int sections_per_block;
+	int i, nid;
+
+	start_pfn = base >> PAGE_SHIFT;
+
+	lock_device_hotplug();
+
+	if (!pfn_valid(start_pfn))
+		goto out;
+
+	block_sz = pseries_memory_block_size();
+	sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
+	nid = memory_add_physaddr_to_nid(base);
+
+	for (i = 0; i < sections_per_block; i++) {
+		remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE);
+		base += MIN_MEMORY_BLOCK_SIZE;
+	}
+
+out:
+	/* Update memory regions for memory remove */
+	memblock_remove(base, memblock_size);
+	unlock_device_hotplug();
+	return 0;
+}
+
+static int pseries_remove_mem_node(struct device_node *np)
+{
+	const char *type;
+	const __be32 *regs;
+	unsigned long base;
+	unsigned int lmb_size;
+	int ret = -EINVAL;
+
+	/*
+	 * Check to see if we are actually removing memory
+	 */
+	type = of_get_property(np, "device_type", NULL);
+	if (type == NULL || strcmp(type, "memory") != 0)
+		return 0;
+
+	/*
+	 * Find the base address and size of the memblock
+	 */
+	regs = of_get_property(np, "reg", NULL);
+	if (!regs)
+		return ret;
+
+	base = be64_to_cpu(*(unsigned long *)regs);
+	lmb_size = be32_to_cpu(regs[3]);
+
+	pseries_remove_memblock(base, lmb_size);
+	return 0;
+}
+
+static bool lmb_is_removable(struct of_drconf_cell *lmb)
+{
+	int i, scns_per_block;
+	int rc = 1;
+	unsigned long pfn, block_sz;
+	u64 phys_addr;
+
+	if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
+		return false;
+
+	block_sz = memory_block_size_bytes();
+	scns_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
+	phys_addr = lmb->base_addr;
+
+	for (i = 0; i < scns_per_block; i++) {
+		pfn = PFN_DOWN(phys_addr);
+		if (!pfn_present(pfn))
+			continue;
+
+		rc &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
+		phys_addr += MIN_MEMORY_BLOCK_SIZE;
+	}
+
+	return rc ? true : false;
+}
+
+static int dlpar_add_lmb(struct of_drconf_cell *);
+
+static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
+{
+	struct memory_block *mem_block;
+	unsigned long block_sz;
+	int nid, rc;
+
+	if (!lmb_is_removable(lmb))
+		return -EINVAL;
+
+	mem_block = lmb_to_memblock(lmb);
+	if (!mem_block)
+		return -EINVAL;
+
+	rc = device_offline(&mem_block->dev);
+	put_device(&mem_block->dev);
+	if (rc)
+		return rc;
+
+	block_sz = pseries_memory_block_size();
+	nid = memory_add_physaddr_to_nid(lmb->base_addr);
+
+	remove_memory(nid, lmb->base_addr, block_sz);
+
+	/* Update memory regions for memory remove */
+	memblock_remove(lmb->base_addr, block_sz);
+
+	dlpar_release_drc(lmb->drc_index);
+
+	lmb->flags &= ~DRCONF_MEM_ASSIGNED;
+	return 0;
+}
+
+static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
+					struct property *prop)
+{
+	struct of_drconf_cell *lmbs;
+	int lmbs_removed = 0;
+	int lmbs_available = 0;
+	u32 num_lmbs, *p;
+	int i, rc;
+
+	pr_info("Attempting to hot-remove %d LMB(s)\n", lmbs_to_remove);
+
+	if (lmbs_to_remove == 0)
+		return -EINVAL;
+
+	p = prop->value;
+	num_lmbs = *p++;
+	lmbs = (struct of_drconf_cell *)p;
+
+	/* Validate that there are enough LMBs to satisfy the request */
+	for (i = 0; i < num_lmbs; i++) {
+		if (lmbs[i].flags & DRCONF_MEM_ASSIGNED)
+			lmbs_available++;
+	}
+
+	if (lmbs_available < lmbs_to_remove)
+		return -EINVAL;
+
+	for (i = 0; i < num_lmbs && lmbs_removed < lmbs_to_remove; i++) {
+		rc = dlpar_remove_lmb(&lmbs[i]);
+		if (rc)
+			continue;
+
+		lmbs_removed++;
+
+		/* Mark this lmb so we can add it later if all of the
+		 * requested LMBs cannot be removed.
+		 */
+		lmbs[i].reserved = 1;
+	}
+
+	if (lmbs_removed != lmbs_to_remove) {
+		pr_err("Memory hot-remove failed, adding LMB's back\n");
+
+		for (i = 0; i < num_lmbs; i++) {
+			if (!lmbs[i].reserved)
+				continue;
+
+			rc = dlpar_add_lmb(&lmbs[i]);
+			if (rc)
+				pr_err("Failed to add LMB back, drc index %x\n",
+				       lmbs[i].drc_index);
+
+			lmbs[i].reserved = 0;
+		}
+
+		rc = -EINVAL;
+	} else {
+		for (i = 0; i < num_lmbs; i++) {
+			if (!lmbs[i].reserved)
+				continue;
+
+			pr_info("Memory at %llx was hot-removed\n",
+				lmbs[i].base_addr);
+
+			lmbs[i].reserved = 0;
+		}
+		rc = 0;
+	}
+
+	return rc;
+}
+
+static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
+{
+	struct of_drconf_cell *lmbs;
+	u32 num_lmbs, *p;
+	int lmb_found;
+	int i, rc;
+
+	pr_info("Attempting to hot-remove LMB, drc index %x\n", drc_index);
+
+	p = prop->value;
+	num_lmbs = *p++;
+	lmbs = (struct of_drconf_cell *)p;
+
+	lmb_found = 0;
+	for (i = 0; i < num_lmbs; i++) {
+		if (lmbs[i].drc_index == drc_index) {
+			lmb_found = 1;
+			rc = dlpar_remove_lmb(&lmbs[i]);
+			break;
+		}
+	}
+
+	if (!lmb_found)
+		rc = -EINVAL;
+
+	if (rc)
+		pr_info("Failed to hot-remove memory at %llx\n",
+			lmbs[i].base_addr);
+	else
+		pr_info("Memory at %llx was hot-removed\n", lmbs[i].base_addr);
+
+	return rc;
+}
+
+#else
+static inline int pseries_remove_memblock(unsigned long base,
+					  unsigned int memblock_size)
+{
+	return -EOPNOTSUPP;
+}
+static inline int pseries_remove_mem_node(struct device_node *np)
+{
+	return 0;
+}
+static inline int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog)
+{
+	return -EOPNOTSUPP;
+}
+static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
+{
+	return -EOPNOTSUPP;
+}
+static int dlpar_memory_remove_by_count(u32 lmbs_to_remove,
+					struct property *prop)
+{
+	return -EOPNOTSUPP;
+}
+static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
+{
+	return -EOPNOTSUPP;
+}
+
+#endif /* CONFIG_MEMORY_HOTREMOVE */
+
+static int dlpar_add_lmb(struct of_drconf_cell *lmb)
+{
+	struct memory_block *mem_block;
+	unsigned long block_sz;
+	int nid, rc;
+
+	if (lmb->flags & DRCONF_MEM_ASSIGNED)
+		return -EINVAL;
+
+	block_sz = memory_block_size_bytes();
+
+	rc = dlpar_acquire_drc(lmb->drc_index);
+	if (rc)
+		return rc;
+
+	/* Find the node id for this address */
+	nid = memory_add_physaddr_to_nid(lmb->base_addr);
+
+	/* Add the memory */
+	rc = add_memory(nid, lmb->base_addr, block_sz);
+	if (rc) {
+		dlpar_release_drc(lmb->drc_index);
+		return rc;
+	}
+
+	/* Register this block of memory */
+	rc = memblock_add(lmb->base_addr, block_sz);
+	if (rc) {
+		remove_memory(nid, lmb->base_addr, block_sz);
+		dlpar_release_drc(lmb->drc_index);
+		return rc;
+	}
+
+	mem_block = lmb_to_memblock(lmb);
+	if (!mem_block) {
+		remove_memory(nid, lmb->base_addr, block_sz);
+		dlpar_release_drc(lmb->drc_index);
+		return -EINVAL;
+	}
+
+	rc = device_online(&mem_block->dev);
+	put_device(&mem_block->dev);
+	if (rc) {
+		remove_memory(nid, lmb->base_addr, block_sz);
+		dlpar_release_drc(lmb->drc_index);
+		return rc;
+	}
+
+	lmb->flags |= DRCONF_MEM_ASSIGNED;
+	return 0;
+}
+
+static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop)
+{
+	struct of_drconf_cell *lmbs;
+	u32 num_lmbs, *p;
+	int lmbs_available = 0;
+	int lmbs_added = 0;
+	int i, rc;
+
+	pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add);
+
+	if (lmbs_to_add == 0)
+		return -EINVAL;
+
+	p = prop->value;
+	num_lmbs = *p++;
+	lmbs = (struct of_drconf_cell *)p;
+
+	/* Validate that there are enough LMBs to satisfy the request */
+	for (i = 0; i < num_lmbs; i++) {
+		if (!(lmbs[i].flags & DRCONF_MEM_ASSIGNED))
+			lmbs_available++;
+	}
+
+	if (lmbs_available < lmbs_to_add)
+		return -EINVAL;
+
+	for (i = 0; i < num_lmbs && lmbs_to_add != lmbs_added; i++) {
+		rc = dlpar_add_lmb(&lmbs[i]);
+		if (rc)
+			continue;
+
+		lmbs_added++;
+
+		/* Mark this lmb so we can remove it later if all of the
+		 * requested LMBs cannot be added.
+		 */
+		lmbs[i].reserved = 1;
+	}
+
+	if (lmbs_added != lmbs_to_add) {
+		pr_err("Memory hot-add failed, removing any added LMBs\n");
+
+		for (i = 0; i < num_lmbs; i++) {
+			if (!lmbs[i].reserved)
+				continue;
+
+			rc = dlpar_remove_lmb(&lmbs[i]);
+			if (rc)
+				pr_err("Failed to remove LMB, drc index %x\n",
+				       be32_to_cpu(lmbs[i].drc_index));
+		}
+		rc = -EINVAL;
+	} else {
+		for (i = 0; i < num_lmbs; i++) {
+			if (!lmbs[i].reserved)
+				continue;
+
+			pr_info("Memory at %llx (drc index %x) was hot-added\n",
+				lmbs[i].base_addr, lmbs[i].drc_index);
+			lmbs[i].reserved = 0;
+		}
+	}
+
+	return rc;
+}
+
+static int dlpar_memory_add_by_index(u32 drc_index, struct property *prop)
+{
+	struct of_drconf_cell *lmbs;
+	u32 num_lmbs, *p;
+	int i, lmb_found;
+	int rc;
+
+	pr_info("Attempting to hot-add LMB, drc index %x\n", drc_index);
+
+	p = prop->value;
+	num_lmbs = *p++;
+	lmbs = (struct of_drconf_cell *)p;
+
+	lmb_found = 0;
+	for (i = 0; i < num_lmbs; i++) {
+		if (lmbs[i].drc_index == drc_index) {
+			lmb_found = 1;
+			rc = dlpar_add_lmb(&lmbs[i]);
+			break;
+		}
+	}
+
+	if (!lmb_found)
+		rc = -EINVAL;
+
+	if (rc)
+		pr_info("Failed to hot-add memory, drc index %x\n", drc_index);
+	else
+		pr_info("Memory at %llx (drc index %x) was hot-added\n",
+			lmbs[i].base_addr, drc_index);
+
+	return rc;
+}
+
+static void dlpar_update_drconf_property(struct device_node *dn,
+					 struct property *prop)
+{
+	struct of_drconf_cell *lmbs;
+	u32 num_lmbs, *p;
+	int i;
+
+	/* Convert the property back to BE */
+	p = prop->value;
+	num_lmbs = *p;
+	*p = cpu_to_be32(*p);
+	p++;
+
+	lmbs = (struct of_drconf_cell *)p;
+	for (i = 0; i < num_lmbs; i++) {
+		lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr);
+		lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index);
+		lmbs[i].flags = cpu_to_be32(lmbs[i].flags);
+	}
+
+	rtas_hp_event = true;
+	of_update_property(dn, prop);
+	rtas_hp_event = false;
+}
+
+int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
+{
+	struct device_node *dn;
+	struct property *prop;
+	u32 count, drc_index;
+	int rc;
+
+	count = hp_elog->_drc_u.drc_count;
+	drc_index = hp_elog->_drc_u.drc_index;
+
+	lock_device_hotplug();
+
+	dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+	if (!dn) {
+		rc = -EINVAL;
+		goto dlpar_memory_out;
+	}
+
+	prop = dlpar_clone_drconf_property(dn);
+	if (!prop) {
+		rc = -EINVAL;
+		goto dlpar_memory_out;
+	}
+
+	switch (hp_elog->action) {
+	case PSERIES_HP_ELOG_ACTION_ADD:
+		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
+			rc = dlpar_memory_add_by_count(count, prop);
+		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
+			rc = dlpar_memory_add_by_index(drc_index, prop);
+		else
+			rc = -EINVAL;
+		break;
+	case PSERIES_HP_ELOG_ACTION_REMOVE:
+		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
+			rc = dlpar_memory_remove_by_count(count, prop);
+		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
+			rc = dlpar_memory_remove_by_index(drc_index, prop);
+		else
+			rc = -EINVAL;
+		break;
+	default:
+		pr_err("Invalid action (%d) specified\n", hp_elog->action);
+		rc = -EINVAL;
+		break;
+	}
+
+	if (rc)
+		dlpar_free_drconf_property(prop);
+	else
+		dlpar_update_drconf_property(dn, prop);
+
+dlpar_memory_out:
+	of_node_put(dn);
+	unlock_device_hotplug();
+	return rc;
+}
+
+static int pseries_add_mem_node(struct device_node *np)
+{
+	const char *type;
+	const __be32 *regs;
+	unsigned long base;
+	unsigned int lmb_size;
+	int ret = -EINVAL;
+
+	/*
+	 * Check to see if we are actually adding memory
+	 */
+	type = of_get_property(np, "device_type", NULL);
+	if (type == NULL || strcmp(type, "memory") != 0)
+		return 0;
+
+	/*
+	 * Find the base and size of the memblock
+	 */
+	regs = of_get_property(np, "reg", NULL);
+	if (!regs)
+		return ret;
+
+	base = be64_to_cpu(*(unsigned long *)regs);
+	lmb_size = be32_to_cpu(regs[3]);
+
+	/*
+	 * Update memory region to represent the memory add
+	 */
+	ret = memblock_add(base, lmb_size);
+	return (ret < 0) ? -EINVAL : 0;
+}
+
+static int pseries_update_drconf_memory(struct of_reconfig_data *pr)
+{
+	struct of_drconf_cell *new_drmem, *old_drmem;
+	unsigned long memblock_size;
+	u32 entries;
+	__be32 *p;
+	int i, rc = -EINVAL;
+
+	if (rtas_hp_event)
+		return 0;
+
+	memblock_size = pseries_memory_block_size();
+	if (!memblock_size)
+		return -EINVAL;
+
+	p = (__be32 *) pr->old_prop->value;
+	if (!p)
+		return -EINVAL;
+
+	/* The first int of the property is the number of lmb's described
+	 * by the property. This is followed by an array of of_drconf_cell
+	 * entries. Get the number of entries and skip to the array of
+	 * of_drconf_cell's.
+	 */
+	entries = be32_to_cpu(*p++);
+	old_drmem = (struct of_drconf_cell *)p;
+
+	p = (__be32 *)pr->prop->value;
+	p++;
+	new_drmem = (struct of_drconf_cell *)p;
+
+	for (i = 0; i < entries; i++) {
+		if ((be32_to_cpu(old_drmem[i].flags) & DRCONF_MEM_ASSIGNED) &&
+		    (!(be32_to_cpu(new_drmem[i].flags) & DRCONF_MEM_ASSIGNED))) {
+			rc = pseries_remove_memblock(
+				be64_to_cpu(old_drmem[i].base_addr),
+						     memblock_size);
+			break;
+		} else if ((!(be32_to_cpu(old_drmem[i].flags) &
+			    DRCONF_MEM_ASSIGNED)) &&
+			    (be32_to_cpu(new_drmem[i].flags) &
+			    DRCONF_MEM_ASSIGNED)) {
+			rc = memblock_add(be64_to_cpu(old_drmem[i].base_addr),
+					  memblock_size);
+			rc = (rc < 0) ? -EINVAL : 0;
+			break;
+		}
+	}
+	return rc;
+}
+
+static int pseries_memory_notifier(struct notifier_block *nb,
+				   unsigned long action, void *data)
+{
+	struct of_reconfig_data *rd = data;
+	int err = 0;
+
+	switch (action) {
+	case OF_RECONFIG_ATTACH_NODE:
+		err = pseries_add_mem_node(rd->dn);
+		break;
+	case OF_RECONFIG_DETACH_NODE:
+		err = pseries_remove_mem_node(rd->dn);
+		break;
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		if (!strcmp(rd->prop->name, "ibm,dynamic-memory"))
+			err = pseries_update_drconf_memory(rd);
+		break;
+	}
+	return notifier_from_errno(err);
+}
+
+static struct notifier_block pseries_mem_nb = {
+	.notifier_call = pseries_memory_notifier,
+};
+
+static int __init pseries_memory_hotplug_init(void)
+{
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		of_reconfig_notifier_register(&pseries_mem_nb);
+
+	return 0;
+}
+machine_device_initcall(pseries, pseries_memory_hotplug_init);
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
new file mode 100644
index 000000000..74b5b8e23
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -0,0 +1,338 @@
+/*
+ * This file contains the generic code to perform a call to the
+ * pSeries LPAR hypervisor.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/jump_label.h>
+#include <asm/hvcall.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+
+	.section	".text"
+	
+#ifdef CONFIG_TRACEPOINTS
+
+#ifndef HAVE_JUMP_LABEL
+	.section	".toc","aw"
+
+	.globl hcall_tracepoint_refcount
+hcall_tracepoint_refcount:
+	.llong	0
+
+	.section	".text"
+#endif
+
+/*
+ * precall must preserve all registers.  use unused STK_PARAM()
+ * areas to save snapshots and opcode.
+ */
+#define HCALL_INST_PRECALL(FIRST_REG)				\
+	mflr	r0;						\
+	std	r3,STK_PARAM(R3)(r1);				\
+	std	r4,STK_PARAM(R4)(r1);				\
+	std	r5,STK_PARAM(R5)(r1);				\
+	std	r6,STK_PARAM(R6)(r1);				\
+	std	r7,STK_PARAM(R7)(r1);				\
+	std	r8,STK_PARAM(R8)(r1);				\
+	std	r9,STK_PARAM(R9)(r1);				\
+	std	r10,STK_PARAM(R10)(r1);				\
+	std	r0,16(r1);					\
+	addi	r4,r1,STK_PARAM(FIRST_REG);			\
+	stdu	r1,-STACK_FRAME_OVERHEAD(r1);			\
+	bl	__trace_hcall_entry;				\
+	ld	r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1);	\
+	ld	r4,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1);	\
+	ld	r5,STACK_FRAME_OVERHEAD+STK_PARAM(R5)(r1);	\
+	ld	r6,STACK_FRAME_OVERHEAD+STK_PARAM(R6)(r1);	\
+	ld	r7,STACK_FRAME_OVERHEAD+STK_PARAM(R7)(r1);	\
+	ld	r8,STACK_FRAME_OVERHEAD+STK_PARAM(R8)(r1);	\
+	ld	r9,STACK_FRAME_OVERHEAD+STK_PARAM(R9)(r1);	\
+	ld	r10,STACK_FRAME_OVERHEAD+STK_PARAM(R10)(r1)
+
+/*
+ * postcall is performed immediately before function return which
+ * allows liberal use of volatile registers.
+ */
+#define __HCALL_INST_POSTCALL					\
+	ld	r0,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1);	\
+	std	r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1);	\
+	mr	r4,r3;						\
+	mr	r3,r0;						\
+	bl	__trace_hcall_exit;				\
+	ld	r0,STACK_FRAME_OVERHEAD+16(r1);			\
+	addi	r1,r1,STACK_FRAME_OVERHEAD;			\
+	ld	r3,STK_PARAM(R3)(r1);				\
+	mtlr	r0
+
+#define HCALL_INST_POSTCALL_NORETS				\
+	li	r5,0;						\
+	__HCALL_INST_POSTCALL
+
+#define HCALL_INST_POSTCALL(BUFREG)				\
+	mr	r5,BUFREG;					\
+	__HCALL_INST_POSTCALL
+
+#ifdef HAVE_JUMP_LABEL
+#define HCALL_BRANCH(LABEL)					\
+	ARCH_STATIC_BRANCH(LABEL, hcall_tracepoint_key)
+#else
+
+/*
+ * We branch around this in early init (eg when populating the MMU
+ * hashtable) by using an unconditional cpu feature.
+ */
+#define HCALL_BRANCH(LABEL)					\
+BEGIN_FTR_SECTION;						\
+	b	1f;						\
+END_FTR_SECTION(0, 1);						\
+	ld	r12,hcall_tracepoint_refcount@toc(r2);		\
+	std	r12,32(r1);					\
+	cmpdi	r12,0;						\
+	bne-	LABEL;						\
+1:
+#endif
+
+#else
+#define HCALL_INST_PRECALL(FIRST_ARG)
+#define HCALL_INST_POSTCALL_NORETS
+#define HCALL_INST_POSTCALL(BUFREG)
+#define HCALL_BRANCH(LABEL)
+#endif
+
+_GLOBAL_TOC(plpar_hcall_norets)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+	HCALL_BRANCH(plpar_hcall_norets_trace)
+	HVSC				/* invoke the hypervisor */
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+	blr				/* return r3 = status */
+
+#ifdef CONFIG_TRACEPOINTS
+plpar_hcall_norets_trace:
+	HCALL_INST_PRECALL(R4)
+	HVSC
+	HCALL_INST_POSTCALL_NORETS
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+	blr
+#endif
+
+_GLOBAL_TOC(plpar_hcall)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	HCALL_BRANCH(plpar_hcall_trace)
+
+	std     r4,STK_PARAM(R4)(r1)     /* Save ret buffer */
+
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+	mr	r9,r10
+
+	HVSC				/* invoke the hypervisor */
+
+	ld	r12,STK_PARAM(R4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+	std	r6, 16(r12)
+	std	r7, 24(r12)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+#ifdef CONFIG_TRACEPOINTS
+plpar_hcall_trace:
+	HCALL_INST_PRECALL(R5)
+
+	std	r4,STK_PARAM(R4)(r1)
+	mr	r0,r4
+
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+	mr	r9,r10
+
+	HVSC
+
+	ld	r12,STK_PARAM(R4)(r1)
+	std	r4,0(r12)
+	std	r5,8(r12)
+	std	r6,16(r12)
+	std	r7,24(r12)
+
+	HCALL_INST_POSTCALL(r12)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr
+#endif
+
+/*
+ * plpar_hcall_raw can be called in real mode. kexec/kdump need some
+ * hypervisor calls to be executed in real mode. So plpar_hcall_raw
+ * does not access the per cpu hypervisor call statistics variables,
+ * since these variables may not be present in the RMO region.
+ */
+_GLOBAL(plpar_hcall_raw)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	std     r4,STK_PARAM(R4)(r1)     /* Save ret buffer */
+
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+	mr	r9,r10
+
+	HVSC				/* invoke the hypervisor */
+
+	ld	r12,STK_PARAM(R4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+	std	r6, 16(r12)
+	std	r7, 24(r12)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+_GLOBAL_TOC(plpar_hcall9)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	HCALL_BRANCH(plpar_hcall9_trace)
+
+	std     r4,STK_PARAM(R4)(r1)     /* Save ret buffer */
+
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+	mr	r9,r10
+	ld	r10,STK_PARAM(R11)(r1)	 /* put arg7 in R10 */
+	ld	r11,STK_PARAM(R12)(r1)	 /* put arg8 in R11 */
+	ld	r12,STK_PARAM(R13)(r1)    /* put arg9 in R12 */
+
+	HVSC				/* invoke the hypervisor */
+
+	mr	r0,r12
+	ld	r12,STK_PARAM(R4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+	std	r6, 16(r12)
+	std	r7, 24(r12)
+	std	r8, 32(r12)
+	std	r9, 40(r12)
+	std	r10,48(r12)
+	std	r11,56(r12)
+	std	r0, 64(r12)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+#ifdef CONFIG_TRACEPOINTS
+plpar_hcall9_trace:
+	HCALL_INST_PRECALL(R5)
+
+	std	r4,STK_PARAM(R4)(r1)
+	mr	r0,r4
+
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+	mr	r9,r10
+	ld	r10,STACK_FRAME_OVERHEAD+STK_PARAM(R11)(r1)
+	ld	r11,STACK_FRAME_OVERHEAD+STK_PARAM(R12)(r1)
+	ld	r12,STACK_FRAME_OVERHEAD+STK_PARAM(R13)(r1)
+
+	HVSC
+
+	mr	r0,r12
+	ld	r12,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1)
+	std	r4,0(r12)
+	std	r5,8(r12)
+	std	r6,16(r12)
+	std	r7,24(r12)
+	std	r8,32(r12)
+	std	r9,40(r12)
+	std	r10,48(r12)
+	std	r11,56(r12)
+	std	r0,64(r12)
+
+	HCALL_INST_POSTCALL(r12)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr
+#endif
+
+/* See plpar_hcall_raw to see why this is needed */
+_GLOBAL(plpar_hcall9_raw)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	std     r4,STK_PARAM(R4)(r1)     /* Save ret buffer */
+
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+	mr	r9,r10
+	ld	r10,STK_PARAM(R11)(r1)	 /* put arg7 in R10 */
+	ld	r11,STK_PARAM(R12)(r1)	 /* put arg8 in R11 */
+	ld	r12,STK_PARAM(R13)(r1)    /* put arg9 in R12 */
+
+	HVSC				/* invoke the hypervisor */
+
+	mr	r0,r12
+	ld	r12,STK_PARAM(R4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+	std	r6, 16(r12)
+	std	r7, 24(r12)
+	std	r8, 32(r12)
+	std	r9, 40(r12)
+	std	r10,48(r12)
+	std	r11,56(r12)
+	std	r0, 64(r12)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
new file mode 100644
index 000000000..f02ec3ab4
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2006 Mike Kravetz IBM Corporation
+ *
+ * Hypervisor Call Instrumentation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+#include <asm/cputable.h>
+#include <asm/trace.h>
+#include <asm/machdep.h>
+
+DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
+
+/*
+ * Routines for displaying the statistics in debugfs
+ */
+static void *hc_start(struct seq_file *m, loff_t *pos)
+{
+	if ((int)*pos < (HCALL_STAT_ARRAY_SIZE-1))
+		return (void *)(unsigned long)(*pos + 1);
+
+	return NULL;
+}
+
+static void *hc_next(struct seq_file *m, void *p, loff_t * pos)
+{
+	++*pos;
+
+	return hc_start(m, pos);
+}
+
+static void hc_stop(struct seq_file *m, void *p)
+{
+}
+
+static int hc_show(struct seq_file *m, void *p)
+{
+	unsigned long h_num = (unsigned long)p;
+	struct hcall_stats *hs = m->private;
+
+	if (hs[h_num].num_calls) {
+		if (cpu_has_feature(CPU_FTR_PURR))
+			seq_printf(m, "%lu %lu %lu %lu\n", h_num<<2,
+				   hs[h_num].num_calls,
+				   hs[h_num].tb_total,
+				   hs[h_num].purr_total);
+		else
+			seq_printf(m, "%lu %lu %lu\n", h_num<<2,
+				   hs[h_num].num_calls,
+				   hs[h_num].tb_total);
+	}
+
+	return 0;
+}
+
+static const struct seq_operations hcall_inst_seq_ops = {
+        .start = hc_start,
+        .next  = hc_next,
+        .stop  = hc_stop,
+        .show  = hc_show
+};
+
+static int hcall_inst_seq_open(struct inode *inode, struct file *file)
+{
+	int rc;
+	struct seq_file *seq;
+
+	rc = seq_open(file, &hcall_inst_seq_ops);
+	seq = file->private_data;
+	seq->private = file_inode(file)->i_private;
+
+	return rc;
+}
+
+static const struct file_operations hcall_inst_seq_fops = {
+	.open = hcall_inst_seq_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#define	HCALL_ROOT_DIR		"hcall_inst"
+#define CPU_NAME_BUF_SIZE	32
+
+
+static void probe_hcall_entry(void *ignored, unsigned long opcode, unsigned long *args)
+{
+	struct hcall_stats *h;
+
+	if (opcode > MAX_HCALL_OPCODE)
+		return;
+
+	h = this_cpu_ptr(&hcall_stats[opcode / 4]);
+	h->tb_start = mftb();
+	h->purr_start = mfspr(SPRN_PURR);
+}
+
+static void probe_hcall_exit(void *ignored, unsigned long opcode, unsigned long retval,
+			     unsigned long *retbuf)
+{
+	struct hcall_stats *h;
+
+	if (opcode > MAX_HCALL_OPCODE)
+		return;
+
+	h = this_cpu_ptr(&hcall_stats[opcode / 4]);
+	h->num_calls++;
+	h->tb_total += mftb() - h->tb_start;
+	h->purr_total += mfspr(SPRN_PURR) - h->purr_start;
+}
+
+static int __init hcall_inst_init(void)
+{
+	struct dentry *hcall_root;
+	struct dentry *hcall_file;
+	char cpu_name_buf[CPU_NAME_BUF_SIZE];
+	int cpu;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		return 0;
+
+	if (register_trace_hcall_entry(probe_hcall_entry, NULL))
+		return -EINVAL;
+
+	if (register_trace_hcall_exit(probe_hcall_exit, NULL)) {
+		unregister_trace_hcall_entry(probe_hcall_entry, NULL);
+		return -EINVAL;
+	}
+
+	hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
+	if (!hcall_root)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu) {
+		snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu);
+		hcall_file = debugfs_create_file(cpu_name_buf, S_IRUGO,
+						 hcall_root,
+						 per_cpu(hcall_stats, cpu),
+						 &hcall_inst_seq_fops);
+		if (!hcall_file)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+machine_device_initcall(pseries, hcall_inst_init);
diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c
new file mode 100644
index 000000000..849b29b3e
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hvconsole.c
@@ -0,0 +1,88 @@
+/*
+ * hvconsole.c
+ * Copyright (C) 2004 Hollis Blanchard, IBM Corporation
+ * Copyright (C) 2004 IBM Corporation
+ *
+ * Additional Author(s):
+ *  Ryan S. Arnold <rsa@us.ibm.com>
+ *
+ * LPAR console support.
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <asm/hvcall.h>
+#include <asm/hvconsole.h>
+#include <asm/plpar_wrappers.h>
+
+/**
+ * hvc_get_chars - retrieve characters from firmware for denoted vterm adatper
+ * @vtermno: The vtermno or unit_address of the adapter from which to fetch the
+ *	data.
+ * @buf: The character buffer into which to put the character data fetched from
+ *	firmware.
+ * @count: not used?
+ */
+int hvc_get_chars(uint32_t vtermno, char *buf, int count)
+{
+	long ret;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	unsigned long *lbuf = (unsigned long *)buf;
+
+	ret = plpar_hcall(H_GET_TERM_CHAR, retbuf, vtermno);
+	lbuf[0] = be64_to_cpu(retbuf[1]);
+	lbuf[1] = be64_to_cpu(retbuf[2]);
+
+	if (ret == H_SUCCESS)
+		return retbuf[0];
+
+	return 0;
+}
+
+EXPORT_SYMBOL(hvc_get_chars);
+
+
+/**
+ * hvc_put_chars: send characters to firmware for denoted vterm adapter
+ * @vtermno: The vtermno or unit_address of the adapter from which the data
+ *	originated.
+ * @buf: The character buffer that contains the character data to send to
+ *	firmware.
+ * @count: Send this number of characters.
+ */
+int hvc_put_chars(uint32_t vtermno, const char *buf, int count)
+{
+	unsigned long *lbuf = (unsigned long *) buf;
+	long ret;
+
+
+	/* hcall will ret H_PARAMETER if 'count' exceeds firmware max.*/
+	if (count > MAX_VIO_PUT_CHARS)
+		count = MAX_VIO_PUT_CHARS;
+
+	ret = plpar_hcall_norets(H_PUT_TERM_CHAR, vtermno, count,
+				 cpu_to_be64(lbuf[0]),
+				 cpu_to_be64(lbuf[1]));
+	if (ret == H_SUCCESS)
+		return count;
+	if (ret == H_BUSY)
+		return -EAGAIN;
+	return -EIO;
+}
+
+EXPORT_SYMBOL(hvc_put_chars);
diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c
new file mode 100644
index 000000000..eedb64594
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hvcserver.c
@@ -0,0 +1,252 @@
+/*
+ * hvcserver.c
+ * Copyright (C) 2004 Ryan S Arnold, IBM Corporation
+ *
+ * PPC64 virtual I/O console server support.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include <asm/hvcall.h>
+#include <asm/hvcserver.h>
+#include <asm/io.h>
+
+#define HVCS_ARCH_VERSION "1.0.0"
+
+MODULE_AUTHOR("Ryan S. Arnold <rsa@us.ibm.com>");
+MODULE_DESCRIPTION("IBM hvcs ppc64 API");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(HVCS_ARCH_VERSION);
+
+/*
+ * Convert arch specific return codes into relevant errnos.  The hvcs
+ * functions aren't performance sensitive, so this conversion isn't an
+ * issue.
+ */
+static int hvcs_convert(long to_convert)
+{
+	switch (to_convert) {
+		case H_SUCCESS:
+			return 0;
+		case H_PARAMETER:
+			return -EINVAL;
+		case H_HARDWARE:
+			return -EIO;
+		case H_BUSY:
+		case H_LONG_BUSY_ORDER_1_MSEC:
+		case H_LONG_BUSY_ORDER_10_MSEC:
+		case H_LONG_BUSY_ORDER_100_MSEC:
+		case H_LONG_BUSY_ORDER_1_SEC:
+		case H_LONG_BUSY_ORDER_10_SEC:
+		case H_LONG_BUSY_ORDER_100_SEC:
+			return -EBUSY;
+		case H_FUNCTION: /* fall through */
+		default:
+			return -EPERM;
+	}
+}
+
+/**
+ * hvcs_free_partner_info - free pi allocated by hvcs_get_partner_info
+ * @head: list_head pointer for an allocated list of partner info structs to
+ *	free.
+ *
+ * This function is used to free the partner info list that was returned by
+ * calling hvcs_get_partner_info().
+ */
+int hvcs_free_partner_info(struct list_head *head)
+{
+	struct hvcs_partner_info *pi;
+	struct list_head *element;
+
+	if (!head)
+		return -EINVAL;
+
+	while (!list_empty(head)) {
+		element = head->next;
+		pi = list_entry(element, struct hvcs_partner_info, node);
+		list_del(element);
+		kfree(pi);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(hvcs_free_partner_info);
+
+/* Helper function for hvcs_get_partner_info */
+static int hvcs_next_partner(uint32_t unit_address,
+		unsigned long last_p_partition_ID,
+		unsigned long last_p_unit_address, unsigned long *pi_buff)
+
+{
+	long retval;
+	retval = plpar_hcall_norets(H_VTERM_PARTNER_INFO, unit_address,
+			last_p_partition_ID,
+				last_p_unit_address, virt_to_phys(pi_buff));
+	return hvcs_convert(retval);
+}
+
+/**
+ * hvcs_get_partner_info - Get all of the partner info for a vty-server adapter
+ * @unit_address: The unit_address of the vty-server adapter for which this
+ *	function is fetching partner info.
+ * @head: An initialized list_head pointer to an empty list to use to return the
+ *	list of partner info fetched from the hypervisor to the caller.
+ * @pi_buff: A page sized buffer pre-allocated prior to calling this function
+ *	that is to be used to be used by firmware as an iterator to keep track
+ *	of the partner info retrieval.
+ *
+ * This function returns non-zero on success, or if there is no partner info.
+ *
+ * The pi_buff is pre-allocated prior to calling this function because this
+ * function may be called with a spin_lock held and kmalloc of a page is not
+ * recommended as GFP_ATOMIC.
+ *
+ * The first long of this buffer is used to store a partner unit address.  The
+ * second long is used to store a partner partition ID and starting at
+ * pi_buff[2] is the 79 character Converged Location Code (diff size than the
+ * unsigned longs, hence the casting mumbo jumbo you see later).
+ *
+ * Invocation of this function should always be followed by an invocation of
+ * hvcs_free_partner_info() using a pointer to the SAME list head instance
+ * that was passed as a parameter to this function.
+ */
+int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head,
+		unsigned long *pi_buff)
+{
+	/*
+	 * Dealt with as longs because of the hcall interface even though the
+	 * values are uint32_t.
+	 */
+	unsigned long	last_p_partition_ID;
+	unsigned long	last_p_unit_address;
+	struct hvcs_partner_info *next_partner_info = NULL;
+	int more = 1;
+	int retval;
+
+	memset(pi_buff, 0x00, PAGE_SIZE);
+	/* invalid parameters */
+	if (!head || !pi_buff)
+		return -EINVAL;
+
+	last_p_partition_ID = last_p_unit_address = ~0UL;
+	INIT_LIST_HEAD(head);
+
+	do {
+		retval = hvcs_next_partner(unit_address, last_p_partition_ID,
+				last_p_unit_address, pi_buff);
+		if (retval) {
+			/*
+			 * Don't indicate that we've failed if we have
+			 * any list elements.
+			 */
+			if (!list_empty(head))
+				return 0;
+			return retval;
+		}
+
+		last_p_partition_ID = be64_to_cpu(pi_buff[0]);
+		last_p_unit_address = be64_to_cpu(pi_buff[1]);
+
+		/* This indicates that there are no further partners */
+		if (last_p_partition_ID == ~0UL
+				&& last_p_unit_address == ~0UL)
+			break;
+
+		/* This is a very small struct and will be freed soon in
+		 * hvcs_free_partner_info(). */
+		next_partner_info = kmalloc(sizeof(struct hvcs_partner_info),
+				GFP_ATOMIC);
+
+		if (!next_partner_info) {
+			printk(KERN_WARNING "HVCONSOLE: kmalloc() failed to"
+				" allocate partner info struct.\n");
+			hvcs_free_partner_info(head);
+			return -ENOMEM;
+		}
+
+		next_partner_info->unit_address
+			= (unsigned int)last_p_unit_address;
+		next_partner_info->partition_ID
+			= (unsigned int)last_p_partition_ID;
+
+		/* copy the Null-term char too */
+		strlcpy(&next_partner_info->location_code[0],
+			(char *)&pi_buff[2],
+			sizeof(next_partner_info->location_code));
+
+		list_add_tail(&(next_partner_info->node), head);
+		next_partner_info = NULL;
+
+	} while (more);
+
+	return 0;
+}
+EXPORT_SYMBOL(hvcs_get_partner_info);
+
+/**
+ * hvcs_register_connection - establish a connection between this vty-server and
+ *	a vty.
+ * @unit_address: The unit address of the vty-server adapter that is to be
+ *	establish a connection.
+ * @p_partition_ID: The partition ID of the vty adapter that is to be connected.
+ * @p_unit_address: The unit address of the vty adapter to which the vty-server
+ *	is to be connected.
+ *
+ * If this function is called once and -EINVAL is returned it may
+ * indicate that the partner info needs to be refreshed for the
+ * target unit address at which point the caller must invoke
+ * hvcs_get_partner_info() and then call this function again.  If,
+ * for a second time, -EINVAL is returned then it indicates that
+ * there is probably already a partner connection registered to a
+ * different vty-server adapter.  It is also possible that a second
+ * -EINVAL may indicate that one of the parms is not valid, for
+ * instance if the link was removed between the vty-server adapter
+ * and the vty adapter that you are trying to open.  Don't shoot the
+ * messenger.  Firmware implemented it this way.
+ */
+int hvcs_register_connection( uint32_t unit_address,
+		uint32_t p_partition_ID, uint32_t p_unit_address)
+{
+	long retval;
+	retval = plpar_hcall_norets(H_REGISTER_VTERM, unit_address,
+				p_partition_ID, p_unit_address);
+	return hvcs_convert(retval);
+}
+EXPORT_SYMBOL(hvcs_register_connection);
+
+/**
+ * hvcs_free_connection - free the connection between a vty-server and vty
+ * @unit_address: The unit address of the vty-server that is to have its
+ *	connection severed.
+ *
+ * This function is used to free the partner connection between a vty-server
+ * adapter and a vty adapter.
+ *
+ * If -EBUSY is returned continue to call this function until 0 is returned.
+ */
+int hvcs_free_connection(uint32_t unit_address)
+{
+	long retval;
+	retval = plpar_hcall_norets(H_FREE_VTERM, unit_address);
+	return hvcs_convert(retval);
+}
+EXPORT_SYMBOL(hvcs_free_connection);
diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c
new file mode 100644
index 000000000..0240c4ff8
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/io_event_irq.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright 2010 2011 Mark Nelson and Tseng-Hui (Frank) Lin, IBM Corporation
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/list.h>
+#include <linux/notifier.h>
+
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/irq.h>
+#include <asm/io_event_irq.h>
+
+#include "pseries.h"
+
+/*
+ * IO event interrupt is a mechanism provided by RTAS to return
+ * information about hardware error and non-error events. Device
+ * drivers can register their event handlers to receive events.
+ * Device drivers are expected to use atomic_notifier_chain_register()
+ * and atomic_notifier_chain_unregister() to register and unregister
+ * their event handlers. Since multiple IO event types and scopes
+ * share an IO event interrupt, the event handlers are called one
+ * by one until the IO event is claimed by one of the handlers.
+ * The event handlers are expected to return NOTIFY_OK if the
+ * event is handled by the event handler or NOTIFY_DONE if the
+ * event does not belong to the handler.
+ *
+ * Usage:
+ *
+ * Notifier function:
+ * #include <asm/io_event_irq.h>
+ * int event_handler(struct notifier_block *nb, unsigned long val, void *data) {
+ * 	p = (struct pseries_io_event_sect_data *) data;
+ * 	if (! is_my_event(p->scope, p->event_type)) return NOTIFY_DONE;
+ * 		:
+ * 		:
+ * 	return NOTIFY_OK;
+ * }
+ * struct notifier_block event_nb = {
+ * 	.notifier_call = event_handler,
+ * }
+ *
+ * Registration:
+ * atomic_notifier_chain_register(&pseries_ioei_notifier_list, &event_nb);
+ *
+ * Unregistration:
+ * atomic_notifier_chain_unregister(&pseries_ioei_notifier_list, &event_nb);
+ */
+
+ATOMIC_NOTIFIER_HEAD(pseries_ioei_notifier_list);
+EXPORT_SYMBOL_GPL(pseries_ioei_notifier_list);
+
+static int ioei_check_exception_token;
+
+static char ioei_rtas_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned;
+
+/**
+ * Find the data portion of an IO Event section from event log.
+ * @elog: RTAS error/event log.
+ *
+ * Return:
+ * 	pointer to a valid IO event section data. NULL if not found.
+ */
+static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog)
+{
+	struct pseries_errorlog *sect;
+
+	/* We should only ever get called for io-event interrupts, but if
+	 * we do get called for another type then something went wrong so
+	 * make some noise about it.
+	 * RTAS_TYPE_IO only exists in extended event log version 6 or later.
+	 * No need to check event log version.
+	 */
+	if (unlikely(rtas_error_type(elog) != RTAS_TYPE_IO)) {
+		printk_once(KERN_WARNING"io_event_irq: Unexpected event type %d",
+			    rtas_error_type(elog));
+		return NULL;
+	}
+
+	sect = get_pseries_errorlog(elog, PSERIES_ELOG_SECT_ID_IO_EVENT);
+	if (unlikely(!sect)) {
+		printk_once(KERN_WARNING "io_event_irq: RTAS extended event "
+			    "log does not contain an IO Event section. "
+			    "Could be a bug in system firmware!\n");
+		return NULL;
+	}
+	return (struct pseries_io_event *) &sect->data;
+}
+
+/*
+ * PAPR:
+ * - check-exception returns the first found error or event and clear that
+ *   error or event so it is reported once.
+ * - Each interrupt returns one event. If a plateform chooses to report
+ *   multiple events through a single interrupt, it must ensure that the
+ *   interrupt remains asserted until check-exception has been used to
+ *   process all out-standing events for that interrupt.
+ *
+ * Implementation notes:
+ * - Events must be processed in the order they are returned. Hence,
+ *   sequential in nature.
+ * - The owner of an event is determined by combinations of scope,
+ *   event type, and sub-type. There is no easy way to pre-sort clients
+ *   by scope or event type alone. For example, Torrent ISR route change
+ *   event is reported with scope 0x00 (Not Applicatable) rather than
+ *   0x3B (Torrent-hub). It is better to let the clients to identify
+ *   who owns the event.
+ */
+
+static irqreturn_t ioei_interrupt(int irq, void *dev_id)
+{
+	struct pseries_io_event *event;
+	int rtas_rc;
+
+	for (;;) {
+		rtas_rc = rtas_call(ioei_check_exception_token, 6, 1, NULL,
+				    RTAS_VECTOR_EXTERNAL_INTERRUPT,
+				    virq_to_hw(irq),
+				    RTAS_IO_EVENTS, 1 /* Time Critical */,
+				    __pa(ioei_rtas_buf),
+				    RTAS_DATA_BUF_SIZE);
+		if (rtas_rc != 0)
+			break;
+
+		event = ioei_find_event((struct rtas_error_log *)ioei_rtas_buf);
+		if (!event)
+			continue;
+
+		atomic_notifier_call_chain(&pseries_ioei_notifier_list,
+					   0, event);
+	}
+	return IRQ_HANDLED;
+}
+
+static int __init ioei_init(void)
+{
+	struct device_node *np;
+
+	ioei_check_exception_token = rtas_token("check-exception");
+	if (ioei_check_exception_token == RTAS_UNKNOWN_SERVICE)
+		return -ENODEV;
+
+	np = of_find_node_by_path("/event-sources/ibm,io-events");
+	if (np) {
+		request_event_sources_irqs(np, ioei_interrupt, "IO_EVENT");
+		pr_info("IBM I/O event interrupts enabled\n");
+		of_node_put(np);
+	} else {
+		return -ENODEV;
+	}
+	return 0;
+}
+machine_subsys_initcall(pseries, ioei_init);
+
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
new file mode 100644
index 000000000..61d5a17f4
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -0,0 +1,1345 @@
+/*
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
+ *
+ * Rewrite, cleanup:
+ *
+ * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
+ * Copyright (C) 2006 Olof Johansson <olof@lixom.net>
+ *
+ * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR.
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/memblock.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/crash_dump.h>
+#include <linux/memory.h>
+#include <linux/of.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/iommu.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/tce.h>
+#include <asm/ppc-pci.h>
+#include <asm/udbg.h>
+#include <asm/mmzone.h>
+#include <asm/plpar_wrappers.h>
+
+#include "pseries.h"
+
+static void tce_invalidate_pSeries_sw(struct iommu_table *tbl,
+				      __be64 *startp, __be64 *endp)
+{
+	u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index;
+	unsigned long start, end, inc;
+
+	start = __pa(startp);
+	end = __pa(endp);
+	inc = L1_CACHE_BYTES; /* invalidate a cacheline of TCEs at a time */
+
+	/* If this is non-zero, change the format.  We shift the
+	 * address and or in the magic from the device tree. */
+	if (tbl->it_busno) {
+		start <<= 12;
+		end <<= 12;
+		inc <<= 12;
+		start |= tbl->it_busno;
+		end |= tbl->it_busno;
+	}
+
+	end |= inc - 1; /* round up end to be different than start */
+
+	mb(); /* Make sure TCEs in memory are written */
+	while (start <= end) {
+		out_be64(invalidate, start);
+		start += inc;
+	}
+}
+
+static int tce_build_pSeries(struct iommu_table *tbl, long index,
+			      long npages, unsigned long uaddr,
+			      enum dma_data_direction direction,
+			      struct dma_attrs *attrs)
+{
+	u64 proto_tce;
+	__be64 *tcep, *tces;
+	u64 rpn;
+
+	proto_tce = TCE_PCI_READ; // Read allowed
+
+	if (direction != DMA_TO_DEVICE)
+		proto_tce |= TCE_PCI_WRITE;
+
+	tces = tcep = ((__be64 *)tbl->it_base) + index;
+
+	while (npages--) {
+		/* can't move this out since we might cross MEMBLOCK boundary */
+		rpn = __pa(uaddr) >> TCE_SHIFT;
+		*tcep = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT);
+
+		uaddr += TCE_PAGE_SIZE;
+		tcep++;
+	}
+
+	if (tbl->it_type & TCE_PCI_SWINV_CREATE)
+		tce_invalidate_pSeries_sw(tbl, tces, tcep - 1);
+	return 0;
+}
+
+
+static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
+{
+	__be64 *tcep, *tces;
+
+	tces = tcep = ((__be64 *)tbl->it_base) + index;
+
+	while (npages--)
+		*(tcep++) = 0;
+
+	if (tbl->it_type & TCE_PCI_SWINV_FREE)
+		tce_invalidate_pSeries_sw(tbl, tces, tcep - 1);
+}
+
+static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
+{
+	__be64 *tcep;
+
+	tcep = ((__be64 *)tbl->it_base) + index;
+
+	return be64_to_cpu(*tcep);
+}
+
+static void tce_free_pSeriesLP(struct iommu_table*, long, long);
+static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
+
+static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
+				long npages, unsigned long uaddr,
+				enum dma_data_direction direction,
+				struct dma_attrs *attrs)
+{
+	u64 rc = 0;
+	u64 proto_tce, tce;
+	u64 rpn;
+	int ret = 0;
+	long tcenum_start = tcenum, npages_start = npages;
+
+	rpn = __pa(uaddr) >> TCE_SHIFT;
+	proto_tce = TCE_PCI_READ;
+	if (direction != DMA_TO_DEVICE)
+		proto_tce |= TCE_PCI_WRITE;
+
+	while (npages--) {
+		tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
+		rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
+
+		if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
+			ret = (int)rc;
+			tce_free_pSeriesLP(tbl, tcenum_start,
+			                   (npages_start - (npages + 1)));
+			break;
+		}
+
+		if (rc && printk_ratelimit()) {
+			printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
+			printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
+			printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
+			printk("\ttce val = 0x%llx\n", tce );
+			dump_stack();
+		}
+
+		tcenum++;
+		rpn++;
+	}
+	return ret;
+}
+
+static DEFINE_PER_CPU(__be64 *, tce_page);
+
+static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+				     long npages, unsigned long uaddr,
+				     enum dma_data_direction direction,
+				     struct dma_attrs *attrs)
+{
+	u64 rc = 0;
+	u64 proto_tce;
+	__be64 *tcep;
+	u64 rpn;
+	long l, limit;
+	long tcenum_start = tcenum, npages_start = npages;
+	int ret = 0;
+	unsigned long flags;
+
+	if (npages == 1) {
+		return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
+		                           direction, attrs);
+	}
+
+	local_irq_save(flags);	/* to protect tcep and the page behind it */
+
+	tcep = __this_cpu_read(tce_page);
+
+	/* This is safe to do since interrupts are off when we're called
+	 * from iommu_alloc{,_sg}()
+	 */
+	if (!tcep) {
+		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
+		/* If allocation fails, fall back to the loop implementation */
+		if (!tcep) {
+			local_irq_restore(flags);
+			return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
+					    direction, attrs);
+		}
+		__this_cpu_write(tce_page, tcep);
+	}
+
+	rpn = __pa(uaddr) >> TCE_SHIFT;
+	proto_tce = TCE_PCI_READ;
+	if (direction != DMA_TO_DEVICE)
+		proto_tce |= TCE_PCI_WRITE;
+
+	/* We can map max one pageful of TCEs at a time */
+	do {
+		/*
+		 * Set up the page with TCE data, looping through and setting
+		 * the values.
+		 */
+		limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE);
+
+		for (l = 0; l < limit; l++) {
+			tcep[l] = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT);
+			rpn++;
+		}
+
+		rc = plpar_tce_put_indirect((u64)tbl->it_index,
+					    (u64)tcenum << 12,
+					    (u64)__pa(tcep),
+					    limit);
+
+		npages -= limit;
+		tcenum += limit;
+	} while (npages > 0 && !rc);
+
+	local_irq_restore(flags);
+
+	if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
+		ret = (int)rc;
+		tce_freemulti_pSeriesLP(tbl, tcenum_start,
+		                        (npages_start - (npages + limit)));
+		return ret;
+	}
+
+	if (rc && printk_ratelimit()) {
+		printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
+		printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
+		printk("\tnpages  = 0x%llx\n", (u64)npages);
+		printk("\ttce[0] val = 0x%llx\n", tcep[0]);
+		dump_stack();
+	}
+	return ret;
+}
+
+static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
+{
+	u64 rc;
+
+	while (npages--) {
+		rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0);
+
+		if (rc && printk_ratelimit()) {
+			printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
+			printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
+			printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
+			dump_stack();
+		}
+
+		tcenum++;
+	}
+}
+
+
+static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
+{
+	u64 rc;
+
+	rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
+
+	if (rc && printk_ratelimit()) {
+		printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
+		printk("\trc      = %lld\n", rc);
+		printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
+		printk("\tnpages  = 0x%llx\n", (u64)npages);
+		dump_stack();
+	}
+}
+
+static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum)
+{
+	u64 rc;
+	unsigned long tce_ret;
+
+	rc = plpar_tce_get((u64)tbl->it_index, (u64)tcenum << 12, &tce_ret);
+
+	if (rc && printk_ratelimit()) {
+		printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc);
+		printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
+		printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
+		dump_stack();
+	}
+
+	return tce_ret;
+}
+
+/* this is compatible with cells for the device tree property */
+struct dynamic_dma_window_prop {
+	__be32	liobn;		/* tce table number */
+	__be64	dma_base;	/* address hi,lo */
+	__be32	tce_shift;	/* ilog2(tce_page_size) */
+	__be32	window_shift;	/* ilog2(tce_window_size) */
+};
+
+struct direct_window {
+	struct device_node *device;
+	const struct dynamic_dma_window_prop *prop;
+	struct list_head list;
+};
+
+/* Dynamic DMA Window support */
+struct ddw_query_response {
+	u32 windows_available;
+	u32 largest_available_block;
+	u32 page_size;
+	u32 migration_capable;
+};
+
+struct ddw_create_response {
+	u32 liobn;
+	u32 addr_hi;
+	u32 addr_lo;
+};
+
+static LIST_HEAD(direct_window_list);
+/* prevents races between memory on/offline and window creation */
+static DEFINE_SPINLOCK(direct_window_list_lock);
+/* protects initializing window twice for same device */
+static DEFINE_MUTEX(direct_window_init_mutex);
+#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
+
+static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
+					unsigned long num_pfn, const void *arg)
+{
+	const struct dynamic_dma_window_prop *maprange = arg;
+	int rc;
+	u64 tce_size, num_tce, dma_offset, next;
+	u32 tce_shift;
+	long limit;
+
+	tce_shift = be32_to_cpu(maprange->tce_shift);
+	tce_size = 1ULL << tce_shift;
+	next = start_pfn << PAGE_SHIFT;
+	num_tce = num_pfn << PAGE_SHIFT;
+
+	/* round back to the beginning of the tce page size */
+	num_tce += next & (tce_size - 1);
+	next &= ~(tce_size - 1);
+
+	/* covert to number of tces */
+	num_tce |= tce_size - 1;
+	num_tce >>= tce_shift;
+
+	do {
+		/*
+		 * Set up the page with TCE data, looping through and setting
+		 * the values.
+		 */
+		limit = min_t(long, num_tce, 512);
+		dma_offset = next + be64_to_cpu(maprange->dma_base);
+
+		rc = plpar_tce_stuff((u64)be32_to_cpu(maprange->liobn),
+					     dma_offset,
+					     0, limit);
+		next += limit * tce_size;
+		num_tce -= limit;
+	} while (num_tce > 0 && !rc);
+
+	return rc;
+}
+
+static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
+					unsigned long num_pfn, const void *arg)
+{
+	const struct dynamic_dma_window_prop *maprange = arg;
+	u64 tce_size, num_tce, dma_offset, next, proto_tce, liobn;
+	__be64 *tcep;
+	u32 tce_shift;
+	u64 rc = 0;
+	long l, limit;
+
+	local_irq_disable();	/* to protect tcep and the page behind it */
+	tcep = __this_cpu_read(tce_page);
+
+	if (!tcep) {
+		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
+		if (!tcep) {
+			local_irq_enable();
+			return -ENOMEM;
+		}
+		__this_cpu_write(tce_page, tcep);
+	}
+
+	proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
+
+	liobn = (u64)be32_to_cpu(maprange->liobn);
+	tce_shift = be32_to_cpu(maprange->tce_shift);
+	tce_size = 1ULL << tce_shift;
+	next = start_pfn << PAGE_SHIFT;
+	num_tce = num_pfn << PAGE_SHIFT;
+
+	/* round back to the beginning of the tce page size */
+	num_tce += next & (tce_size - 1);
+	next &= ~(tce_size - 1);
+
+	/* covert to number of tces */
+	num_tce |= tce_size - 1;
+	num_tce >>= tce_shift;
+
+	/* We can map max one pageful of TCEs at a time */
+	do {
+		/*
+		 * Set up the page with TCE data, looping through and setting
+		 * the values.
+		 */
+		limit = min_t(long, num_tce, 4096/TCE_ENTRY_SIZE);
+		dma_offset = next + be64_to_cpu(maprange->dma_base);
+
+		for (l = 0; l < limit; l++) {
+			tcep[l] = cpu_to_be64(proto_tce | next);
+			next += tce_size;
+		}
+
+		rc = plpar_tce_put_indirect(liobn,
+					    dma_offset,
+					    (u64)__pa(tcep),
+					    limit);
+
+		num_tce -= limit;
+	} while (num_tce > 0 && !rc);
+
+	/* error cleanup: caller will clear whole range */
+
+	local_irq_enable();
+	return rc;
+}
+
+static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn,
+		unsigned long num_pfn, void *arg)
+{
+	return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg);
+}
+
+
+#ifdef CONFIG_PCI
+static void iommu_table_setparms(struct pci_controller *phb,
+				 struct device_node *dn,
+				 struct iommu_table *tbl)
+{
+	struct device_node *node;
+	const unsigned long *basep, *sw_inval;
+	const u32 *sizep;
+
+	node = phb->dn;
+
+	basep = of_get_property(node, "linux,tce-base", NULL);
+	sizep = of_get_property(node, "linux,tce-size", NULL);
+	if (basep == NULL || sizep == NULL) {
+		printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %s has "
+				"missing tce entries !\n", dn->full_name);
+		return;
+	}
+
+	tbl->it_base = (unsigned long)__va(*basep);
+
+	if (!is_kdump_kernel())
+		memset((void *)tbl->it_base, 0, *sizep);
+
+	tbl->it_busno = phb->bus->number;
+	tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
+
+	/* Units of tce entries */
+	tbl->it_offset = phb->dma_window_base_cur >> tbl->it_page_shift;
+
+	/* Test if we are going over 2GB of DMA space */
+	if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) {
+		udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
+		panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
+	}
+
+	phb->dma_window_base_cur += phb->dma_window_size;
+
+	/* Set the tce table size - measured in entries */
+	tbl->it_size = phb->dma_window_size >> tbl->it_page_shift;
+
+	tbl->it_index = 0;
+	tbl->it_blocksize = 16;
+	tbl->it_type = TCE_PCI;
+
+	sw_inval = of_get_property(node, "linux,tce-sw-invalidate-info", NULL);
+	if (sw_inval) {
+		/*
+		 * This property contains information on how to
+		 * invalidate the TCE entry.  The first property is
+		 * the base MMIO address used to invalidate entries.
+		 * The second property tells us the format of the TCE
+		 * invalidate (whether it needs to be shifted) and
+		 * some magic routing info to add to our invalidate
+		 * command.
+		 */
+		tbl->it_index = (unsigned long) ioremap(sw_inval[0], 8);
+		tbl->it_busno = sw_inval[1]; /* overload this with magic */
+		tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
+	}
+}
+
+/*
+ * iommu_table_setparms_lpar
+ *
+ * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
+ */
+static void iommu_table_setparms_lpar(struct pci_controller *phb,
+				      struct device_node *dn,
+				      struct iommu_table *tbl,
+				      const __be32 *dma_window)
+{
+	unsigned long offset, size;
+
+	of_parse_dma_window(dn, dma_window, &tbl->it_index, &offset, &size);
+
+	tbl->it_busno = phb->bus->number;
+	tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
+	tbl->it_base   = 0;
+	tbl->it_blocksize  = 16;
+	tbl->it_type = TCE_PCI;
+	tbl->it_offset = offset >> tbl->it_page_shift;
+	tbl->it_size = size >> tbl->it_page_shift;
+}
+
+static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
+{
+	struct device_node *dn;
+	struct iommu_table *tbl;
+	struct device_node *isa_dn, *isa_dn_orig;
+	struct device_node *tmp;
+	struct pci_dn *pci;
+	int children;
+
+	dn = pci_bus_to_OF_node(bus);
+
+	pr_debug("pci_dma_bus_setup_pSeries: setting up bus %s\n", dn->full_name);
+
+	if (bus->self) {
+		/* This is not a root bus, any setup will be done for the
+		 * device-side of the bridge in iommu_dev_setup_pSeries().
+		 */
+		return;
+	}
+	pci = PCI_DN(dn);
+
+	/* Check if the ISA bus on the system is under
+	 * this PHB.
+	 */
+	isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa");
+
+	while (isa_dn && isa_dn != dn)
+		isa_dn = isa_dn->parent;
+
+	of_node_put(isa_dn_orig);
+
+	/* Count number of direct PCI children of the PHB. */
+	for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling)
+		children++;
+
+	pr_debug("Children: %d\n", children);
+
+	/* Calculate amount of DMA window per slot. Each window must be
+	 * a power of two (due to pci_alloc_consistent requirements).
+	 *
+	 * Keep 256MB aside for PHBs with ISA.
+	 */
+
+	if (!isa_dn) {
+		/* No ISA/IDE - just set window size and return */
+		pci->phb->dma_window_size = 0x80000000ul; /* To be divided */
+
+		while (pci->phb->dma_window_size * children > 0x80000000ul)
+			pci->phb->dma_window_size >>= 1;
+		pr_debug("No ISA/IDE, window size is 0x%llx\n",
+			 pci->phb->dma_window_size);
+		pci->phb->dma_window_base_cur = 0;
+
+		return;
+	}
+
+	/* If we have ISA, then we probably have an IDE
+	 * controller too. Allocate a 128MB table but
+	 * skip the first 128MB to avoid stepping on ISA
+	 * space.
+	 */
+	pci->phb->dma_window_size = 0x8000000ul;
+	pci->phb->dma_window_base_cur = 0x8000000ul;
+
+	tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
+			   pci->phb->node);
+
+	iommu_table_setparms(pci->phb, dn, tbl);
+	pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
+	iommu_register_group(tbl, pci_domain_nr(bus), 0);
+
+	/* Divide the rest (1.75GB) among the children */
+	pci->phb->dma_window_size = 0x80000000ul;
+	while (pci->phb->dma_window_size * children > 0x70000000ul)
+		pci->phb->dma_window_size >>= 1;
+
+	pr_debug("ISA/IDE, window size is 0x%llx\n", pci->phb->dma_window_size);
+}
+
+
+static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
+{
+	struct iommu_table *tbl;
+	struct device_node *dn, *pdn;
+	struct pci_dn *ppci;
+	const __be32 *dma_window = NULL;
+
+	dn = pci_bus_to_OF_node(bus);
+
+	pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %s\n",
+		 dn->full_name);
+
+	/* Find nearest ibm,dma-window, walking up the device tree */
+	for (pdn = dn; pdn != NULL; pdn = pdn->parent) {
+		dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
+		if (dma_window != NULL)
+			break;
+	}
+
+	if (dma_window == NULL) {
+		pr_debug("  no ibm,dma-window property !\n");
+		return;
+	}
+
+	ppci = PCI_DN(pdn);
+
+	pr_debug("  parent is %s, iommu_table: 0x%p\n",
+		 pdn->full_name, ppci->iommu_table);
+
+	if (!ppci->iommu_table) {
+		tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
+				   ppci->phb->node);
+		iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window);
+		ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node);
+		iommu_register_group(tbl, pci_domain_nr(bus), 0);
+		pr_debug("  created table: %p\n", ppci->iommu_table);
+	}
+}
+
+
+static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
+{
+	struct device_node *dn;
+	struct iommu_table *tbl;
+
+	pr_debug("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev));
+
+	dn = dev->dev.of_node;
+
+	/* If we're the direct child of a root bus, then we need to allocate
+	 * an iommu table ourselves. The bus setup code should have setup
+	 * the window sizes already.
+	 */
+	if (!dev->bus->self) {
+		struct pci_controller *phb = PCI_DN(dn)->phb;
+
+		pr_debug(" --> first child, no bridge. Allocating iommu table.\n");
+		tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
+				   phb->node);
+		iommu_table_setparms(phb, dn, tbl);
+		PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node);
+		iommu_register_group(tbl, pci_domain_nr(phb->bus), 0);
+		set_iommu_table_base_and_group(&dev->dev,
+					       PCI_DN(dn)->iommu_table);
+		return;
+	}
+
+	/* If this device is further down the bus tree, search upwards until
+	 * an already allocated iommu table is found and use that.
+	 */
+
+	while (dn && PCI_DN(dn) && PCI_DN(dn)->iommu_table == NULL)
+		dn = dn->parent;
+
+	if (dn && PCI_DN(dn))
+		set_iommu_table_base_and_group(&dev->dev,
+					       PCI_DN(dn)->iommu_table);
+	else
+		printk(KERN_WARNING "iommu: Device %s has no iommu table\n",
+		       pci_name(dev));
+}
+
+static int __read_mostly disable_ddw;
+
+static int __init disable_ddw_setup(char *str)
+{
+	disable_ddw = 1;
+	printk(KERN_INFO "ppc iommu: disabling ddw.\n");
+
+	return 0;
+}
+
+early_param("disable_ddw", disable_ddw_setup);
+
+static void remove_ddw(struct device_node *np, bool remove_prop)
+{
+	struct dynamic_dma_window_prop *dwp;
+	struct property *win64;
+	u32 ddw_avail[3];
+	u64 liobn;
+	int ret = 0;
+
+	ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
+					 &ddw_avail[0], 3);
+
+	win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
+	if (!win64)
+		return;
+
+	if (ret || win64->length < sizeof(*dwp))
+		goto delprop;
+
+	dwp = win64->value;
+	liobn = (u64)be32_to_cpu(dwp->liobn);
+
+	/* clear the whole window, note the arg is in kernel pages */
+	ret = tce_clearrange_multi_pSeriesLP(0,
+		1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp);
+	if (ret)
+		pr_warning("%s failed to clear tces in window.\n",
+			 np->full_name);
+	else
+		pr_debug("%s successfully cleared tces in window.\n",
+			 np->full_name);
+
+	ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
+	if (ret)
+		pr_warning("%s: failed to remove direct window: rtas returned "
+			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
+			np->full_name, ret, ddw_avail[2], liobn);
+	else
+		pr_debug("%s: successfully removed direct window: rtas returned "
+			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
+			np->full_name, ret, ddw_avail[2], liobn);
+
+delprop:
+	if (remove_prop)
+		ret = of_remove_property(np, win64);
+	if (ret)
+		pr_warning("%s: failed to remove direct window property: %d\n",
+			np->full_name, ret);
+}
+
+static u64 find_existing_ddw(struct device_node *pdn)
+{
+	struct direct_window *window;
+	const struct dynamic_dma_window_prop *direct64;
+	u64 dma_addr = 0;
+
+	spin_lock(&direct_window_list_lock);
+	/* check if we already created a window and dupe that config if so */
+	list_for_each_entry(window, &direct_window_list, list) {
+		if (window->device == pdn) {
+			direct64 = window->prop;
+			dma_addr = be64_to_cpu(direct64->dma_base);
+			break;
+		}
+	}
+	spin_unlock(&direct_window_list_lock);
+
+	return dma_addr;
+}
+
+static int find_existing_ddw_windows(void)
+{
+	int len;
+	struct device_node *pdn;
+	struct direct_window *window;
+	const struct dynamic_dma_window_prop *direct64;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		return 0;
+
+	for_each_node_with_property(pdn, DIRECT64_PROPNAME) {
+		direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
+		if (!direct64)
+			continue;
+
+		window = kzalloc(sizeof(*window), GFP_KERNEL);
+		if (!window || len < sizeof(struct dynamic_dma_window_prop)) {
+			kfree(window);
+			remove_ddw(pdn, true);
+			continue;
+		}
+
+		window->device = pdn;
+		window->prop = direct64;
+		spin_lock(&direct_window_list_lock);
+		list_add(&window->list, &direct_window_list);
+		spin_unlock(&direct_window_list_lock);
+	}
+
+	return 0;
+}
+machine_arch_initcall(pseries, find_existing_ddw_windows);
+
+static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
+			struct ddw_query_response *query)
+{
+	struct eeh_dev *edev;
+	u32 cfg_addr;
+	u64 buid;
+	int ret;
+
+	/*
+	 * Get the config address and phb buid of the PE window.
+	 * Rely on eeh to retrieve this for us.
+	 * Retrieve them from the pci device, not the node with the
+	 * dma-window property
+	 */
+	edev = pci_dev_to_eeh_dev(dev);
+	cfg_addr = edev->config_addr;
+	if (edev->pe_config_addr)
+		cfg_addr = edev->pe_config_addr;
+	buid = edev->phb->buid;
+
+	ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
+		  cfg_addr, BUID_HI(buid), BUID_LO(buid));
+	dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
+		" returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid),
+		BUID_LO(buid), ret);
+	return ret;
+}
+
+static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
+			struct ddw_create_response *create, int page_shift,
+			int window_shift)
+{
+	struct eeh_dev *edev;
+	u32 cfg_addr;
+	u64 buid;
+	int ret;
+
+	/*
+	 * Get the config address and phb buid of the PE window.
+	 * Rely on eeh to retrieve this for us.
+	 * Retrieve them from the pci device, not the node with the
+	 * dma-window property
+	 */
+	edev = pci_dev_to_eeh_dev(dev);
+	cfg_addr = edev->config_addr;
+	if (edev->pe_config_addr)
+		cfg_addr = edev->pe_config_addr;
+	buid = edev->phb->buid;
+
+	do {
+		/* extra outputs are LIOBN and dma-addr (hi, lo) */
+		ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create,
+				cfg_addr, BUID_HI(buid), BUID_LO(buid),
+				page_shift, window_shift);
+	} while (rtas_busy_delay(ret));
+	dev_info(&dev->dev,
+		"ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
+		"(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1],
+		 cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift,
+		 window_shift, ret, create->liobn, create->addr_hi, create->addr_lo);
+
+	return ret;
+}
+
+struct failed_ddw_pdn {
+	struct device_node *pdn;
+	struct list_head list;
+};
+
+static LIST_HEAD(failed_ddw_pdn_list);
+
+/*
+ * If the PE supports dynamic dma windows, and there is space for a table
+ * that can map all pages in a linear offset, then setup such a table,
+ * and record the dma-offset in the struct device.
+ *
+ * dev: the pci device we are checking
+ * pdn: the parent pe node with the ibm,dma_window property
+ * Future: also check if we can remap the base window for our base page size
+ *
+ * returns the dma offset for use by dma_set_mask
+ */
+static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
+{
+	int len, ret;
+	struct ddw_query_response query;
+	struct ddw_create_response create;
+	int page_shift;
+	u64 dma_addr, max_addr;
+	struct device_node *dn;
+	u32 ddw_avail[3];
+	struct direct_window *window;
+	struct property *win64;
+	struct dynamic_dma_window_prop *ddwprop;
+	struct failed_ddw_pdn *fpdn;
+
+	mutex_lock(&direct_window_init_mutex);
+
+	dma_addr = find_existing_ddw(pdn);
+	if (dma_addr != 0)
+		goto out_unlock;
+
+	/*
+	 * If we already went through this for a previous function of
+	 * the same device and failed, we don't want to muck with the
+	 * DMA window again, as it will race with in-flight operations
+	 * and can lead to EEHs. The above mutex protects access to the
+	 * list.
+	 */
+	list_for_each_entry(fpdn, &failed_ddw_pdn_list, list) {
+		if (!strcmp(fpdn->pdn->full_name, pdn->full_name))
+			goto out_unlock;
+	}
+
+	/*
+	 * the ibm,ddw-applicable property holds the tokens for:
+	 * ibm,query-pe-dma-window
+	 * ibm,create-pe-dma-window
+	 * ibm,remove-pe-dma-window
+	 * for the given node in that order.
+	 * the property is actually in the parent, not the PE
+	 */
+	ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable",
+					 &ddw_avail[0], 3);
+	if (ret)
+		goto out_failed;
+
+       /*
+	 * Query if there is a second window of size to map the
+	 * whole partition.  Query returns number of windows, largest
+	 * block assigned to PE (partition endpoint), and two bitmasks
+	 * of page sizes: supported and supported for migrate-dma.
+	 */
+	dn = pci_device_to_OF_node(dev);
+	ret = query_ddw(dev, ddw_avail, &query);
+	if (ret != 0)
+		goto out_failed;
+
+	if (query.windows_available == 0) {
+		/*
+		 * no additional windows are available for this device.
+		 * We might be able to reallocate the existing window,
+		 * trading in for a larger page size.
+		 */
+		dev_dbg(&dev->dev, "no free dynamic windows");
+		goto out_failed;
+	}
+	if (query.page_size & 4) {
+		page_shift = 24; /* 16MB */
+	} else if (query.page_size & 2) {
+		page_shift = 16; /* 64kB */
+	} else if (query.page_size & 1) {
+		page_shift = 12; /* 4kB */
+	} else {
+		dev_dbg(&dev->dev, "no supported direct page size in mask %x",
+			  query.page_size);
+		goto out_failed;
+	}
+	/* verify the window * number of ptes will map the partition */
+	/* check largest block * page size > max memory hotplug addr */
+	max_addr = memory_hotplug_max();
+	if (query.largest_available_block < (max_addr >> page_shift)) {
+		dev_dbg(&dev->dev, "can't map partiton max 0x%llx with %u "
+			  "%llu-sized pages\n", max_addr,  query.largest_available_block,
+			  1ULL << page_shift);
+		goto out_failed;
+	}
+	len = order_base_2(max_addr);
+	win64 = kzalloc(sizeof(struct property), GFP_KERNEL);
+	if (!win64) {
+		dev_info(&dev->dev,
+			"couldn't allocate property for 64bit dma window\n");
+		goto out_failed;
+	}
+	win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL);
+	win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL);
+	win64->length = sizeof(*ddwprop);
+	if (!win64->name || !win64->value) {
+		dev_info(&dev->dev,
+			"couldn't allocate property name and value\n");
+		goto out_free_prop;
+	}
+
+	ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
+	if (ret != 0)
+		goto out_free_prop;
+
+	ddwprop->liobn = cpu_to_be32(create.liobn);
+	ddwprop->dma_base = cpu_to_be64(((u64)create.addr_hi << 32) |
+			create.addr_lo);
+	ddwprop->tce_shift = cpu_to_be32(page_shift);
+	ddwprop->window_shift = cpu_to_be32(len);
+
+	dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %s\n",
+		  create.liobn, dn->full_name);
+
+	window = kzalloc(sizeof(*window), GFP_KERNEL);
+	if (!window)
+		goto out_clear_window;
+
+	ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
+			win64->value, tce_setrange_multi_pSeriesLP_walk);
+	if (ret) {
+		dev_info(&dev->dev, "failed to map direct window for %s: %d\n",
+			 dn->full_name, ret);
+		goto out_free_window;
+	}
+
+	ret = of_add_property(pdn, win64);
+	if (ret) {
+		dev_err(&dev->dev, "unable to add dma window property for %s: %d",
+			 pdn->full_name, ret);
+		goto out_free_window;
+	}
+
+	window->device = pdn;
+	window->prop = ddwprop;
+	spin_lock(&direct_window_list_lock);
+	list_add(&window->list, &direct_window_list);
+	spin_unlock(&direct_window_list_lock);
+
+	dma_addr = be64_to_cpu(ddwprop->dma_base);
+	goto out_unlock;
+
+out_free_window:
+	kfree(window);
+
+out_clear_window:
+	remove_ddw(pdn, true);
+
+out_free_prop:
+	kfree(win64->name);
+	kfree(win64->value);
+	kfree(win64);
+
+out_failed:
+
+	fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL);
+	if (!fpdn)
+		goto out_unlock;
+	fpdn->pdn = pdn;
+	list_add(&fpdn->list, &failed_ddw_pdn_list);
+
+out_unlock:
+	mutex_unlock(&direct_window_init_mutex);
+	return dma_addr;
+}
+
+static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
+{
+	struct device_node *pdn, *dn;
+	struct iommu_table *tbl;
+	const __be32 *dma_window = NULL;
+	struct pci_dn *pci;
+
+	pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev));
+
+	/* dev setup for LPAR is a little tricky, since the device tree might
+	 * contain the dma-window properties per-device and not necessarily
+	 * for the bus. So we need to search upwards in the tree until we
+	 * either hit a dma-window property, OR find a parent with a table
+	 * already allocated.
+	 */
+	dn = pci_device_to_OF_node(dev);
+	pr_debug("  node is %s\n", dn->full_name);
+
+	for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table;
+	     pdn = pdn->parent) {
+		dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
+		if (dma_window)
+			break;
+	}
+
+	if (!pdn || !PCI_DN(pdn)) {
+		printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: "
+		       "no DMA window found for pci dev=%s dn=%s\n",
+				 pci_name(dev), of_node_full_name(dn));
+		return;
+	}
+	pr_debug("  parent is %s\n", pdn->full_name);
+
+	pci = PCI_DN(pdn);
+	if (!pci->iommu_table) {
+		tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
+				   pci->phb->node);
+		iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window);
+		pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
+		iommu_register_group(tbl, pci_domain_nr(pci->phb->bus), 0);
+		pr_debug("  created table: %p\n", pci->iommu_table);
+	} else {
+		pr_debug("  found DMA window, table: %p\n", pci->iommu_table);
+	}
+
+	set_iommu_table_base_and_group(&dev->dev, pci->iommu_table);
+}
+
+static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
+{
+	bool ddw_enabled = false;
+	struct device_node *pdn, *dn;
+	struct pci_dev *pdev;
+	const __be32 *dma_window = NULL;
+	u64 dma_offset;
+
+	if (!dev->dma_mask)
+		return -EIO;
+
+	if (!dev_is_pci(dev))
+		goto check_mask;
+
+	pdev = to_pci_dev(dev);
+
+	/* only attempt to use a new window if 64-bit DMA is requested */
+	if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) {
+		dn = pci_device_to_OF_node(pdev);
+		dev_dbg(dev, "node is %s\n", dn->full_name);
+
+		/*
+		 * the device tree might contain the dma-window properties
+		 * per-device and not necessarily for the bus. So we need to
+		 * search upwards in the tree until we either hit a dma-window
+		 * property, OR find a parent with a table already allocated.
+		 */
+		for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table;
+				pdn = pdn->parent) {
+			dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
+			if (dma_window)
+				break;
+		}
+		if (pdn && PCI_DN(pdn)) {
+			dma_offset = enable_ddw(pdev, pdn);
+			if (dma_offset != 0) {
+				dev_info(dev, "Using 64-bit direct DMA at offset %llx\n", dma_offset);
+				set_dma_offset(dev, dma_offset);
+				set_dma_ops(dev, &dma_direct_ops);
+				ddw_enabled = true;
+			}
+		}
+	}
+
+	/* fall back on iommu ops, restore table pointer with ops */
+	if (!ddw_enabled && get_dma_ops(dev) != &dma_iommu_ops) {
+		dev_info(dev, "Restoring 32-bit DMA via iommu\n");
+		set_dma_ops(dev, &dma_iommu_ops);
+		pci_dma_dev_setup_pSeriesLP(pdev);
+	}
+
+check_mask:
+	if (!dma_supported(dev, dma_mask))
+		return -EIO;
+
+	*dev->dma_mask = dma_mask;
+	return 0;
+}
+
+static u64 dma_get_required_mask_pSeriesLP(struct device *dev)
+{
+	if (!dev->dma_mask)
+		return 0;
+
+	if (!disable_ddw && dev_is_pci(dev)) {
+		struct pci_dev *pdev = to_pci_dev(dev);
+		struct device_node *dn;
+
+		dn = pci_device_to_OF_node(pdev);
+
+		/* search upwards for ibm,dma-window */
+		for (; dn && PCI_DN(dn) && !PCI_DN(dn)->iommu_table;
+				dn = dn->parent)
+			if (of_get_property(dn, "ibm,dma-window", NULL))
+				break;
+		/* if there is a ibm,ddw-applicable property require 64 bits */
+		if (dn && PCI_DN(dn) &&
+				of_get_property(dn, "ibm,ddw-applicable", NULL))
+			return DMA_BIT_MASK(64);
+	}
+
+	return dma_iommu_ops.get_required_mask(dev);
+}
+
+#else  /* CONFIG_PCI */
+#define pci_dma_bus_setup_pSeries	NULL
+#define pci_dma_dev_setup_pSeries	NULL
+#define pci_dma_bus_setup_pSeriesLP	NULL
+#define pci_dma_dev_setup_pSeriesLP	NULL
+#define dma_set_mask_pSeriesLP		NULL
+#define dma_get_required_mask_pSeriesLP	NULL
+#endif /* !CONFIG_PCI */
+
+static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
+		void *data)
+{
+	struct direct_window *window;
+	struct memory_notify *arg = data;
+	int ret = 0;
+
+	switch (action) {
+	case MEM_GOING_ONLINE:
+		spin_lock(&direct_window_list_lock);
+		list_for_each_entry(window, &direct_window_list, list) {
+			ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn,
+					arg->nr_pages, window->prop);
+			/* XXX log error */
+		}
+		spin_unlock(&direct_window_list_lock);
+		break;
+	case MEM_CANCEL_ONLINE:
+	case MEM_OFFLINE:
+		spin_lock(&direct_window_list_lock);
+		list_for_each_entry(window, &direct_window_list, list) {
+			ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn,
+					arg->nr_pages, window->prop);
+			/* XXX log error */
+		}
+		spin_unlock(&direct_window_list_lock);
+		break;
+	default:
+		break;
+	}
+	if (ret && action != MEM_CANCEL_ONLINE)
+		return NOTIFY_BAD;
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block iommu_mem_nb = {
+	.notifier_call = iommu_mem_notifier,
+};
+
+static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data)
+{
+	int err = NOTIFY_OK;
+	struct of_reconfig_data *rd = data;
+	struct device_node *np = rd->dn;
+	struct pci_dn *pci = PCI_DN(np);
+	struct direct_window *window;
+
+	switch (action) {
+	case OF_RECONFIG_DETACH_NODE:
+		/*
+		 * Removing the property will invoke the reconfig
+		 * notifier again, which causes dead-lock on the
+		 * read-write semaphore of the notifier chain. So
+		 * we have to remove the property when releasing
+		 * the device node.
+		 */
+		remove_ddw(np, false);
+		if (pci && pci->iommu_table)
+			iommu_free_table(pci->iommu_table, np->full_name);
+
+		spin_lock(&direct_window_list_lock);
+		list_for_each_entry(window, &direct_window_list, list) {
+			if (window->device == np) {
+				list_del(&window->list);
+				kfree(window);
+				break;
+			}
+		}
+		spin_unlock(&direct_window_list_lock);
+		break;
+	default:
+		err = NOTIFY_DONE;
+		break;
+	}
+	return err;
+}
+
+static struct notifier_block iommu_reconfig_nb = {
+	.notifier_call = iommu_reconfig_notifier,
+};
+
+/* These are called very early. */
+void iommu_init_early_pSeries(void)
+{
+	if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL))
+		return;
+
+	if (firmware_has_feature(FW_FEATURE_LPAR)) {
+		if (firmware_has_feature(FW_FEATURE_MULTITCE)) {
+			ppc_md.tce_build = tce_buildmulti_pSeriesLP;
+			ppc_md.tce_free	 = tce_freemulti_pSeriesLP;
+		} else {
+			ppc_md.tce_build = tce_build_pSeriesLP;
+			ppc_md.tce_free	 = tce_free_pSeriesLP;
+		}
+		ppc_md.tce_get   = tce_get_pSeriesLP;
+		pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
+		pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
+		ppc_md.dma_set_mask = dma_set_mask_pSeriesLP;
+		ppc_md.dma_get_required_mask = dma_get_required_mask_pSeriesLP;
+	} else {
+		ppc_md.tce_build = tce_build_pSeries;
+		ppc_md.tce_free  = tce_free_pSeries;
+		ppc_md.tce_get   = tce_get_pseries;
+		pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries;
+		pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries;
+	}
+
+
+	of_reconfig_notifier_register(&iommu_reconfig_nb);
+	register_memory_notifier(&iommu_mem_nb);
+
+	set_pci_dma_ops(&dma_iommu_ops);
+}
+
+static int __init disable_multitce(char *str)
+{
+	if (strcmp(str, "off") == 0 &&
+	    firmware_has_feature(FW_FEATURE_LPAR) &&
+	    firmware_has_feature(FW_FEATURE_MULTITCE)) {
+		printk(KERN_INFO "Disabling MULTITCE firmware feature\n");
+		ppc_md.tce_build = tce_build_pSeriesLP;
+		ppc_md.tce_free	 = tce_free_pSeriesLP;
+		powerpc_firmware_features &= ~FW_FEATURE_MULTITCE;
+	}
+	return 1;
+}
+
+__setup("multitce=", disable_multitce);
+
+machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
new file mode 100644
index 000000000..13fa95b3a
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -0,0 +1,76 @@
+/*
+ *  Copyright 2006 Michael Ellerman, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+
+#include <asm/machdep.h>
+#include <asm/page.h>
+#include <asm/firmware.h>
+#include <asm/kexec.h>
+#include <asm/mpic.h>
+#include <asm/xics.h>
+#include <asm/smp.h>
+#include <asm/plpar_wrappers.h>
+
+#include "pseries.h"
+
+static void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
+{
+	/* Don't risk a hypervisor call if we're crashing */
+	if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) {
+		int ret;
+		int cpu = smp_processor_id();
+		int hwcpu = hard_smp_processor_id();
+
+		if (get_lppaca()->dtl_enable_mask) {
+			ret = unregister_dtl(hwcpu);
+			if (ret) {
+				pr_err("WARNING: DTL deregistration for cpu "
+				       "%d (hw %d) failed with %d\n",
+				       cpu, hwcpu, ret);
+			}
+		}
+
+		ret = unregister_slb_shadow(hwcpu);
+		if (ret) {
+			pr_err("WARNING: SLB shadow buffer deregistration "
+			       "for cpu %d (hw %d) failed with %d\n",
+			       cpu, hwcpu, ret);
+		}
+
+		ret = unregister_vpa(hwcpu);
+		if (ret) {
+			pr_err("WARNING: VPA deregistration for cpu %d "
+			       "(hw %d) failed with %d\n", cpu, hwcpu, ret);
+		}
+	}
+}
+
+static void pseries_kexec_cpu_down_mpic(int crash_shutdown, int secondary)
+{
+	pseries_kexec_cpu_down(crash_shutdown, secondary);
+	mpic_teardown_this_cpu(secondary);
+}
+
+void __init setup_kexec_cpu_down_mpic(void)
+{
+	ppc_md.kexec_cpu_down = pseries_kexec_cpu_down_mpic;
+}
+
+static void pseries_kexec_cpu_down_xics(int crash_shutdown, int secondary)
+{
+	pseries_kexec_cpu_down(crash_shutdown, secondary);
+	xics_kexec_teardown_cpu(secondary);
+}
+
+void __init setup_kexec_cpu_down_xics(void)
+{
+	ppc_md.kexec_cpu_down = pseries_kexec_cpu_down_xics;
+}
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
new file mode 100644
index 000000000..b7a67e3d2
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -0,0 +1,796 @@
+/*
+ * pSeries_lpar.c
+ * Copyright (C) 2001 Todd Inglett, IBM Corporation
+ *
+ * pSeries LPAR support.
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+/* Enables debugging of low-level hash table routines - careful! */
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <linux/console.h>
+#include <linux/export.h>
+#include <linux/jump_label.h>
+#include <asm/processor.h>
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/machdep.h>
+#include <asm/mmu_context.h>
+#include <asm/iommu.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+#include <asm/prom.h>
+#include <asm/cputable.h>
+#include <asm/udbg.h>
+#include <asm/smp.h>
+#include <asm/trace.h>
+#include <asm/firmware.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/kexec.h>
+#include <asm/fadump.h>
+
+#include "pseries.h"
+
+/* Flag bits for H_BULK_REMOVE */
+#define HBR_REQUEST	0x4000000000000000UL
+#define HBR_RESPONSE	0x8000000000000000UL
+#define HBR_END		0xc000000000000000UL
+#define HBR_AVPN	0x0200000000000000UL
+#define HBR_ANDCOND	0x0100000000000000UL
+
+
+/* in hvCall.S */
+EXPORT_SYMBOL(plpar_hcall);
+EXPORT_SYMBOL(plpar_hcall9);
+EXPORT_SYMBOL(plpar_hcall_norets);
+
+void vpa_init(int cpu)
+{
+	int hwcpu = get_hard_smp_processor_id(cpu);
+	unsigned long addr;
+	long ret;
+	struct paca_struct *pp;
+	struct dtl_entry *dtl;
+
+	/*
+	 * The spec says it "may be problematic" if CPU x registers the VPA of
+	 * CPU y. We should never do that, but wail if we ever do.
+	 */
+	WARN_ON(cpu != smp_processor_id());
+
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		lppaca_of(cpu).vmxregs_in_use = 1;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		lppaca_of(cpu).ebb_regs_in_use = 1;
+
+	addr = __pa(&lppaca_of(cpu));
+	ret = register_vpa(hwcpu, addr);
+
+	if (ret) {
+		pr_err("WARNING: VPA registration for cpu %d (hw %d) of area "
+		       "%lx failed with %ld\n", cpu, hwcpu, addr, ret);
+		return;
+	}
+	/*
+	 * PAPR says this feature is SLB-Buffer but firmware never
+	 * reports that.  All SPLPAR support SLB shadow buffer.
+	 */
+	addr = __pa(paca[cpu].slb_shadow_ptr);
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+		ret = register_slb_shadow(hwcpu, addr);
+		if (ret)
+			pr_err("WARNING: SLB shadow buffer registration for "
+			       "cpu %d (hw %d) of area %lx failed with %ld\n",
+			       cpu, hwcpu, addr, ret);
+	}
+
+	/*
+	 * Register dispatch trace log, if one has been allocated.
+	 */
+	pp = &paca[cpu];
+	dtl = pp->dispatch_log;
+	if (dtl) {
+		pp->dtl_ridx = 0;
+		pp->dtl_curr = dtl;
+		lppaca_of(cpu).dtl_idx = 0;
+
+		/* hypervisor reads buffer length from this field */
+		dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES);
+		ret = register_dtl(hwcpu, __pa(dtl));
+		if (ret)
+			pr_err("WARNING: DTL registration of cpu %d (hw %d) "
+			       "failed with %ld\n", smp_processor_id(),
+			       hwcpu, ret);
+		lppaca_of(cpu).dtl_enable_mask = 2;
+	}
+}
+
+static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
+				     unsigned long vpn, unsigned long pa,
+				     unsigned long rflags, unsigned long vflags,
+				     int psize, int apsize, int ssize)
+{
+	unsigned long lpar_rc;
+	unsigned long flags;
+	unsigned long slot;
+	unsigned long hpte_v, hpte_r;
+
+	if (!(vflags & HPTE_V_BOLTED))
+		pr_devel("hpte_insert(group=%lx, vpn=%016lx, "
+			 "pa=%016lx, rflags=%lx, vflags=%lx, psize=%d)\n",
+			 hpte_group, vpn,  pa, rflags, vflags, psize);
+
+	hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
+	hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
+
+	if (!(vflags & HPTE_V_BOLTED))
+		pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
+
+	/* Now fill in the actual HPTE */
+	/* Set CEC cookie to 0         */
+	/* Zero page = 0               */
+	/* I-cache Invalidate = 0      */
+	/* I-cache synchronize = 0     */
+	/* Exact = 0                   */
+	flags = 0;
+
+	/* Make pHyp happy */
+	if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU))
+		hpte_r &= ~HPTE_R_M;
+
+	if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
+		flags |= H_COALESCE_CAND;
+
+	lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot);
+	if (unlikely(lpar_rc == H_PTEG_FULL)) {
+		if (!(vflags & HPTE_V_BOLTED))
+			pr_devel(" full\n");
+		return -1;
+	}
+
+	/*
+	 * Since we try and ioremap PHBs we don't own, the pte insert
+	 * will fail. However we must catch the failure in hash_page
+	 * or we will loop forever, so return -2 in this case.
+	 */
+	if (unlikely(lpar_rc != H_SUCCESS)) {
+		if (!(vflags & HPTE_V_BOLTED))
+			pr_devel(" lpar err %ld\n", lpar_rc);
+		return -2;
+	}
+	if (!(vflags & HPTE_V_BOLTED))
+		pr_devel(" -> slot: %lu\n", slot & 7);
+
+	/* Because of iSeries, we have to pass down the secondary
+	 * bucket bit here as well
+	 */
+	return (slot & 7) | (!!(vflags & HPTE_V_SECONDARY) << 3);
+}
+
+static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock);
+
+static long pSeries_lpar_hpte_remove(unsigned long hpte_group)
+{
+	unsigned long slot_offset;
+	unsigned long lpar_rc;
+	int i;
+	unsigned long dummy1, dummy2;
+
+	/* pick a random slot to start at */
+	slot_offset = mftb() & 0x7;
+
+	for (i = 0; i < HPTES_PER_GROUP; i++) {
+
+		/* don't remove a bolted entry */
+		lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset,
+					   (0x1UL << 4), &dummy1, &dummy2);
+		if (lpar_rc == H_SUCCESS)
+			return i;
+
+		/*
+		 * The test for adjunct partition is performed before the
+		 * ANDCOND test.  H_RESOURCE may be returned, so we need to
+		 * check for that as well.
+		 */
+		BUG_ON(lpar_rc != H_NOT_FOUND && lpar_rc != H_RESOURCE);
+
+		slot_offset++;
+		slot_offset &= 0x7;
+	}
+
+	return -1;
+}
+
+static void pSeries_lpar_hptab_clear(void)
+{
+	unsigned long size_bytes = 1UL << ppc64_pft_size;
+	unsigned long hpte_count = size_bytes >> 4;
+	struct {
+		unsigned long pteh;
+		unsigned long ptel;
+	} ptes[4];
+	long lpar_rc;
+	unsigned long i, j;
+
+	/* Read in batches of 4,
+	 * invalidate only valid entries not in the VRMA
+	 * hpte_count will be a multiple of 4
+         */
+	for (i = 0; i < hpte_count; i += 4) {
+		lpar_rc = plpar_pte_read_4_raw(0, i, (void *)ptes);
+		if (lpar_rc != H_SUCCESS)
+			continue;
+		for (j = 0; j < 4; j++){
+			if ((ptes[j].pteh & HPTE_V_VRMA_MASK) ==
+				HPTE_V_VRMA_MASK)
+				continue;
+			if (ptes[j].pteh & HPTE_V_VALID)
+				plpar_pte_remove_raw(0, i + j, 0,
+					&(ptes[j].pteh), &(ptes[j].ptel));
+		}
+	}
+
+#ifdef __LITTLE_ENDIAN__
+	/*
+	 * Reset exceptions to big endian.
+	 *
+	 * FIXME this is a hack for kexec, we need to reset the exception
+	 * endian before starting the new kernel and this is a convenient place
+	 * to do it.
+	 *
+	 * This is also called on boot when a fadump happens. In that case we
+	 * must not change the exception endian mode.
+	 */
+	if (firmware_has_feature(FW_FEATURE_SET_MODE) && !is_fadump_active()) {
+		long rc;
+
+		rc = pseries_big_endian_exceptions();
+		/*
+		 * At this point it is unlikely panic() will get anything
+		 * out to the user, but at least this will stop us from
+		 * continuing on further and creating an even more
+		 * difficult to debug situation.
+		 *
+		 * There is a known problem when kdump'ing, if cpus are offline
+		 * the above call will fail. Rather than panicking again, keep
+		 * going and hope the kdump kernel is also little endian, which
+		 * it usually is.
+		 */
+		if (rc && !kdump_in_progress())
+			panic("Could not enable big endian exceptions");
+	}
+#endif
+}
+
+/*
+ * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
+ * the low 3 bits of flags happen to line up.  So no transform is needed.
+ * We can probably optimize here and assume the high bits of newpp are
+ * already zero.  For now I am paranoid.
+ */
+static long pSeries_lpar_hpte_updatepp(unsigned long slot,
+				       unsigned long newpp,
+				       unsigned long vpn,
+				       int psize, int apsize,
+				       int ssize, unsigned long inv_flags)
+{
+	unsigned long lpar_rc;
+	unsigned long flags = (newpp & 7) | H_AVPN;
+	unsigned long want_v;
+
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
+
+	pr_devel("    update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
+		 want_v, slot, flags, psize);
+
+	lpar_rc = plpar_pte_protect(flags, slot, want_v);
+
+	if (lpar_rc == H_NOT_FOUND) {
+		pr_devel("not found !\n");
+		return -1;
+	}
+
+	pr_devel("ok\n");
+
+	BUG_ON(lpar_rc != H_SUCCESS);
+
+	return 0;
+}
+
+static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot)
+{
+	unsigned long dword0;
+	unsigned long lpar_rc;
+	unsigned long dummy_word1;
+	unsigned long flags;
+
+	/* Read 1 pte at a time                        */
+	/* Do not need RPN to logical page translation */
+	/* No cross CEC PFT access                     */
+	flags = 0;
+
+	lpar_rc = plpar_pte_read(flags, slot, &dword0, &dummy_word1);
+
+	BUG_ON(lpar_rc != H_SUCCESS);
+
+	return dword0;
+}
+
+static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize)
+{
+	unsigned long hash;
+	unsigned long i;
+	long slot;
+	unsigned long want_v, hpte_v;
+
+	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
+
+	/* Bolted entries are always in the primary group */
+	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+	for (i = 0; i < HPTES_PER_GROUP; i++) {
+		hpte_v = pSeries_lpar_hpte_getword0(slot);
+
+		if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
+			/* HPTE matches */
+			return slot;
+		++slot;
+	}
+
+	return -1;
+} 
+
+static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
+					     unsigned long ea,
+					     int psize, int ssize)
+{
+	unsigned long vpn;
+	unsigned long lpar_rc, slot, vsid, flags;
+
+	vsid = get_kernel_vsid(ea, ssize);
+	vpn = hpt_vpn(ea, vsid, ssize);
+
+	slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
+	BUG_ON(slot == -1);
+
+	flags = newpp & 7;
+	lpar_rc = plpar_pte_protect(flags, slot, 0);
+
+	BUG_ON(lpar_rc != H_SUCCESS);
+}
+
+static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
+					 int psize, int apsize,
+					 int ssize, int local)
+{
+	unsigned long want_v;
+	unsigned long lpar_rc;
+	unsigned long dummy1, dummy2;
+
+	pr_devel("    inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n",
+		 slot, vpn, psize, local);
+
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
+	lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v, &dummy1, &dummy2);
+	if (lpar_rc == H_NOT_FOUND)
+		return;
+
+	BUG_ON(lpar_rc != H_SUCCESS);
+}
+
+/*
+ * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need
+ * to make sure that we avoid bouncing the hypervisor tlbie lock.
+ */
+#define PPC64_HUGE_HPTE_BATCH 12
+
+static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
+					     unsigned long *vpn, int count,
+					     int psize, int ssize)
+{
+	unsigned long param[8];
+	int i = 0, pix = 0, rc;
+	unsigned long flags = 0;
+	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+	if (lock_tlbie)
+		spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
+
+	for (i = 0; i < count; i++) {
+
+		if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
+			pSeries_lpar_hpte_invalidate(slot[i], vpn[i], psize, 0,
+						     ssize, 0);
+		} else {
+			param[pix] = HBR_REQUEST | HBR_AVPN | slot[i];
+			param[pix+1] = hpte_encode_avpn(vpn[i], psize, ssize);
+			pix += 2;
+			if (pix == 8) {
+				rc = plpar_hcall9(H_BULK_REMOVE, param,
+						  param[0], param[1], param[2],
+						  param[3], param[4], param[5],
+						  param[6], param[7]);
+				BUG_ON(rc != H_SUCCESS);
+				pix = 0;
+			}
+		}
+	}
+	if (pix) {
+		param[pix] = HBR_END;
+		rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1],
+				  param[2], param[3], param[4], param[5],
+				  param[6], param[7]);
+		BUG_ON(rc != H_SUCCESS);
+	}
+
+	if (lock_tlbie)
+		spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
+}
+
+static void pSeries_lpar_hugepage_invalidate(unsigned long vsid,
+					     unsigned long addr,
+					     unsigned char *hpte_slot_array,
+					     int psize, int ssize, int local)
+{
+	int i, index = 0;
+	unsigned long s_addr = addr;
+	unsigned int max_hpte_count, valid;
+	unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH];
+	unsigned long slot_array[PPC64_HUGE_HPTE_BATCH];
+	unsigned long shift, hidx, vpn = 0, hash, slot;
+
+	shift = mmu_psize_defs[psize].shift;
+	max_hpte_count = 1U << (PMD_SHIFT - shift);
+
+	for (i = 0; i < max_hpte_count; i++) {
+		valid = hpte_valid(hpte_slot_array, i);
+		if (!valid)
+			continue;
+		hidx =  hpte_hash_index(hpte_slot_array, i);
+
+		/* get the vpn */
+		addr = s_addr + (i * (1ul << shift));
+		vpn = hpt_vpn(addr, vsid, ssize);
+		hash = hpt_hash(vpn, shift, ssize);
+		if (hidx & _PTEIDX_SECONDARY)
+			hash = ~hash;
+
+		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+		slot += hidx & _PTEIDX_GROUP_IX;
+
+		slot_array[index] = slot;
+		vpn_array[index] = vpn;
+		if (index == PPC64_HUGE_HPTE_BATCH - 1) {
+			/*
+			 * Now do a bluk invalidate
+			 */
+			__pSeries_lpar_hugepage_invalidate(slot_array,
+							   vpn_array,
+							   PPC64_HUGE_HPTE_BATCH,
+							   psize, ssize);
+			index = 0;
+		} else
+			index++;
+	}
+	if (index)
+		__pSeries_lpar_hugepage_invalidate(slot_array, vpn_array,
+						   index, psize, ssize);
+}
+
+static void pSeries_lpar_hpte_removebolted(unsigned long ea,
+					   int psize, int ssize)
+{
+	unsigned long vpn;
+	unsigned long slot, vsid;
+
+	vsid = get_kernel_vsid(ea, ssize);
+	vpn = hpt_vpn(ea, vsid, ssize);
+
+	slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
+	BUG_ON(slot == -1);
+	/*
+	 * lpar doesn't use the passed actual page size
+	 */
+	pSeries_lpar_hpte_invalidate(slot, vpn, psize, 0, ssize, 0);
+}
+
+/*
+ * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
+ * lock.
+ */
+static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
+{
+	unsigned long vpn;
+	unsigned long i, pix, rc;
+	unsigned long flags = 0;
+	struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
+	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+	unsigned long param[9];
+	unsigned long hash, index, shift, hidx, slot;
+	real_pte_t pte;
+	int psize, ssize;
+
+	if (lock_tlbie)
+		spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
+
+	psize = batch->psize;
+	ssize = batch->ssize;
+	pix = 0;
+	for (i = 0; i < number; i++) {
+		vpn = batch->vpn[i];
+		pte = batch->pte[i];
+		pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
+			hash = hpt_hash(vpn, shift, ssize);
+			hidx = __rpte_to_hidx(pte, index);
+			if (hidx & _PTEIDX_SECONDARY)
+				hash = ~hash;
+			slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+			slot += hidx & _PTEIDX_GROUP_IX;
+			if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
+				/*
+				 * lpar doesn't use the passed actual page size
+				 */
+				pSeries_lpar_hpte_invalidate(slot, vpn, psize,
+							     0, ssize, local);
+			} else {
+				param[pix] = HBR_REQUEST | HBR_AVPN | slot;
+				param[pix+1] = hpte_encode_avpn(vpn, psize,
+								ssize);
+				pix += 2;
+				if (pix == 8) {
+					rc = plpar_hcall9(H_BULK_REMOVE, param,
+						param[0], param[1], param[2],
+						param[3], param[4], param[5],
+						param[6], param[7]);
+					BUG_ON(rc != H_SUCCESS);
+					pix = 0;
+				}
+			}
+		} pte_iterate_hashed_end();
+	}
+	if (pix) {
+		param[pix] = HBR_END;
+		rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1],
+				  param[2], param[3], param[4], param[5],
+				  param[6], param[7]);
+		BUG_ON(rc != H_SUCCESS);
+	}
+
+	if (lock_tlbie)
+		spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
+}
+
+static int __init disable_bulk_remove(char *str)
+{
+	if (strcmp(str, "off") == 0 &&
+	    firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
+			printk(KERN_INFO "Disabling BULK_REMOVE firmware feature");
+			powerpc_firmware_features &= ~FW_FEATURE_BULK_REMOVE;
+	}
+	return 1;
+}
+
+__setup("bulk_remove=", disable_bulk_remove);
+
+void __init hpte_init_lpar(void)
+{
+	ppc_md.hpte_invalidate	= pSeries_lpar_hpte_invalidate;
+	ppc_md.hpte_updatepp	= pSeries_lpar_hpte_updatepp;
+	ppc_md.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp;
+	ppc_md.hpte_insert	= pSeries_lpar_hpte_insert;
+	ppc_md.hpte_remove	= pSeries_lpar_hpte_remove;
+	ppc_md.hpte_removebolted = pSeries_lpar_hpte_removebolted;
+	ppc_md.flush_hash_range	= pSeries_lpar_flush_hash_range;
+	ppc_md.hpte_clear_all   = pSeries_lpar_hptab_clear;
+	ppc_md.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
+}
+
+#ifdef CONFIG_PPC_SMLPAR
+#define CMO_FREE_HINT_DEFAULT 1
+static int cmo_free_hint_flag = CMO_FREE_HINT_DEFAULT;
+
+static int __init cmo_free_hint(char *str)
+{
+	char *parm;
+	parm = strstrip(str);
+
+	if (strcasecmp(parm, "no") == 0 || strcasecmp(parm, "off") == 0) {
+		printk(KERN_INFO "cmo_free_hint: CMO free page hinting is not active.\n");
+		cmo_free_hint_flag = 0;
+		return 1;
+	}
+
+	cmo_free_hint_flag = 1;
+	printk(KERN_INFO "cmo_free_hint: CMO free page hinting is active.\n");
+
+	if (strcasecmp(parm, "yes") == 0 || strcasecmp(parm, "on") == 0)
+		return 1;
+
+	return 0;
+}
+
+__setup("cmo_free_hint=", cmo_free_hint);
+
+static void pSeries_set_page_state(struct page *page, int order,
+				   unsigned long state)
+{
+	int i, j;
+	unsigned long cmo_page_sz, addr;
+
+	cmo_page_sz = cmo_get_page_size();
+	addr = __pa((unsigned long)page_address(page));
+
+	for (i = 0; i < (1 << order); i++, addr += PAGE_SIZE) {
+		for (j = 0; j < PAGE_SIZE; j += cmo_page_sz)
+			plpar_hcall_norets(H_PAGE_INIT, state, addr + j, 0);
+	}
+}
+
+void arch_free_page(struct page *page, int order)
+{
+	if (!cmo_free_hint_flag || !firmware_has_feature(FW_FEATURE_CMO))
+		return;
+
+	pSeries_set_page_state(page, order, H_PAGE_SET_UNUSED);
+}
+EXPORT_SYMBOL(arch_free_page);
+
+#endif
+
+#ifdef CONFIG_TRACEPOINTS
+#ifdef HAVE_JUMP_LABEL
+struct static_key hcall_tracepoint_key = STATIC_KEY_INIT;
+
+void hcall_tracepoint_regfunc(void)
+{
+	static_key_slow_inc(&hcall_tracepoint_key);
+}
+
+void hcall_tracepoint_unregfunc(void)
+{
+	static_key_slow_dec(&hcall_tracepoint_key);
+}
+#else
+/*
+ * We optimise our hcall path by placing hcall_tracepoint_refcount
+ * directly in the TOC so we can check if the hcall tracepoints are
+ * enabled via a single load.
+ */
+
+/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
+extern long hcall_tracepoint_refcount;
+
+void hcall_tracepoint_regfunc(void)
+{
+	hcall_tracepoint_refcount++;
+}
+
+void hcall_tracepoint_unregfunc(void)
+{
+	hcall_tracepoint_refcount--;
+}
+#endif
+
+/*
+ * Since the tracing code might execute hcalls we need to guard against
+ * recursion. One example of this are spinlocks calling H_YIELD on
+ * shared processor partitions.
+ */
+static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
+
+
+void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
+{
+	unsigned long flags;
+	unsigned int *depth;
+
+	/*
+	 * We cannot call tracepoints inside RCU idle regions which
+	 * means we must not trace H_CEDE.
+	 */
+	if (opcode == H_CEDE)
+		return;
+
+	local_irq_save(flags);
+
+	depth = this_cpu_ptr(&hcall_trace_depth);
+
+	if (*depth)
+		goto out;
+
+	(*depth)++;
+	preempt_disable();
+	trace_hcall_entry(opcode, args);
+	(*depth)--;
+
+out:
+	local_irq_restore(flags);
+}
+
+void __trace_hcall_exit(long opcode, unsigned long retval,
+			unsigned long *retbuf)
+{
+	unsigned long flags;
+	unsigned int *depth;
+
+	if (opcode == H_CEDE)
+		return;
+
+	local_irq_save(flags);
+
+	depth = this_cpu_ptr(&hcall_trace_depth);
+
+	if (*depth)
+		goto out;
+
+	(*depth)++;
+	trace_hcall_exit(opcode, retval, retbuf);
+	preempt_enable();
+	(*depth)--;
+
+out:
+	local_irq_restore(flags);
+}
+#endif
+
+/**
+ * h_get_mpp
+ * H_GET_MPP hcall returns info in 7 parms
+ */
+int h_get_mpp(struct hvcall_mpp_data *mpp_data)
+{
+	int rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+	rc = plpar_hcall9(H_GET_MPP, retbuf);
+
+	mpp_data->entitled_mem = retbuf[0];
+	mpp_data->mapped_mem = retbuf[1];
+
+	mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
+	mpp_data->pool_num = retbuf[2] & 0xffff;
+
+	mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
+	mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
+	mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffffUL;
+
+	mpp_data->pool_size = retbuf[4];
+	mpp_data->loan_request = retbuf[5];
+	mpp_data->backing_mem = retbuf[6];
+
+	return rc;
+}
+EXPORT_SYMBOL(h_get_mpp);
+
+int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data)
+{
+	int rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = { 0 };
+
+	rc = plpar_hcall9(H_GET_MPP_X, retbuf);
+
+	mpp_x_data->coalesced_bytes = retbuf[0];
+	mpp_x_data->pool_coalesced_bytes = retbuf[1];
+	mpp_x_data->pool_purr_cycles = retbuf[2];
+	mpp_x_data->pool_spurr_cycles = retbuf[3];
+
+	return rc;
+}
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
new file mode 100644
index 000000000..c9fecf09b
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -0,0 +1,710 @@
+/*
+ * PowerPC64 LPAR Configuration Information Driver
+ *
+ * Dave Engebretsen engebret@us.ibm.com
+ *    Copyright (c) 2003 Dave Engebretsen
+ * Will Schmidt willschm@us.ibm.com
+ *    SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation.
+ *    seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation.
+ * Nathan Lynch nathanl@austin.ibm.com
+ *    Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation.
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ * This driver creates a proc file at /proc/ppc64/lparcfg which contains
+ * keyword - value pairs that specify the configuration of the partition.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+#include <asm/lppaca.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/time.h>
+#include <asm/prom.h>
+#include <asm/vdso_datapage.h>
+#include <asm/vio.h>
+#include <asm/mmu.h>
+#include <asm/machdep.h>
+
+
+/*
+ * This isn't a module but we expose that to userspace
+ * via /proc so leave the definitions here
+ */
+#define MODULE_VERS "1.9"
+#define MODULE_NAME "lparcfg"
+
+/* #define LPARCFG_DEBUG */
+
+/*
+ * Track sum of all purrs across all processors. This is used to further
+ * calculate usage values by different applications
+ */
+static unsigned long get_purr(void)
+{
+	unsigned long sum_purr = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct cpu_usage *cu;
+
+		cu = &per_cpu(cpu_usage_array, cpu);
+		sum_purr += cu->current_tb;
+	}
+	return sum_purr;
+}
+
+/*
+ * Methods used to fetch LPAR data when running on a pSeries platform.
+ */
+
+struct hvcall_ppp_data {
+	u64	entitlement;
+	u64	unallocated_entitlement;
+	u16	group_num;
+	u16	pool_num;
+	u8	capped;
+	u8	weight;
+	u8	unallocated_weight;
+	u16	active_procs_in_pool;
+	u16	active_system_procs;
+	u16	phys_platform_procs;
+	u32	max_proc_cap_avail;
+	u32	entitled_proc_cap_avail;
+};
+
+/*
+ * H_GET_PPP hcall returns info in 4 parms.
+ *  entitled_capacity,unallocated_capacity,
+ *  aggregation, resource_capability).
+ *
+ *  R4 = Entitled Processor Capacity Percentage.
+ *  R5 = Unallocated Processor Capacity Percentage.
+ *  R6 (AABBCCDDEEFFGGHH).
+ *      XXXX - reserved (0)
+ *          XXXX - reserved (0)
+ *              XXXX - Group Number
+ *                  XXXX - Pool Number.
+ *  R7 (IIJJKKLLMMNNOOPP).
+ *      XX - reserved. (0)
+ *        XX - bit 0-6 reserved (0).   bit 7 is Capped indicator.
+ *          XX - variable processor Capacity Weight
+ *            XX - Unallocated Variable Processor Capacity Weight.
+ *              XXXX - Active processors in Physical Processor Pool.
+ *                  XXXX  - Processors active on platform.
+ *  R8 (QQQQRRRRRRSSSSSS). if ibm,partition-performance-parameters-level >= 1
+ *	XXXX - Physical platform procs allocated to virtualization.
+ *	    XXXXXX - Max procs capacity % available to the partitions pool.
+ *	          XXXXXX - Entitled procs capacity % available to the
+ *			   partitions pool.
+ */
+static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
+{
+	unsigned long rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+	rc = plpar_hcall9(H_GET_PPP, retbuf);
+
+	ppp_data->entitlement = retbuf[0];
+	ppp_data->unallocated_entitlement = retbuf[1];
+
+	ppp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
+	ppp_data->pool_num = retbuf[2] & 0xffff;
+
+	ppp_data->capped = (retbuf[3] >> 6 * 8) & 0x01;
+	ppp_data->weight = (retbuf[3] >> 5 * 8) & 0xff;
+	ppp_data->unallocated_weight = (retbuf[3] >> 4 * 8) & 0xff;
+	ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff;
+	ppp_data->active_system_procs = retbuf[3] & 0xffff;
+
+	ppp_data->phys_platform_procs = retbuf[4] >> 6 * 8;
+	ppp_data->max_proc_cap_avail = (retbuf[4] >> 3 * 8) & 0xffffff;
+	ppp_data->entitled_proc_cap_avail = retbuf[4] & 0xffffff;
+
+	return rc;
+}
+
+static unsigned h_pic(unsigned long *pool_idle_time,
+		      unsigned long *num_procs)
+{
+	unsigned long rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall(H_PIC, retbuf);
+
+	*pool_idle_time = retbuf[0];
+	*num_procs = retbuf[1];
+
+	return rc;
+}
+
+/*
+ * parse_ppp_data
+ * Parse out the data returned from h_get_ppp and h_pic
+ */
+static void parse_ppp_data(struct seq_file *m)
+{
+	struct hvcall_ppp_data ppp_data;
+	struct device_node *root;
+	const __be32 *perf_level;
+	int rc;
+
+	rc = h_get_ppp(&ppp_data);
+	if (rc)
+		return;
+
+	seq_printf(m, "partition_entitled_capacity=%lld\n",
+	           ppp_data.entitlement);
+	seq_printf(m, "group=%d\n", ppp_data.group_num);
+	seq_printf(m, "system_active_processors=%d\n",
+	           ppp_data.active_system_procs);
+
+	/* pool related entries are appropriate for shared configs */
+	if (lppaca_shared_proc(get_lppaca())) {
+		unsigned long pool_idle_time, pool_procs;
+
+		seq_printf(m, "pool=%d\n", ppp_data.pool_num);
+
+		/* report pool_capacity in percentage */
+		seq_printf(m, "pool_capacity=%d\n",
+			   ppp_data.active_procs_in_pool * 100);
+
+		h_pic(&pool_idle_time, &pool_procs);
+		seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
+		seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
+	}
+
+	seq_printf(m, "unallocated_capacity_weight=%d\n",
+		   ppp_data.unallocated_weight);
+	seq_printf(m, "capacity_weight=%d\n", ppp_data.weight);
+	seq_printf(m, "capped=%d\n", ppp_data.capped);
+	seq_printf(m, "unallocated_capacity=%lld\n",
+		   ppp_data.unallocated_entitlement);
+
+	/* The last bits of information returned from h_get_ppp are only
+	 * valid if the ibm,partition-performance-parameters-level
+	 * property is >= 1.
+	 */
+	root = of_find_node_by_path("/");
+	if (root) {
+		perf_level = of_get_property(root,
+				"ibm,partition-performance-parameters-level",
+					     NULL);
+		if (perf_level && (be32_to_cpup(perf_level) >= 1)) {
+			seq_printf(m,
+			    "physical_procs_allocated_to_virtualization=%d\n",
+				   ppp_data.phys_platform_procs);
+			seq_printf(m, "max_proc_capacity_available=%d\n",
+				   ppp_data.max_proc_cap_avail);
+			seq_printf(m, "entitled_proc_capacity_available=%d\n",
+				   ppp_data.entitled_proc_cap_avail);
+		}
+
+		of_node_put(root);
+	}
+}
+
+/**
+ * parse_mpp_data
+ * Parse out data returned from h_get_mpp
+ */
+static void parse_mpp_data(struct seq_file *m)
+{
+	struct hvcall_mpp_data mpp_data;
+	int rc;
+
+	rc = h_get_mpp(&mpp_data);
+	if (rc)
+		return;
+
+	seq_printf(m, "entitled_memory=%ld\n", mpp_data.entitled_mem);
+
+	if (mpp_data.mapped_mem != -1)
+		seq_printf(m, "mapped_entitled_memory=%ld\n",
+		           mpp_data.mapped_mem);
+
+	seq_printf(m, "entitled_memory_group_number=%d\n", mpp_data.group_num);
+	seq_printf(m, "entitled_memory_pool_number=%d\n", mpp_data.pool_num);
+
+	seq_printf(m, "entitled_memory_weight=%d\n", mpp_data.mem_weight);
+	seq_printf(m, "unallocated_entitled_memory_weight=%d\n",
+	           mpp_data.unallocated_mem_weight);
+	seq_printf(m, "unallocated_io_mapping_entitlement=%ld\n",
+	           mpp_data.unallocated_entitlement);
+
+	if (mpp_data.pool_size != -1)
+		seq_printf(m, "entitled_memory_pool_size=%ld bytes\n",
+		           mpp_data.pool_size);
+
+	seq_printf(m, "entitled_memory_loan_request=%ld\n",
+	           mpp_data.loan_request);
+
+	seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem);
+}
+
+/**
+ * parse_mpp_x_data
+ * Parse out data returned from h_get_mpp_x
+ */
+static void parse_mpp_x_data(struct seq_file *m)
+{
+	struct hvcall_mpp_x_data mpp_x_data;
+
+	if (!firmware_has_feature(FW_FEATURE_XCMO))
+		return;
+	if (h_get_mpp_x(&mpp_x_data))
+		return;
+
+	seq_printf(m, "coalesced_bytes=%ld\n", mpp_x_data.coalesced_bytes);
+
+	if (mpp_x_data.pool_coalesced_bytes)
+		seq_printf(m, "pool_coalesced_bytes=%ld\n",
+			   mpp_x_data.pool_coalesced_bytes);
+	if (mpp_x_data.pool_purr_cycles)
+		seq_printf(m, "coalesce_pool_purr=%ld\n", mpp_x_data.pool_purr_cycles);
+	if (mpp_x_data.pool_spurr_cycles)
+		seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles);
+}
+
+#define SPLPAR_CHARACTERISTICS_TOKEN 20
+#define SPLPAR_MAXLENGTH 1026*(sizeof(char))
+
+/*
+ * parse_system_parameter_string()
+ * Retrieve the potential_processors, max_entitled_capacity and friends
+ * through the get-system-parameter rtas call.  Replace keyword strings as
+ * necessary.
+ */
+static void parse_system_parameter_string(struct seq_file *m)
+{
+	int call_status;
+
+	unsigned char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
+	if (!local_buffer) {
+		printk(KERN_ERR "%s %s kmalloc failure at line %d\n",
+		       __FILE__, __func__, __LINE__);
+		return;
+	}
+
+	spin_lock(&rtas_data_buf_lock);
+	memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH);
+	call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+				NULL,
+				SPLPAR_CHARACTERISTICS_TOKEN,
+				__pa(rtas_data_buf),
+				RTAS_DATA_BUF_SIZE);
+	memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH);
+	local_buffer[SPLPAR_MAXLENGTH - 1] = '\0';
+	spin_unlock(&rtas_data_buf_lock);
+
+	if (call_status != 0) {
+		printk(KERN_INFO
+		       "%s %s Error calling get-system-parameter (0x%x)\n",
+		       __FILE__, __func__, call_status);
+	} else {
+		int splpar_strlen;
+		int idx, w_idx;
+		char *workbuffer = kzalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
+		if (!workbuffer) {
+			printk(KERN_ERR "%s %s kmalloc failure at line %d\n",
+			       __FILE__, __func__, __LINE__);
+			kfree(local_buffer);
+			return;
+		}
+#ifdef LPARCFG_DEBUG
+		printk(KERN_INFO "success calling get-system-parameter\n");
+#endif
+		splpar_strlen = local_buffer[0] * 256 + local_buffer[1];
+		local_buffer += 2;	/* step over strlen value */
+
+		w_idx = 0;
+		idx = 0;
+		while ((*local_buffer) && (idx < splpar_strlen)) {
+			workbuffer[w_idx++] = local_buffer[idx++];
+			if ((local_buffer[idx] == ',')
+			    || (local_buffer[idx] == '\0')) {
+				workbuffer[w_idx] = '\0';
+				if (w_idx) {
+					/* avoid the empty string */
+					seq_printf(m, "%s\n", workbuffer);
+				}
+				memset(workbuffer, 0, SPLPAR_MAXLENGTH);
+				idx++;	/* skip the comma */
+				w_idx = 0;
+			} else if (local_buffer[idx] == '=') {
+				/* code here to replace workbuffer contents
+				   with different keyword strings */
+				if (0 == strcmp(workbuffer, "MaxEntCap")) {
+					strcpy(workbuffer,
+					       "partition_max_entitled_capacity");
+					w_idx = strlen(workbuffer);
+				}
+				if (0 == strcmp(workbuffer, "MaxPlatProcs")) {
+					strcpy(workbuffer,
+					       "system_potential_processors");
+					w_idx = strlen(workbuffer);
+				}
+			}
+		}
+		kfree(workbuffer);
+		local_buffer -= 2;	/* back up over strlen value */
+	}
+	kfree(local_buffer);
+}
+
+/* Return the number of processors in the system.
+ * This function reads through the device tree and counts
+ * the virtual processors, this does not include threads.
+ */
+static int lparcfg_count_active_processors(void)
+{
+	struct device_node *cpus_dn = NULL;
+	int count = 0;
+
+	while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) {
+#ifdef LPARCFG_DEBUG
+		printk(KERN_ERR "cpus_dn %p\n", cpus_dn);
+#endif
+		count++;
+	}
+	return count;
+}
+
+static void pseries_cmo_data(struct seq_file *m)
+{
+	int cpu;
+	unsigned long cmo_faults = 0;
+	unsigned long cmo_fault_time = 0;
+
+	seq_printf(m, "cmo_enabled=%d\n", firmware_has_feature(FW_FEATURE_CMO));
+
+	if (!firmware_has_feature(FW_FEATURE_CMO))
+		return;
+
+	for_each_possible_cpu(cpu) {
+		cmo_faults += be64_to_cpu(lppaca_of(cpu).cmo_faults);
+		cmo_fault_time += be64_to_cpu(lppaca_of(cpu).cmo_fault_time);
+	}
+
+	seq_printf(m, "cmo_faults=%lu\n", cmo_faults);
+	seq_printf(m, "cmo_fault_time_usec=%lu\n",
+		   cmo_fault_time / tb_ticks_per_usec);
+	seq_printf(m, "cmo_primary_psp=%d\n", cmo_get_primary_psp());
+	seq_printf(m, "cmo_secondary_psp=%d\n", cmo_get_secondary_psp());
+	seq_printf(m, "cmo_page_size=%lu\n", cmo_get_page_size());
+}
+
+static void splpar_dispatch_data(struct seq_file *m)
+{
+	int cpu;
+	unsigned long dispatches = 0;
+	unsigned long dispatch_dispersions = 0;
+
+	for_each_possible_cpu(cpu) {
+		dispatches += be32_to_cpu(lppaca_of(cpu).yield_count);
+		dispatch_dispersions +=
+			be32_to_cpu(lppaca_of(cpu).dispersion_count);
+	}
+
+	seq_printf(m, "dispatches=%lu\n", dispatches);
+	seq_printf(m, "dispatch_dispersions=%lu\n", dispatch_dispersions);
+}
+
+static void parse_em_data(struct seq_file *m)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	if (firmware_has_feature(FW_FEATURE_LPAR) &&
+	    plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS)
+		seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]);
+}
+
+static int pseries_lparcfg_data(struct seq_file *m, void *v)
+{
+	int partition_potential_processors;
+	int partition_active_processors;
+	struct device_node *rtas_node;
+	const __be32 *lrdrp = NULL;
+
+	rtas_node = of_find_node_by_path("/rtas");
+	if (rtas_node)
+		lrdrp = of_get_property(rtas_node, "ibm,lrdr-capacity", NULL);
+
+	if (lrdrp == NULL) {
+		partition_potential_processors = vdso_data->processorCount;
+	} else {
+		partition_potential_processors = be32_to_cpup(lrdrp + 4);
+	}
+	of_node_put(rtas_node);
+
+	partition_active_processors = lparcfg_count_active_processors();
+
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+		/* this call handles the ibm,get-system-parameter contents */
+		parse_system_parameter_string(m);
+		parse_ppp_data(m);
+		parse_mpp_data(m);
+		parse_mpp_x_data(m);
+		pseries_cmo_data(m);
+		splpar_dispatch_data(m);
+
+		seq_printf(m, "purr=%ld\n", get_purr());
+	} else {		/* non SPLPAR case */
+
+		seq_printf(m, "system_active_processors=%d\n",
+			   partition_potential_processors);
+
+		seq_printf(m, "system_potential_processors=%d\n",
+			   partition_potential_processors);
+
+		seq_printf(m, "partition_max_entitled_capacity=%d\n",
+			   partition_potential_processors * 100);
+
+		seq_printf(m, "partition_entitled_capacity=%d\n",
+			   partition_active_processors * 100);
+	}
+
+	seq_printf(m, "partition_active_processors=%d\n",
+		   partition_active_processors);
+
+	seq_printf(m, "partition_potential_processors=%d\n",
+		   partition_potential_processors);
+
+	seq_printf(m, "shared_processor_mode=%d\n",
+		   lppaca_shared_proc(get_lppaca()));
+
+	seq_printf(m, "slb_size=%d\n", mmu_slb_size);
+
+	parse_em_data(m);
+
+	return 0;
+}
+
+static ssize_t update_ppp(u64 *entitlement, u8 *weight)
+{
+	struct hvcall_ppp_data ppp_data;
+	u8 new_weight;
+	u64 new_entitled;
+	ssize_t retval;
+
+	/* Get our current parameters */
+	retval = h_get_ppp(&ppp_data);
+	if (retval)
+		return retval;
+
+	if (entitlement) {
+		new_weight = ppp_data.weight;
+		new_entitled = *entitlement;
+	} else if (weight) {
+		new_weight = *weight;
+		new_entitled = ppp_data.entitlement;
+	} else
+		return -EINVAL;
+
+	pr_debug("%s: current_entitled = %llu, current_weight = %u\n",
+		 __func__, ppp_data.entitlement, ppp_data.weight);
+
+	pr_debug("%s: new_entitled = %llu, new_weight = %u\n",
+		 __func__, new_entitled, new_weight);
+
+	retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight);
+	return retval;
+}
+
+/**
+ * update_mpp
+ *
+ * Update the memory entitlement and weight for the partition.  Caller must
+ * specify either a new entitlement or weight, not both, to be updated
+ * since the h_set_mpp call takes both entitlement and weight as parameters.
+ */
+static ssize_t update_mpp(u64 *entitlement, u8 *weight)
+{
+	struct hvcall_mpp_data mpp_data;
+	u64 new_entitled;
+	u8 new_weight;
+	ssize_t rc;
+
+	if (entitlement) {
+		/* Check with vio to ensure the new memory entitlement
+		 * can be handled.
+		 */
+		rc = vio_cmo_entitlement_update(*entitlement);
+		if (rc)
+			return rc;
+	}
+
+	rc = h_get_mpp(&mpp_data);
+	if (rc)
+		return rc;
+
+	if (entitlement) {
+		new_weight = mpp_data.mem_weight;
+		new_entitled = *entitlement;
+	} else if (weight) {
+		new_weight = *weight;
+		new_entitled = mpp_data.entitled_mem;
+	} else
+		return -EINVAL;
+
+	pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
+	         __func__, mpp_data.entitled_mem, mpp_data.mem_weight);
+
+	pr_debug("%s: new_entitled = %llu, new_weight = %u\n",
+		 __func__, new_entitled, new_weight);
+
+	rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight);
+	return rc;
+}
+
+/*
+ * Interface for changing system parameters (variable capacity weight
+ * and entitled capacity).  Format of input is "param_name=value";
+ * anything after value is ignored.  Valid parameters at this time are
+ * "partition_entitled_capacity" and "capacity_weight".  We use
+ * H_SET_PPP to alter parameters.
+ *
+ * This function should be invoked only on systems with
+ * FW_FEATURE_SPLPAR.
+ */
+static ssize_t lparcfg_write(struct file *file, const char __user * buf,
+			     size_t count, loff_t * off)
+{
+	int kbuf_sz = 64;
+	char kbuf[kbuf_sz];
+	char *tmp;
+	u64 new_entitled, *new_entitled_ptr = &new_entitled;
+	u8 new_weight, *new_weight_ptr = &new_weight;
+	ssize_t retval;
+
+	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+		return -EINVAL;
+
+	if (count > kbuf_sz)
+		return -EINVAL;
+
+	if (copy_from_user(kbuf, buf, count))
+		return -EFAULT;
+
+	kbuf[count - 1] = '\0';
+	tmp = strchr(kbuf, '=');
+	if (!tmp)
+		return -EINVAL;
+
+	*tmp++ = '\0';
+
+	if (!strcmp(kbuf, "partition_entitled_capacity")) {
+		char *endp;
+		*new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
+		if (endp == tmp)
+			return -EINVAL;
+
+		retval = update_ppp(new_entitled_ptr, NULL);
+	} else if (!strcmp(kbuf, "capacity_weight")) {
+		char *endp;
+		*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
+		if (endp == tmp)
+			return -EINVAL;
+
+		retval = update_ppp(NULL, new_weight_ptr);
+	} else if (!strcmp(kbuf, "entitled_memory")) {
+		char *endp;
+		*new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
+		if (endp == tmp)
+			return -EINVAL;
+
+		retval = update_mpp(new_entitled_ptr, NULL);
+	} else if (!strcmp(kbuf, "entitled_memory_weight")) {
+		char *endp;
+		*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
+		if (endp == tmp)
+			return -EINVAL;
+
+		retval = update_mpp(NULL, new_weight_ptr);
+	} else
+		return -EINVAL;
+
+	if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
+		retval = count;
+	} else if (retval == H_BUSY) {
+		retval = -EBUSY;
+	} else if (retval == H_HARDWARE) {
+		retval = -EIO;
+	} else if (retval == H_PARAMETER) {
+		retval = -EINVAL;
+	}
+
+	return retval;
+}
+
+static int lparcfg_data(struct seq_file *m, void *v)
+{
+	struct device_node *rootdn;
+	const char *model = "";
+	const char *system_id = "";
+	const char *tmp;
+	const __be32 *lp_index_ptr;
+	unsigned int lp_index = 0;
+
+	seq_printf(m, "%s %s\n", MODULE_NAME, MODULE_VERS);
+
+	rootdn = of_find_node_by_path("/");
+	if (rootdn) {
+		tmp = of_get_property(rootdn, "model", NULL);
+		if (tmp)
+			model = tmp;
+		tmp = of_get_property(rootdn, "system-id", NULL);
+		if (tmp)
+			system_id = tmp;
+		lp_index_ptr = of_get_property(rootdn, "ibm,partition-no",
+					NULL);
+		if (lp_index_ptr)
+			lp_index = be32_to_cpup(lp_index_ptr);
+		of_node_put(rootdn);
+	}
+	seq_printf(m, "serial_number=%s\n", system_id);
+	seq_printf(m, "system_type=%s\n", model);
+	seq_printf(m, "partition_id=%d\n", (int)lp_index);
+
+	return pseries_lparcfg_data(m, v);
+}
+
+static int lparcfg_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, lparcfg_data, NULL);
+}
+
+static const struct file_operations lparcfg_fops = {
+	.read		= seq_read,
+	.write		= lparcfg_write,
+	.open		= lparcfg_open,
+	.release	= single_release,
+	.llseek		= seq_lseek,
+};
+
+static int __init lparcfg_init(void)
+{
+	umode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
+
+	/* Allow writing if we have FW_FEATURE_SPLPAR */
+	if (firmware_has_feature(FW_FEATURE_SPLPAR))
+		mode |= S_IWUSR;
+
+	if (!proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops)) {
+		printk(KERN_ERR "Failed to create powerpc/lparcfg\n");
+		return -EIO;
+	}
+	return 0;
+}
+machine_device_initcall(pseries, lparcfg_init);
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
new file mode 100644
index 000000000..ceb18d349
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -0,0 +1,370 @@
+/*
+ * Support for Partition Mobility/Migration
+ *
+ * Copyright (C) 2010 Nathan Fontenot
+ * Copyright (C) 2010 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/smp.h>
+#include <linux/stat.h>
+#include <linux/completion.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include "pseries.h"
+
+static struct kobject *mobility_kobj;
+
+struct update_props_workarea {
+	__be32 phandle;
+	__be32 state;
+	__be64 reserved;
+	__be32 nprops;
+} __packed;
+
+#define NODE_ACTION_MASK	0xff000000
+#define NODE_COUNT_MASK		0x00ffffff
+
+#define DELETE_DT_NODE	0x01000000
+#define UPDATE_DT_NODE	0x02000000
+#define ADD_DT_NODE	0x03000000
+
+#define MIGRATION_SCOPE	(1)
+
+static int mobility_rtas_call(int token, char *buf, s32 scope)
+{
+	int rc;
+
+	spin_lock(&rtas_data_buf_lock);
+
+	memcpy(rtas_data_buf, buf, RTAS_DATA_BUF_SIZE);
+	rc = rtas_call(token, 2, 1, NULL, rtas_data_buf, scope);
+	memcpy(buf, rtas_data_buf, RTAS_DATA_BUF_SIZE);
+
+	spin_unlock(&rtas_data_buf_lock);
+	return rc;
+}
+
+static int delete_dt_node(__be32 phandle)
+{
+	struct device_node *dn;
+
+	dn = of_find_node_by_phandle(be32_to_cpu(phandle));
+	if (!dn)
+		return -ENOENT;
+
+	dlpar_detach_node(dn);
+	of_node_put(dn);
+	return 0;
+}
+
+static int update_dt_property(struct device_node *dn, struct property **prop,
+			      const char *name, u32 vd, char *value)
+{
+	struct property *new_prop = *prop;
+	int more = 0;
+
+	/* A negative 'vd' value indicates that only part of the new property
+	 * value is contained in the buffer and we need to call
+	 * ibm,update-properties again to get the rest of the value.
+	 *
+	 * A negative value is also the two's compliment of the actual value.
+	 */
+	if (vd & 0x80000000) {
+		vd = ~vd + 1;
+		more = 1;
+	}
+
+	if (new_prop) {
+		/* partial property fixup */
+		char *new_data = kzalloc(new_prop->length + vd, GFP_KERNEL);
+		if (!new_data)
+			return -ENOMEM;
+
+		memcpy(new_data, new_prop->value, new_prop->length);
+		memcpy(new_data + new_prop->length, value, vd);
+
+		kfree(new_prop->value);
+		new_prop->value = new_data;
+		new_prop->length += vd;
+	} else {
+		new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
+		if (!new_prop)
+			return -ENOMEM;
+
+		new_prop->name = kstrdup(name, GFP_KERNEL);
+		if (!new_prop->name) {
+			kfree(new_prop);
+			return -ENOMEM;
+		}
+
+		new_prop->length = vd;
+		new_prop->value = kzalloc(new_prop->length, GFP_KERNEL);
+		if (!new_prop->value) {
+			kfree(new_prop->name);
+			kfree(new_prop);
+			return -ENOMEM;
+		}
+
+		memcpy(new_prop->value, value, vd);
+		*prop = new_prop;
+	}
+
+	if (!more) {
+		of_update_property(dn, new_prop);
+		*prop = NULL;
+	}
+
+	return 0;
+}
+
+static int update_dt_node(__be32 phandle, s32 scope)
+{
+	struct update_props_workarea *upwa;
+	struct device_node *dn;
+	struct property *prop = NULL;
+	int i, rc, rtas_rc;
+	char *prop_data;
+	char *rtas_buf;
+	int update_properties_token;
+	u32 nprops;
+	u32 vd;
+
+	update_properties_token = rtas_token("ibm,update-properties");
+	if (update_properties_token == RTAS_UNKNOWN_SERVICE)
+		return -EINVAL;
+
+	rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+	if (!rtas_buf)
+		return -ENOMEM;
+
+	dn = of_find_node_by_phandle(be32_to_cpu(phandle));
+	if (!dn) {
+		kfree(rtas_buf);
+		return -ENOENT;
+	}
+
+	upwa = (struct update_props_workarea *)&rtas_buf[0];
+	upwa->phandle = phandle;
+
+	do {
+		rtas_rc = mobility_rtas_call(update_properties_token, rtas_buf,
+					scope);
+		if (rtas_rc < 0)
+			break;
+
+		prop_data = rtas_buf + sizeof(*upwa);
+		nprops = be32_to_cpu(upwa->nprops);
+
+		/* On the first call to ibm,update-properties for a node the
+		 * the first property value descriptor contains an empty
+		 * property name, the property value length encoded as u32,
+		 * and the property value is the node path being updated.
+		 */
+		if (*prop_data == 0) {
+			prop_data++;
+			vd = be32_to_cpu(*(__be32 *)prop_data);
+			prop_data += vd + sizeof(vd);
+			nprops--;
+		}
+
+		for (i = 0; i < nprops; i++) {
+			char *prop_name;
+
+			prop_name = prop_data;
+			prop_data += strlen(prop_name) + 1;
+			vd = be32_to_cpu(*(__be32 *)prop_data);
+			prop_data += sizeof(vd);
+
+			switch (vd) {
+			case 0x00000000:
+				/* name only property, nothing to do */
+				break;
+
+			case 0x80000000:
+				prop = of_find_property(dn, prop_name, NULL);
+				of_remove_property(dn, prop);
+				prop = NULL;
+				break;
+
+			default:
+				rc = update_dt_property(dn, &prop, prop_name,
+							vd, prop_data);
+				if (rc) {
+					printk(KERN_ERR "Could not update %s"
+					       " property\n", prop_name);
+				}
+
+				prop_data += vd;
+			}
+		}
+	} while (rtas_rc == 1);
+
+	of_node_put(dn);
+	kfree(rtas_buf);
+	return 0;
+}
+
+static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
+{
+	struct device_node *dn;
+	struct device_node *parent_dn;
+	int rc;
+
+	parent_dn = of_find_node_by_phandle(be32_to_cpu(parent_phandle));
+	if (!parent_dn)
+		return -ENOENT;
+
+	dn = dlpar_configure_connector(drc_index, parent_dn);
+	if (!dn)
+		return -ENOENT;
+
+	rc = dlpar_attach_node(dn);
+	if (rc)
+		dlpar_free_cc_nodes(dn);
+
+	of_node_put(parent_dn);
+	return rc;
+}
+
+int pseries_devicetree_update(s32 scope)
+{
+	char *rtas_buf;
+	__be32 *data;
+	int update_nodes_token;
+	int rc;
+
+	update_nodes_token = rtas_token("ibm,update-nodes");
+	if (update_nodes_token == RTAS_UNKNOWN_SERVICE)
+		return -EINVAL;
+
+	rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+	if (!rtas_buf)
+		return -ENOMEM;
+
+	do {
+		rc = mobility_rtas_call(update_nodes_token, rtas_buf, scope);
+		if (rc && rc != 1)
+			break;
+
+		data = (__be32 *)rtas_buf + 4;
+		while (be32_to_cpu(*data) & NODE_ACTION_MASK) {
+			int i;
+			u32 action = be32_to_cpu(*data) & NODE_ACTION_MASK;
+			u32 node_count = be32_to_cpu(*data) & NODE_COUNT_MASK;
+
+			data++;
+
+			for (i = 0; i < node_count; i++) {
+				__be32 phandle = *data++;
+				__be32 drc_index;
+
+				switch (action) {
+				case DELETE_DT_NODE:
+					delete_dt_node(phandle);
+					break;
+				case UPDATE_DT_NODE:
+					update_dt_node(phandle, scope);
+					break;
+				case ADD_DT_NODE:
+					drc_index = *data++;
+					add_dt_node(phandle, drc_index);
+					break;
+				}
+			}
+		}
+	} while (rc == 1);
+
+	kfree(rtas_buf);
+	return rc;
+}
+
+void post_mobility_fixup(void)
+{
+	int rc;
+	int activate_fw_token;
+
+	activate_fw_token = rtas_token("ibm,activate-firmware");
+	if (activate_fw_token == RTAS_UNKNOWN_SERVICE) {
+		printk(KERN_ERR "Could not make post-mobility "
+		       "activate-fw call.\n");
+		return;
+	}
+
+	do {
+		rc = rtas_call(activate_fw_token, 0, 1, NULL);
+	} while (rtas_busy_delay(rc));
+
+	if (rc)
+		printk(KERN_ERR "Post-mobility activate-fw failed: %d\n", rc);
+
+	rc = pseries_devicetree_update(MIGRATION_SCOPE);
+	if (rc)
+		printk(KERN_ERR "Post-mobility device tree update "
+			"failed: %d\n", rc);
+
+	return;
+}
+
+static ssize_t migrate_store(struct class *class, struct class_attribute *attr,
+			     const char *buf, size_t count)
+{
+	u64 streamid;
+	int rc;
+
+	rc = kstrtou64(buf, 0, &streamid);
+	if (rc)
+		return rc;
+
+	do {
+		rc = rtas_ibm_suspend_me(streamid);
+		if (rc == -EAGAIN)
+			ssleep(1);
+	} while (rc == -EAGAIN);
+
+	if (rc)
+		return rc;
+
+	post_mobility_fixup();
+	return count;
+}
+
+/*
+ * Used by drmgr to determine the kernel behavior of the migration interface.
+ *
+ * Version 1: Performs all PAPR requirements for migration including
+ *	firmware activation and device tree update.
+ */
+#define MIGRATION_API_VERSION	1
+
+static CLASS_ATTR(migration, S_IWUSR, NULL, migrate_store);
+static CLASS_ATTR_STRING(api_version, S_IRUGO, __stringify(MIGRATION_API_VERSION));
+
+static int __init mobility_sysfs_init(void)
+{
+	int rc;
+
+	mobility_kobj = kobject_create_and_add("mobility", kernel_kobj);
+	if (!mobility_kobj)
+		return -ENOMEM;
+
+	rc = sysfs_create_file(mobility_kobj, &class_attr_migration.attr);
+	if (rc)
+		pr_err("mobility: unable to create migration sysfs file (%d)\n", rc);
+
+	rc = sysfs_create_file(mobility_kobj, &class_attr_api_version.attr.attr);
+	if (rc)
+		pr_err("mobility: unable to create api_version sysfs file (%d)\n", rc);
+
+	return 0;
+}
+machine_device_initcall(pseries, mobility_sysfs_init);
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
new file mode 100644
index 000000000..c8d24f9a6
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -0,0 +1,528 @@
+/*
+ * Copyright 2006 Jake Moilanen <moilanen@austin.ibm.com>, IBM Corp.
+ * Copyright 2006-2007 Michael Ellerman, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/irq.h>
+#include <linux/msi.h>
+
+#include <asm/rtas.h>
+#include <asm/hw_irq.h>
+#include <asm/ppc-pci.h>
+#include <asm/machdep.h>
+
+static int query_token, change_token;
+
+#define RTAS_QUERY_FN		0
+#define RTAS_CHANGE_FN		1
+#define RTAS_RESET_FN		2
+#define RTAS_CHANGE_MSI_FN	3
+#define RTAS_CHANGE_MSIX_FN	4
+#define RTAS_CHANGE_32MSI_FN	5
+
+/* RTAS Helpers */
+
+static int rtas_change_msi(struct pci_dn *pdn, u32 func, u32 num_irqs)
+{
+	u32 addr, seq_num, rtas_ret[3];
+	unsigned long buid;
+	int rc;
+
+	addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+	buid = pdn->phb->buid;
+
+	seq_num = 1;
+	do {
+		if (func == RTAS_CHANGE_MSI_FN || func == RTAS_CHANGE_MSIX_FN ||
+		    func == RTAS_CHANGE_32MSI_FN)
+			rc = rtas_call(change_token, 6, 4, rtas_ret, addr,
+					BUID_HI(buid), BUID_LO(buid),
+					func, num_irqs, seq_num);
+		else
+			rc = rtas_call(change_token, 6, 3, rtas_ret, addr,
+					BUID_HI(buid), BUID_LO(buid),
+					func, num_irqs, seq_num);
+
+		seq_num = rtas_ret[1];
+	} while (rtas_busy_delay(rc));
+
+	/*
+	 * If the RTAS call succeeded, return the number of irqs allocated.
+	 * If not, make sure we return a negative error code.
+	 */
+	if (rc == 0)
+		rc = rtas_ret[0];
+	else if (rc > 0)
+		rc = -rc;
+
+	pr_debug("rtas_msi: ibm,change_msi(func=%d,num=%d), got %d rc = %d\n",
+		 func, num_irqs, rtas_ret[0], rc);
+
+	return rc;
+}
+
+static void rtas_disable_msi(struct pci_dev *pdev)
+{
+	struct pci_dn *pdn;
+
+	pdn = pci_get_pdn(pdev);
+	if (!pdn)
+		return;
+
+	/*
+	 * disabling MSI with the explicit interface also disables MSI-X
+	 */
+	if (rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, 0) != 0) {
+		/* 
+		 * may have failed because explicit interface is not
+		 * present
+		 */
+		if (rtas_change_msi(pdn, RTAS_CHANGE_FN, 0) != 0) {
+			pr_debug("rtas_msi: Setting MSIs to 0 failed!\n");
+		}
+	}
+}
+
+static int rtas_query_irq_number(struct pci_dn *pdn, int offset)
+{
+	u32 addr, rtas_ret[2];
+	unsigned long buid;
+	int rc;
+
+	addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+	buid = pdn->phb->buid;
+
+	do {
+		rc = rtas_call(query_token, 4, 3, rtas_ret, addr,
+			       BUID_HI(buid), BUID_LO(buid), offset);
+	} while (rtas_busy_delay(rc));
+
+	if (rc) {
+		pr_debug("rtas_msi: error (%d) querying source number\n", rc);
+		return rc;
+	}
+
+	return rtas_ret[0];
+}
+
+static void rtas_teardown_msi_irqs(struct pci_dev *pdev)
+{
+	struct msi_desc *entry;
+
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		if (entry->irq == NO_IRQ)
+			continue;
+
+		irq_set_msi_desc(entry->irq, NULL);
+		irq_dispose_mapping(entry->irq);
+	}
+
+	rtas_disable_msi(pdev);
+}
+
+static int check_req(struct pci_dev *pdev, int nvec, char *prop_name)
+{
+	struct device_node *dn;
+	struct pci_dn *pdn;
+	const __be32 *p;
+	u32 req_msi;
+
+	pdn = pci_get_pdn(pdev);
+	if (!pdn)
+		return -ENODEV;
+
+	dn = pdn->node;
+
+	p = of_get_property(dn, prop_name, NULL);
+	if (!p) {
+		pr_debug("rtas_msi: No %s on %s\n", prop_name, dn->full_name);
+		return -ENOENT;
+	}
+
+	req_msi = be32_to_cpup(p);
+	if (req_msi < nvec) {
+		pr_debug("rtas_msi: %s requests < %d MSIs\n", prop_name, nvec);
+
+		if (req_msi == 0) /* Be paranoid */
+			return -ENOSPC;
+
+		return req_msi;
+	}
+
+	return 0;
+}
+
+static int check_req_msi(struct pci_dev *pdev, int nvec)
+{
+	return check_req(pdev, nvec, "ibm,req#msi");
+}
+
+static int check_req_msix(struct pci_dev *pdev, int nvec)
+{
+	return check_req(pdev, nvec, "ibm,req#msi-x");
+}
+
+/* Quota calculation */
+
+static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
+{
+	struct device_node *dn;
+	const __be32 *p;
+
+	dn = of_node_get(pci_device_to_OF_node(dev));
+	while (dn) {
+		p = of_get_property(dn, "ibm,pe-total-#msi", NULL);
+		if (p) {
+			pr_debug("rtas_msi: found prop on dn %s\n",
+				dn->full_name);
+			*total = be32_to_cpup(p);
+			return dn;
+		}
+
+		dn = of_get_next_parent(dn);
+	}
+
+	return NULL;
+}
+
+static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
+{
+	struct device_node *dn;
+	struct pci_dn *pdn;
+	struct eeh_dev *edev;
+
+	/* Found our PE and assume 8 at that point. */
+
+	dn = pci_device_to_OF_node(dev);
+	if (!dn)
+		return NULL;
+
+	/* Get the top level device in the PE */
+	edev = pdn_to_eeh_dev(PCI_DN(dn));
+	if (edev->pe)
+		edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, list);
+	pdn = eeh_dev_to_pdn(edev);
+	dn = pdn ? pdn->node : NULL;
+	if (!dn)
+		return NULL;
+
+	/* We actually want the parent */
+	dn = of_get_parent(dn);
+	if (!dn)
+		return NULL;
+
+	/* Hardcode of 8 for old firmwares */
+	*total = 8;
+	pr_debug("rtas_msi: using PE dn %s\n", dn->full_name);
+
+	return dn;
+}
+
+struct msi_counts {
+	struct device_node *requestor;
+	int num_devices;
+	int request;
+	int quota;
+	int spare;
+	int over_quota;
+};
+
+static void *count_non_bridge_devices(struct device_node *dn, void *data)
+{
+	struct msi_counts *counts = data;
+	const __be32 *p;
+	u32 class;
+
+	pr_debug("rtas_msi: counting %s\n", dn->full_name);
+
+	p = of_get_property(dn, "class-code", NULL);
+	class = p ? be32_to_cpup(p) : 0;
+
+	if ((class >> 8) != PCI_CLASS_BRIDGE_PCI)
+		counts->num_devices++;
+
+	return NULL;
+}
+
+static void *count_spare_msis(struct device_node *dn, void *data)
+{
+	struct msi_counts *counts = data;
+	const __be32 *p;
+	int req;
+
+	if (dn == counts->requestor)
+		req = counts->request;
+	else {
+		/* We don't know if a driver will try to use MSI or MSI-X,
+		 * so we just have to punt and use the larger of the two. */
+		req = 0;
+		p = of_get_property(dn, "ibm,req#msi", NULL);
+		if (p)
+			req = be32_to_cpup(p);
+
+		p = of_get_property(dn, "ibm,req#msi-x", NULL);
+		if (p)
+			req = max(req, (int)be32_to_cpup(p));
+	}
+
+	if (req < counts->quota)
+		counts->spare += counts->quota - req;
+	else if (req > counts->quota)
+		counts->over_quota++;
+
+	return NULL;
+}
+
+static int msi_quota_for_device(struct pci_dev *dev, int request)
+{
+	struct device_node *pe_dn;
+	struct msi_counts counts;
+	int total;
+
+	pr_debug("rtas_msi: calc quota for %s, request %d\n", pci_name(dev),
+		  request);
+
+	pe_dn = find_pe_total_msi(dev, &total);
+	if (!pe_dn)
+		pe_dn = find_pe_dn(dev, &total);
+
+	if (!pe_dn) {
+		pr_err("rtas_msi: couldn't find PE for %s\n", pci_name(dev));
+		goto out;
+	}
+
+	pr_debug("rtas_msi: found PE %s\n", pe_dn->full_name);
+
+	memset(&counts, 0, sizeof(struct msi_counts));
+
+	/* Work out how many devices we have below this PE */
+	traverse_pci_devices(pe_dn, count_non_bridge_devices, &counts);
+
+	if (counts.num_devices == 0) {
+		pr_err("rtas_msi: found 0 devices under PE for %s\n",
+			pci_name(dev));
+		goto out;
+	}
+
+	counts.quota = total / counts.num_devices;
+	if (request <= counts.quota)
+		goto out;
+
+	/* else, we have some more calculating to do */
+	counts.requestor = pci_device_to_OF_node(dev);
+	counts.request = request;
+	traverse_pci_devices(pe_dn, count_spare_msis, &counts);
+
+	/* If the quota isn't an integer multiple of the total, we can
+	 * use the remainder as spare MSIs for anyone that wants them. */
+	counts.spare += total % counts.num_devices;
+
+	/* Divide any spare by the number of over-quota requestors */
+	if (counts.over_quota)
+		counts.quota += counts.spare / counts.over_quota;
+
+	/* And finally clamp the request to the possibly adjusted quota */
+	request = min(counts.quota, request);
+
+	pr_debug("rtas_msi: request clamped to quota %d\n", request);
+out:
+	of_node_put(pe_dn);
+
+	return request;
+}
+
+static int check_msix_entries(struct pci_dev *pdev)
+{
+	struct msi_desc *entry;
+	int expected;
+
+	/* There's no way for us to express to firmware that we want
+	 * a discontiguous, or non-zero based, range of MSI-X entries.
+	 * So we must reject such requests. */
+
+	expected = 0;
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		if (entry->msi_attrib.entry_nr != expected) {
+			pr_debug("rtas_msi: bad MSI-X entries.\n");
+			return -EINVAL;
+		}
+		expected++;
+	}
+
+	return 0;
+}
+
+static void rtas_hack_32bit_msi_gen2(struct pci_dev *pdev)
+{
+	u32 addr_hi, addr_lo;
+
+	/*
+	 * We should only get in here for IODA1 configs. This is based on the
+	 * fact that we using RTAS for MSIs, we don't have the 32 bit MSI RTAS
+	 * support, and we are in a PCIe Gen2 slot.
+	 */
+	dev_info(&pdev->dev,
+		 "rtas_msi: No 32 bit MSI firmware support, forcing 32 bit MSI\n");
+	pci_read_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, &addr_hi);
+	addr_lo = 0xffff0000 | ((addr_hi >> (48 - 32)) << 4);
+	pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO, addr_lo);
+	pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, 0);
+}
+
+static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type)
+{
+	struct pci_dn *pdn;
+	int hwirq, virq, i, quota, rc;
+	struct msi_desc *entry;
+	struct msi_msg msg;
+	int nvec = nvec_in;
+	int use_32bit_msi_hack = 0;
+
+	if (type == PCI_CAP_ID_MSIX)
+		rc = check_req_msix(pdev, nvec);
+	else
+		rc = check_req_msi(pdev, nvec);
+
+	if (rc)
+		return rc;
+
+	quota = msi_quota_for_device(pdev, nvec);
+
+	if (quota && quota < nvec)
+		return quota;
+
+	if (type == PCI_CAP_ID_MSIX && check_msix_entries(pdev))
+		return -EINVAL;
+
+	/*
+	 * Firmware currently refuse any non power of two allocation
+	 * so we round up if the quota will allow it.
+	 */
+	if (type == PCI_CAP_ID_MSIX) {
+		int m = roundup_pow_of_two(nvec);
+		quota = msi_quota_for_device(pdev, m);
+
+		if (quota >= m)
+			nvec = m;
+	}
+
+	pdn = pci_get_pdn(pdev);
+
+	/*
+	 * Try the new more explicit firmware interface, if that fails fall
+	 * back to the old interface. The old interface is known to never
+	 * return MSI-Xs.
+	 */
+again:
+	if (type == PCI_CAP_ID_MSI) {
+		if (pdev->no_64bit_msi) {
+			rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSI_FN, nvec);
+			if (rc < 0) {
+				/*
+				 * We only want to run the 32 bit MSI hack below if
+				 * the max bus speed is Gen2 speed
+				 */
+				if (pdev->bus->max_bus_speed != PCIE_SPEED_5_0GT)
+					return rc;
+
+				use_32bit_msi_hack = 1;
+			}
+		} else
+			rc = -1;
+
+		if (rc < 0)
+			rc = rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, nvec);
+
+		if (rc < 0) {
+			pr_debug("rtas_msi: trying the old firmware call.\n");
+			rc = rtas_change_msi(pdn, RTAS_CHANGE_FN, nvec);
+		}
+
+		if (use_32bit_msi_hack && rc > 0)
+			rtas_hack_32bit_msi_gen2(pdev);
+	} else
+		rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec);
+
+	if (rc != nvec) {
+		if (nvec != nvec_in) {
+			nvec = nvec_in;
+			goto again;
+		}
+		pr_debug("rtas_msi: rtas_change_msi() failed\n");
+		return rc;
+	}
+
+	i = 0;
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		hwirq = rtas_query_irq_number(pdn, i++);
+		if (hwirq < 0) {
+			pr_debug("rtas_msi: error (%d) getting hwirq\n", rc);
+			return hwirq;
+		}
+
+		virq = irq_create_mapping(NULL, hwirq);
+
+		if (virq == NO_IRQ) {
+			pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq);
+			return -ENOSPC;
+		}
+
+		dev_dbg(&pdev->dev, "rtas_msi: allocated virq %d\n", virq);
+		irq_set_msi_desc(virq, entry);
+
+		/* Read config space back so we can restore after reset */
+		__pci_read_msi_msg(entry, &msg);
+		entry->msg = msg;
+	}
+
+	return 0;
+}
+
+static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev)
+{
+	/* No LSI -> leave MSIs (if any) configured */
+	if (pdev->irq == NO_IRQ) {
+		dev_dbg(&pdev->dev, "rtas_msi: no LSI, nothing to do.\n");
+		return;
+	}
+
+	/* No MSI -> MSIs can't have been assigned by fw, leave LSI */
+	if (check_req_msi(pdev, 1) && check_req_msix(pdev, 1)) {
+		dev_dbg(&pdev->dev, "rtas_msi: no req#msi/x, nothing to do.\n");
+		return;
+	}
+
+	dev_dbg(&pdev->dev, "rtas_msi: disabling existing MSI.\n");
+	rtas_disable_msi(pdev);
+}
+
+static int rtas_msi_init(void)
+{
+	query_token  = rtas_token("ibm,query-interrupt-source-number");
+	change_token = rtas_token("ibm,change-msi");
+
+	if ((query_token == RTAS_UNKNOWN_SERVICE) ||
+			(change_token == RTAS_UNKNOWN_SERVICE)) {
+		pr_debug("rtas_msi: no RTAS tokens, no MSI support.\n");
+		return -1;
+	}
+
+	pr_debug("rtas_msi: Registering RTAS MSI callbacks.\n");
+
+	WARN_ON(ppc_md.setup_msi_irqs);
+	ppc_md.setup_msi_irqs = rtas_setup_msi_irqs;
+	ppc_md.teardown_msi_irqs = rtas_teardown_msi_irqs;
+
+	WARN_ON(ppc_md.pci_irq_fixup);
+	ppc_md.pci_irq_fixup = rtas_msi_pci_irq_fixup;
+
+	return 0;
+}
+machine_arch_initcall(pseries, rtas_msi_init);
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
new file mode 100644
index 000000000..9f8184175
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -0,0 +1,249 @@
+/*
+ *  c 2001 PPC 64 Team, IBM Corp
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ * /dev/nvram driver for PPC64
+ *
+ * This perhaps should live in drivers/char
+ */
+
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/kmsg_dump.h>
+#include <linux/pstore.h>
+#include <linux/ctype.h>
+#include <asm/uaccess.h>
+#include <asm/nvram.h>
+#include <asm/rtas.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+
+/* Max bytes to read/write in one go */
+#define NVRW_CNT 0x20
+
+static unsigned int nvram_size;
+static int nvram_fetch, nvram_store;
+static char nvram_buf[NVRW_CNT];	/* assume this is in the first 4GB */
+static DEFINE_SPINLOCK(nvram_lock);
+
+/* See clobbering_unread_rtas_event() */
+#define NVRAM_RTAS_READ_TIMEOUT 5		/* seconds */
+static time64_t last_unread_rtas_event;		/* timestamp */
+
+#ifdef CONFIG_PSTORE
+time64_t last_rtas_event;
+#endif
+
+static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
+{
+	unsigned int i;
+	unsigned long len;
+	int done;
+	unsigned long flags;
+	char *p = buf;
+
+
+	if (nvram_size == 0 || nvram_fetch == RTAS_UNKNOWN_SERVICE)
+		return -ENODEV;
+
+	if (*index >= nvram_size)
+		return 0;
+
+	i = *index;
+	if (i + count > nvram_size)
+		count = nvram_size - i;
+
+	spin_lock_irqsave(&nvram_lock, flags);
+
+	for (; count != 0; count -= len) {
+		len = count;
+		if (len > NVRW_CNT)
+			len = NVRW_CNT;
+		
+		if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf),
+			       len) != 0) || len != done) {
+			spin_unlock_irqrestore(&nvram_lock, flags);
+			return -EIO;
+		}
+		
+		memcpy(p, nvram_buf, len);
+
+		p += len;
+		i += len;
+	}
+
+	spin_unlock_irqrestore(&nvram_lock, flags);
+	
+	*index = i;
+	return p - buf;
+}
+
+static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index)
+{
+	unsigned int i;
+	unsigned long len;
+	int done;
+	unsigned long flags;
+	const char *p = buf;
+
+	if (nvram_size == 0 || nvram_store == RTAS_UNKNOWN_SERVICE)
+		return -ENODEV;
+
+	if (*index >= nvram_size)
+		return 0;
+
+	i = *index;
+	if (i + count > nvram_size)
+		count = nvram_size - i;
+
+	spin_lock_irqsave(&nvram_lock, flags);
+
+	for (; count != 0; count -= len) {
+		len = count;
+		if (len > NVRW_CNT)
+			len = NVRW_CNT;
+
+		memcpy(nvram_buf, p, len);
+
+		if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf),
+			       len) != 0) || len != done) {
+			spin_unlock_irqrestore(&nvram_lock, flags);
+			return -EIO;
+		}
+		
+		p += len;
+		i += len;
+	}
+	spin_unlock_irqrestore(&nvram_lock, flags);
+	
+	*index = i;
+	return p - buf;
+}
+
+static ssize_t pSeries_nvram_get_size(void)
+{
+	return nvram_size ? nvram_size : -ENODEV;
+}
+
+/* nvram_write_error_log
+ *
+ * We need to buffer the error logs into nvram to ensure that we have
+ * the failure information to decode.
+ */
+int nvram_write_error_log(char * buff, int length,
+                          unsigned int err_type, unsigned int error_log_cnt)
+{
+	int rc = nvram_write_os_partition(&rtas_log_partition, buff, length,
+						err_type, error_log_cnt);
+	if (!rc) {
+		last_unread_rtas_event = ktime_get_real_seconds();
+#ifdef CONFIG_PSTORE
+		last_rtas_event = ktime_get_real_seconds();
+#endif
+	}
+
+	return rc;
+}
+
+/* nvram_read_error_log
+ *
+ * Reads nvram for error log for at most 'length'
+ */
+int nvram_read_error_log(char *buff, int length,
+			unsigned int *err_type, unsigned int *error_log_cnt)
+{
+	return nvram_read_partition(&rtas_log_partition, buff, length,
+						err_type, error_log_cnt);
+}
+
+/* This doesn't actually zero anything, but it sets the event_logged
+ * word to tell that this event is safely in syslog.
+ */
+int nvram_clear_error_log(void)
+{
+	loff_t tmp_index;
+	int clear_word = ERR_FLAG_ALREADY_LOGGED;
+	int rc;
+
+	if (rtas_log_partition.index == -1)
+		return -1;
+
+	tmp_index = rtas_log_partition.index;
+	
+	rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index);
+	if (rc <= 0) {
+		printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc);
+		return rc;
+	}
+	last_unread_rtas_event = 0;
+
+	return 0;
+}
+
+/*
+ * Are we using the ibm,rtas-log for oops/panic reports?  And if so,
+ * would logging this oops/panic overwrite an RTAS event that rtas_errd
+ * hasn't had a chance to read and process?  Return 1 if so, else 0.
+ *
+ * We assume that if rtas_errd hasn't read the RTAS event in
+ * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
+ */
+int clobbering_unread_rtas_event(void)
+{
+	return (oops_log_partition.index == rtas_log_partition.index
+		&& last_unread_rtas_event
+		&& ktime_get_real_seconds() - last_unread_rtas_event <=
+						NVRAM_RTAS_READ_TIMEOUT);
+}
+
+static int __init pseries_nvram_init_log_partitions(void)
+{
+	int rc;
+
+	/* Scan nvram for partitions */
+	nvram_scan_partitions();
+
+	rc = nvram_init_os_partition(&rtas_log_partition);
+	nvram_init_oops_partition(rc == 0);
+	return 0;
+}
+machine_arch_initcall(pseries, pseries_nvram_init_log_partitions);
+
+int __init pSeries_nvram_init(void)
+{
+	struct device_node *nvram;
+	const __be32 *nbytes_p;
+	unsigned int proplen;
+
+	nvram = of_find_node_by_type(NULL, "nvram");
+	if (nvram == NULL)
+		return -ENODEV;
+
+	nbytes_p = of_get_property(nvram, "#bytes", &proplen);
+	if (nbytes_p == NULL || proplen != sizeof(unsigned int)) {
+		of_node_put(nvram);
+		return -EIO;
+	}
+
+	nvram_size = be32_to_cpup(nbytes_p);
+
+	nvram_fetch = rtas_token("nvram-fetch");
+	nvram_store = rtas_token("nvram-store");
+	printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size);
+	of_node_put(nvram);
+
+	ppc_md.nvram_read	= pSeries_nvram_read;
+	ppc_md.nvram_write	= pSeries_nvram_write;
+	ppc_md.nvram_size	= pSeries_nvram_get_size;
+
+	return 0;
+}
+
diff --git a/arch/powerpc/platforms/pseries/offline_states.h b/arch/powerpc/platforms/pseries/offline_states.h
new file mode 100644
index 000000000..08672d913
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/offline_states.h
@@ -0,0 +1,37 @@
+#ifndef _OFFLINE_STATES_H_
+#define _OFFLINE_STATES_H_
+
+/* Cpu offline states go here */
+enum cpu_state_vals {
+	CPU_STATE_OFFLINE,
+	CPU_STATE_INACTIVE,
+	CPU_STATE_ONLINE,
+	CPU_MAX_OFFLINE_STATES
+};
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern enum cpu_state_vals get_cpu_current_state(int cpu);
+extern void set_cpu_current_state(int cpu, enum cpu_state_vals state);
+extern void set_preferred_offline_state(int cpu, enum cpu_state_vals state);
+extern void set_default_offline_state(int cpu);
+#else
+static inline enum cpu_state_vals get_cpu_current_state(int cpu)
+{
+	return CPU_STATE_ONLINE;
+}
+
+static inline void set_cpu_current_state(int cpu, enum cpu_state_vals state)
+{
+}
+
+static inline void set_preferred_offline_state(int cpu, enum cpu_state_vals state)
+{
+}
+
+static inline void set_default_offline_state(int cpu)
+{
+}
+#endif
+
+extern enum cpu_state_vals get_preferred_offline_state(int cpu);
+#endif
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
new file mode 100644
index 000000000..fe16a5070
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * pSeries specific routines for PCI.
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *    
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+
+#include <asm/eeh.h>
+#include <asm/pci-bridge.h>
+#include <asm/prom.h>
+#include <asm/ppc-pci.h>
+#include "pseries.h"
+
+#if 0
+void pcibios_name_device(struct pci_dev *dev)
+{
+	struct device_node *dn;
+
+	/*
+	 * Add IBM loc code (slot) as a prefix to the device names for service
+	 */
+	dn = pci_device_to_OF_node(dev);
+	if (dn) {
+		const char *loc_code = of_get_property(dn, "ibm,loc-code",
+				NULL);
+		if (loc_code) {
+			int loc_len = strlen(loc_code);
+			if (loc_len < sizeof(dev->dev.name)) {
+				memmove(dev->dev.name+loc_len+1, dev->dev.name,
+					sizeof(dev->dev.name)-loc_len-1);
+				memcpy(dev->dev.name, loc_code, loc_len);
+				dev->dev.name[loc_len] = ' ';
+				dev->dev.name[sizeof(dev->dev.name)-1] = '\0';
+			}
+		}
+	}
+}   
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device);
+#endif
+
+static void __init pSeries_request_regions(void)
+{
+	if (!isa_io_base)
+		return;
+
+	request_region(0x20,0x20,"pic1");
+	request_region(0xa0,0x20,"pic2");
+	request_region(0x00,0x20,"dma1");
+	request_region(0x40,0x20,"timer");
+	request_region(0x80,0x10,"dma page reg");
+	request_region(0xc0,0x20,"dma2");
+}
+
+void __init pSeries_final_fixup(void)
+{
+	pSeries_request_regions();
+
+	eeh_addr_cache_build();
+}
+
+/*
+ * Assume the winbond 82c105 is the IDE controller on a
+ * p610/p615/p630. We should probably be more careful in case
+ * someone tries to plug in a similar adapter.
+ */
+static void fixup_winbond_82c105(struct pci_dev* dev)
+{
+	int i;
+	unsigned int reg;
+
+	if (!machine_is(pseries))
+		return;
+
+	printk("Using INTC for W82c105 IDE controller.\n");
+	pci_read_config_dword(dev, 0x40, &reg);
+	/* Enable LEGIRQ to use INTC instead of ISA interrupts */
+	pci_write_config_dword(dev, 0x40, reg | (1<<11));
+
+	for (i = 0; i < DEVICE_COUNT_RESOURCE; ++i) {
+		/* zap the 2nd function of the winbond chip */
+		if (dev->resource[i].flags & IORESOURCE_IO
+		    && dev->bus->number == 0 && dev->devfn == 0x81)
+			dev->resource[i].flags &= ~IORESOURCE_IO;
+		if (dev->resource[i].start == 0 && dev->resource[i].end) {
+			dev->resource[i].flags = 0;
+			dev->resource[i].end = 0;
+		}
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105,
+			 fixup_winbond_82c105);
+
+int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
+{
+	struct device_node *dn, *pdn;
+	struct pci_bus *bus;
+	u32 pcie_link_speed_stats[2];
+	int rc;
+
+	bus = bridge->bus;
+
+	dn = pcibios_get_phb_of_node(bus);
+	if (!dn)
+		return 0;
+
+	for (pdn = dn; pdn != NULL; pdn = of_get_next_parent(pdn)) {
+		rc = of_property_read_u32_array(pdn,
+				"ibm,pcie-link-speed-stats",
+				&pcie_link_speed_stats[0], 2);
+		if (!rc)
+			break;
+	}
+
+	of_node_put(pdn);
+
+	if (rc) {
+		pr_debug("no ibm,pcie-link-speed-stats property\n");
+		return 0;
+	}
+
+	switch (pcie_link_speed_stats[0]) {
+	case 0x01:
+		bus->max_bus_speed = PCIE_SPEED_2_5GT;
+		break;
+	case 0x02:
+		bus->max_bus_speed = PCIE_SPEED_5_0GT;
+		break;
+	case 0x04:
+		bus->max_bus_speed = PCIE_SPEED_8_0GT;
+		break;
+	default:
+		bus->max_bus_speed = PCI_SPEED_UNKNOWN;
+		break;
+	}
+
+	switch (pcie_link_speed_stats[1]) {
+	case 0x01:
+		bus->cur_bus_speed = PCIE_SPEED_2_5GT;
+		break;
+	case 0x02:
+		bus->cur_bus_speed = PCIE_SPEED_5_0GT;
+		break;
+	case 0x04:
+		bus->cur_bus_speed = PCIE_SPEED_8_0GT;
+		break;
+	default:
+		bus->cur_bus_speed = PCI_SPEED_UNKNOWN;
+		break;
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
new file mode 100644
index 000000000..5d4a3df59
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -0,0 +1,146 @@
+/*
+ * PCI Dynamic LPAR, PCI Hot Plug and PCI EEH recovery code
+ * for RPA-compliant PPC64 platform.
+ * Copyright (C) 2003 Linda Xie <lxie@us.ibm.com>
+ * Copyright (C) 2005 International Business Machines
+ *
+ * Updates, 2005, John Rose <johnrose@austin.ibm.com>
+ * Updates, 2005, Linas Vepstas <linas@austin.ibm.com>
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/pci.h>
+#include <linux/export.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/firmware.h>
+#include <asm/eeh.h>
+
+#include "pseries.h"
+
+static struct pci_bus *
+find_bus_among_children(struct pci_bus *bus,
+                        struct device_node *dn)
+{
+	struct pci_bus *child = NULL;
+	struct pci_bus *tmp;
+	struct device_node *busdn;
+
+	busdn = pci_bus_to_OF_node(bus);
+	if (busdn == dn)
+		return bus;
+
+	list_for_each_entry(tmp, &bus->children, node) {
+		child = find_bus_among_children(tmp, dn);
+		if (child)
+			break;
+	};
+	return child;
+}
+
+struct pci_bus *
+pcibios_find_pci_bus(struct device_node *dn)
+{
+	struct pci_dn *pdn = dn->data;
+
+	if (!pdn  || !pdn->phb || !pdn->phb->bus)
+		return NULL;
+
+	return find_bus_among_children(pdn->phb->bus, dn);
+}
+EXPORT_SYMBOL_GPL(pcibios_find_pci_bus);
+
+struct pci_controller *init_phb_dynamic(struct device_node *dn)
+{
+	struct pci_controller *phb;
+
+	pr_debug("PCI: Initializing new hotplug PHB %s\n", dn->full_name);
+
+	phb = pcibios_alloc_controller(dn);
+	if (!phb)
+		return NULL;
+	rtas_setup_phb(phb);
+	pci_process_bridge_OF_ranges(phb, dn, 0);
+	phb->controller_ops = pseries_pci_controller_ops;
+
+	pci_devs_phb_init_dynamic(phb);
+
+	/* Create EEH devices for the PHB */
+	eeh_dev_phb_init_dynamic(phb);
+
+	if (dn->child)
+		eeh_add_device_tree_early(PCI_DN(dn));
+
+	pcibios_scan_phb(phb);
+	pcibios_finish_adding_to_bus(phb->bus);
+
+	return phb;
+}
+EXPORT_SYMBOL_GPL(init_phb_dynamic);
+
+/* RPA-specific bits for removing PHBs */
+int remove_phb_dynamic(struct pci_controller *phb)
+{
+	struct pci_bus *b = phb->bus;
+	struct resource *res;
+	int rc, i;
+
+	pr_debug("PCI: Removing PHB %04x:%02x...\n",
+		 pci_domain_nr(b), b->number);
+
+	/* We cannot to remove a root bus that has children */
+	if (!(list_empty(&b->children) && list_empty(&b->devices)))
+		return -EBUSY;
+
+	/* We -know- there aren't any child devices anymore at this stage
+	 * and thus, we can safely unmap the IO space as it's not in use
+	 */
+	res = &phb->io_resource;
+	if (res->flags & IORESOURCE_IO) {
+		rc = pcibios_unmap_io_space(b);
+		if (rc) {
+			printk(KERN_ERR "%s: failed to unmap IO on bus %s\n",
+			       __func__, b->name);
+			return 1;
+		}
+	}
+
+	/* Remove the PCI bus and unregister the bridge device from sysfs */
+	phb->bus = NULL;
+	pci_remove_bus(b);
+	device_unregister(b->bridge);
+
+	/* Now release the IO resource */
+	if (res->flags & IORESOURCE_IO)
+		release_resource(res);
+
+	/* Release memory resources */
+	for (i = 0; i < 3; ++i) {
+		res = &phb->mem_resources[i];
+		if (!(res->flags & IORESOURCE_MEM))
+			continue;
+		release_resource(res);
+	}
+
+	/* Free pci_controller data structure */
+	pcibios_free_controller(phb);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(remove_phb_dynamic);
diff --git a/arch/powerpc/platforms/pseries/power.c b/arch/powerpc/platforms/pseries/power.c
new file mode 100644
index 000000000..c26eadde4
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/power.c
@@ -0,0 +1,82 @@
+/*
+ *  Interface for power-management for ppc64 compliant platform
+ *
+ *  Manish Ahuja <mahuja@us.ibm.com>
+ *
+ *  Feb 2007
+ *
+ *  Copyright (C) 2007 IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kobject.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <asm/machdep.h>
+
+unsigned long rtas_poweron_auto; /* default and normal state is 0 */
+
+static ssize_t auto_poweron_show(struct kobject *kobj,
+				 struct kobj_attribute *attr, char *buf)
+{
+        return sprintf(buf, "%lu\n", rtas_poweron_auto);
+}
+
+static ssize_t auto_poweron_store(struct kobject *kobj,
+				  struct kobj_attribute *attr,
+				  const char *buf, size_t n)
+{
+	int ret;
+	unsigned long ups_restart;
+	ret = sscanf(buf, "%lu", &ups_restart);
+
+	if ((ret == 1) && ((ups_restart == 1) || (ups_restart == 0))){
+		rtas_poweron_auto = ups_restart;
+		return n;
+	}
+	return -EINVAL;
+}
+
+static struct kobj_attribute auto_poweron_attr =
+	__ATTR(auto_poweron, 0644, auto_poweron_show, auto_poweron_store);
+
+#ifndef CONFIG_PM
+struct kobject *power_kobj;
+
+static struct attribute *g[] = {
+        &auto_poweron_attr.attr,
+        NULL,
+};
+
+static struct attribute_group attr_group = {
+        .attrs = g,
+};
+
+static int __init pm_init(void)
+{
+	power_kobj = kobject_create_and_add("power", NULL);
+	if (!power_kobj)
+		return -ENOMEM;
+	return sysfs_create_group(power_kobj, &attr_group);
+}
+machine_core_initcall(pseries, pm_init);
+#else
+static int __init apo_pm_init(void)
+{
+	return (sysfs_create_file(power_kobj, &auto_poweron_attr.attr));
+}
+machine_device_initcall(pseries, apo_pm_init);
+#endif
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
new file mode 100644
index 000000000..8411c2729
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2006 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _PSERIES_PSERIES_H
+#define _PSERIES_PSERIES_H
+
+#include <linux/interrupt.h>
+#include <asm/rtas.h>
+
+struct device_node;
+
+extern void request_event_sources_irqs(struct device_node *np,
+				       irq_handler_t handler, const char *name);
+
+#include <linux/of.h>
+
+extern void __init fw_hypertas_feature_init(const char *hypertas,
+					    unsigned long len);
+extern void __init fw_vec5_feature_init(const char *hypertas,
+					unsigned long len);
+
+struct pt_regs;
+
+extern int pSeries_system_reset_exception(struct pt_regs *regs);
+extern int pSeries_machine_check_exception(struct pt_regs *regs);
+
+#ifdef CONFIG_SMP
+extern void smp_init_pseries_mpic(void);
+extern void smp_init_pseries_xics(void);
+#else
+static inline void smp_init_pseries_mpic(void) { };
+static inline void smp_init_pseries_xics(void) { };
+#endif
+
+#ifdef CONFIG_KEXEC
+extern void setup_kexec_cpu_down_xics(void);
+extern void setup_kexec_cpu_down_mpic(void);
+#else
+static inline void setup_kexec_cpu_down_xics(void) { }
+static inline void setup_kexec_cpu_down_mpic(void) { }
+#endif
+
+extern void pSeries_final_fixup(void);
+
+/* Poweron flag used for enabling auto ups restart */
+extern unsigned long rtas_poweron_auto;
+
+/* Provided by HVC VIO */
+extern void hvc_vio_init_early(void);
+
+/* Dynamic logical Partitioning/Mobility */
+extern void dlpar_free_cc_nodes(struct device_node *);
+extern void dlpar_free_cc_property(struct property *);
+extern struct device_node *dlpar_configure_connector(__be32,
+						struct device_node *);
+extern int dlpar_attach_node(struct device_node *);
+extern int dlpar_detach_node(struct device_node *);
+extern int dlpar_acquire_drc(u32 drc_index);
+extern int dlpar_release_drc(u32 drc_index);
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int dlpar_memory(struct pseries_hp_errorlog *hp_elog);
+#else
+static inline int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
+/* PCI root bridge prepare function override for pseries */
+struct pci_host_bridge;
+int pseries_root_bridge_prepare(struct pci_host_bridge *bridge);
+
+extern struct pci_controller_ops pseries_pci_controller_ops;
+
+unsigned long pseries_memory_block_size(void);
+
+#endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c
new file mode 100644
index 000000000..92767791f
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -0,0 +1,290 @@
+/*
+ * POWER platform energy management driver
+ * Copyright (C) 2010 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This pseries platform device driver provides access to
+ * platform energy management capabilities.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <asm/cputhreads.h>
+#include <asm/page.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+
+
+#define MODULE_VERS "1.0"
+#define MODULE_NAME "pseries_energy"
+
+/* Driver flags */
+
+static int sysfs_entries;
+
+/* Helper routines */
+
+/* Helper Routines to convert between drc_index to cpu numbers */
+
+static u32 cpu_to_drc_index(int cpu)
+{
+	struct device_node *dn = NULL;
+	const int *indexes;
+	int i;
+	int rc = 1;
+	u32 ret = 0;
+
+	dn = of_find_node_by_path("/cpus");
+	if (dn == NULL)
+		goto err;
+	indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
+	if (indexes == NULL)
+		goto err_of_node_put;
+	/* Convert logical cpu number to core number */
+	i = cpu_core_index_of_thread(cpu);
+	/*
+	 * The first element indexes[0] is the number of drc_indexes
+	 * returned in the list.  Hence i+1 will get the drc_index
+	 * corresponding to core number i.
+	 */
+	WARN_ON(i > indexes[0]);
+	ret = indexes[i + 1];
+	rc = 0;
+
+err_of_node_put:
+	of_node_put(dn);
+err:
+	if (rc)
+		printk(KERN_WARNING "cpu_to_drc_index(%d) failed", cpu);
+	return ret;
+}
+
+static int drc_index_to_cpu(u32 drc_index)
+{
+	struct device_node *dn = NULL;
+	const int *indexes;
+	int i, cpu = 0;
+	int rc = 1;
+
+	dn = of_find_node_by_path("/cpus");
+	if (dn == NULL)
+		goto err;
+	indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
+	if (indexes == NULL)
+		goto err_of_node_put;
+	/*
+	 * First element in the array is the number of drc_indexes
+	 * returned.  Search through the list to find the matching
+	 * drc_index and get the core number
+	 */
+	for (i = 0; i < indexes[0]; i++) {
+		if (indexes[i + 1] == drc_index)
+			break;
+	}
+	/* Convert core number to logical cpu number */
+	cpu = cpu_first_thread_of_core(i);
+	rc = 0;
+
+err_of_node_put:
+	of_node_put(dn);
+err:
+	if (rc)
+		printk(KERN_WARNING "drc_index_to_cpu(%d) failed", drc_index);
+	return cpu;
+}
+
+/*
+ * pseries hypervisor call H_BEST_ENERGY provides hints to OS on
+ * preferred logical cpus to activate or deactivate for optimized
+ * energy consumption.
+ */
+
+#define FLAGS_MODE1	0x004E200000080E01UL
+#define FLAGS_MODE2	0x004E200000080401UL
+#define FLAGS_ACTIVATE  0x100
+
+static ssize_t get_best_energy_list(char *page, int activate)
+{
+	int rc, cnt, i, cpu;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+	unsigned long flags = 0;
+	u32 *buf_page;
+	char *s = page;
+
+	buf_page = (u32 *) get_zeroed_page(GFP_KERNEL);
+	if (!buf_page)
+		return -ENOMEM;
+
+	flags = FLAGS_MODE1;
+	if (activate)
+		flags |= FLAGS_ACTIVATE;
+
+	rc = plpar_hcall9(H_BEST_ENERGY, retbuf, flags, 0, __pa(buf_page),
+				0, 0, 0, 0, 0, 0);
+	if (rc != H_SUCCESS) {
+		free_page((unsigned long) buf_page);
+		return -EINVAL;
+	}
+
+	cnt = retbuf[0];
+	for (i = 0; i < cnt; i++) {
+		cpu = drc_index_to_cpu(buf_page[2*i+1]);
+		if ((cpu_online(cpu) && !activate) ||
+		    (!cpu_online(cpu) && activate))
+			s += sprintf(s, "%d,", cpu);
+	}
+	if (s > page) { /* Something to show */
+		s--; /* Suppress last comma */
+		s += sprintf(s, "\n");
+	}
+
+	free_page((unsigned long) buf_page);
+	return s-page;
+}
+
+static ssize_t get_best_energy_data(struct device *dev,
+					char *page, int activate)
+{
+	int rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+	unsigned long flags = 0;
+
+	flags = FLAGS_MODE2;
+	if (activate)
+		flags |= FLAGS_ACTIVATE;
+
+	rc = plpar_hcall9(H_BEST_ENERGY, retbuf, flags,
+				cpu_to_drc_index(dev->id),
+				0, 0, 0, 0, 0, 0, 0);
+
+	if (rc != H_SUCCESS)
+		return -EINVAL;
+
+	return sprintf(page, "%lu\n", retbuf[1] >> 32);
+}
+
+/* Wrapper functions */
+
+static ssize_t cpu_activate_hint_list_show(struct device *dev,
+			struct device_attribute *attr, char *page)
+{
+	return get_best_energy_list(page, 1);
+}
+
+static ssize_t cpu_deactivate_hint_list_show(struct device *dev,
+			struct device_attribute *attr, char *page)
+{
+	return get_best_energy_list(page, 0);
+}
+
+static ssize_t percpu_activate_hint_show(struct device *dev,
+			struct device_attribute *attr, char *page)
+{
+	return get_best_energy_data(dev, page, 1);
+}
+
+static ssize_t percpu_deactivate_hint_show(struct device *dev,
+			struct device_attribute *attr, char *page)
+{
+	return get_best_energy_data(dev, page, 0);
+}
+
+/*
+ * Create sysfs interface:
+ * /sys/devices/system/cpu/pseries_activate_hint_list
+ * /sys/devices/system/cpu/pseries_deactivate_hint_list
+ *	Comma separated list of cpus to activate or deactivate
+ * /sys/devices/system/cpu/cpuN/pseries_activate_hint
+ * /sys/devices/system/cpu/cpuN/pseries_deactivate_hint
+ *	Per-cpu value of the hint
+ */
+
+struct device_attribute attr_cpu_activate_hint_list =
+		__ATTR(pseries_activate_hint_list, 0444,
+		cpu_activate_hint_list_show, NULL);
+
+struct device_attribute attr_cpu_deactivate_hint_list =
+		__ATTR(pseries_deactivate_hint_list, 0444,
+		cpu_deactivate_hint_list_show, NULL);
+
+struct device_attribute attr_percpu_activate_hint =
+		__ATTR(pseries_activate_hint, 0444,
+		percpu_activate_hint_show, NULL);
+
+struct device_attribute attr_percpu_deactivate_hint =
+		__ATTR(pseries_deactivate_hint, 0444,
+		percpu_deactivate_hint_show, NULL);
+
+static int __init pseries_energy_init(void)
+{
+	int cpu, err;
+	struct device *cpu_dev;
+
+	if (!firmware_has_feature(FW_FEATURE_BEST_ENERGY)) {
+		printk(KERN_INFO "Hypercall H_BEST_ENERGY not supported\n");
+		return 0;
+	}
+	/* Create the sysfs files */
+	err = device_create_file(cpu_subsys.dev_root,
+				&attr_cpu_activate_hint_list);
+	if (!err)
+		err = device_create_file(cpu_subsys.dev_root,
+				&attr_cpu_deactivate_hint_list);
+
+	if (err)
+		return err;
+	for_each_possible_cpu(cpu) {
+		cpu_dev = get_cpu_device(cpu);
+		err = device_create_file(cpu_dev,
+				&attr_percpu_activate_hint);
+		if (err)
+			break;
+		err = device_create_file(cpu_dev,
+				&attr_percpu_deactivate_hint);
+		if (err)
+			break;
+	}
+
+	if (err)
+		return err;
+
+	sysfs_entries = 1; /* Removed entries on cleanup */
+	return 0;
+
+}
+
+static void __exit pseries_energy_cleanup(void)
+{
+	int cpu;
+	struct device *cpu_dev;
+
+	if (!sysfs_entries)
+		return;
+
+	/* Remove the sysfs files */
+	device_remove_file(cpu_subsys.dev_root, &attr_cpu_activate_hint_list);
+	device_remove_file(cpu_subsys.dev_root, &attr_cpu_deactivate_hint_list);
+
+	for_each_possible_cpu(cpu) {
+		cpu_dev = get_cpu_device(cpu);
+		sysfs_remove_file(&cpu_dev->kobj,
+				&attr_percpu_activate_hint.attr);
+		sysfs_remove_file(&cpu_dev->kobj,
+				&attr_percpu_deactivate_hint.attr);
+	}
+}
+
+module_init(pseries_energy_init);
+module_exit(pseries_energy_cleanup);
+MODULE_DESCRIPTION("Driver for pSeries platform energy management");
+MODULE_AUTHOR("Vaidyanathan Srinivasan");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
new file mode 100644
index 000000000..02e4a1745
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -0,0 +1,416 @@
+/*
+ * Copyright (C) 2001 Dave Engebretsen IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <linux/fs.h>
+#include <linux/reboot.h>
+
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/firmware.h>
+
+#include "pseries.h"
+
+static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
+static DEFINE_SPINLOCK(ras_log_buf_lock);
+
+static char global_mce_data_buf[RTAS_ERROR_LOG_MAX];
+static DEFINE_PER_CPU(__u64, mce_data_buf);
+
+static int ras_check_exception_token;
+
+#define EPOW_SENSOR_TOKEN	9
+#define EPOW_SENSOR_INDEX	0
+
+static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
+static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
+
+
+/*
+ * Initialize handlers for the set of interrupts caused by hardware errors
+ * and power system events.
+ */
+static int __init init_ras_IRQ(void)
+{
+	struct device_node *np;
+
+	ras_check_exception_token = rtas_token("check-exception");
+
+	/* Internal Errors */
+	np = of_find_node_by_path("/event-sources/internal-errors");
+	if (np != NULL) {
+		request_event_sources_irqs(np, ras_error_interrupt,
+					   "RAS_ERROR");
+		of_node_put(np);
+	}
+
+	/* EPOW Events */
+	np = of_find_node_by_path("/event-sources/epow-events");
+	if (np != NULL) {
+		request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW");
+		of_node_put(np);
+	}
+
+	return 0;
+}
+machine_subsys_initcall(pseries, init_ras_IRQ);
+
+#define EPOW_SHUTDOWN_NORMAL				1
+#define EPOW_SHUTDOWN_ON_UPS				2
+#define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS	3
+#define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH	4
+
+static void handle_system_shutdown(char event_modifier)
+{
+	switch (event_modifier) {
+	case EPOW_SHUTDOWN_NORMAL:
+		pr_emerg("Firmware initiated power off");
+		orderly_poweroff(true);
+		break;
+
+	case EPOW_SHUTDOWN_ON_UPS:
+		pr_emerg("Loss of power reported by firmware, system is "
+			"running on UPS/battery");
+		pr_emerg("Check RTAS error log for details");
+		orderly_poweroff(true);
+		break;
+
+	case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
+		pr_emerg("Loss of system critical functions reported by "
+			"firmware");
+		pr_emerg("Check RTAS error log for details");
+		orderly_poweroff(true);
+		break;
+
+	case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
+		pr_emerg("Ambient temperature too high reported by firmware");
+		pr_emerg("Check RTAS error log for details");
+		orderly_poweroff(true);
+		break;
+
+	default:
+		pr_err("Unknown power/cooling shutdown event (modifier %d)",
+			event_modifier);
+	}
+}
+
+struct epow_errorlog {
+	unsigned char sensor_value;
+	unsigned char event_modifier;
+	unsigned char extended_modifier;
+	unsigned char reserved;
+	unsigned char platform_reason;
+};
+
+#define EPOW_RESET			0
+#define EPOW_WARN_COOLING		1
+#define EPOW_WARN_POWER			2
+#define EPOW_SYSTEM_SHUTDOWN		3
+#define EPOW_SYSTEM_HALT		4
+#define EPOW_MAIN_ENCLOSURE		5
+#define EPOW_POWER_OFF			7
+
+static void rtas_parse_epow_errlog(struct rtas_error_log *log)
+{
+	struct pseries_errorlog *pseries_log;
+	struct epow_errorlog *epow_log;
+	char action_code;
+	char modifier;
+
+	pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW);
+	if (pseries_log == NULL)
+		return;
+
+	epow_log = (struct epow_errorlog *)pseries_log->data;
+	action_code = epow_log->sensor_value & 0xF;	/* bottom 4 bits */
+	modifier = epow_log->event_modifier & 0xF;	/* bottom 4 bits */
+
+	switch (action_code) {
+	case EPOW_RESET:
+		pr_err("Non critical power or cooling issue cleared");
+		break;
+
+	case EPOW_WARN_COOLING:
+		pr_err("Non critical cooling issue reported by firmware");
+		pr_err("Check RTAS error log for details");
+		break;
+
+	case EPOW_WARN_POWER:
+		pr_err("Non critical power issue reported by firmware");
+		pr_err("Check RTAS error log for details");
+		break;
+
+	case EPOW_SYSTEM_SHUTDOWN:
+		handle_system_shutdown(epow_log->event_modifier);
+		break;
+
+	case EPOW_SYSTEM_HALT:
+		pr_emerg("Firmware initiated power off");
+		orderly_poweroff(true);
+		break;
+
+	case EPOW_MAIN_ENCLOSURE:
+	case EPOW_POWER_OFF:
+		pr_emerg("Critical power/cooling issue reported by firmware");
+		pr_emerg("Check RTAS error log for details");
+		pr_emerg("Immediate power off");
+		emergency_sync();
+		kernel_power_off();
+		break;
+
+	default:
+		pr_err("Unknown power/cooling event (action code %d)",
+			action_code);
+	}
+}
+
+/* Handle environmental and power warning (EPOW) interrupts. */
+static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
+{
+	int status;
+	int state;
+	int critical;
+
+	status = rtas_get_sensor(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state);
+
+	if (state > 3)
+		critical = 1;		/* Time Critical */
+	else
+		critical = 0;
+
+	spin_lock(&ras_log_buf_lock);
+
+	status = rtas_call(ras_check_exception_token, 6, 1, NULL,
+			   RTAS_VECTOR_EXTERNAL_INTERRUPT,
+			   virq_to_hw(irq),
+			   RTAS_EPOW_WARNING,
+			   critical, __pa(&ras_log_buf),
+				rtas_get_error_log_max());
+
+	log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
+
+	rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf);
+
+	spin_unlock(&ras_log_buf_lock);
+	return IRQ_HANDLED;
+}
+
+/*
+ * Handle hardware error interrupts.
+ *
+ * RTAS check-exception is called to collect data on the exception.  If
+ * the error is deemed recoverable, we log a warning and return.
+ * For nonrecoverable errors, an error is logged and we stop all processing
+ * as quickly as possible in order to prevent propagation of the failure.
+ */
+static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
+{
+	struct rtas_error_log *rtas_elog;
+	int status;
+	int fatal;
+
+	spin_lock(&ras_log_buf_lock);
+
+	status = rtas_call(ras_check_exception_token, 6, 1, NULL,
+			   RTAS_VECTOR_EXTERNAL_INTERRUPT,
+			   virq_to_hw(irq),
+			   RTAS_INTERNAL_ERROR, 1 /* Time Critical */,
+			   __pa(&ras_log_buf),
+				rtas_get_error_log_max());
+
+	rtas_elog = (struct rtas_error_log *)ras_log_buf;
+
+	if (status == 0 &&
+	    rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC)
+		fatal = 1;
+	else
+		fatal = 0;
+
+	/* format and print the extended information */
+	log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
+
+	if (fatal) {
+		pr_emerg("Fatal hardware error reported by firmware");
+		pr_emerg("Check RTAS error log for details");
+		pr_emerg("Immediate power off");
+		emergency_sync();
+		kernel_power_off();
+	} else {
+		pr_err("Recoverable hardware error reported by firmware");
+	}
+
+	spin_unlock(&ras_log_buf_lock);
+	return IRQ_HANDLED;
+}
+
+/*
+ * Some versions of FWNMI place the buffer inside the 4kB page starting at
+ * 0x7000. Other versions place it inside the rtas buffer. We check both.
+ */
+#define VALID_FWNMI_BUFFER(A) \
+	((((A) >= 0x7000) && ((A) < 0x7ff0)) || \
+	(((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16))))
+
+/*
+ * Get the error information for errors coming through the
+ * FWNMI vectors.  The pt_regs' r3 will be updated to reflect
+ * the actual r3 if possible, and a ptr to the error log entry
+ * will be returned if found.
+ *
+ * If the RTAS error is not of the extended type, then we put it in a per
+ * cpu 64bit buffer. If it is the extended type we use global_mce_data_buf.
+ *
+ * The global_mce_data_buf does not have any locks or protection around it,
+ * if a second machine check comes in, or a system reset is done
+ * before we have logged the error, then we will get corruption in the
+ * error log.  This is preferable over holding off on calling
+ * ibm,nmi-interlock which would result in us checkstopping if a
+ * second machine check did come in.
+ */
+static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
+{
+	unsigned long *savep;
+	struct rtas_error_log *h, *errhdr = NULL;
+
+	/* Mask top two bits */
+	regs->gpr[3] &= ~(0x3UL << 62);
+
+	if (!VALID_FWNMI_BUFFER(regs->gpr[3])) {
+		printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
+		return NULL;
+	}
+
+	savep = __va(regs->gpr[3]);
+	regs->gpr[3] = savep[0];	/* restore original r3 */
+
+	/* If it isn't an extended log we can use the per cpu 64bit buffer */
+	h = (struct rtas_error_log *)&savep[1];
+	if (!rtas_error_extended(h)) {
+		memcpy(this_cpu_ptr(&mce_data_buf), h, sizeof(__u64));
+		errhdr = (struct rtas_error_log *)this_cpu_ptr(&mce_data_buf);
+	} else {
+		int len, error_log_length;
+
+		error_log_length = 8 + rtas_error_extended_log_length(h);
+		len = max_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
+		memset(global_mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
+		memcpy(global_mce_data_buf, h, len);
+		errhdr = (struct rtas_error_log *)global_mce_data_buf;
+	}
+
+	return errhdr;
+}
+
+/* Call this when done with the data returned by FWNMI_get_errinfo.
+ * It will release the saved data area for other CPUs in the
+ * partition to receive FWNMI errors.
+ */
+static void fwnmi_release_errinfo(void)
+{
+	int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL);
+	if (ret != 0)
+		printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret);
+}
+
+int pSeries_system_reset_exception(struct pt_regs *regs)
+{
+	if (fwnmi_active) {
+		struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs);
+		if (errhdr) {
+			/* XXX Should look at FWNMI information */
+		}
+		fwnmi_release_errinfo();
+	}
+	return 0; /* need to perform reset */
+}
+
+/*
+ * See if we can recover from a machine check exception.
+ * This is only called on power4 (or above) and only via
+ * the Firmware Non-Maskable Interrupts (fwnmi) handler
+ * which provides the error analysis for us.
+ *
+ * Return 1 if corrected (or delivered a signal).
+ * Return 0 if there is nothing we can do.
+ */
+static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
+{
+	int recovered = 0;
+	int disposition = rtas_error_disposition(err);
+
+	if (!(regs->msr & MSR_RI)) {
+		/* If MSR_RI isn't set, we cannot recover */
+		recovered = 0;
+
+	} else if (disposition == RTAS_DISP_FULLY_RECOVERED) {
+		/* Platform corrected itself */
+		recovered = 1;
+
+	} else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
+		/* Platform corrected itself but could be degraded */
+		printk(KERN_ERR "MCE: limited recovery, system may "
+		       "be degraded\n");
+		recovered = 1;
+
+	} else if (user_mode(regs) && !is_global_init(current) &&
+		   rtas_error_severity(err) == RTAS_SEVERITY_ERROR_SYNC) {
+
+		/*
+		 * If we received a synchronous error when in userspace
+		 * kill the task. Firmware may report details of the fail
+		 * asynchronously, so we can't rely on the target and type
+		 * fields being valid here.
+		 */
+		printk(KERN_ERR "MCE: uncorrectable error, killing task "
+		       "%s:%d\n", current->comm, current->pid);
+
+		_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+		recovered = 1;
+	}
+
+	log_error((char *)err, ERR_TYPE_RTAS_LOG, 0);
+
+	return recovered;
+}
+
+/*
+ * Handle a machine check.
+ *
+ * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi)
+ * should be present.  If so the handler which called us tells us if the
+ * error was recovered (never true if RI=0).
+ *
+ * On hardware prior to Power 4 these exceptions were asynchronous which
+ * means we can't tell exactly where it occurred and so we can't recover.
+ */
+int pSeries_machine_check_exception(struct pt_regs *regs)
+{
+	struct rtas_error_log *errp;
+
+	if (fwnmi_active) {
+		errp = fwnmi_get_errinfo(regs);
+		fwnmi_release_errinfo();
+		if (errp && recover_mce(regs, errp))
+			return 1;
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
new file mode 100644
index 000000000..0f319521e
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -0,0 +1,455 @@
+/*
+ * pSeries_reconfig.c - support for dynamic reconfiguration (including PCI
+ * Hotplug and Dynamic Logical Partitioning on RPA platforms).
+ *
+ * Copyright (C) 2005 Nathan Lynch
+ * Copyright (C) 2005 IBM Corporation
+ *
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License version
+ *	2 as published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/notifier.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/uaccess.h>
+#include <asm/mmu.h>
+
+/**
+ *	derive_parent - basically like dirname(1)
+ *	@path:  the full_name of a node to be added to the tree
+ *
+ *	Returns the node which should be the parent of the node
+ *	described by path.  E.g., for path = "/foo/bar", returns
+ *	the node with full_name = "/foo".
+ */
+static struct device_node *derive_parent(const char *path)
+{
+	struct device_node *parent = NULL;
+	char *parent_path = "/";
+	size_t parent_path_len = strrchr(path, '/') - path + 1;
+
+	/* reject if path is "/" */
+	if (!strcmp(path, "/"))
+		return ERR_PTR(-EINVAL);
+
+	if (strrchr(path, '/') != path) {
+		parent_path = kmalloc(parent_path_len, GFP_KERNEL);
+		if (!parent_path)
+			return ERR_PTR(-ENOMEM);
+		strlcpy(parent_path, path, parent_path_len);
+	}
+	parent = of_find_node_by_path(parent_path);
+	if (!parent)
+		return ERR_PTR(-EINVAL);
+	if (strcmp(parent_path, "/"))
+		kfree(parent_path);
+	return parent;
+}
+
+static int pSeries_reconfig_add_node(const char *path, struct property *proplist)
+{
+	struct device_node *np;
+	int err = -ENOMEM;
+
+	np = kzalloc(sizeof(*np), GFP_KERNEL);
+	if (!np)
+		goto out_err;
+
+	np->full_name = kstrdup(path, GFP_KERNEL);
+	if (!np->full_name)
+		goto out_err;
+
+	np->properties = proplist;
+	of_node_set_flag(np, OF_DYNAMIC);
+	of_node_init(np);
+
+	np->parent = derive_parent(path);
+	if (IS_ERR(np->parent)) {
+		err = PTR_ERR(np->parent);
+		goto out_err;
+	}
+
+	err = of_attach_node(np);
+	if (err) {
+		printk(KERN_ERR "Failed to add device node %s\n", path);
+		goto out_err;
+	}
+
+	of_node_put(np->parent);
+
+	return 0;
+
+out_err:
+	if (np) {
+		of_node_put(np->parent);
+		kfree(np->full_name);
+		kfree(np);
+	}
+	return err;
+}
+
+static int pSeries_reconfig_remove_node(struct device_node *np)
+{
+	struct device_node *parent, *child;
+
+	parent = of_get_parent(np);
+	if (!parent)
+		return -EINVAL;
+
+	if ((child = of_get_next_child(np, NULL))) {
+		of_node_put(child);
+		of_node_put(parent);
+		return -EBUSY;
+	}
+
+	of_detach_node(np);
+	of_node_put(parent);
+	of_node_put(np); /* Must decrement the refcount */
+	return 0;
+}
+
+/*
+ * /proc/powerpc/ofdt - yucky binary interface for adding and removing
+ * OF device nodes.  Should be deprecated as soon as we get an
+ * in-kernel wrapper for the RTAS ibm,configure-connector call.
+ */
+
+static void release_prop_list(const struct property *prop)
+{
+	struct property *next;
+	for (; prop; prop = next) {
+		next = prop->next;
+		kfree(prop->name);
+		kfree(prop->value);
+		kfree(prop);
+	}
+
+}
+
+/**
+ * parse_next_property - process the next property from raw input buffer
+ * @buf: input buffer, must be nul-terminated
+ * @end: end of the input buffer + 1, for validation
+ * @name: return value; set to property name in buf
+ * @length: return value; set to length of value
+ * @value: return value; set to the property value in buf
+ *
+ * Note that the caller must make copies of the name and value returned,
+ * this function does no allocation or copying of the data.  Return value
+ * is set to the next name in buf, or NULL on error.
+ */
+static char * parse_next_property(char *buf, char *end, char **name, int *length,
+				  unsigned char **value)
+{
+	char *tmp;
+
+	*name = buf;
+
+	tmp = strchr(buf, ' ');
+	if (!tmp) {
+		printk(KERN_ERR "property parse failed in %s at line %d\n",
+		       __func__, __LINE__);
+		return NULL;
+	}
+	*tmp = '\0';
+
+	if (++tmp >= end) {
+		printk(KERN_ERR "property parse failed in %s at line %d\n",
+		       __func__, __LINE__);
+		return NULL;
+	}
+
+	/* now we're on the length */
+	*length = -1;
+	*length = simple_strtoul(tmp, &tmp, 10);
+	if (*length == -1) {
+		printk(KERN_ERR "property parse failed in %s at line %d\n",
+		       __func__, __LINE__);
+		return NULL;
+	}
+	if (*tmp != ' ' || ++tmp >= end) {
+		printk(KERN_ERR "property parse failed in %s at line %d\n",
+		       __func__, __LINE__);
+		return NULL;
+	}
+
+	/* now we're on the value */
+	*value = tmp;
+	tmp += *length;
+	if (tmp > end) {
+		printk(KERN_ERR "property parse failed in %s at line %d\n",
+		       __func__, __LINE__);
+		return NULL;
+	}
+	else if (tmp < end && *tmp != ' ' && *tmp != '\0') {
+		printk(KERN_ERR "property parse failed in %s at line %d\n",
+		       __func__, __LINE__);
+		return NULL;
+	}
+	tmp++;
+
+	/* and now we should be on the next name, or the end */
+	return tmp;
+}
+
+static struct property *new_property(const char *name, const int length,
+				     const unsigned char *value, struct property *last)
+{
+	struct property *new = kzalloc(sizeof(*new), GFP_KERNEL);
+
+	if (!new)
+		return NULL;
+
+	if (!(new->name = kstrdup(name, GFP_KERNEL)))
+		goto cleanup;
+	if (!(new->value = kmalloc(length + 1, GFP_KERNEL)))
+		goto cleanup;
+
+	memcpy(new->value, value, length);
+	*(((char *)new->value) + length) = 0;
+	new->length = length;
+	new->next = last;
+	return new;
+
+cleanup:
+	kfree(new->name);
+	kfree(new->value);
+	kfree(new);
+	return NULL;
+}
+
+static int do_add_node(char *buf, size_t bufsize)
+{
+	char *path, *end, *name;
+	struct device_node *np;
+	struct property *prop = NULL;
+	unsigned char* value;
+	int length, rv = 0;
+
+	end = buf + bufsize;
+	path = buf;
+	buf = strchr(buf, ' ');
+	if (!buf)
+		return -EINVAL;
+	*buf = '\0';
+	buf++;
+
+	if ((np = of_find_node_by_path(path))) {
+		of_node_put(np);
+		return -EINVAL;
+	}
+
+	/* rv = build_prop_list(tmp, bufsize - (tmp - buf), &proplist); */
+	while (buf < end &&
+	       (buf = parse_next_property(buf, end, &name, &length, &value))) {
+		struct property *last = prop;
+
+		prop = new_property(name, length, value, last);
+		if (!prop) {
+			rv = -ENOMEM;
+			prop = last;
+			goto out;
+		}
+	}
+	if (!buf) {
+		rv = -EINVAL;
+		goto out;
+	}
+
+	rv = pSeries_reconfig_add_node(path, prop);
+
+out:
+	if (rv)
+		release_prop_list(prop);
+	return rv;
+}
+
+static int do_remove_node(char *buf)
+{
+	struct device_node *node;
+	int rv = -ENODEV;
+
+	if ((node = of_find_node_by_path(buf)))
+		rv = pSeries_reconfig_remove_node(node);
+
+	of_node_put(node);
+	return rv;
+}
+
+static char *parse_node(char *buf, size_t bufsize, struct device_node **npp)
+{
+	char *handle_str;
+	phandle handle;
+	*npp = NULL;
+
+	handle_str = buf;
+
+	buf = strchr(buf, ' ');
+	if (!buf)
+		return NULL;
+	*buf = '\0';
+	buf++;
+
+	handle = simple_strtoul(handle_str, NULL, 0);
+
+	*npp = of_find_node_by_phandle(handle);
+	return buf;
+}
+
+static int do_add_property(char *buf, size_t bufsize)
+{
+	struct property *prop = NULL;
+	struct device_node *np;
+	unsigned char *value;
+	char *name, *end;
+	int length;
+	end = buf + bufsize;
+	buf = parse_node(buf, bufsize, &np);
+
+	if (!np)
+		return -ENODEV;
+
+	if (parse_next_property(buf, end, &name, &length, &value) == NULL)
+		return -EINVAL;
+
+	prop = new_property(name, length, value, NULL);
+	if (!prop)
+		return -ENOMEM;
+
+	of_add_property(np, prop);
+
+	return 0;
+}
+
+static int do_remove_property(char *buf, size_t bufsize)
+{
+	struct device_node *np;
+	char *tmp;
+	struct property *prop;
+	buf = parse_node(buf, bufsize, &np);
+
+	if (!np)
+		return -ENODEV;
+
+	tmp = strchr(buf,' ');
+	if (tmp)
+		*tmp = '\0';
+
+	if (strlen(buf) == 0)
+		return -EINVAL;
+
+	prop = of_find_property(np, buf, NULL);
+
+	return of_remove_property(np, prop);
+}
+
+static int do_update_property(char *buf, size_t bufsize)
+{
+	struct device_node *np;
+	unsigned char *value;
+	char *name, *end, *next_prop;
+	int length;
+	struct property *newprop;
+	buf = parse_node(buf, bufsize, &np);
+	end = buf + bufsize;
+
+	if (!np)
+		return -ENODEV;
+
+	next_prop = parse_next_property(buf, end, &name, &length, &value);
+	if (!next_prop)
+		return -EINVAL;
+
+	if (!strlen(name))
+		return -ENODEV;
+
+	newprop = new_property(name, length, value, NULL);
+	if (!newprop)
+		return -ENOMEM;
+
+	if (!strcmp(name, "slb-size") || !strcmp(name, "ibm,slb-size"))
+		slb_set_size(*(int *)value);
+
+	return of_update_property(np, newprop);
+}
+
+/**
+ * ofdt_write - perform operations on the Open Firmware device tree
+ *
+ * @file: not used
+ * @buf: command and arguments
+ * @count: size of the command buffer
+ * @off: not used
+ *
+ * Operations supported at this time are addition and removal of
+ * whole nodes along with their properties.  Operations on individual
+ * properties are not implemented (yet).
+ */
+static ssize_t ofdt_write(struct file *file, const char __user *buf, size_t count,
+			  loff_t *off)
+{
+	int rv = 0;
+	char *kbuf;
+	char *tmp;
+
+	if (!(kbuf = kmalloc(count + 1, GFP_KERNEL))) {
+		rv = -ENOMEM;
+		goto out;
+	}
+	if (copy_from_user(kbuf, buf, count)) {
+		rv = -EFAULT;
+		goto out;
+	}
+
+	kbuf[count] = '\0';
+
+	tmp = strchr(kbuf, ' ');
+	if (!tmp) {
+		rv = -EINVAL;
+		goto out;
+	}
+	*tmp = '\0';
+	tmp++;
+
+	if (!strcmp(kbuf, "add_node"))
+		rv = do_add_node(tmp, count - (tmp - kbuf));
+	else if (!strcmp(kbuf, "remove_node"))
+		rv = do_remove_node(tmp);
+	else if (!strcmp(kbuf, "add_property"))
+		rv = do_add_property(tmp, count - (tmp - kbuf));
+	else if (!strcmp(kbuf, "remove_property"))
+		rv = do_remove_property(tmp, count - (tmp - kbuf));
+	else if (!strcmp(kbuf, "update_property"))
+		rv = do_update_property(tmp, count - (tmp - kbuf));
+	else
+		rv = -EINVAL;
+out:
+	kfree(kbuf);
+	return rv ? rv : count;
+}
+
+static const struct file_operations ofdt_fops = {
+	.write = ofdt_write,
+	.llseek = noop_llseek,
+};
+
+/* create /proc/powerpc/ofdt write-only by root */
+static int proc_ppc64_create_ofdt(void)
+{
+	struct proc_dir_entry *ent;
+
+	ent = proc_create("powerpc/ofdt", S_IWUSR, NULL, &ofdt_fops);
+	if (ent)
+		proc_set_size(ent, 0);
+
+	return 0;
+}
+machine_device_initcall(pseries, proc_ppc64_create_ofdt);
diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c
new file mode 100644
index 000000000..e09608770
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/rng.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt)	"pseries-rng: " fmt
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <asm/archrandom.h>
+#include <asm/machdep.h>
+#include <asm/plpar_wrappers.h>
+
+
+static int pseries_get_random_long(unsigned long *v)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	if (plpar_hcall(H_RANDOM, retbuf) == H_SUCCESS) {
+		*v = retbuf[0];
+		return 1;
+	}
+
+	return 0;
+}
+
+static __init int rng_init(void)
+{
+	struct device_node *dn;
+
+	dn = of_find_compatible_node(NULL, NULL, "ibm,random");
+	if (!dn)
+		return -ENODEV;
+
+	pr_info("Registering arch random hook.\n");
+
+	ppc_md.get_random_long = pseries_get_random_long;
+
+	return 0;
+}
+machine_subsys_initcall(pseries, rng_init);
diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c
new file mode 100644
index 000000000..b502ab61a
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/scanlog.c
@@ -0,0 +1,200 @@
+/*
+ *  c 2001 PPC 64 Team, IBM Corp
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ * scan-log-data driver for PPC64  Todd Inglett <tinglett@vnet.ibm.com>
+ *
+ * When ppc64 hardware fails the service processor dumps internal state
+ * of the system.  After a reboot the operating system can access a dump
+ * of this data using this driver.  A dump exists if the device-tree
+ * /chosen/ibm,scan-log-data property exists.
+ *
+ * This driver exports /proc/powerpc/scan-log-dump which can be read.
+ * The driver supports only sequential reads.
+ *
+ * The driver looks at a write to the driver for the single word "reset".
+ * If given, the driver will reset the scanlog so the platform can free it.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+#include <asm/rtas.h>
+#include <asm/prom.h>
+
+#define MODULE_VERS "1.0"
+#define MODULE_NAME "scanlog"
+
+/* Status returns from ibm,scan-log-dump */
+#define SCANLOG_COMPLETE 0
+#define SCANLOG_HWERROR -1
+#define SCANLOG_CONTINUE 1
+
+
+static unsigned int ibm_scan_log_dump;			/* RTAS token */
+static unsigned int *scanlog_buffer;			/* The data buffer */
+
+static ssize_t scanlog_read(struct file *file, char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	unsigned int *data = scanlog_buffer;
+	int status;
+	unsigned long len, off;
+	unsigned int wait_time;
+
+	if (count > RTAS_DATA_BUF_SIZE)
+		count = RTAS_DATA_BUF_SIZE;
+
+	if (count < 1024) {
+		/* This is the min supported by this RTAS call.  Rather
+		 * than do all the buffering we insist the user code handle
+		 * larger reads.  As long as cp works... :)
+		 */
+		printk(KERN_ERR "scanlog: cannot perform a small read (%ld)\n", count);
+		return -EINVAL;
+	}
+
+	if (!access_ok(VERIFY_WRITE, buf, count))
+		return -EFAULT;
+
+	for (;;) {
+		wait_time = 500;	/* default wait if no data */
+		spin_lock(&rtas_data_buf_lock);
+		memcpy(rtas_data_buf, data, RTAS_DATA_BUF_SIZE);
+		status = rtas_call(ibm_scan_log_dump, 2, 1, NULL,
+				   (u32) __pa(rtas_data_buf), (u32) count);
+		memcpy(data, rtas_data_buf, RTAS_DATA_BUF_SIZE);
+		spin_unlock(&rtas_data_buf_lock);
+
+		pr_debug("scanlog: status=%d, data[0]=%x, data[1]=%x, " \
+			 "data[2]=%x\n", status, data[0], data[1], data[2]);
+		switch (status) {
+		    case SCANLOG_COMPLETE:
+			pr_debug("scanlog: hit eof\n");
+			return 0;
+		    case SCANLOG_HWERROR:
+			pr_debug("scanlog: hardware error reading data\n");
+			return -EIO;
+		    case SCANLOG_CONTINUE:
+			/* We may or may not have data yet */
+			len = data[1];
+			off = data[2];
+			if (len > 0) {
+				if (copy_to_user(buf, ((char *)data)+off, len))
+					return -EFAULT;
+				return len;
+			}
+			/* Break to sleep default time */
+			break;
+		    default:
+			/* Assume extended busy */
+			wait_time = rtas_busy_delay_time(status);
+			if (!wait_time) {
+				printk(KERN_ERR "scanlog: unknown error " \
+				       "from rtas: %d\n", status);
+				return -EIO;
+			}
+		}
+		/* Apparently no data yet.  Wait and try again. */
+		msleep_interruptible(wait_time);
+	}
+	/*NOTREACHED*/
+}
+
+static ssize_t scanlog_write(struct file * file, const char __user * buf,
+			     size_t count, loff_t *ppos)
+{
+	char stkbuf[20];
+	int status;
+
+	if (count > 19) count = 19;
+	if (copy_from_user (stkbuf, buf, count)) {
+		return -EFAULT;
+	}
+	stkbuf[count] = 0;
+
+	if (buf) {
+		if (strncmp(stkbuf, "reset", 5) == 0) {
+			pr_debug("scanlog: reset scanlog\n");
+			status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, 0, 0);
+			pr_debug("scanlog: rtas returns %d\n", status);
+		}
+	}
+	return count;
+}
+
+static int scanlog_open(struct inode * inode, struct file * file)
+{
+	unsigned int *data = scanlog_buffer;
+
+	if (data[0] != 0) {
+		/* This imperfect test stops a second copy of the
+		 * data (or a reset while data is being copied)
+		 */
+		return -EBUSY;
+	}
+
+	data[0] = 0;	/* re-init so we restart the scan */
+
+	return 0;
+}
+
+static int scanlog_release(struct inode * inode, struct file * file)
+{
+	unsigned int *data = scanlog_buffer;
+
+	data[0] = 0;
+	return 0;
+}
+
+const struct file_operations scanlog_fops = {
+	.owner		= THIS_MODULE,
+	.read		= scanlog_read,
+	.write		= scanlog_write,
+	.open		= scanlog_open,
+	.release	= scanlog_release,
+	.llseek		= noop_llseek,
+};
+
+static int __init scanlog_init(void)
+{
+	struct proc_dir_entry *ent;
+	int err = -ENOMEM;
+
+	ibm_scan_log_dump = rtas_token("ibm,scan-log-dump");
+	if (ibm_scan_log_dump == RTAS_UNKNOWN_SERVICE)
+		return -ENODEV;
+
+	/* Ideally we could allocate a buffer < 4G */
+	scanlog_buffer = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+	if (!scanlog_buffer)
+		goto err;
+
+	ent = proc_create("powerpc/rtas/scan-log-dump", S_IRUSR, NULL,
+			  &scanlog_fops);
+	if (!ent)
+		goto err;
+	return 0;
+err:
+	kfree(scanlog_buffer);
+	return err;
+}
+
+static void __exit scanlog_cleanup(void)
+{
+	remove_proc_entry("powerpc/rtas/scan-log-dump", NULL);
+	kfree(scanlog_buffer);
+}
+
+module_init(scanlog_init);
+module_exit(scanlog_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
new file mode 100644
index 000000000..df6a70419
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -0,0 +1,865 @@
+/*
+ *  64-bit pSeries and RS/6000 setup code.
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Adapted from 'alpha' version by Gary Thomas
+ *  Modified by Cort Dougan (cort@cs.nmt.edu)
+ *  Modified by PPC64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * bootup setup stuff..
+ */
+
+#include <linux/cpu.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/user.h>
+#include <linux/tty.h>
+#include <linux/major.h>
+#include <linux/interrupt.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/console.h>
+#include <linux/pci.h>
+#include <linux/utsname.h>
+#include <linux/adb.h>
+#include <linux/export.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/of.h>
+#include <linux/kexec.h>
+
+#include <asm/mmu.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/time.h>
+#include <asm/nvram.h>
+#include <asm/pmc.h>
+#include <asm/mpic.h>
+#include <asm/xics.h>
+#include <asm/ppc-pci.h>
+#include <asm/i8259.h>
+#include <asm/udbg.h>
+#include <asm/smp.h>
+#include <asm/firmware.h>
+#include <asm/eeh.h>
+#include <asm/reg.h>
+#include <asm/plpar_wrappers.h>
+
+#include "pseries.h"
+
+int CMO_PrPSP = -1;
+int CMO_SecPSP = -1;
+unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
+EXPORT_SYMBOL(CMO_PageSize);
+
+int fwnmi_active;  /* TRUE if an FWNMI handler is present */
+
+static struct device_node *pSeries_mpic_node;
+
+static void pSeries_show_cpuinfo(struct seq_file *m)
+{
+	struct device_node *root;
+	const char *model = "";
+
+	root = of_find_node_by_path("/");
+	if (root)
+		model = of_get_property(root, "model", NULL);
+	seq_printf(m, "machine\t\t: CHRP %s\n", model);
+	of_node_put(root);
+}
+
+/* Initialize firmware assisted non-maskable interrupts if
+ * the firmware supports this feature.
+ */
+static void __init fwnmi_init(void)
+{
+	unsigned long system_reset_addr, machine_check_addr;
+
+	int ibm_nmi_register = rtas_token("ibm,nmi-register");
+	if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE)
+		return;
+
+	/* If the kernel's not linked at zero we point the firmware at low
+	 * addresses anyway, and use a trampoline to get to the real code. */
+	system_reset_addr  = __pa(system_reset_fwnmi) - PHYSICAL_START;
+	machine_check_addr = __pa(machine_check_fwnmi) - PHYSICAL_START;
+
+	if (0 == rtas_call(ibm_nmi_register, 2, 1, NULL, system_reset_addr,
+				machine_check_addr))
+		fwnmi_active = 1;
+}
+
+static void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned int cascade_irq = i8259_irq();
+
+	if (cascade_irq != NO_IRQ)
+		generic_handle_irq(cascade_irq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+static void __init pseries_setup_i8259_cascade(void)
+{
+	struct device_node *np, *old, *found = NULL;
+	unsigned int cascade;
+	const u32 *addrp;
+	unsigned long intack = 0;
+	int naddr;
+
+	for_each_node_by_type(np, "interrupt-controller") {
+		if (of_device_is_compatible(np, "chrp,iic")) {
+			found = np;
+			break;
+		}
+	}
+
+	if (found == NULL) {
+		printk(KERN_DEBUG "pic: no ISA interrupt controller\n");
+		return;
+	}
+
+	cascade = irq_of_parse_and_map(found, 0);
+	if (cascade == NO_IRQ) {
+		printk(KERN_ERR "pic: failed to map cascade interrupt");
+		return;
+	}
+	pr_debug("pic: cascade mapped to irq %d\n", cascade);
+
+	for (old = of_node_get(found); old != NULL ; old = np) {
+		np = of_get_parent(old);
+		of_node_put(old);
+		if (np == NULL)
+			break;
+		if (strcmp(np->name, "pci") != 0)
+			continue;
+		addrp = of_get_property(np, "8259-interrupt-acknowledge", NULL);
+		if (addrp == NULL)
+			continue;
+		naddr = of_n_addr_cells(np);
+		intack = addrp[naddr-1];
+		if (naddr > 1)
+			intack |= ((unsigned long)addrp[naddr-2]) << 32;
+	}
+	if (intack)
+		printk(KERN_DEBUG "pic: PCI 8259 intack at 0x%016lx\n", intack);
+	i8259_init(found, intack);
+	of_node_put(found);
+	irq_set_chained_handler(cascade, pseries_8259_cascade);
+}
+
+static void __init pseries_mpic_init_IRQ(void)
+{
+	struct device_node *np;
+	const unsigned int *opprop;
+	unsigned long openpic_addr = 0;
+	int naddr, n, i, opplen;
+	struct mpic *mpic;
+
+	np = of_find_node_by_path("/");
+	naddr = of_n_addr_cells(np);
+	opprop = of_get_property(np, "platform-open-pic", &opplen);
+	if (opprop != NULL) {
+		openpic_addr = of_read_number(opprop, naddr);
+		printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr);
+	}
+	of_node_put(np);
+
+	BUG_ON(openpic_addr == 0);
+
+	/* Setup the openpic driver */
+	mpic = mpic_alloc(pSeries_mpic_node, openpic_addr,
+			MPIC_NO_RESET, 16, 0, " MPIC     ");
+	BUG_ON(mpic == NULL);
+
+	/* Add ISUs */
+	opplen /= sizeof(u32);
+	for (n = 0, i = naddr; i < opplen; i += naddr, n++) {
+		unsigned long isuaddr = of_read_number(opprop + i, naddr);
+		mpic_assign_isu(mpic, n, isuaddr);
+	}
+
+	/* Setup top-level get_irq */
+	ppc_md.get_irq = mpic_get_irq;
+
+	/* All ISUs are setup, complete initialization */
+	mpic_init(mpic);
+
+	/* Look for cascade */
+	pseries_setup_i8259_cascade();
+}
+
+static void __init pseries_xics_init_IRQ(void)
+{
+	xics_init();
+	pseries_setup_i8259_cascade();
+}
+
+static void pseries_lpar_enable_pmcs(void)
+{
+	unsigned long set, reset;
+
+	set = 1UL << 63;
+	reset = 0;
+	plpar_hcall_norets(H_PERFMON, set, reset);
+}
+
+static void __init pseries_discover_pic(void)
+{
+	struct device_node *np;
+	const char *typep;
+
+	for_each_node_by_name(np, "interrupt-controller") {
+		typep = of_get_property(np, "compatible", NULL);
+		if (strstr(typep, "open-pic")) {
+			pSeries_mpic_node = of_node_get(np);
+			ppc_md.init_IRQ       = pseries_mpic_init_IRQ;
+			setup_kexec_cpu_down_mpic();
+			smp_init_pseries_mpic();
+			return;
+		} else if (strstr(typep, "ppc-xicp")) {
+			ppc_md.init_IRQ       = pseries_xics_init_IRQ;
+			setup_kexec_cpu_down_xics();
+			smp_init_pseries_xics();
+			return;
+		}
+	}
+	printk(KERN_ERR "pSeries_discover_pic: failed to recognize"
+	       " interrupt-controller\n");
+}
+
+static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data)
+{
+	struct of_reconfig_data *rd = data;
+	struct device_node *np = rd->dn;
+	struct pci_dn *pci = NULL;
+	int err = NOTIFY_OK;
+
+	switch (action) {
+	case OF_RECONFIG_ATTACH_NODE:
+		pci = np->parent->data;
+		if (pci) {
+			update_dn_pci_info(np, pci->phb);
+
+			/* Create EEH device for the OF node */
+			eeh_dev_init(PCI_DN(np), pci->phb);
+		}
+		break;
+	default:
+		err = NOTIFY_DONE;
+		break;
+	}
+	return err;
+}
+
+static struct notifier_block pci_dn_reconfig_nb = {
+	.notifier_call = pci_dn_reconfig_notifier,
+};
+
+struct kmem_cache *dtl_cache;
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+/*
+ * Allocate space for the dispatch trace log for all possible cpus
+ * and register the buffers with the hypervisor.  This is used for
+ * computing time stolen by the hypervisor.
+ */
+static int alloc_dispatch_logs(void)
+{
+	int cpu, ret;
+	struct paca_struct *pp;
+	struct dtl_entry *dtl;
+
+	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+		return 0;
+
+	if (!dtl_cache)
+		return 0;
+
+	for_each_possible_cpu(cpu) {
+		pp = &paca[cpu];
+		dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
+		if (!dtl) {
+			pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
+				cpu);
+			pr_warn("Stolen time statistics will be unreliable\n");
+			break;
+		}
+
+		pp->dtl_ridx = 0;
+		pp->dispatch_log = dtl;
+		pp->dispatch_log_end = dtl + N_DISPATCH_LOG;
+		pp->dtl_curr = dtl;
+	}
+
+	/* Register the DTL for the current (boot) cpu */
+	dtl = get_paca()->dispatch_log;
+	get_paca()->dtl_ridx = 0;
+	get_paca()->dtl_curr = dtl;
+	get_paca()->lppaca_ptr->dtl_idx = 0;
+
+	/* hypervisor reads buffer length from this field */
+	dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES);
+	ret = register_dtl(hard_smp_processor_id(), __pa(dtl));
+	if (ret)
+		pr_err("WARNING: DTL registration of cpu %d (hw %d) failed "
+		       "with %d\n", smp_processor_id(),
+		       hard_smp_processor_id(), ret);
+	get_paca()->lppaca_ptr->dtl_enable_mask = 2;
+
+	return 0;
+}
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+static inline int alloc_dispatch_logs(void)
+{
+	return 0;
+}
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+
+static int alloc_dispatch_log_kmem_cache(void)
+{
+	dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
+						DISPATCH_LOG_BYTES, 0, NULL);
+	if (!dtl_cache) {
+		pr_warn("Failed to create dispatch trace log buffer cache\n");
+		pr_warn("Stolen time statistics will be unreliable\n");
+		return 0;
+	}
+
+	return alloc_dispatch_logs();
+}
+machine_early_initcall(pseries, alloc_dispatch_log_kmem_cache);
+
+static void pseries_lpar_idle(void)
+{
+	/*
+	 * Default handler to go into low thread priority and possibly
+	 * low power mode by cedeing processor to hypervisor
+	 */
+
+	/* Indicate to hypervisor that we are idle. */
+	get_lppaca()->idle = 1;
+
+	/*
+	 * Yield the processor to the hypervisor.  We return if
+	 * an external interrupt occurs (which are driven prior
+	 * to returning here) or if a prod occurs from another
+	 * processor. When returning here, external interrupts
+	 * are enabled.
+	 */
+	cede_processor();
+
+	get_lppaca()->idle = 0;
+}
+
+/*
+ * Enable relocation on during exceptions. This has partition wide scope and
+ * may take a while to complete, if it takes longer than one second we will
+ * just give up rather than wasting any more time on this - if that turns out
+ * to ever be a problem in practice we can move this into a kernel thread to
+ * finish off the process later in boot.
+ */
+long pSeries_enable_reloc_on_exc(void)
+{
+	long rc;
+	unsigned int delay, total_delay = 0;
+
+	while (1) {
+		rc = enable_reloc_on_exceptions();
+		if (!H_IS_LONG_BUSY(rc))
+			return rc;
+
+		delay = get_longbusy_msecs(rc);
+		total_delay += delay;
+		if (total_delay > 1000) {
+			pr_warn("Warning: Giving up waiting to enable "
+				"relocation on exceptions (%u msec)!\n",
+				total_delay);
+			return rc;
+		}
+
+		mdelay(delay);
+	}
+}
+EXPORT_SYMBOL(pSeries_enable_reloc_on_exc);
+
+long pSeries_disable_reloc_on_exc(void)
+{
+	long rc;
+
+	while (1) {
+		rc = disable_reloc_on_exceptions();
+		if (!H_IS_LONG_BUSY(rc))
+			return rc;
+		mdelay(get_longbusy_msecs(rc));
+	}
+}
+EXPORT_SYMBOL(pSeries_disable_reloc_on_exc);
+
+#ifdef CONFIG_KEXEC
+static void pSeries_machine_kexec(struct kimage *image)
+{
+	long rc;
+
+	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+		rc = pSeries_disable_reloc_on_exc();
+		if (rc != H_SUCCESS)
+			pr_warning("Warning: Failed to disable relocation on "
+				   "exceptions: %ld\n", rc);
+	}
+
+	default_machine_kexec(image);
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+long pseries_big_endian_exceptions(void)
+{
+	long rc;
+
+	while (1) {
+		rc = enable_big_endian_exceptions();
+		if (!H_IS_LONG_BUSY(rc))
+			return rc;
+		mdelay(get_longbusy_msecs(rc));
+	}
+}
+
+static long pseries_little_endian_exceptions(void)
+{
+	long rc;
+
+	while (1) {
+		rc = enable_little_endian_exceptions();
+		if (!H_IS_LONG_BUSY(rc))
+			return rc;
+		mdelay(get_longbusy_msecs(rc));
+	}
+}
+#endif
+
+static void __init find_and_init_phbs(void)
+{
+	struct device_node *node;
+	struct pci_controller *phb;
+	struct device_node *root = of_find_node_by_path("/");
+
+	for_each_child_of_node(root, node) {
+		if (node->type == NULL || (strcmp(node->type, "pci") != 0 &&
+					   strcmp(node->type, "pciex") != 0))
+			continue;
+
+		phb = pcibios_alloc_controller(node);
+		if (!phb)
+			continue;
+		rtas_setup_phb(phb);
+		pci_process_bridge_OF_ranges(phb, node, 0);
+		isa_bridge_find_early(phb);
+		phb->controller_ops = pseries_pci_controller_ops;
+	}
+
+	of_node_put(root);
+	pci_devs_phb_init();
+
+	/*
+	 * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties
+	 * in chosen.
+	 */
+	if (of_chosen) {
+		const int *prop;
+
+		prop = of_get_property(of_chosen,
+				"linux,pci-probe-only", NULL);
+		if (prop) {
+			if (*prop)
+				pci_add_flags(PCI_PROBE_ONLY);
+			else
+				pci_clear_flags(PCI_PROBE_ONLY);
+		}
+	}
+}
+
+static void __init pSeries_setup_arch(void)
+{
+	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
+
+	/* Discover PIC type and setup ppc_md accordingly */
+	pseries_discover_pic();
+
+	/* openpic global configuration register (64-bit format). */
+	/* openpic Interrupt Source Unit pointer (64-bit format). */
+	/* python0 facility area (mmio) (64-bit format) REAL address. */
+
+	/* init to some ~sane value until calibrate_delay() runs */
+	loops_per_jiffy = 50000000;
+
+	fwnmi_init();
+
+	/* By default, only probe PCI (can be overriden by rtas_pci) */
+	pci_add_flags(PCI_PROBE_ONLY);
+
+	/* Find and initialize PCI host bridges */
+	init_pci_config_tokens();
+	find_and_init_phbs();
+	of_reconfig_notifier_register(&pci_dn_reconfig_nb);
+
+	pSeries_nvram_init();
+
+	if (firmware_has_feature(FW_FEATURE_LPAR)) {
+		vpa_init(boot_cpuid);
+		ppc_md.power_save = pseries_lpar_idle;
+		ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
+	} else {
+		/* No special idle routine */
+		ppc_md.enable_pmcs = power4_enable_pmcs;
+	}
+
+	ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare;
+
+	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+		long rc;
+
+		rc = pSeries_enable_reloc_on_exc();
+		if (rc == H_P2) {
+			pr_info("Relocation on exceptions not supported\n");
+		} else if (rc != H_SUCCESS) {
+			pr_warn("Unable to enable relocation on exceptions: "
+				"%ld\n", rc);
+		}
+	}
+}
+
+static int __init pSeries_init_panel(void)
+{
+	/* Manually leave the kernel version on the panel. */
+#ifdef __BIG_ENDIAN__
+	ppc_md.progress("Linux ppc64\n", 0);
+#else
+	ppc_md.progress("Linux ppc64le\n", 0);
+#endif
+	ppc_md.progress(init_utsname()->version, 0);
+
+	return 0;
+}
+machine_arch_initcall(pseries, pSeries_init_panel);
+
+static int pseries_set_dabr(unsigned long dabr, unsigned long dabrx)
+{
+	return plpar_hcall_norets(H_SET_DABR, dabr);
+}
+
+static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx)
+{
+	/* Have to set at least one bit in the DABRX according to PAPR */
+	if (dabrx == 0 && dabr == 0)
+		dabrx = DABRX_USER;
+	/* PAPR says we can only set kernel and user bits */
+	dabrx &= DABRX_KERNEL | DABRX_USER;
+
+	return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx);
+}
+
+static int pseries_set_dawr(unsigned long dawr, unsigned long dawrx)
+{
+	/* PAPR says we can't set HYP */
+	dawrx &= ~DAWRX_HYP;
+
+	return  plapr_set_watchpoint0(dawr, dawrx);
+}
+
+#define CMO_CHARACTERISTICS_TOKEN 44
+#define CMO_MAXLENGTH 1026
+
+void pSeries_coalesce_init(void)
+{
+	struct hvcall_mpp_x_data mpp_x_data;
+
+	if (firmware_has_feature(FW_FEATURE_CMO) && !h_get_mpp_x(&mpp_x_data))
+		powerpc_firmware_features |= FW_FEATURE_XCMO;
+	else
+		powerpc_firmware_features &= ~FW_FEATURE_XCMO;
+}
+
+/**
+ * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
+ * handle that here. (Stolen from parse_system_parameter_string)
+ */
+static void pSeries_cmo_feature_init(void)
+{
+	char *ptr, *key, *value, *end;
+	int call_status;
+	int page_order = IOMMU_PAGE_SHIFT_4K;
+
+	pr_debug(" -> fw_cmo_feature_init()\n");
+	spin_lock(&rtas_data_buf_lock);
+	memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
+	call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+				NULL,
+				CMO_CHARACTERISTICS_TOKEN,
+				__pa(rtas_data_buf),
+				RTAS_DATA_BUF_SIZE);
+
+	if (call_status != 0) {
+		spin_unlock(&rtas_data_buf_lock);
+		pr_debug("CMO not available\n");
+		pr_debug(" <- fw_cmo_feature_init()\n");
+		return;
+	}
+
+	end = rtas_data_buf + CMO_MAXLENGTH - 2;
+	ptr = rtas_data_buf + 2;	/* step over strlen value */
+	key = value = ptr;
+
+	while (*ptr && (ptr <= end)) {
+		/* Separate the key and value by replacing '=' with '\0' and
+		 * point the value at the string after the '='
+		 */
+		if (ptr[0] == '=') {
+			ptr[0] = '\0';
+			value = ptr + 1;
+		} else if (ptr[0] == '\0' || ptr[0] == ',') {
+			/* Terminate the string containing the key/value pair */
+			ptr[0] = '\0';
+
+			if (key == value) {
+				pr_debug("Malformed key/value pair\n");
+				/* Never found a '=', end processing */
+				break;
+			}
+
+			if (0 == strcmp(key, "CMOPageSize"))
+				page_order = simple_strtol(value, NULL, 10);
+			else if (0 == strcmp(key, "PrPSP"))
+				CMO_PrPSP = simple_strtol(value, NULL, 10);
+			else if (0 == strcmp(key, "SecPSP"))
+				CMO_SecPSP = simple_strtol(value, NULL, 10);
+			value = key = ptr + 1;
+		}
+		ptr++;
+	}
+
+	/* Page size is returned as the power of 2 of the page size,
+	 * convert to the page size in bytes before returning
+	 */
+	CMO_PageSize = 1 << page_order;
+	pr_debug("CMO_PageSize = %lu\n", CMO_PageSize);
+
+	if (CMO_PrPSP != -1 || CMO_SecPSP != -1) {
+		pr_info("CMO enabled\n");
+		pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
+		         CMO_SecPSP);
+		powerpc_firmware_features |= FW_FEATURE_CMO;
+		pSeries_coalesce_init();
+	} else
+		pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
+		         CMO_SecPSP);
+	spin_unlock(&rtas_data_buf_lock);
+	pr_debug(" <- fw_cmo_feature_init()\n");
+}
+
+/*
+ * Early initialization.  Relocation is on but do not reference unbolted pages
+ */
+static void __init pSeries_init_early(void)
+{
+	pr_debug(" -> pSeries_init_early()\n");
+
+#ifdef CONFIG_HVC_CONSOLE
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		hvc_vio_init_early();
+#endif
+	if (firmware_has_feature(FW_FEATURE_XDABR))
+		ppc_md.set_dabr = pseries_set_xdabr;
+	else if (firmware_has_feature(FW_FEATURE_DABR))
+		ppc_md.set_dabr = pseries_set_dabr;
+
+	if (firmware_has_feature(FW_FEATURE_SET_MODE))
+		ppc_md.set_dawr = pseries_set_dawr;
+
+	pSeries_cmo_feature_init();
+	iommu_init_early_pSeries();
+
+	pr_debug(" <- pSeries_init_early()\n");
+}
+
+/**
+ * pseries_power_off - tell firmware about how to power off the system.
+ *
+ * This function calls either the power-off rtas token in normal cases
+ * or the ibm,power-off-ups token (if present & requested) in case of
+ * a power failure. If power-off token is used, power on will only be
+ * possible with power button press. If ibm,power-off-ups token is used
+ * it will allow auto poweron after power is restored.
+ */
+static void pseries_power_off(void)
+{
+	int rc;
+	int rtas_poweroff_ups_token = rtas_token("ibm,power-off-ups");
+
+	if (rtas_flash_term_hook)
+		rtas_flash_term_hook(SYS_POWER_OFF);
+
+	if (rtas_poweron_auto == 0 ||
+		rtas_poweroff_ups_token == RTAS_UNKNOWN_SERVICE) {
+		rc = rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1);
+		printk(KERN_INFO "RTAS power-off returned %d\n", rc);
+	} else {
+		rc = rtas_call(rtas_poweroff_ups_token, 0, 1, NULL);
+		printk(KERN_INFO "RTAS ibm,power-off-ups returned %d\n", rc);
+	}
+	for (;;);
+}
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+
+static int __init pseries_probe_fw_features(unsigned long node,
+					    const char *uname, int depth,
+					    void *data)
+{
+	const char *prop;
+	int len;
+	static int hypertas_found;
+	static int vec5_found;
+
+	if (depth != 1)
+		return 0;
+
+	if (!strcmp(uname, "rtas") || !strcmp(uname, "rtas@0")) {
+		prop = of_get_flat_dt_prop(node, "ibm,hypertas-functions",
+					   &len);
+		if (prop) {
+			powerpc_firmware_features |= FW_FEATURE_LPAR;
+			fw_hypertas_feature_init(prop, len);
+		}
+
+		hypertas_found = 1;
+	}
+
+	if (!strcmp(uname, "chosen")) {
+		prop = of_get_flat_dt_prop(node, "ibm,architecture-vec-5",
+					   &len);
+		if (prop)
+			fw_vec5_feature_init(prop, len);
+
+		vec5_found = 1;
+	}
+
+	return hypertas_found && vec5_found;
+}
+
+static int __init pSeries_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+	const char *dtype = of_get_flat_dt_prop(root, "device_type", NULL);
+
+ 	if (dtype == NULL)
+ 		return 0;
+ 	if (strcmp(dtype, "chrp"))
+		return 0;
+
+	/* Cell blades firmware claims to be chrp while it's not. Until this
+	 * is fixed, we need to avoid those here.
+	 */
+	if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0") ||
+	    of_flat_dt_is_compatible(root, "IBM,CBEA"))
+		return 0;
+
+	pr_debug("pSeries detected, looking for LPAR capability...\n");
+
+	/* Now try to figure out if we are running on LPAR */
+	of_scan_flat_dt(pseries_probe_fw_features, NULL);
+
+#ifdef __LITTLE_ENDIAN__
+	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+		long rc;
+		/*
+		 * Tell the hypervisor that we want our exceptions to
+		 * be taken in little endian mode. If this fails we don't
+		 * want to use BUG() because it will trigger an exception.
+		 */
+		rc = pseries_little_endian_exceptions();
+		if (rc) {
+			ppc_md.progress("H_SET_MODE LE exception fail", 0);
+			panic("Could not enable little endian exceptions");
+		}
+	}
+#endif
+
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		hpte_init_lpar();
+	else
+		hpte_init_native();
+
+	pm_power_off = pseries_power_off;
+
+	pr_debug("Machine is%s LPAR !\n",
+	         (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not");
+
+	return 1;
+}
+
+static int pSeries_pci_probe_mode(struct pci_bus *bus)
+{
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		return PCI_PROBE_DEVTREE;
+	return PCI_PROBE_NORMAL;
+}
+
+#ifndef CONFIG_PCI
+void pSeries_final_fixup(void) { }
+#endif
+
+struct pci_controller_ops pseries_pci_controller_ops = {
+	.probe_mode		= pSeries_pci_probe_mode,
+};
+
+define_machine(pseries) {
+	.name			= "pSeries",
+	.probe			= pSeries_probe,
+	.setup_arch		= pSeries_setup_arch,
+	.init_early		= pSeries_init_early,
+	.show_cpuinfo		= pSeries_show_cpuinfo,
+	.log_error		= pSeries_log_error,
+	.pcibios_fixup		= pSeries_final_fixup,
+	.restart		= rtas_restart,
+	.halt			= rtas_halt,
+	.panic			= rtas_os_term,
+	.get_boot_time		= rtas_get_boot_time,
+	.get_rtc_time		= rtas_get_rtc_time,
+	.set_rtc_time		= rtas_set_rtc_time,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= rtas_progress,
+	.system_reset_exception = pSeries_system_reset_exception,
+	.machine_check_exception = pSeries_machine_check_exception,
+#ifdef CONFIG_KEXEC
+	.machine_kexec          = pSeries_machine_kexec,
+#endif
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+	.memory_block_size	= pseries_memory_block_size,
+#endif
+};
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
new file mode 100644
index 000000000..6932ea803
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -0,0 +1,274 @@
+/*
+ * SMP support for pSeries machines.
+ *
+ * Dave Engebretsen, Peter Bergner, and
+ * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
+ *
+ * Plus various changes from other IBM teams...
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/paca.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/mpic.h>
+#include <asm/vdso_datapage.h>
+#include <asm/cputhreads.h>
+#include <asm/xics.h>
+#include <asm/dbell.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/code-patching.h>
+
+#include "pseries.h"
+#include "offline_states.h"
+
+
+/*
+ * The Primary thread of each non-boot processor was started from the OF client
+ * interface by prom_hold_cpus and is spinning on secondary_hold_spinloop.
+ */
+static cpumask_var_t of_spin_mask;
+
+/*
+ * If we multiplex IPI mechanisms, store the appropriate XICS IPI mechanism here
+ */
+static void  (*xics_cause_ipi)(int cpu, unsigned long data);
+
+/* Query where a cpu is now.  Return codes #defined in plpar_wrappers.h */
+int smp_query_cpu_stopped(unsigned int pcpu)
+{
+	int cpu_status, status;
+	int qcss_tok = rtas_token("query-cpu-stopped-state");
+
+	if (qcss_tok == RTAS_UNKNOWN_SERVICE) {
+		printk_once(KERN_INFO
+			"Firmware doesn't support query-cpu-stopped-state\n");
+		return QCSS_HARDWARE_ERROR;
+	}
+
+	status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu);
+	if (status != 0) {
+		printk(KERN_ERR
+		       "RTAS query-cpu-stopped-state failed: %i\n", status);
+		return status;
+	}
+
+	return cpu_status;
+}
+
+/**
+ * smp_startup_cpu() - start the given cpu
+ *
+ * At boot time, there is nothing to do for primary threads which were
+ * started from Open Firmware.  For anything else, call RTAS with the
+ * appropriate start location.
+ *
+ * Returns:
+ *	0	- failure
+ *	1	- success
+ */
+static inline int smp_startup_cpu(unsigned int lcpu)
+{
+	int status;
+	unsigned long start_here =
+			__pa(ppc_function_entry(generic_secondary_smp_init));
+	unsigned int pcpu;
+	int start_cpu;
+
+	if (cpumask_test_cpu(lcpu, of_spin_mask))
+		/* Already started by OF and sitting in spin loop */
+		return 1;
+
+	pcpu = get_hard_smp_processor_id(lcpu);
+
+	/* Check to see if the CPU out of FW already for kexec */
+	if (smp_query_cpu_stopped(pcpu) == QCSS_NOT_STOPPED){
+		cpumask_set_cpu(lcpu, of_spin_mask);
+		return 1;
+	}
+
+	/* Fixup atomic count: it exited inside IRQ handler. */
+	task_thread_info(paca[lcpu].__current)->preempt_count	= 0;
+#ifdef CONFIG_HOTPLUG_CPU
+	if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE)
+		goto out;
+#endif
+	/* 
+	 * If the RTAS start-cpu token does not exist then presume the
+	 * cpu is already spinning.
+	 */
+	start_cpu = rtas_token("start-cpu");
+	if (start_cpu == RTAS_UNKNOWN_SERVICE)
+		return 1;
+
+	status = rtas_call(start_cpu, 3, 1, NULL, pcpu, start_here, pcpu);
+	if (status != 0) {
+		printk(KERN_ERR "start-cpu failed: %i\n", status);
+		return 0;
+	}
+
+#ifdef CONFIG_HOTPLUG_CPU
+out:
+#endif
+	return 1;
+}
+
+static void smp_xics_setup_cpu(int cpu)
+{
+	if (cpu != boot_cpuid)
+		xics_setup_cpu();
+	if (cpu_has_feature(CPU_FTR_DBELL))
+		doorbell_setup_this_cpu();
+
+	if (firmware_has_feature(FW_FEATURE_SPLPAR))
+		vpa_init(cpu);
+
+	cpumask_clear_cpu(cpu, of_spin_mask);
+#ifdef CONFIG_HOTPLUG_CPU
+	set_cpu_current_state(cpu, CPU_STATE_ONLINE);
+	set_default_offline_state(cpu);
+#endif
+}
+
+static int smp_pSeries_kick_cpu(int nr)
+{
+	BUG_ON(nr < 0 || nr >= NR_CPUS);
+
+	if (!smp_startup_cpu(nr))
+		return -ENOENT;
+
+	/*
+	 * The processor is currently spinning, waiting for the
+	 * cpu_start field to become non-zero After we set cpu_start,
+	 * the processor will continue on to secondary_start
+	 */
+	paca[nr].cpu_start = 1;
+#ifdef CONFIG_HOTPLUG_CPU
+	set_preferred_offline_state(nr, CPU_STATE_ONLINE);
+
+	if (get_cpu_current_state(nr) == CPU_STATE_INACTIVE) {
+		long rc;
+		unsigned long hcpuid;
+
+		hcpuid = get_hard_smp_processor_id(nr);
+		rc = plpar_hcall_norets(H_PROD, hcpuid);
+		if (rc != H_SUCCESS)
+			printk(KERN_ERR "Error: Prod to wake up processor %d "
+						"Ret= %ld\n", nr, rc);
+	}
+#endif
+
+	return 0;
+}
+
+/* Only used on systems that support multiple IPI mechanisms */
+static void pSeries_cause_ipi_mux(int cpu, unsigned long data)
+{
+	if (cpumask_test_cpu(cpu, cpu_sibling_mask(smp_processor_id())))
+		doorbell_cause_ipi(cpu, data);
+	else
+		xics_cause_ipi(cpu, data);
+}
+
+static __init void pSeries_smp_probe(void)
+{
+	xics_smp_probe();
+
+	if (cpu_has_feature(CPU_FTR_DBELL)) {
+		xics_cause_ipi = smp_ops->cause_ipi;
+		smp_ops->cause_ipi = pSeries_cause_ipi_mux;
+	}
+}
+
+static struct smp_ops_t pSeries_mpic_smp_ops = {
+	.message_pass	= smp_mpic_message_pass,
+	.probe		= smp_mpic_probe,
+	.kick_cpu	= smp_pSeries_kick_cpu,
+	.setup_cpu	= smp_mpic_setup_cpu,
+};
+
+static struct smp_ops_t pSeries_xics_smp_ops = {
+	.message_pass	= NULL,	/* Use smp_muxed_ipi_message_pass */
+	.cause_ipi	= NULL,	/* Filled at runtime by pSeries_smp_probe() */
+	.probe		= pSeries_smp_probe,
+	.kick_cpu	= smp_pSeries_kick_cpu,
+	.setup_cpu	= smp_xics_setup_cpu,
+	.cpu_bootable	= smp_generic_cpu_bootable,
+};
+
+/* This is called very early */
+static void __init smp_init_pseries(void)
+{
+	int i;
+
+	pr_debug(" -> smp_init_pSeries()\n");
+
+	alloc_bootmem_cpumask_var(&of_spin_mask);
+
+	/*
+	 * Mark threads which are still spinning in hold loops
+	 *
+	 * We know prom_init will not have started them if RTAS supports
+	 * query-cpu-stopped-state.
+	 */
+	if (rtas_token("query-cpu-stopped-state") == RTAS_UNKNOWN_SERVICE) {
+		if (cpu_has_feature(CPU_FTR_SMT)) {
+			for_each_present_cpu(i) {
+				if (cpu_thread_in_core(i) == 0)
+					cpumask_set_cpu(i, of_spin_mask);
+			}
+		} else
+			cpumask_copy(of_spin_mask, cpu_present_mask);
+
+		cpumask_clear_cpu(boot_cpuid, of_spin_mask);
+	}
+
+	/* Non-lpar has additional take/give timebase */
+	if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
+		smp_ops->give_timebase = rtas_give_timebase;
+		smp_ops->take_timebase = rtas_take_timebase;
+	}
+
+	pr_debug(" <- smp_init_pSeries()\n");
+}
+
+void __init smp_init_pseries_mpic(void)
+{
+	smp_ops = &pSeries_mpic_smp_ops;
+
+	smp_init_pseries();
+}
+
+void __init smp_init_pseries_xics(void)
+{
+	smp_ops = &pSeries_xics_smp_ops;
+
+	smp_init_pseries();
+}
diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c
new file mode 100644
index 000000000..e76aefae2
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -0,0 +1,283 @@
+/*
+  * Copyright (C) 2010 Brian King IBM Corporation
+  *
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+  * the Free Software Foundation; either version 2 of the License, or
+  * (at your option) any later version.
+  *
+  * This program is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  * GNU General Public License for more details.
+  *
+  * You should have received a copy of the GNU General Public License
+  * along with this program; if not, write to the Free Software
+  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+  */
+
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/suspend.h>
+#include <linux/stat.h>
+#include <asm/firmware.h>
+#include <asm/hvcall.h>
+#include <asm/machdep.h>
+#include <asm/mmu.h>
+#include <asm/rtas.h>
+#include <asm/topology.h>
+#include "../../kernel/cacheinfo.h"
+
+static u64 stream_id;
+static struct device suspend_dev;
+static DECLARE_COMPLETION(suspend_work);
+static struct rtas_suspend_me_data suspend_data;
+static atomic_t suspending;
+
+/**
+ * pseries_suspend_begin - First phase of hibernation
+ *
+ * Check to ensure we are in a valid state to hibernate
+ *
+ * Return value:
+ * 	0 on success / other on failure
+ **/
+static int pseries_suspend_begin(suspend_state_t state)
+{
+	long vasi_state, rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	/* Make sure the state is valid */
+	rc = plpar_hcall(H_VASI_STATE, retbuf, stream_id);
+
+	vasi_state = retbuf[0];
+
+	if (rc) {
+		pr_err("pseries_suspend_begin: vasi_state returned %ld\n",rc);
+		return rc;
+	} else if (vasi_state == H_VASI_ENABLED) {
+		return -EAGAIN;
+	} else if (vasi_state != H_VASI_SUSPENDING) {
+		pr_err("pseries_suspend_begin: vasi_state returned state %ld\n",
+		       vasi_state);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * pseries_suspend_cpu - Suspend a single CPU
+ *
+ * Makes the H_JOIN call to suspend the CPU
+ *
+ **/
+static int pseries_suspend_cpu(void)
+{
+	if (atomic_read(&suspending))
+		return rtas_suspend_cpu(&suspend_data);
+	return 0;
+}
+
+/**
+ * pseries_suspend_enable_irqs
+ *
+ * Post suspend configuration updates
+ *
+ **/
+static void pseries_suspend_enable_irqs(void)
+{
+	/*
+	 * Update configuration which can be modified based on device tree
+	 * changes during resume.
+	 */
+	cacheinfo_cpu_offline(smp_processor_id());
+	post_mobility_fixup();
+	cacheinfo_cpu_online(smp_processor_id());
+}
+
+/**
+ * pseries_suspend_enter - Final phase of hibernation
+ *
+ * Return value:
+ * 	0 on success / other on failure
+ **/
+static int pseries_suspend_enter(suspend_state_t state)
+{
+	int rc = rtas_suspend_last_cpu(&suspend_data);
+
+	atomic_set(&suspending, 0);
+	atomic_set(&suspend_data.done, 1);
+	return rc;
+}
+
+/**
+ * pseries_prepare_late - Prepare to suspend all other CPUs
+ *
+ * Return value:
+ * 	0 on success / other on failure
+ **/
+static int pseries_prepare_late(void)
+{
+	atomic_set(&suspending, 1);
+	atomic_set(&suspend_data.working, 0);
+	atomic_set(&suspend_data.done, 0);
+	atomic_set(&suspend_data.error, 0);
+	suspend_data.complete = &suspend_work;
+	reinit_completion(&suspend_work);
+	return 0;
+}
+
+/**
+ * store_hibernate - Initiate partition hibernation
+ * @dev:		subsys root device
+ * @attr:		device attribute struct
+ * @buf:		buffer
+ * @count:		buffer size
+ *
+ * Write the stream ID received from the HMC to this file
+ * to trigger hibernating the partition
+ *
+ * Return value:
+ * 	number of bytes printed to buffer / other on failure
+ **/
+static ssize_t store_hibernate(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
+{
+	cpumask_var_t offline_mask;
+	int rc;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (!alloc_cpumask_var(&offline_mask, GFP_TEMPORARY))
+		return -ENOMEM;
+
+	stream_id = simple_strtoul(buf, NULL, 16);
+
+	do {
+		rc = pseries_suspend_begin(PM_SUSPEND_MEM);
+		if (rc == -EAGAIN)
+			ssleep(1);
+	} while (rc == -EAGAIN);
+
+	if (!rc) {
+		/* All present CPUs must be online */
+		cpumask_andnot(offline_mask, cpu_present_mask,
+				cpu_online_mask);
+		rc = rtas_online_cpus_mask(offline_mask);
+		if (rc) {
+			pr_err("%s: Could not bring present CPUs online.\n",
+					__func__);
+			goto out;
+		}
+
+		stop_topology_update();
+		rc = pm_suspend(PM_SUSPEND_MEM);
+		start_topology_update();
+
+		/* Take down CPUs not online prior to suspend */
+		if (!rtas_offline_cpus_mask(offline_mask))
+			pr_warn("%s: Could not restore CPUs to offline "
+					"state.\n", __func__);
+	}
+
+	stream_id = 0;
+
+	if (!rc)
+		rc = count;
+out:
+	free_cpumask_var(offline_mask);
+	return rc;
+}
+
+#define USER_DT_UPDATE	0
+#define KERN_DT_UPDATE	1
+
+/**
+ * show_hibernate - Report device tree update responsibilty
+ * @dev:		subsys root device
+ * @attr:		device attribute struct
+ * @buf:		buffer
+ *
+ * Report whether a device tree update is performed by the kernel after a
+ * resume, or if drmgr must coordinate the update from user space.
+ *
+ * Return value:
+ *	0 if drmgr is to initiate update, and 1 otherwise
+ **/
+static ssize_t show_hibernate(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	return sprintf(buf, "%d\n", KERN_DT_UPDATE);
+}
+
+static DEVICE_ATTR(hibernate, S_IWUSR | S_IRUGO,
+		   show_hibernate, store_hibernate);
+
+static struct bus_type suspend_subsys = {
+	.name = "power",
+	.dev_name = "power",
+};
+
+static const struct platform_suspend_ops pseries_suspend_ops = {
+	.valid		= suspend_valid_only_mem,
+	.begin		= pseries_suspend_begin,
+	.prepare_late	= pseries_prepare_late,
+	.enter		= pseries_suspend_enter,
+};
+
+/**
+ * pseries_suspend_sysfs_register - Register with sysfs
+ *
+ * Return value:
+ * 	0 on success / other on failure
+ **/
+static int pseries_suspend_sysfs_register(struct device *dev)
+{
+	int rc;
+
+	if ((rc = subsys_system_register(&suspend_subsys, NULL)))
+		return rc;
+
+	dev->id = 0;
+	dev->bus = &suspend_subsys;
+
+	if ((rc = device_create_file(suspend_subsys.dev_root, &dev_attr_hibernate)))
+		goto subsys_unregister;
+
+	return 0;
+
+subsys_unregister:
+	bus_unregister(&suspend_subsys);
+	return rc;
+}
+
+/**
+ * pseries_suspend_init - initcall for pSeries suspend
+ *
+ * Return value:
+ * 	0 on success / other on failure
+ **/
+static int __init pseries_suspend_init(void)
+{
+	int rc;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		return 0;
+
+	suspend_data.token = rtas_token("ibm,suspend-me");
+	if (suspend_data.token == RTAS_UNKNOWN_SERVICE)
+		return 0;
+
+	if ((rc = pseries_suspend_sysfs_register(&suspend_dev)))
+		return rc;
+
+	ppc_md.suspend_disable_cpu = pseries_suspend_cpu;
+	ppc_md.suspend_enable_irqs = pseries_suspend_enable_irqs;
+	suspend_set_ops(&pseries_suspend_ops);
+	return 0;
+}
+machine_device_initcall(pseries, pseries_suspend_init);
author	André Fabian Silva Delgado <emulatorman@parabola.nu>	2015-08-05 17:04:01 -0300
committer	André Fabian Silva Delgado <emulatorman@parabola.nu>	2015-08-05 17:04:01 -0300
commit	57f0f512b273f60d52568b8c6b77e17f5636edc0 (patch)
tree	5e910f0e82173f4ef4f51111366a3f1299037a7b /arch/powerpc/platforms