From 94cabd003e989556d8bf84027d96284dc2d99c76 Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Sat, 12 Nov 2005 18:53:48 +0000
Subject: [ARM] 3149/1: SharpSL: Add Akita (SL-C1000) machine support

Patch from Richard Purdie

Add the core machine support for the Sharp SL-C1000 (Akita)
and enable the Kconfig selection for it.

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-pxa/akita.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/asm-arm/arch-pxa/akita.h b/include/asm-arm/arch-pxa/akita.h
index 4a1fbcfccc39..5d8cc1d9cb10 100644
--- a/include/asm-arm/arch-pxa/akita.h
+++ b/include/asm-arm/arch-pxa/akita.h
@@ -25,6 +25,8 @@
 /* Default Values */
 #define AKITA_IOEXP_IO_OUT	(AKITA_IOEXP_IR_ON | AKITA_IOEXP_AKIN_PULLUP)
 
+extern struct platform_device akitaioexp_device;
+
 void akita_set_ioexp(struct device *dev, unsigned char bitmask);
 void akita_reset_ioexp(struct device *dev, unsigned char bitmask);
 
-- 
cgit v1.2.3-71-gd317


From bd5d080ab99642e3245ef7cfa54490384c01d878 Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Sun, 13 Nov 2005 10:07:48 +0000
Subject: [ARM] 3160/1: SharpSL: Add driver for Akita specific GPIOs

Patch from Richard Purdie

Add a driver for the extra GPIOs found on the Sharp SL-C1000 (Akita).
These GPIOs are found on a Maxim MAX7310 I2C i/o expander chip. A
generic GPIO driver for the MAX7310 was attempted but this mini
driver is a much simpler and much more effective solution avoiding
several issues and complexity the generic driver had (as discussed
on LKML).

The platform device is required so the device parent can be set
correctly which ensures the device is one of the last to suspend
and first to resume. Whilst the i2c suspend/resume calls can be
influenced, nothing guarantees this is easlier/later than the
subsystems the gpios are used on which are all independent of i2c
(sound, irda, video/backlight etc.).

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-pxa/Makefile      |   1 +
 arch/arm/mach-pxa/akita-ioexp.c | 223 ++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c-id.h          |   1 +
 3 files changed, 225 insertions(+)
 create mode 100644 arch/arm/mach-pxa/akita-ioexp.c

(limited to 'include')

diff --git a/arch/arm/mach-pxa/Makefile b/arch/arm/mach-pxa/Makefile
index c83092e298a7..32526a0a6f86 100644
--- a/arch/arm/mach-pxa/Makefile
+++ b/arch/arm/mach-pxa/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_MACH_MAINSTONE) += mainstone.o
 obj-$(CONFIG_ARCH_PXA_IDP) += idp.o
 obj-$(CONFIG_PXA_SHARP_C7xx)	+= corgi.o corgi_ssp.o corgi_lcd.o sharpsl_pm.o corgi_pm.o
 obj-$(CONFIG_PXA_SHARP_Cxx00)	+= spitz.o corgi_ssp.o corgi_lcd.o sharpsl_pm.o spitz_pm.o
+obj-$(CONFIG_MACH_AKITA)	+= akita-ioexp.o
 obj-$(CONFIG_MACH_POODLE)	+= poodle.o
 obj-$(CONFIG_MACH_TOSA)         += tosa.o
 
diff --git a/arch/arm/mach-pxa/akita-ioexp.c b/arch/arm/mach-pxa/akita-ioexp.c
new file mode 100644
index 000000000000..f6d73cc01f78
--- /dev/null
+++ b/arch/arm/mach-pxa/akita-ioexp.c
@@ -0,0 +1,223 @@
+/*
+ * Support for the Extra GPIOs on the Sharp SL-C1000 (Akita)
+ * (uses a Maxim MAX7310 8 Port IO Expander)
+ *
+ * Copyright 2005 Openedhand Ltd.
+ *
+ * Author: Richard Purdie <richard@openedhand.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <asm/arch/akita.h>
+
+/* MAX7310 Regiser Map */
+#define MAX7310_INPUT    0x00
+#define MAX7310_OUTPUT   0x01
+#define MAX7310_POLINV   0x02
+#define MAX7310_IODIR    0x03 /* 1 = Input, 0 = Output */
+#define MAX7310_TIMEOUT  0x04
+
+/* Addresses to scan */
+static unsigned short normal_i2c[] = { 0x18, I2C_CLIENT_END };
+
+/* I2C Magic */
+I2C_CLIENT_INSMOD;
+
+static int max7310_write(struct i2c_client *client, int address, int data);
+static struct i2c_client max7310_template;
+static void akita_ioexp_work(void *private_);
+
+static struct device *akita_ioexp_device;
+static unsigned char ioexp_output_value = AKITA_IOEXP_IO_OUT;
+DECLARE_WORK(akita_ioexp, akita_ioexp_work, NULL);
+
+
+/*
+ * MAX7310 Access
+ */
+static int max7310_config(struct device *dev, int iomode, int polarity)
+{
+	int ret;
+	struct i2c_client *client = to_i2c_client(dev);
+
+	ret = max7310_write(client, MAX7310_POLINV, polarity);
+	if (ret < 0)
+		return ret;
+	ret = max7310_write(client, MAX7310_IODIR, iomode);
+	return ret;
+}
+
+static int max7310_set_ouputs(struct device *dev, int outputs)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	return max7310_write(client, MAX7310_OUTPUT, outputs);
+}
+
+/*
+ * I2C Functions
+ */
+static int max7310_write(struct i2c_client *client, int address, int value)
+{
+	u8 data[2];
+
+	data[0] = address & 0xff;
+	data[1] = value & 0xff;
+
+	if (i2c_master_send(client, data, 2) == 2)
+		return 0;
+	return -1;
+}
+
+static int max7310_detect(struct i2c_adapter *adapter, int address, int kind)
+{
+	struct i2c_client *new_client;
+	int err;
+
+	if (!(new_client = kmalloc(sizeof(struct i2c_client), GFP_KERNEL)))
+		return -ENOMEM;
+
+	max7310_template.adapter = adapter;
+	max7310_template.addr = address;
+
+	memcpy(new_client, &max7310_template, sizeof(struct i2c_client));
+
+	if ((err = i2c_attach_client(new_client))) {
+		kfree(new_client);
+		return err;
+	}
+
+	max7310_config(&new_client->dev, AKITA_IOEXP_IO_DIR, 0);
+	akita_ioexp_device = &new_client->dev;
+	schedule_work(&akita_ioexp);
+
+	return 0;
+}
+
+static int max7310_attach_adapter(struct i2c_adapter *adapter)
+{
+	return i2c_probe(adapter, &addr_data, max7310_detect);
+}
+
+static int max7310_detach_client(struct i2c_client *client)
+{
+	int err;
+
+	akita_ioexp_device = NULL;
+
+	if ((err = i2c_detach_client(client)))
+		return err;
+
+	kfree(client);
+	return 0;
+}
+
+static struct i2c_driver max7310_i2c_driver = {
+	.owner		= THIS_MODULE,
+	.name		= "akita-max7310",
+	.id		= I2C_DRIVERID_AKITAIOEXP,
+	.flags		= I2C_DF_NOTIFY,
+	.attach_adapter	= max7310_attach_adapter,
+	.detach_client	= max7310_detach_client,
+};
+
+static struct i2c_client max7310_template = {
+	name:   "akita-max7310",
+	flags:  I2C_CLIENT_ALLOW_USE,
+	driver: &max7310_i2c_driver,
+};
+
+void akita_set_ioexp(struct device *dev, unsigned char bit)
+{
+	ioexp_output_value |= bit;
+
+	if (akita_ioexp_device)
+		schedule_work(&akita_ioexp);
+	return;
+}
+
+void akita_reset_ioexp(struct device *dev, unsigned char bit)
+{
+	ioexp_output_value &= ~bit;
+
+	if (akita_ioexp_device)
+		schedule_work(&akita_ioexp);
+	return;
+}
+
+EXPORT_SYMBOL(akita_set_ioexp);
+EXPORT_SYMBOL(akita_reset_ioexp);
+
+static void akita_ioexp_work(void *private_)
+{
+	if (akita_ioexp_device)
+		max7310_set_ouputs(akita_ioexp_device, ioexp_output_value);
+}
+
+
+#ifdef CONFIG_PM
+static int akita_ioexp_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	flush_scheduled_work();
+	return 0;
+}
+
+static int akita_ioexp_resume(struct platform_device *pdev)
+{
+	schedule_work(&akita_ioexp);
+	return 0;
+}
+#else
+#define akita_ioexp_suspend NULL
+#define akita_ioexp_resume NULL
+#endif
+
+static int __init akita_ioexp_probe(struct platform_device *pdev)
+{
+	return i2c_add_driver(&max7310_i2c_driver);
+}
+
+static int akita_ioexp_remove(struct platform_device *pdev)
+{
+	i2c_del_driver(&max7310_i2c_driver);
+	return 0;
+}
+
+static struct platform_driver akita_ioexp_driver = {
+	.probe		= akita_ioexp_probe,
+	.remove		= akita_ioexp_remove,
+	.suspend	= akita_ioexp_suspend,
+	.resume		= akita_ioexp_resume,
+	.driver		= {
+		.name	= "akita-ioexp",
+	},
+};
+
+static int __init akita_ioexp_init(void)
+{
+	return platform_driver_register(&akita_ioexp_driver);
+}
+
+static void __exit akita_ioexp_exit(void)
+{
+	platform_driver_unregister(&akita_ioexp_driver);
+}
+
+MODULE_AUTHOR("Richard Purdie <rpurdie@openedhand.com>");
+MODULE_DESCRIPTION("Akita IO-Expander driver");
+MODULE_LICENSE("GPL");
+
+fs_initcall(akita_ioexp_init);
+module_exit(akita_ioexp_exit);
+
diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h
index 74abaecdb572..1543daaa9c5e 100644
--- a/include/linux/i2c-id.h
+++ b/include/linux/i2c-id.h
@@ -107,6 +107,7 @@
 #define I2C_DRIVERID_CX25840	71	/* cx2584x video encoder	*/
 #define I2C_DRIVERID_SAA7127	72	/* saa7124 video encoder	*/
 #define I2C_DRIVERID_SAA711X	73	/* saa711x video encoders	*/
+#define I2C_DRIVERID_AKITAIOEXP	74	/* IO Expander on Sharp SL-C1000 */
 
 #define I2C_DRIVERID_EXP0	0xF0	/* experimental use id's	*/
 #define I2C_DRIVERID_EXP1	0xF1
-- 
cgit v1.2.3-71-gd317


From 47936357c0d14809c3c9547e532511f6625654b2 Mon Sep 17 00:00:00 2001
From: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Date: Sun, 13 Nov 2005 16:06:21 -0800
Subject: [PATCH] x86_64: fix tss limit

Fix the x86_64 TSS limit in TSS descriptor.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/desc.h | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-x86_64/desc.h b/include/asm-x86_64/desc.h
index 68ac3c62fe3d..b837820c9073 100644
--- a/include/asm-x86_64/desc.h
+++ b/include/asm-x86_64/desc.h
@@ -129,9 +129,16 @@ static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned
 
 static inline void set_tss_desc(unsigned cpu, void *addr)
 { 
-	set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (unsigned long)addr, 
-			      DESC_TSS,
-			      sizeof(struct tss_struct) - 1);
+	/*
+	 * sizeof(unsigned long) coming from an extra "long" at the end
+	 * of the iobitmap. See tss_struct definition in processor.h
+	 *
+	 * -1? seg base+limit should be pointing to the address of the
+	 * last valid byte
+	 */
+	set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_TSS],
+		(unsigned long)addr, DESC_TSS,
+		IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
 } 
 
 static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
-- 
cgit v1.2.3-71-gd317


From 95e861db3eaba7bc99f8605db70103ec3d078203 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sun, 13 Nov 2005 16:06:24 -0800
Subject: [PATCH] reorder struct files_struct

The file_lock spinlock sits close to mostly read fields of 'struct
files_struct'

In SMP (and NUMA) environments, each time a thread wants to open or close
a file, it has to acquire the spinlock, thus invalidating the cache line
containing this spinlock on other CPUS.  So other threads doing
read()/write()/...  calls that use RCU to access the file table are going
to ask further memory (possibly NUMA) transactions to read again this
memory line.

Move the spinlock to another cache line, so that concurrent threads can
share the cache line containing 'count' and 'fdt' fields.

It's worth up to 9% on a microbenchmark using a 4-thread 2-package x86
machine.  See
http://marc.theaimsgroup.com/?l=linux-kernel&m=112680448713342&w=2

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/file.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/file.h b/include/linux/file.h
index d3b1a15d5f21..418b6101b59a 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -33,13 +33,13 @@ struct fdtable {
  * Open file table structure
  */
 struct files_struct {
-        atomic_t count;
-        spinlock_t file_lock;     /* Protects all the below members.  Nests inside tsk->alloc_lock */
+	atomic_t count;
 	struct fdtable *fdt;
 	struct fdtable fdtab;
-        fd_set close_on_exec_init;
-        fd_set open_fds_init;
-        struct file * fd_array[NR_OPEN_DEFAULT];
+	fd_set close_on_exec_init;
+	fd_set open_fds_init;
+	struct file * fd_array[NR_OPEN_DEFAULT];
+	spinlock_t file_lock;     /* Protects concurrent writers.  Nests inside tsk->alloc_lock */
 };
 
 #define files_fdtable(files) (rcu_dereference((files)->fdt))
-- 
cgit v1.2.3-71-gd317


From bca73e4bf8563d83f7856164caa44d5f42e44cca Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Sun, 13 Nov 2005 16:06:25 -0800
Subject: [PATCH] move pm_register/etc. to CONFIG_PM_LEGACY, pm_legacy.h

Since few people need the support anymore, this moves the legacy
pm_xxx functions to CONFIG_PM_LEGACY, and include/linux/pm_legacy.h.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/arm/kernel/apm.c           |  1 +
 arch/frv/kernel/pm.c            |  1 +
 arch/i386/Kconfig               |  2 +-
 arch/i386/kernel/apm.c          |  1 +
 arch/mips/au1000/common/power.c |  1 +
 drivers/acpi/bus.c              |  3 ++-
 drivers/net/3c509.c             | 13 +++++-----
 drivers/net/irda/ali-ircc.c     |  1 +
 drivers/net/irda/nsc-ircc.c     |  1 +
 drivers/serial/68328serial.c    |  7 +++---
 include/linux/pm.h              | 49 ------------------------------------
 include/linux/pm_legacy.h       | 56 +++++++++++++++++++++++++++++++++++++++++
 kernel/power/Kconfig            |  9 +++++++
 kernel/power/Makefile           |  3 ++-
 kernel/power/pm.c               |  1 +
 sound/oss/ad1848.c              |  1 +
 sound/oss/cs4281/cs4281m.c      |  1 +
 sound/oss/maestro.c             |  1 +
 sound/oss/nm256_audio.c         |  1 +
 sound/oss/opl3sa2.c             | 18 +++++++------
 20 files changed, 102 insertions(+), 69 deletions(-)
 create mode 100644 include/linux/pm_legacy.h

(limited to 'include')

diff --git a/arch/arm/kernel/apm.c b/arch/arm/kernel/apm.c
index b0bbd1e62ebb..a2843be05557 100644
--- a/arch/arm/kernel/apm.c
+++ b/arch/arm/kernel/apm.c
@@ -20,6 +20,7 @@
 #include <linux/apm_bios.h>
 #include <linux/sched.h>
 #include <linux/pm.h>
+#include <linux/pm_legacy.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c
index 1a1e8a119c3d..712c3c24c954 100644
--- a/arch/frv/kernel/pm.c
+++ b/arch/frv/kernel/pm.c
@@ -14,6 +14,7 @@
 #include <linux/config.h>
 #include <linux/init.h>
 #include <linux/pm.h>
+#include <linux/pm_legacy.h>
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/sysctl.h>
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index dbf90ad6eac3..6004bb0795e0 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -699,7 +699,7 @@ depends on PM && !X86_VISWS
 
 config APM
 	tristate "APM (Advanced Power Management) BIOS support"
-	depends on PM
+	depends on PM && PM_LEGACY
 	---help---
 	  APM is a BIOS specification for saving power using several different
 	  techniques. This is mostly useful for battery powered laptops with
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 003548b8735f..1e60acbed3c1 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -218,6 +218,7 @@
 #include <linux/time.h>
 #include <linux/sched.h>
 #include <linux/pm.h>
+#include <linux/pm_legacy.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
 #include <linux/smp.h>
diff --git a/arch/mips/au1000/common/power.c b/arch/mips/au1000/common/power.c
index f85093b8d54d..f4926315fb68 100644
--- a/arch/mips/au1000/common/power.c
+++ b/arch/mips/au1000/common/power.c
@@ -32,6 +32,7 @@
 #include <linux/config.h>
 #include <linux/init.h>
 #include <linux/pm.h>
+#include <linux/pm_legacy.h>
 #include <linux/slab.h>
 #include <linux/sysctl.h>
 #include <linux/jiffies.h>
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 6a4da417c16b..606f8733a776 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -28,6 +28,7 @@
 #include <linux/list.h>
 #include <linux/sched.h>
 #include <linux/pm.h>
+#include <linux/pm_legacy.h>
 #include <linux/device.h>
 #include <linux/proc_fs.h>
 #ifdef CONFIG_X86
@@ -754,7 +755,7 @@ static int __init acpi_init(void)
 	result = acpi_bus_init();
 
 	if (!result) {
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_LEGACY
 		if (!PM_IS_ACTIVE())
 			pm_active = 1;
 		else {
diff --git a/drivers/net/3c509.c b/drivers/net/3c509.c
index 977935a3d898..824e430486c2 100644
--- a/drivers/net/3c509.c
+++ b/drivers/net/3c509.c
@@ -84,6 +84,7 @@ static int max_interrupt_work = 10;
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/pm.h>
+#include <linux/pm_legacy.h>
 #include <linux/skbuff.h>
 #include <linux/delay.h>	/* for udelay() */
 #include <linux/spinlock.h>
@@ -173,7 +174,7 @@ struct el3_private {
 	/* skb send-queue */
 	int head, size;
 	struct sk_buff *queue[SKB_QUEUE_SIZE];
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_LEGACY
 	struct pm_dev *pmdev;
 #endif
 	enum {
@@ -200,7 +201,7 @@ static void el3_tx_timeout (struct net_device *dev);
 static void el3_down(struct net_device *dev);
 static void el3_up(struct net_device *dev);
 static struct ethtool_ops ethtool_ops;
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_LEGACY
 static int el3_suspend(struct pm_dev *pdev);
 static int el3_resume(struct pm_dev *pdev);
 static int el3_pm_callback(struct pm_dev *pdev, pm_request_t rqst, void *data);
@@ -361,7 +362,7 @@ static void el3_common_remove (struct net_device *dev)
 	struct el3_private *lp = netdev_priv(dev);
 
 	(void) lp;				/* Keep gcc quiet... */
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_LEGACY
 	if (lp->pmdev)
 		pm_unregister(lp->pmdev);
 #endif
@@ -571,7 +572,7 @@ no_pnp:
 	if (err)
 		goto out1;
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_LEGACY
 	/* register power management */
 	lp->pmdev = pm_register(PM_ISA_DEV, card_idx, el3_pm_callback);
 	if (lp->pmdev) {
@@ -1479,7 +1480,7 @@ el3_up(struct net_device *dev)
 }
 
 /* Power Management support functions */
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_LEGACY
 
 static int
 el3_suspend(struct pm_dev *pdev)
@@ -1548,7 +1549,7 @@ el3_pm_callback(struct pm_dev *pdev, pm_request_t rqst, void *data)
 	return 0;
 }
 
-#endif /* CONFIG_PM */
+#endif /* CONFIG_PM_LEGACY */
 
 /* Parameters that may be passed into the module. */
 static int debug = -1;
diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c
index 9bf34681d3df..2e7882eb7d6f 100644
--- a/drivers/net/irda/ali-ircc.c
+++ b/drivers/net/irda/ali-ircc.c
@@ -40,6 +40,7 @@
 #include <asm/byteorder.h>
 
 #include <linux/pm.h>
+#include <linux/pm_legacy.h>
 
 #include <net/irda/wrapper.h>
 #include <net/irda/irda.h>
diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c
index 805714ec9a8a..ee717d0e939e 100644
--- a/drivers/net/irda/nsc-ircc.c
+++ b/drivers/net/irda/nsc-ircc.c
@@ -59,6 +59,7 @@
 #include <asm/byteorder.h>
 
 #include <linux/pm.h>
+#include <linux/pm_legacy.h>
 
 #include <net/irda/wrapper.h>
 #include <net/irda/irda.h>
diff --git a/drivers/serial/68328serial.c b/drivers/serial/68328serial.c
index 2efb317153ce..67e9afa000c1 100644
--- a/drivers/serial/68328serial.c
+++ b/drivers/serial/68328serial.c
@@ -34,6 +34,7 @@
 #include <linux/keyboard.h>
 #include <linux/init.h>
 #include <linux/pm.h>
+#include <linux/pm_legacy.h>
 #include <linux/bitops.h>
 #include <linux/delay.h>
 
@@ -1343,7 +1344,7 @@ static void show_serial_version(void)
 	printk("MC68328 serial driver version 1.00\n");
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_LEGACY
 /* Serial Power management
  *  The console (currently fixed at line 0) is a special case for power
  *  management because the kernel is so chatty. The console will be 
@@ -1393,7 +1394,7 @@ void startup_console(void)
 	struct m68k_serial *info = &m68k_soft[0];
 	startup(info);
 }
-#endif
+#endif /* CONFIG_PM_LEGACY */
 
 
 static struct tty_operations rs_ops = {
@@ -1486,7 +1487,7 @@ rs68328_init(void)
 			    IRQ_FLG_STD,
 			    "M68328_UART", NULL))
                 panic("Unable to attach 68328 serial interrupt\n");
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_LEGACY
 	    serial_pm[i] = pm_register(PM_SYS_DEV, PM_SYS_COM, serial_pm_callback);
 	    if (serial_pm[i])
 		    serial_pm[i]->data = info;
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 1514098d156d..5be87ba3b7ac 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -94,55 +94,6 @@ struct pm_dev
 	struct list_head entry;
 };
 
-#ifdef CONFIG_PM
-
-extern int pm_active;
-
-#define PM_IS_ACTIVE() (pm_active != 0)
-
-/*
- * Register a device with power management
- */
-struct pm_dev __deprecated *
-pm_register(pm_dev_t type, unsigned long id, pm_callback callback);
-
-/*
- * Unregister a device with power management
- */
-void __deprecated pm_unregister(struct pm_dev *dev);
-
-/*
- * Unregister all devices with matching callback
- */
-void __deprecated pm_unregister_all(pm_callback callback);
-
-/*
- * Send a request to all devices
- */
-int __deprecated pm_send_all(pm_request_t rqst, void *data);
-
-#else /* CONFIG_PM */
-
-#define PM_IS_ACTIVE() 0
-
-static inline struct pm_dev *pm_register(pm_dev_t type,
-					 unsigned long id,
-					 pm_callback callback)
-{
-	return NULL;
-}
-
-static inline void pm_unregister(struct pm_dev *dev) {}
-
-static inline void pm_unregister_all(pm_callback callback) {}
-
-static inline int pm_send_all(pm_request_t rqst, void *data)
-{
-	return 0;
-}
-
-#endif /* CONFIG_PM */
-
 /* Functions above this comment are list-based old-style power
  * managment. Please avoid using them.  */
 
diff --git a/include/linux/pm_legacy.h b/include/linux/pm_legacy.h
new file mode 100644
index 000000000000..1252b45face1
--- /dev/null
+++ b/include/linux/pm_legacy.h
@@ -0,0 +1,56 @@
+#ifndef __LINUX_PM_LEGACY_H__
+#define __LINUX_PM_LEGACY_H__
+
+#include <linux/config.h>
+
+#ifdef CONFIG_PM_LEGACY
+
+extern int pm_active;
+
+#define PM_IS_ACTIVE() (pm_active != 0)
+
+/*
+ * Register a device with power management
+ */
+struct pm_dev __deprecated *
+pm_register(pm_dev_t type, unsigned long id, pm_callback callback);
+
+/*
+ * Unregister a device with power management
+ */
+void __deprecated pm_unregister(struct pm_dev *dev);
+
+/*
+ * Unregister all devices with matching callback
+ */
+void __deprecated pm_unregister_all(pm_callback callback);
+
+/*
+ * Send a request to all devices
+ */
+int __deprecated pm_send_all(pm_request_t rqst, void *data);
+
+#else /* CONFIG_PM_LEGACY */
+
+#define PM_IS_ACTIVE() 0
+
+static inline struct pm_dev *pm_register(pm_dev_t type,
+					 unsigned long id,
+					 pm_callback callback)
+{
+	return NULL;
+}
+
+static inline void pm_unregister(struct pm_dev *dev) {}
+
+static inline void pm_unregister_all(pm_callback callback) {}
+
+static inline int pm_send_all(pm_request_t rqst, void *data)
+{
+	return 0;
+}
+
+#endif /* CONFIG_PM_LEGACY */
+
+#endif /* __LINUX_PM_LEGACY_H__ */
+
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 46a5e5acff97..5ec248cb7f4a 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -19,6 +19,15 @@ config PM
 	  will issue the hlt instruction if nothing is to be done, thereby
 	  sending the processor to sleep and saving power.
 
+config PM_LEGACY
+	bool "Legacy Power Management API"
+	depends on PM
+	default y
+	---help---
+	   Support for pm_register() and friends.
+
+	   If unsure, say Y.
+
 config PM_DEBUG
 	bool "Power Management Debug Support"
 	depends on PM
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index c71eb4579c07..04be7d0d96a7 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -3,7 +3,8 @@ ifeq ($(CONFIG_PM_DEBUG),y)
 EXTRA_CFLAGS	+=	-DDEBUG
 endif
 
-obj-y				:= main.o process.o console.o pm.o
+obj-y				:= main.o process.o console.o
+obj-$(CONFIG_PM_LEGACY)		+= pm.o
 obj-$(CONFIG_SOFTWARE_SUSPEND)	+= swsusp.o disk.o snapshot.o
 
 obj-$(CONFIG_SUSPEND_SMP)	+= smp.o
diff --git a/kernel/power/pm.c b/kernel/power/pm.c
index 159149321b3c..33c508e857dd 100644
--- a/kernel/power/pm.c
+++ b/kernel/power/pm.c
@@ -23,6 +23,7 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/pm.h>
+#include <linux/pm_legacy.h>
 #include <linux/interrupt.h>
 
 int pm_active;
diff --git a/sound/oss/ad1848.c b/sound/oss/ad1848.c
index 7c835abd99bc..3f30c57676c1 100644
--- a/sound/oss/ad1848.c
+++ b/sound/oss/ad1848.c
@@ -47,6 +47,7 @@
 #include <linux/module.h>
 #include <linux/stddef.h>
 #include <linux/pm.h>
+#include <linux/pm_legacy.h>
 #include <linux/isapnp.h>
 #include <linux/pnp.h>
 #include <linux/spinlock.h>
diff --git a/sound/oss/cs4281/cs4281m.c b/sound/oss/cs4281/cs4281m.c
index d0d3963e1b83..adc689649fe1 100644
--- a/sound/oss/cs4281/cs4281m.c
+++ b/sound/oss/cs4281/cs4281m.c
@@ -298,6 +298,7 @@ struct cs4281_state {
 	struct cs4281_pipeline pl[CS4281_NUMBER_OF_PIPELINES];
 };
 
+#include <linux/pm_legacy.h>
 #include "cs4281pm-24.c"
 
 #if CSDEBUG
diff --git a/sound/oss/maestro.c b/sound/oss/maestro.c
index 3dce504e6d6d..3abd3541cbc7 100644
--- a/sound/oss/maestro.c
+++ b/sound/oss/maestro.c
@@ -231,6 +231,7 @@
 #include <asm/uaccess.h>
 
 #include <linux/pm.h>
+#include <linux/pm_legacy.h>
 static int maestro_pm_callback(struct pm_dev *dev, pm_request_t rqst, void *d);
 
 #include "maestro.h"
diff --git a/sound/oss/nm256_audio.c b/sound/oss/nm256_audio.c
index 66970062eb36..0ce2c404a730 100644
--- a/sound/oss/nm256_audio.c
+++ b/sound/oss/nm256_audio.c
@@ -25,6 +25,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pm.h>
+#include <linux/pm_legacy.h>
 #include <linux/delay.h>
 #include <linux/spinlock.h>
 #include "sound_config.h"
diff --git a/sound/oss/opl3sa2.c b/sound/oss/opl3sa2.c
index 2efbd865109b..cd41d0e4706a 100644
--- a/sound/oss/opl3sa2.c
+++ b/sound/oss/opl3sa2.c
@@ -70,6 +70,7 @@
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/pm.h>
+#include <linux/pm_legacy.h>
 #include "sound_config.h"
 
 #include "ad1848.h"
@@ -138,7 +139,7 @@ typedef struct {
 	struct pnp_dev* pdev;
 	int activated;			/* Whether said devices have been activated */
 #endif
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_LEGACY
 	unsigned int	in_suspend;
 	struct pm_dev	*pmdev;
 #endif
@@ -341,7 +342,7 @@ static void opl3sa2_mixer_reset(opl3sa2_state_t* devc)
 }
 
 /* Currently only used for power management */
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_LEGACY
 static void opl3sa2_mixer_restore(opl3sa2_state_t* devc)
 {
 	if (devc) {
@@ -354,7 +355,7 @@ static void opl3sa2_mixer_restore(opl3sa2_state_t* devc)
 		}
 	}
 }
-#endif
+#endif /* CONFIG_PM_LEGACY */
 
 static inline void arg_to_vol_mono(unsigned int vol, int* value)
 {
@@ -831,7 +832,8 @@ static struct pnp_driver opl3sa2_driver = {
 
 /* End of component functions */
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_LEGACY
+
 static DEFINE_SPINLOCK(opl3sa2_lock);
 
 /* Power Management support functions */
@@ -906,7 +908,7 @@ static int opl3sa2_pm_callback(struct pm_dev *pdev, pm_request_t rqst, void *dat
 	}
 	return 0;
 }
-#endif /* CONFIG_PM */
+#endif /* CONFIG_PM_LEGACY */
 
 /*
  * Install OPL3-SA2 based card(s).
@@ -1019,12 +1021,12 @@ static int __init init_opl3sa2(void)
 
 		/* ewww =) */
 		opl3sa2_state[card].card = card;
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_LEGACY
 		/* register our power management capabilities */
 		opl3sa2_state[card].pmdev = pm_register(PM_ISA_DEV, card, opl3sa2_pm_callback);
 		if (opl3sa2_state[card].pmdev)
 			opl3sa2_state[card].pmdev->data = &opl3sa2_state[card];
-#endif /* CONFIG_PM */
+#endif /* CONFIG_PM_LEGACY */
 
 		/*
 		 * Set the Yamaha 3D enhancement mode (aka Ymersion) if asked to and
@@ -1081,7 +1083,7 @@ static void __exit cleanup_opl3sa2(void)
 	int card;
 
 	for(card = 0; card < opl3sa2_cards_num; card++) {
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_LEGACY
 		if (opl3sa2_state[card].pmdev)
 			pm_unregister(opl3sa2_state[card].pmdev);
 #endif
-- 
cgit v1.2.3-71-gd317


From c1986ee9bea3d880bcf0d3f1a31e055778f306c7 Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@gnumonks.org>
Date: Sun, 13 Nov 2005 16:06:29 -0800
Subject: [PATCH] New Omnikey Cardman 4000 driver

Add new Omnikey Cardman 4000 smartcard reader driver

Signed-off-by: Harald Welte <laforge@gnumonks.org>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 MAINTAINERS                     |    5 +
 drivers/char/pcmcia/Kconfig     |   11 +
 drivers/char/pcmcia/Makefile    |    1 +
 drivers/char/pcmcia/cm4000_cs.c | 2078 +++++++++++++++++++++++++++++++++++++++
 include/linux/cm4000_cs.h       |   66 ++
 5 files changed, 2161 insertions(+)
 create mode 100644 drivers/char/pcmcia/cm4000_cs.c
 create mode 100644 include/linux/cm4000_cs.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index c1350d7e6789..cc924073f599 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1873,6 +1873,11 @@ L:	linux-tr@linuxtr.net
 W:	http://www.linuxtr.net
 S:	Maintained
 
+OMNIKEY CARDMAN 4000 DRIVER
+P:	Harald Welte
+M:	laforge@gnumonks.org
+S:	Maintained
+
 OMNIKEY CARDMAN 4040 DRIVER
 P:	Harald Welte
 M:	laforge@gnumonks.org
diff --git a/drivers/char/pcmcia/Kconfig b/drivers/char/pcmcia/Kconfig
index e8d41f36635d..27c1179ee527 100644
--- a/drivers/char/pcmcia/Kconfig
+++ b/drivers/char/pcmcia/Kconfig
@@ -18,6 +18,17 @@ config SYNCLINK_CS
 	  The module will be called synclinkmp.  If you want to do that, say M
 	  here.
 
+config CARDMAN_4000
+	tristate "Omnikey Cardman 4000 support"
+	depends on PCMCIA
+	help
+	  Enable support for the Omnikey Cardman 4000 PCMCIA Smartcard
+	  reader.
+
+	  This kernel driver requires additional userspace support, either
+	  by the vendor-provided PC/SC ifd_handler (http://www.omnikey.com/),
+	  or via the cm4000 backend of OpenCT (http://www.opensc.com/).
+
 config CARDMAN_4040
 	tristate "Omnikey CardMan 4040 support"
 	depends on PCMCIA
diff --git a/drivers/char/pcmcia/Makefile b/drivers/char/pcmcia/Makefile
index 35cd766a0a17..0aae20985d57 100644
--- a/drivers/char/pcmcia/Makefile
+++ b/drivers/char/pcmcia/Makefile
@@ -5,4 +5,5 @@
 #
 
 obj-$(CONFIG_SYNCLINK_CS) += synclink_cs.o
+obj-$(CONFIG_CARDMAN_4000) += cm4000_cs.o
 obj-$(CONFIG_CARDMAN_4040) += cm4040_cs.o
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
new file mode 100644
index 000000000000..ef011ef5dc46
--- /dev/null
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -0,0 +1,2078 @@
+ /*
+  * A driver for the PCMCIA Smartcard Reader "Omnikey CardMan Mobile 4000"
+  *
+  * cm4000_cs.c support.linux@omnikey.com
+  *
+  * Tue Oct 23 11:32:43 GMT 2001 herp - cleaned up header files
+  * Sun Jan 20 10:11:15 MET 2002 herp - added modversion header files
+  * Thu Nov 14 16:34:11 GMT 2002 mh   - added PPS functionality
+  * Tue Nov 19 16:36:27 GMT 2002 mh   - added SUSPEND/RESUME functionailty
+  * Wed Jul 28 12:55:01 CEST 2004 mh  - kernel 2.6 adjustments
+  *
+  * current version: 2.4.0gm4
+  *
+  * (C) 2000,2001,2002,2003,2004 Omnikey AG
+  *
+  * (C) 2005 Harald Welte <laforge@gnumonks.org>
+  * 	- Adhere to Kernel CodingStyle
+  * 	- Port to 2.6.13 "new" style PCMCIA
+  * 	- Check for copy_{from,to}_user return values
+  * 	- Use nonseekable_open()
+  *
+  * All rights reserved. Licensed under dual BSD/GPL license.
+  */
+
+/* #define PCMCIA_DEBUG 6 */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/delay.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#include <pcmcia/cs_types.h>
+#include <pcmcia/cs.h>
+#include <pcmcia/cistpl.h>
+#include <pcmcia/cisreg.h>
+#include <pcmcia/ciscode.h>
+#include <pcmcia/ds.h>
+
+#include <linux/cm4000_cs.h>
+
+/* #define ATR_CSUM */
+
+#ifdef PCMCIA_DEBUG
+#define reader_to_dev(x)	(&handle_to_dev(x->link.handle))
+static int pc_debug = PCMCIA_DEBUG;
+module_param(pc_debug, int, 0600);
+#define DEBUGP(n, rdr, x, args...) do { 				\
+	if (pc_debug >= (n))						\
+		dev_printk(KERN_DEBUG, reader_to_dev(rdr), "%s:" x, 	\
+			   __FUNCTION__ , ## args);			\
+	} while (0)
+#else
+#define DEBUGP(n, rdr, x, args...)
+#endif
+static char *version = "cm4000_cs.c v2.4.0gm5 - All bugs added by Harald Welte";
+
+#define	T_1SEC		(HZ)
+#define	T_10MSEC	msecs_to_jiffies(10)
+#define	T_20MSEC	msecs_to_jiffies(20)
+#define	T_40MSEC	msecs_to_jiffies(40)
+#define	T_50MSEC	msecs_to_jiffies(50)
+#define	T_100MSEC	msecs_to_jiffies(100)
+#define	T_500MSEC	msecs_to_jiffies(500)
+
+static void cm4000_detach(dev_link_t *link);
+static void cm4000_release(dev_link_t *link);
+
+static int major;		/* major number we get from the kernel */
+
+/* note: the first state has to have number 0 always */
+
+#define	M_FETCH_ATR	0
+#define	M_TIMEOUT_WAIT	1
+#define	M_READ_ATR_LEN	2
+#define	M_READ_ATR	3
+#define	M_ATR_PRESENT	4
+#define	M_BAD_CARD	5
+#define M_CARDOFF	6
+
+#define	LOCK_IO			0
+#define	LOCK_MONITOR		1
+
+#define IS_AUTOPPS_ACT		 6
+#define	IS_PROCBYTE_PRESENT	 7
+#define	IS_INVREV		 8
+#define IS_ANY_T0		 9
+#define	IS_ANY_T1		10
+#define	IS_ATR_PRESENT		11
+#define	IS_ATR_VALID		12
+#define	IS_CMM_ABSENT		13
+#define	IS_BAD_LENGTH		14
+#define	IS_BAD_CSUM		15
+#define	IS_BAD_CARD		16
+
+#define REG_FLAGS0(x)		(x + 0)
+#define REG_FLAGS1(x)		(x + 1)
+#define REG_NUM_BYTES(x)	(x + 2)
+#define REG_BUF_ADDR(x)		(x + 3)
+#define REG_BUF_DATA(x)		(x + 4)
+#define REG_NUM_SEND(x)		(x + 5)
+#define REG_BAUDRATE(x)		(x + 6)
+#define REG_STOPBITS(x)		(x + 7)
+
+struct cm4000_dev {
+	dev_link_t link;		/* pcmcia link */
+	dev_node_t node;		/* OS node (major,minor) */
+
+	unsigned char atr[MAX_ATR];
+	unsigned char rbuf[512];
+	unsigned char sbuf[512];
+
+	wait_queue_head_t devq;		/* when removing cardman must not be
+					   zeroed! */
+
+	wait_queue_head_t ioq;		/* if IO is locked, wait on this Q */
+	wait_queue_head_t atrq;		/* wait for ATR valid */
+	wait_queue_head_t readq;	/* used by write to wake blk.read */
+
+	/* warning: do not move this fields.
+	 * initialising to zero depends on it - see ZERO_DEV below.  */
+	unsigned char atr_csum;
+	unsigned char atr_len_retry;
+	unsigned short atr_len;
+	unsigned short rlen;	/* bytes avail. after write */
+	unsigned short rpos;	/* latest read pos. write zeroes */
+	unsigned char procbyte;	/* T=0 procedure byte */
+	unsigned char mstate;	/* state of card monitor */
+	unsigned char cwarn;	/* slow down warning */
+	unsigned char flags0;	/* cardman IO-flags 0 */
+	unsigned char flags1;	/* cardman IO-flags 1 */
+	unsigned int mdelay;	/* variable monitor speeds, in jiffies */
+
+	unsigned int baudv;	/* baud value for speed */
+	unsigned char ta1;
+	unsigned char proto;	/* T=0, T=1, ... */
+	unsigned long flags;	/* lock+flags (MONITOR,IO,ATR) * for concurrent
+				   access */
+
+	unsigned char pts[4];
+
+	struct timer_list timer;	/* used to keep monitor running */
+	int monitor_running;
+};
+
+#define	ZERO_DEV(dev)  						\
+	memset(&dev->atr_csum,0,				\
+		sizeof(struct cm4000_dev) - 			\
+		/*link*/ sizeof(dev_link_t) - 			\
+		/*node*/ sizeof(dev_node_t) - 			\
+		/*atr*/ MAX_ATR*sizeof(char) - 			\
+		/*rbuf*/ 512*sizeof(char) - 			\
+		/*sbuf*/ 512*sizeof(char) - 			\
+		/*queue*/ 4*sizeof(wait_queue_head_t))
+
+static dev_info_t dev_info = MODULE_NAME;
+static dev_link_t *dev_table[CM4000_MAX_DEV];
+
+/* This table doesn't use spaces after the comma between fields and thus
+ * violates CodingStyle.  However, I don't really think wrapping it around will
+ * make it any clearer to read -HW */
+static unsigned char fi_di_table[10][14] = {
+/*FI     00   01   02   03   04   05   06   07   08   09   10   11   12   13 */
+/*DI */
+/* 0 */ {0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11},
+/* 1 */ {0x01,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x91,0x11,0x11,0x11,0x11},
+/* 2 */ {0x02,0x12,0x22,0x32,0x11,0x11,0x11,0x11,0x11,0x92,0xA2,0xB2,0x11,0x11},
+/* 3 */ {0x03,0x13,0x23,0x33,0x43,0x53,0x63,0x11,0x11,0x93,0xA3,0xB3,0xC3,0xD3},
+/* 4 */ {0x04,0x14,0x24,0x34,0x44,0x54,0x64,0x11,0x11,0x94,0xA4,0xB4,0xC4,0xD4},
+/* 5 */ {0x00,0x15,0x25,0x35,0x45,0x55,0x65,0x11,0x11,0x95,0xA5,0xB5,0xC5,0xD5},
+/* 6 */ {0x06,0x16,0x26,0x36,0x46,0x56,0x66,0x11,0x11,0x96,0xA6,0xB6,0xC6,0xD6},
+/* 7 */ {0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11},
+/* 8 */ {0x08,0x11,0x28,0x38,0x48,0x58,0x68,0x11,0x11,0x98,0xA8,0xB8,0xC8,0xD8},
+/* 9 */ {0x09,0x19,0x29,0x39,0x49,0x59,0x69,0x11,0x11,0x99,0xA9,0xB9,0xC9,0xD9}
+};
+
+#ifndef PCMCIA_DEBUG
+#define	xoutb	outb
+#define	xinb	inb
+#else
+static inline void xoutb(unsigned char val, unsigned short port)
+{
+	if (pc_debug >= 7)
+		printk(KERN_DEBUG "outb(val=%.2x,port=%.4x)\n", val, port);
+	outb(val, port);
+}
+static inline unsigned char xinb(unsigned short port)
+{
+	unsigned char val;
+
+	val = inb(port);
+	if (pc_debug >= 7)
+		printk(KERN_DEBUG "%.2x=inb(%.4x)\n", val, port);
+
+	return val;
+}
+#endif
+
+#define	b_0000	15
+#define	b_0001	14
+#define	b_0010	13
+#define	b_0011	12
+#define	b_0100	11
+#define	b_0101	10
+#define	b_0110	9
+#define	b_0111	8
+#define	b_1000	7
+#define	b_1001	6
+#define	b_1010	5
+#define	b_1011	4
+#define	b_1100	3
+#define	b_1101	2
+#define	b_1110	1
+#define	b_1111	0
+
+static unsigned char irtab[16] = {
+	b_0000, b_1000, b_0100, b_1100,
+	b_0010, b_1010, b_0110, b_1110,
+	b_0001, b_1001, b_0101, b_1101,
+	b_0011, b_1011, b_0111, b_1111
+};
+
+static void str_invert_revert(unsigned char *b, int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++)
+		b[i] = (irtab[b[i] & 0x0f] << 4) | irtab[b[i] >> 4];
+}
+
+static unsigned char invert_revert(unsigned char ch)
+{
+	return (irtab[ch & 0x0f] << 4) | irtab[ch >> 4];
+}
+
+#define	ATRLENCK(dev,pos) \
+	if (pos>=dev->atr_len || pos>=MAX_ATR) \
+		goto return_0;
+
+static unsigned int calc_baudv(unsigned char fidi)
+{
+	unsigned int wcrcf, wbrcf, fi_rfu, di_rfu;
+
+	fi_rfu = 372;
+	di_rfu = 1;
+
+	/* FI */
+	switch ((fidi >> 4) & 0x0F) {
+	case 0x00:
+		wcrcf = 372;
+		break;
+	case 0x01:
+		wcrcf = 372;
+		break;
+	case 0x02:
+		wcrcf = 558;
+		break;
+	case 0x03:
+		wcrcf = 744;
+		break;
+	case 0x04:
+		wcrcf = 1116;
+		break;
+	case 0x05:
+		wcrcf = 1488;
+		break;
+	case 0x06:
+		wcrcf = 1860;
+		break;
+	case 0x07:
+		wcrcf = fi_rfu;
+		break;
+	case 0x08:
+		wcrcf = fi_rfu;
+		break;
+	case 0x09:
+		wcrcf = 512;
+		break;
+	case 0x0A:
+		wcrcf = 768;
+		break;
+	case 0x0B:
+		wcrcf = 1024;
+		break;
+	case 0x0C:
+		wcrcf = 1536;
+		break;
+	case 0x0D:
+		wcrcf = 2048;
+		break;
+	default:
+		wcrcf = fi_rfu;
+		break;
+	}
+
+	/* DI */
+	switch (fidi & 0x0F) {
+	case 0x00:
+		wbrcf = di_rfu;
+		break;
+	case 0x01:
+		wbrcf = 1;
+		break;
+	case 0x02:
+		wbrcf = 2;
+		break;
+	case 0x03:
+		wbrcf = 4;
+		break;
+	case 0x04:
+		wbrcf = 8;
+		break;
+	case 0x05:
+		wbrcf = 16;
+		break;
+	case 0x06:
+		wbrcf = 32;
+		break;
+	case 0x07:
+		wbrcf = di_rfu;
+		break;
+	case 0x08:
+		wbrcf = 12;
+		break;
+	case 0x09:
+		wbrcf = 20;
+		break;
+	default:
+		wbrcf = di_rfu;
+		break;
+	}
+
+	return (wcrcf / wbrcf);
+}
+
+static unsigned short io_read_num_rec_bytes(ioaddr_t iobase, unsigned short *s)
+{
+	unsigned short tmp;
+
+	tmp = *s = 0;
+	do {
+		*s = tmp;
+		tmp = inb(REG_NUM_BYTES(iobase)) |
+				(inb(REG_FLAGS0(iobase)) & 4 ? 0x100 : 0);
+	} while (tmp != *s);
+
+	return *s;
+}
+
+static int parse_atr(struct cm4000_dev *dev)
+{
+	unsigned char any_t1, any_t0;
+	unsigned char ch, ifno;
+	int ix, done;
+
+	DEBUGP(3, dev, "-> parse_atr: dev->atr_len = %i\n", dev->atr_len);
+
+	if (dev->atr_len < 3) {
+		DEBUGP(5, dev, "parse_atr: atr_len < 3\n");
+		return 0;
+	}
+
+	if (dev->atr[0] == 0x3f)
+		set_bit(IS_INVREV, &dev->flags);
+	else
+		clear_bit(IS_INVREV, &dev->flags);
+	ix = 1;
+	ifno = 1;
+	ch = dev->atr[1];
+	dev->proto = 0;		/* XXX PROTO */
+	any_t1 = any_t0 = done = 0;
+	dev->ta1 = 0x11;	/* defaults to 9600 baud */
+	do {
+		if (ifno == 1 && (ch & 0x10)) {
+			/* read first interface byte and TA1 is present */
+			dev->ta1 = dev->atr[2];
+			DEBUGP(5, dev, "Card says FiDi is 0x%.2x\n", dev->ta1);
+			ifno++;
+		} else if ((ifno == 2) && (ch & 0x10)) { /* TA(2) */
+			dev->ta1 = 0x11;
+			ifno++;
+		}
+
+		DEBUGP(5, dev, "Yi=%.2x\n", ch & 0xf0);
+		ix += ((ch & 0x10) >> 4)	/* no of int.face chars */
+		    +((ch & 0x20) >> 5)
+		    + ((ch & 0x40) >> 6)
+		    + ((ch & 0x80) >> 7);
+		/* ATRLENCK(dev,ix); */
+		if (ch & 0x80) {	/* TDi */
+			ch = dev->atr[ix];
+			if ((ch & 0x0f)) {
+				any_t1 = 1;
+				DEBUGP(5, dev, "card is capable of T=1\n");
+			} else {
+				any_t0 = 1;
+				DEBUGP(5, dev, "card is capable of T=0\n");
+			}
+		} else
+			done = 1;
+	} while (!done);
+
+	DEBUGP(5, dev, "ix=%d noHist=%d any_t1=%d\n",
+	      ix, dev->atr[1] & 15, any_t1);
+	if (ix + 1 + (dev->atr[1] & 0x0f) + any_t1 != dev->atr_len) {
+		DEBUGP(5, dev, "length error\n");
+		return 0;
+	}
+	if (any_t0)
+		set_bit(IS_ANY_T0, &dev->flags);
+
+	if (any_t1) {		/* compute csum */
+		dev->atr_csum = 0;
+#ifdef ATR_CSUM
+		for (i = 1; i < dev->atr_len; i++)
+			dev->atr_csum ^= dev->atr[i];
+		if (dev->atr_csum) {
+			set_bit(IS_BAD_CSUM, &dev->flags);
+			DEBUGP(5, dev, "bad checksum\n");
+			goto return_0;
+		}
+#endif
+		if (any_t0 == 0)
+			dev->proto = 1;	/* XXX PROTO */
+		set_bit(IS_ANY_T1, &dev->flags);
+	}
+
+	return 1;
+}
+
+struct card_fixup {
+	char atr[12];
+	u_int8_t atr_len;
+	u_int8_t stopbits;
+};
+
+static struct card_fixup card_fixups[] = {
+	{	/* ACOS */
+		.atr = { 0x3b, 0xb3, 0x11, 0x00, 0x00, 0x41, 0x01 },
+		.atr_len = 7,
+		.stopbits = 0x03,
+	},
+	{	/* Motorola */
+		.atr = {0x3b, 0x76, 0x13, 0x00, 0x00, 0x80, 0x62, 0x07,
+			0x41, 0x81, 0x81 },
+		.atr_len = 11,
+		.stopbits = 0x04,
+	},
+};
+
+static void set_cardparameter(struct cm4000_dev *dev)
+{
+	int i;
+	ioaddr_t iobase = dev->link.io.BasePort1;
+	u_int8_t stopbits = 0x02; /* ISO default */
+
+	DEBUGP(3, dev, "-> set_cardparameter\n");
+
+	dev->flags1 = dev->flags1 | (((dev->baudv - 1) & 0x0100) >> 8);
+	xoutb(dev->flags1, REG_FLAGS1(iobase));
+	DEBUGP(5, dev, "flags1 = 0x%02x\n", dev->flags1);
+
+	/* set baudrate */
+	xoutb((unsigned char)((dev->baudv - 1) & 0xFF), REG_BAUDRATE(iobase));
+
+	DEBUGP(5, dev, "baudv = %i -> write 0x%02x\n", dev->baudv,
+	      ((dev->baudv - 1) & 0xFF));
+
+	/* set stopbits */
+	for (i = 0; i < ARRAY_SIZE(card_fixups); i++) {
+		if (!memcmp(dev->atr, card_fixups[i].atr,
+			    card_fixups[i].atr_len))
+			stopbits = card_fixups[i].stopbits;
+	}
+	xoutb(stopbits, REG_STOPBITS(iobase));
+
+	DEBUGP(3, dev, "<- set_cardparameter\n");
+}
+
+static int set_protocol(struct cm4000_dev *dev, struct ptsreq *ptsreq)
+{
+
+	unsigned long tmp, i;
+	unsigned short num_bytes_read;
+	unsigned char pts_reply[4];
+	ssize_t rc;
+	ioaddr_t iobase = dev->link.io.BasePort1;
+
+	rc = 0;
+
+	DEBUGP(3, dev, "-> set_protocol\n");
+	DEBUGP(5, dev, "ptsreq->Protocol = 0x%.8x, ptsreq->Flags=0x%.8x, "
+		 "ptsreq->pts1=0x%.2x, ptsreq->pts2=0x%.2x, "
+		 "ptsreq->pts3=0x%.2x\n", (unsigned int)ptsreq->protocol,
+		 (unsigned int)ptsreq->flags, ptsreq->pts1, ptsreq->pts2,
+		 ptsreq->pts3);
+
+	/* Fill PTS structure */
+	dev->pts[0] = 0xff;
+	dev->pts[1] = 0x00;
+	tmp = ptsreq->protocol;
+	while ((tmp = (tmp >> 1)) > 0)
+		dev->pts[1]++;
+	dev->proto = dev->pts[1];	/* Set new protocol */
+	dev->pts[1] = (0x01 << 4) | (dev->pts[1]);
+
+	/* Correct Fi/Di according to CM4000 Fi/Di table */
+	DEBUGP(5, dev, "Ta(1) from ATR is 0x%.2x\n", dev->ta1);
+	/* set Fi/Di according to ATR TA(1) */
+	dev->pts[2] = fi_di_table[dev->ta1 & 0x0F][(dev->ta1 >> 4) & 0x0F];
+
+	/* Calculate PCK character */
+	dev->pts[3] = dev->pts[0] ^ dev->pts[1] ^ dev->pts[2];
+
+	DEBUGP(5, dev, "pts0=%.2x, pts1=%.2x, pts2=%.2x, pts3=%.2x\n",
+	       dev->pts[0], dev->pts[1], dev->pts[2], dev->pts[3]);
+
+	/* check card convention */
+	if (test_bit(IS_INVREV, &dev->flags))
+		str_invert_revert(dev->pts, 4);
+
+	/* reset SM */
+	xoutb(0x80, REG_FLAGS0(iobase));
+
+	/* Enable access to the message buffer */
+	DEBUGP(5, dev, "Enable access to the messages buffer\n");
+	dev->flags1 = 0x20	/* T_Active */
+	    | (test_bit(IS_INVREV, &dev->flags) ? 0x02 : 0x00) /* inv parity */
+	    | ((dev->baudv >> 8) & 0x01);	/* MSB-baud */
+	xoutb(dev->flags1, REG_FLAGS1(iobase));
+
+	DEBUGP(5, dev, "Enable message buffer -> flags1 = 0x%.2x\n",
+	       dev->flags1);
+
+	/* write challenge to the buffer */
+	DEBUGP(5, dev, "Write challenge to buffer: ");
+	for (i = 0; i < 4; i++) {
+		xoutb(i, REG_BUF_ADDR(iobase));
+		xoutb(dev->pts[i], REG_BUF_DATA(iobase));	/* buf data */
+#ifdef PCMCIA_DEBUG
+		if (pc_debug >= 5)
+			printk("0x%.2x ", dev->pts[i]);
+	}
+	if (pc_debug >= 5)
+		printk("\n");
+#else
+	}
+#endif
+
+	/* set number of bytes to write */
+	DEBUGP(5, dev, "Set number of bytes to write\n");
+	xoutb(0x04, REG_NUM_SEND(iobase));
+
+	/* Trigger CARDMAN CONTROLLER */
+	xoutb(0x50, REG_FLAGS0(iobase));
+
+	/* Monitor progress */
+	/* wait for xmit done */
+	DEBUGP(5, dev, "Waiting for NumRecBytes getting valid\n");
+
+	for (i = 0; i < 100; i++) {
+		if (inb(REG_FLAGS0(iobase)) & 0x08) {
+			DEBUGP(5, dev, "NumRecBytes is valid\n");
+			break;
+		}
+		mdelay(10);
+	}
+	if (i == 100) {
+		DEBUGP(5, dev, "Timeout waiting for NumRecBytes getting "
+		       "valid\n");
+		rc = -EIO;
+		goto exit_setprotocol;
+	}
+
+	DEBUGP(5, dev, "Reading NumRecBytes\n");
+	for (i = 0; i < 100; i++) {
+		io_read_num_rec_bytes(iobase, &num_bytes_read);
+		if (num_bytes_read >= 4) {
+			DEBUGP(2, dev, "NumRecBytes = %i\n", num_bytes_read);
+			break;
+		}
+		mdelay(10);
+	}
+
+	/* check whether it is a short PTS reply? */
+	if (num_bytes_read == 3)
+		i = 0;
+
+	if (i == 100) {
+		DEBUGP(5, dev, "Timeout reading num_bytes_read\n");
+		rc = -EIO;
+		goto exit_setprotocol;
+	}
+
+	DEBUGP(5, dev, "Reset the CARDMAN CONTROLLER\n");
+	xoutb(0x80, REG_FLAGS0(iobase));
+
+	/* Read PPS reply */
+	DEBUGP(5, dev, "Read PPS reply\n");
+	for (i = 0; i < num_bytes_read; i++) {
+		xoutb(i, REG_BUF_ADDR(iobase));
+		pts_reply[i] = inb(REG_BUF_DATA(iobase));
+	}
+
+#ifdef PCMCIA_DEBUG
+	DEBUGP(2, dev, "PTSreply: ");
+	for (i = 0; i < num_bytes_read; i++) {
+		if (pc_debug >= 5)
+			printk("0x%.2x ", pts_reply[i]);
+	}
+	printk("\n");
+#endif	/* PCMCIA_DEBUG */
+
+	DEBUGP(5, dev, "Clear Tactive in Flags1\n");
+	xoutb(0x20, REG_FLAGS1(iobase));
+
+	/* Compare ptsreq and ptsreply */
+	if ((dev->pts[0] == pts_reply[0]) &&
+	    (dev->pts[1] == pts_reply[1]) &&
+	    (dev->pts[2] == pts_reply[2]) && (dev->pts[3] == pts_reply[3])) {
+		/* setcardparameter according to PPS */
+		dev->baudv = calc_baudv(dev->pts[2]);
+		set_cardparameter(dev);
+	} else if ((dev->pts[0] == pts_reply[0]) &&
+		   ((dev->pts[1] & 0xef) == pts_reply[1]) &&
+		   ((pts_reply[0] ^ pts_reply[1]) == pts_reply[2])) {
+		/* short PTS reply, set card parameter to default values */
+		dev->baudv = calc_baudv(0x11);
+		set_cardparameter(dev);
+	} else
+		rc = -EIO;
+
+exit_setprotocol:
+	DEBUGP(3, dev, "<- set_protocol\n");
+	return rc;
+}
+
+static int io_detect_cm4000(ioaddr_t iobase, struct cm4000_dev *dev)
+{
+
+	/* note: statemachine is assumed to be reset */
+	if (inb(REG_FLAGS0(iobase)) & 8) {
+		clear_bit(IS_ATR_VALID, &dev->flags);
+		set_bit(IS_CMM_ABSENT, &dev->flags);
+		return 0;	/* detect CMM = 1 -> failure */
+	}
+	/* xoutb(0x40, REG_FLAGS1(iobase)); detectCMM */
+	xoutb(dev->flags1 | 0x40, REG_FLAGS1(iobase));
+	if ((inb(REG_FLAGS0(iobase)) & 8) == 0) {
+		clear_bit(IS_ATR_VALID, &dev->flags);
+		set_bit(IS_CMM_ABSENT, &dev->flags);
+		return 0;	/* detect CMM=0 -> failure */
+	}
+	/* clear detectCMM again by restoring original flags1 */
+	xoutb(dev->flags1, REG_FLAGS1(iobase));
+	return 1;
+}
+
+static void terminate_monitor(struct cm4000_dev *dev)
+{
+
+	/* tell the monitor to stop and wait until
+	 * it terminates.
+	 */
+	DEBUGP(3, dev, "-> terminate_monitor\n");
+	wait_event_interruptible(dev->devq,
+				 test_and_set_bit(LOCK_MONITOR,
+						  (void *)&dev->flags));
+
+	/* now, LOCK_MONITOR has been set.
+	 * allow a last cycle in the monitor.
+	 * the monitor will indicate that it has
+	 * finished by clearing this bit.
+	 */
+	DEBUGP(5, dev, "Now allow last cycle of monitor!\n");
+	while (test_bit(LOCK_MONITOR, (void *)&dev->flags))
+		msleep(25);
+
+	DEBUGP(5, dev, "Delete timer\n");
+	del_timer_sync(&dev->timer);
+#ifdef PCMCIA_DEBUG
+	dev->monitor_running = 0;
+#endif
+
+	DEBUGP(3, dev, "<- terminate_monitor\n");
+}
+
+/*
+ * monitor the card every 50msec. as a side-effect, retrieve the
+ * atr once a card is inserted. another side-effect of retrieving the
+ * atr is that the card will be powered on, so there is no need to
+ * power on the card explictely from the application: the driver
+ * is already doing that for you.
+ */
+
+static void monitor_card(unsigned long p)
+{
+	struct cm4000_dev *dev = (struct cm4000_dev *) p;
+	ioaddr_t iobase = dev->link.io.BasePort1;
+	unsigned short s;
+	struct ptsreq ptsreq;
+	int i, atrc;
+
+	DEBUGP(7, dev, "->  monitor_card\n");
+
+	/* if someone has set the lock for us: we're done! */
+	if (test_and_set_bit(LOCK_MONITOR, &dev->flags)) {
+		DEBUGP(4, dev, "About to stop monitor\n");
+		/* no */
+		dev->rlen =
+		    dev->rpos =
+		    dev->atr_csum = dev->atr_len_retry = dev->cwarn = 0;
+		dev->mstate = M_FETCH_ATR;
+		clear_bit(LOCK_MONITOR, &dev->flags);
+		/* close et al. are sleeping on devq, so wake it */
+		wake_up_interruptible(&dev->devq);
+		DEBUGP(2, dev, "<- monitor_card (we are done now)\n");
+		return;
+	}
+
+	/* try to lock io: if it is already locked, just add another timer */
+	if (test_and_set_bit(LOCK_IO, (void *)&dev->flags)) {
+		DEBUGP(4, dev, "Couldn't get IO lock\n");
+		goto return_with_timer;
+	}
+
+	/* is a card/a reader inserted at all ? */
+	dev->flags0 = xinb(REG_FLAGS0(iobase));
+	DEBUGP(7, dev, "dev->flags0 = 0x%2x\n", dev->flags0);
+	DEBUGP(7, dev, "smartcard present: %s\n",
+	       dev->flags0 & 1 ? "yes" : "no");
+	DEBUGP(7, dev, "cardman present: %s\n",
+	       dev->flags0 == 0xff ? "no" : "yes");
+
+	if ((dev->flags0 & 1) == 0	/* no smartcard inserted */
+	    || dev->flags0 == 0xff) {	/* no cardman inserted */
+		/* no */
+		dev->rlen =
+		    dev->rpos =
+		    dev->atr_csum = dev->atr_len_retry = dev->cwarn = 0;
+		dev->mstate = M_FETCH_ATR;
+
+		dev->flags &= 0x000000ff; /* only keep IO and MONITOR locks */
+
+		if (dev->flags0 == 0xff) {
+			DEBUGP(4, dev, "set IS_CMM_ABSENT bit\n");
+			set_bit(IS_CMM_ABSENT, &dev->flags);
+		} else if (test_bit(IS_CMM_ABSENT, &dev->flags)) {
+			DEBUGP(4, dev, "clear IS_CMM_ABSENT bit "
+			       "(card is removed)\n");
+			clear_bit(IS_CMM_ABSENT, &dev->flags);
+		}
+
+		goto release_io;
+	} else if ((dev->flags0 & 1) && test_bit(IS_CMM_ABSENT, &dev->flags)) {
+		/* cardman and card present but cardman was absent before
+		 * (after suspend with inserted card) */
+		DEBUGP(4, dev, "clear IS_CMM_ABSENT bit (card is inserted)\n");
+		clear_bit(IS_CMM_ABSENT, &dev->flags);
+	}
+
+	if (test_bit(IS_ATR_VALID, &dev->flags) == 1) {
+		DEBUGP(7, dev, "believe ATR is already valid (do nothing)\n");
+		goto release_io;
+	}
+
+	switch (dev->mstate) {
+		unsigned char flags0;
+	case M_CARDOFF:
+		DEBUGP(4, dev, "M_CARDOFF\n");
+		flags0 = inb(REG_FLAGS0(iobase));
+		if (flags0 & 0x02) {
+			/* wait until Flags0 indicate power is off */
+			dev->mdelay = T_10MSEC;
+		} else {
+			/* Flags0 indicate power off and no card inserted now;
+			 * Reset CARDMAN CONTROLLER */
+			xoutb(0x80, REG_FLAGS0(iobase));
+
+			/* prepare for fetching ATR again: after card off ATR
+			 * is read again automatically */
+			dev->rlen =
+			    dev->rpos =
+			    dev->atr_csum =
+			    dev->atr_len_retry = dev->cwarn = 0;
+			dev->mstate = M_FETCH_ATR;
+
+			/* minimal gap between CARDOFF and read ATR is 50msec */
+			dev->mdelay = T_50MSEC;
+		}
+		break;
+	case M_FETCH_ATR:
+		DEBUGP(4, dev, "M_FETCH_ATR\n");
+		xoutb(0x80, REG_FLAGS0(iobase));
+		DEBUGP(4, dev, "Reset BAUDV to 9600\n");
+		dev->baudv = 0x173;	/* 9600 */
+		xoutb(0x02, REG_STOPBITS(iobase));	/* stopbits=2 */
+		xoutb(0x73, REG_BAUDRATE(iobase));	/* baud value */
+		xoutb(0x21, REG_FLAGS1(iobase));	/* T_Active=1, baud
+							   value */
+		/* warm start vs. power on: */
+		xoutb(dev->flags0 & 2 ? 0x46 : 0x44, REG_FLAGS0(iobase));
+		dev->mdelay = T_40MSEC;
+		dev->mstate = M_TIMEOUT_WAIT;
+		break;
+	case M_TIMEOUT_WAIT:
+		DEBUGP(4, dev, "M_TIMEOUT_WAIT\n");
+		/* numRecBytes */
+		io_read_num_rec_bytes(iobase, &dev->atr_len);
+		dev->mdelay = T_10MSEC;
+		dev->mstate = M_READ_ATR_LEN;
+		break;
+	case M_READ_ATR_LEN:
+		DEBUGP(4, dev, "M_READ_ATR_LEN\n");
+		/* infinite loop possible, since there is no timeout */
+
+#define	MAX_ATR_LEN_RETRY	100
+
+		if (dev->atr_len == io_read_num_rec_bytes(iobase, &s)) {
+			if (dev->atr_len_retry++ >= MAX_ATR_LEN_RETRY) {					/* + XX msec */
+				dev->mdelay = T_10MSEC;
+				dev->mstate = M_READ_ATR;
+			}
+		} else {
+			dev->atr_len = s;
+			dev->atr_len_retry = 0;	/* set new timeout */
+		}
+
+		DEBUGP(4, dev, "Current ATR_LEN = %i\n", dev->atr_len);
+		break;
+	case M_READ_ATR:
+		DEBUGP(4, dev, "M_READ_ATR\n");
+		xoutb(0x80, REG_FLAGS0(iobase));	/* reset SM */
+		for (i = 0; i < dev->atr_len; i++) {
+			xoutb(i, REG_BUF_ADDR(iobase));
+			dev->atr[i] = inb(REG_BUF_DATA(iobase));
+		}
+		/* Deactivate T_Active flags */
+		DEBUGP(4, dev, "Deactivate T_Active flags\n");
+		dev->flags1 = 0x01;
+		xoutb(dev->flags1, REG_FLAGS1(iobase));
+
+		/* atr is present (which doesnt mean it's valid) */
+		set_bit(IS_ATR_PRESENT, &dev->flags);
+		if (dev->atr[0] == 0x03)
+			str_invert_revert(dev->atr, dev->atr_len);
+		atrc = parse_atr(dev);
+		if (atrc == 0) {	/* atr invalid */
+			dev->mdelay = 0;
+			dev->mstate = M_BAD_CARD;
+		} else {
+			dev->mdelay = T_50MSEC;
+			dev->mstate = M_ATR_PRESENT;
+			set_bit(IS_ATR_VALID, &dev->flags);
+		}
+
+		if (test_bit(IS_ATR_VALID, &dev->flags) == 1) {
+			DEBUGP(4, dev, "monitor_card: ATR valid\n");
+ 			/* if ta1 == 0x11, no PPS necessary (default values) */
+			/* do not do PPS with multi protocol cards */
+			if ((test_bit(IS_AUTOPPS_ACT, &dev->flags) == 0) &&
+			    (dev->ta1 != 0x11) &&
+			    !(test_bit(IS_ANY_T0, &dev->flags) &&
+			    test_bit(IS_ANY_T1, &dev->flags))) {
+				DEBUGP(4, dev, "Perform AUTOPPS\n");
+				set_bit(IS_AUTOPPS_ACT, &dev->flags);
+				ptsreq.protocol = ptsreq.protocol =
+				    (0x01 << dev->proto);
+				ptsreq.flags = 0x01;
+				ptsreq.pts1 = 0x00;
+				ptsreq.pts2 = 0x00;
+				ptsreq.pts3 = 0x00;
+				if (set_protocol(dev, &ptsreq) == 0) {
+					DEBUGP(4, dev, "AUTOPPS ret SUCC\n");
+					clear_bit(IS_AUTOPPS_ACT, &dev->flags);
+					wake_up_interruptible(&dev->atrq);
+				} else {
+					DEBUGP(4, dev, "AUTOPPS failed: "
+					       "repower using defaults\n");
+					/* prepare for repowering  */
+					clear_bit(IS_ATR_PRESENT, &dev->flags);
+					clear_bit(IS_ATR_VALID, &dev->flags);
+					dev->rlen =
+					    dev->rpos =
+					    dev->atr_csum =
+					    dev->atr_len_retry = dev->cwarn = 0;
+					dev->mstate = M_FETCH_ATR;
+
+					dev->mdelay = T_50MSEC;
+				}
+			} else {
+				/* for cards which use slightly different
+				 * params (extra guard time) */
+				set_cardparameter(dev);
+				if (test_bit(IS_AUTOPPS_ACT, &dev->flags) == 1)
+					DEBUGP(4, dev, "AUTOPPS already active "
+					       "2nd try:use default values\n");
+				if (dev->ta1 == 0x11)
+					DEBUGP(4, dev, "No AUTOPPS necessary "
+					       "TA(1)==0x11\n");
+				if (test_bit(IS_ANY_T0, &dev->flags)
+				    && test_bit(IS_ANY_T1, &dev->flags))
+					DEBUGP(4, dev, "Do NOT perform AUTOPPS "
+					       "with multiprotocol cards\n");
+				clear_bit(IS_AUTOPPS_ACT, &dev->flags);
+				wake_up_interruptible(&dev->atrq);
+			}
+		} else {
+			DEBUGP(4, dev, "ATR invalid\n");
+			wake_up_interruptible(&dev->atrq);
+		}
+		break;
+	case M_BAD_CARD:
+		DEBUGP(4, dev, "M_BAD_CARD\n");
+		/* slow down warning, but prompt immediately after insertion */
+		if (dev->cwarn == 0 || dev->cwarn == 10) {
+			set_bit(IS_BAD_CARD, &dev->flags);
+			printk(KERN_WARNING MODULE_NAME ": device %s: ",
+			       dev->node.dev_name);
+			if (test_bit(IS_BAD_CSUM, &dev->flags)) {
+				DEBUGP(4, dev, "ATR checksum (0x%.2x, should "
+				       "be zero) failed\n", dev->atr_csum);
+			}
+#ifdef PCMCIA_DEBUG
+			else if (test_bit(IS_BAD_LENGTH, &dev->flags)) {
+				DEBUGP(4, dev, "ATR length error\n");
+			} else {
+				DEBUGP(4, dev, "card damaged or wrong way "
+					"inserted\n");
+			}
+#endif
+			dev->cwarn = 0;
+			wake_up_interruptible(&dev->atrq);	/* wake open */
+		}
+		dev->cwarn++;
+		dev->mdelay = T_100MSEC;
+		dev->mstate = M_FETCH_ATR;
+		break;
+	default:
+		DEBUGP(7, dev, "Unknown action\n");
+		break;		/* nothing */
+	}
+
+release_io:
+	DEBUGP(7, dev, "release_io\n");
+	clear_bit(LOCK_IO, &dev->flags);
+	wake_up_interruptible(&dev->ioq);	/* whoever needs IO */
+
+return_with_timer:
+	DEBUGP(7, dev, "<- monitor_card (returns with timer)\n");
+	dev->timer.expires = jiffies + dev->mdelay;
+	add_timer(&dev->timer);
+	clear_bit(LOCK_MONITOR, &dev->flags);
+}
+
+/* Interface to userland (file_operations) */
+
+static ssize_t cmm_read(struct file *filp, __user char *buf, size_t count,
+			loff_t *ppos)
+{
+	struct cm4000_dev *dev = filp->private_data;
+	ioaddr_t iobase = dev->link.io.BasePort1;
+	ssize_t rc;
+	int i, j, k;
+
+	DEBUGP(2, dev, "-> cmm_read(%s,%d)\n", current->comm, current->pid);
+
+	if (count == 0)		/* according to manpage */
+		return 0;
+
+	if ((dev->link.state & DEV_PRESENT) == 0 ||	/* socket removed */
+	    test_bit(IS_CMM_ABSENT, &dev->flags))
+		return -ENODEV;
+
+	if (test_bit(IS_BAD_CSUM, &dev->flags))
+		return -EIO;
+
+	/* also see the note about this in cmm_write */
+	if (wait_event_interruptible
+	    (dev->atrq,
+	     ((filp->f_flags & O_NONBLOCK)
+	      || (test_bit(IS_ATR_PRESENT, (void *)&dev->flags) != 0)))) {
+		if (filp->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+		return -ERESTARTSYS;
+	}
+
+	if (test_bit(IS_ATR_VALID, &dev->flags) == 0)
+		return -EIO;
+
+	/* this one implements blocking IO */
+	if (wait_event_interruptible
+	    (dev->readq,
+	     ((filp->f_flags & O_NONBLOCK) || (dev->rpos < dev->rlen)))) {
+		if (filp->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+		return -ERESTARTSYS;
+	}
+
+	/* lock io */
+	if (wait_event_interruptible
+	    (dev->ioq,
+	     ((filp->f_flags & O_NONBLOCK)
+	      || (test_and_set_bit(LOCK_IO, (void *)&dev->flags) == 0)))) {
+		if (filp->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+		return -ERESTARTSYS;
+	}
+
+	rc = 0;
+	dev->flags0 = inb(REG_FLAGS0(iobase));
+	if ((dev->flags0 & 1) == 0	/* no smartcard inserted */
+	    || dev->flags0 == 0xff) {	/* no cardman inserted */
+		clear_bit(IS_ATR_VALID, &dev->flags);
+		if (dev->flags0 & 1) {
+			set_bit(IS_CMM_ABSENT, &dev->flags);
+			rc = -ENODEV;
+		}
+		rc = -EIO;
+		goto release_io;
+	}
+
+	DEBUGP(4, dev, "begin read answer\n");
+	j = min(count, (size_t)(dev->rlen - dev->rpos));
+	k = dev->rpos;
+	if (k + j > 255)
+		j = 256 - k;
+	DEBUGP(4, dev, "read1 j=%d\n", j);
+	for (i = 0; i < j; i++) {
+		xoutb(k++, REG_BUF_ADDR(iobase));
+		dev->rbuf[i] = xinb(REG_BUF_DATA(iobase));
+	}
+	j = min(count, (size_t)(dev->rlen - dev->rpos));
+	if (k + j > 255) {
+		DEBUGP(4, dev, "read2 j=%d\n", j);
+		dev->flags1 |= 0x10;	/* MSB buf addr set */
+		xoutb(dev->flags1, REG_FLAGS1(iobase));
+		for (; i < j; i++) {
+			xoutb(k++, REG_BUF_ADDR(iobase));
+			dev->rbuf[i] = xinb(REG_BUF_DATA(iobase));
+		}
+	}
+
+	if (dev->proto == 0 && count > dev->rlen - dev->rpos) {
+		DEBUGP(4, dev, "T=0 and count > buffer\n");
+		dev->rbuf[i] = dev->rbuf[i - 1];
+		dev->rbuf[i - 1] = dev->procbyte;
+		j++;
+	}
+	count = j;
+
+	dev->rpos = dev->rlen + 1;
+
+	/* Clear T1Active */
+	DEBUGP(4, dev, "Clear T1Active\n");
+	dev->flags1 &= 0xdf;
+	xoutb(dev->flags1, REG_FLAGS1(iobase));
+
+	xoutb(0, REG_FLAGS1(iobase));	/* clear detectCMM */
+	/* last check before exit */
+	if (!io_detect_cm4000(iobase, dev))
+		count = -ENODEV;
+
+	if (test_bit(IS_INVREV, &dev->flags) && count > 0)
+		str_invert_revert(dev->rbuf, count);
+
+	if (copy_to_user(buf, dev->rbuf, count))
+		return -EFAULT;
+
+release_io:
+	clear_bit(LOCK_IO, &dev->flags);
+	wake_up_interruptible(&dev->ioq);
+
+	DEBUGP(2, dev, "<- cmm_read returns: rc = %Zi\n",
+	       (rc < 0 ? rc : count));
+	return rc < 0 ? rc : count;
+}
+
+static ssize_t cmm_write(struct file *filp, const char __user *buf,
+			 size_t count, loff_t *ppos)
+{
+	struct cm4000_dev *dev = (struct cm4000_dev *) filp->private_data;
+	ioaddr_t iobase = dev->link.io.BasePort1;
+	unsigned short s;
+	unsigned char tmp;
+	unsigned char infolen;
+	unsigned char sendT0;
+	unsigned short nsend;
+	unsigned short nr;
+	ssize_t rc;
+	int i;
+
+	DEBUGP(2, dev, "-> cmm_write(%s,%d)\n", current->comm, current->pid);
+
+	if (count == 0)		/* according to manpage */
+		return 0;
+
+	if (dev->proto == 0 && count < 4) {
+		/* T0 must have at least 4 bytes */
+		DEBUGP(4, dev, "T0 short write\n");
+		return -EIO;
+	}
+
+	nr = count & 0x1ff;	/* max bytes to write */
+
+	sendT0 = dev->proto ? 0 : nr > 5 ? 0x08 : 0;
+
+	if ((dev->link.state & DEV_PRESENT) == 0 ||	/* socket removed */
+	    test_bit(IS_CMM_ABSENT, &dev->flags))
+		return -ENODEV;
+
+	if (test_bit(IS_BAD_CSUM, &dev->flags)) {
+		DEBUGP(4, dev, "bad csum\n");
+		return -EIO;
+	}
+
+	/*
+	 * wait for atr to become valid.
+	 * note: it is important to lock this code. if we dont, the monitor
+	 * could be run between test_bit and the the call the sleep on the
+	 * atr-queue.  if *then* the monitor detects atr valid, it will wake up
+	 * any process on the atr-queue, *but* since we have been interrupted,
+	 * we do not yet sleep on this queue. this would result in a missed
+	 * wake_up and the calling process would sleep forever (until
+	 * interrupted).  also, do *not* restore_flags before sleep_on, because
+	 * this could result in the same situation!
+	 */
+	if (wait_event_interruptible
+	    (dev->atrq,
+	     ((filp->f_flags & O_NONBLOCK)
+	      || (test_bit(IS_ATR_PRESENT, (void *)&dev->flags) != 0)))) {
+		if (filp->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+		return -ERESTARTSYS;
+	}
+
+	if (test_bit(IS_ATR_VALID, &dev->flags) == 0) {	/* invalid atr */
+		DEBUGP(4, dev, "invalid ATR\n");
+		return -EIO;
+	}
+
+	/* lock io */
+	if (wait_event_interruptible
+	    (dev->ioq,
+	     ((filp->f_flags & O_NONBLOCK)
+	      || (test_and_set_bit(LOCK_IO, (void *)&dev->flags) == 0)))) {
+		if (filp->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+		return -ERESTARTSYS;
+	}
+
+	if (copy_from_user(dev->sbuf, buf, ((count > 512) ? 512 : count)))
+		return -EFAULT;
+
+	rc = 0;
+	dev->flags0 = inb(REG_FLAGS0(iobase));
+	if ((dev->flags0 & 1) == 0	/* no smartcard inserted */
+	    || dev->flags0 == 0xff) {	/* no cardman inserted */
+		clear_bit(IS_ATR_VALID, &dev->flags);
+		if (dev->flags0 & 1) {
+			set_bit(IS_CMM_ABSENT, &dev->flags);
+			rc = -ENODEV;
+		} else {
+			DEBUGP(4, dev, "IO error\n");
+			rc = -EIO;
+		}
+		goto release_io;
+	}
+
+	xoutb(0x80, REG_FLAGS0(iobase));	/* reset SM  */
+
+	if (!io_detect_cm4000(iobase, dev)) {
+		rc = -ENODEV;
+		goto release_io;
+	}
+
+	/* reflect T=0 send/read mode in flags1 */
+	dev->flags1 |= (sendT0);
+
+	set_cardparameter(dev);
+
+	/* dummy read, reset flag procedure received */
+	tmp = inb(REG_FLAGS1(iobase));
+
+	dev->flags1 = 0x20	/* T_Active */
+	    | (sendT0)
+	    | (test_bit(IS_INVREV, &dev->flags) ? 2 : 0)/* inverse parity  */
+	    | (((dev->baudv - 1) & 0x0100) >> 8);	/* MSB-Baud */
+	DEBUGP(1, dev, "set dev->flags1 = 0x%.2x\n", dev->flags1);
+	xoutb(dev->flags1, REG_FLAGS1(iobase));
+
+	/* xmit data */
+	DEBUGP(4, dev, "Xmit data\n");
+	for (i = 0; i < nr; i++) {
+		if (i >= 256) {
+			dev->flags1 = 0x20	/* T_Active */
+			    | (sendT0)	/* SendT0 */
+				/* inverse parity: */
+			    | (test_bit(IS_INVREV, &dev->flags) ? 2 : 0)
+			    | (((dev->baudv - 1) & 0x0100) >> 8) /* MSB-Baud */
+			    | 0x10;	/* set address high */
+			DEBUGP(4, dev, "dev->flags = 0x%.2x - set address "
+			       "high\n", dev->flags1);
+			xoutb(dev->flags1, REG_FLAGS1(iobase));
+		}
+		if (test_bit(IS_INVREV, &dev->flags)) {
+			DEBUGP(4, dev, "Apply inverse convention for 0x%.2x "
+				"-> 0x%.2x\n", (unsigned char)dev->sbuf[i],
+			      invert_revert(dev->sbuf[i]));
+			xoutb(i, REG_BUF_ADDR(iobase));
+			xoutb(invert_revert(dev->sbuf[i]),
+			      REG_BUF_DATA(iobase));
+		} else {
+			xoutb(i, REG_BUF_ADDR(iobase));
+			xoutb(dev->sbuf[i], REG_BUF_DATA(iobase));
+		}
+	}
+	DEBUGP(4, dev, "Xmit done\n");
+
+	if (dev->proto == 0) {
+		/* T=0 proto: 0 byte reply  */
+		if (nr == 4) {
+			DEBUGP(4, dev, "T=0 assumes 0 byte reply\n");
+			xoutb(i, REG_BUF_ADDR(iobase));
+			if (test_bit(IS_INVREV, &dev->flags))
+				xoutb(0xff, REG_BUF_DATA(iobase));
+			else
+				xoutb(0x00, REG_BUF_DATA(iobase));
+		}
+
+		/* numSendBytes */
+		if (sendT0)
+			nsend = nr;
+		else {
+			if (nr == 4)
+				nsend = 5;
+			else {
+				nsend = 5 + (unsigned char)dev->sbuf[4];
+				if (dev->sbuf[4] == 0)
+					nsend += 0x100;
+			}
+		}
+	} else
+		nsend = nr;
+
+	/* T0: output procedure byte */
+	if (test_bit(IS_INVREV, &dev->flags)) {
+		DEBUGP(4, dev, "T=0 set Procedure byte (inverse-reverse) "
+		       "0x%.2x\n", invert_revert(dev->sbuf[1]));
+		xoutb(invert_revert(dev->sbuf[1]), REG_NUM_BYTES(iobase));
+	} else {
+		DEBUGP(4, dev, "T=0 set Procedure byte 0x%.2x\n", dev->sbuf[1]);
+		xoutb(dev->sbuf[1], REG_NUM_BYTES(iobase));
+	}
+
+	DEBUGP(1, dev, "set NumSendBytes = 0x%.2x\n",
+	       (unsigned char)(nsend & 0xff));
+	xoutb((unsigned char)(nsend & 0xff), REG_NUM_SEND(iobase));
+
+	DEBUGP(1, dev, "Trigger CARDMAN CONTROLLER (0x%.2x)\n",
+	       0x40	/* SM_Active */
+	      | (dev->flags0 & 2 ? 0 : 4)	/* power on if needed */
+	      |(dev->proto ? 0x10 : 0x08)	/* T=1/T=0 */
+	      |(nsend & 0x100) >> 8 /* MSB numSendBytes */ );
+	xoutb(0x40		/* SM_Active */
+	      | (dev->flags0 & 2 ? 0 : 4)	/* power on if needed */
+	      |(dev->proto ? 0x10 : 0x08)	/* T=1/T=0 */
+	      |(nsend & 0x100) >> 8,	/* MSB numSendBytes */
+	      REG_FLAGS0(iobase));
+
+	/* wait for xmit done */
+	if (dev->proto == 1) {
+		DEBUGP(4, dev, "Wait for xmit done\n");
+		for (i = 0; i < 1000; i++) {
+			if (inb(REG_FLAGS0(iobase)) & 0x08)
+				break;
+			msleep_interruptible(10);
+		}
+		if (i == 1000) {
+			DEBUGP(4, dev, "timeout waiting for xmit done\n");
+			rc = -EIO;
+			goto release_io;
+		}
+	}
+
+	/* T=1: wait for infoLen */
+
+	infolen = 0;
+	if (dev->proto) {
+		/* wait until infoLen is valid */
+		for (i = 0; i < 6000; i++) {	/* max waiting time of 1 min */
+			io_read_num_rec_bytes(iobase, &s);
+			if (s >= 3) {
+				infolen = inb(REG_FLAGS1(iobase));
+				DEBUGP(4, dev, "infolen=%d\n", infolen);
+				break;
+			}
+			msleep_interruptible(10);
+		}
+		if (i == 6000) {
+			DEBUGP(4, dev, "timeout waiting for infoLen\n");
+			rc = -EIO;
+			goto release_io;
+		}
+	} else
+		clear_bit(IS_PROCBYTE_PRESENT, &dev->flags);
+
+	/* numRecBytes | bit9 of numRecytes */
+	io_read_num_rec_bytes(iobase, &dev->rlen);
+	for (i = 0; i < 600; i++) {	/* max waiting time of 2 sec */
+		if (dev->proto) {
+			if (dev->rlen >= infolen + 4)
+				break;
+		}
+		msleep_interruptible(10);
+		/* numRecBytes | bit9 of numRecytes */
+		io_read_num_rec_bytes(iobase, &s);
+		if (s > dev->rlen) {
+			DEBUGP(1, dev, "NumRecBytes inc (reset timeout)\n");
+			i = 0;	/* reset timeout */
+			dev->rlen = s;
+		}
+		/* T=0: we are done when numRecBytes doesn't
+		 *      increment any more and NoProcedureByte
+		 *      is set and numRecBytes == bytes sent + 6
+		 *      (header bytes + data + 1 for sw2)
+		 *      except when the card replies an error
+		 *      which means, no data will be sent back.
+		 */
+		else if (dev->proto == 0) {
+			if ((inb(REG_BUF_ADDR(iobase)) & 0x80)) {
+				/* no procedure byte received since last read */
+				DEBUGP(1, dev, "NoProcedure byte set\n");
+				/* i=0; */
+			} else {
+				/* procedure byte received since last read */
+				DEBUGP(1, dev, "NoProcedure byte unset "
+					"(reset timeout)\n");
+				dev->procbyte = inb(REG_FLAGS1(iobase));
+				DEBUGP(1, dev, "Read procedure byte 0x%.2x\n",
+				      dev->procbyte);
+				i = 0;	/* resettimeout */
+			}
+			if (inb(REG_FLAGS0(iobase)) & 0x08) {
+				DEBUGP(1, dev, "T0Done flag (read reply)\n");
+				break;
+			}
+		}
+		if (dev->proto)
+			infolen = inb(REG_FLAGS1(iobase));
+	}
+	if (i == 600) {
+		DEBUGP(1, dev, "timeout waiting for numRecBytes\n");
+		rc = -EIO;
+		goto release_io;
+	} else {
+		if (dev->proto == 0) {
+			DEBUGP(1, dev, "Wait for T0Done bit to be  set\n");
+			for (i = 0; i < 1000; i++) {
+				if (inb(REG_FLAGS0(iobase)) & 0x08)
+					break;
+				msleep_interruptible(10);
+			}
+			if (i == 1000) {
+				DEBUGP(1, dev, "timeout waiting for T0Done\n");
+				rc = -EIO;
+				goto release_io;
+			}
+
+			dev->procbyte = inb(REG_FLAGS1(iobase));
+			DEBUGP(4, dev, "Read procedure byte 0x%.2x\n",
+			      dev->procbyte);
+
+			io_read_num_rec_bytes(iobase, &dev->rlen);
+			DEBUGP(4, dev, "Read NumRecBytes = %i\n", dev->rlen);
+
+		}
+	}
+	/* T=1: read offset=zero, T=0: read offset=after challenge */
+	dev->rpos = dev->proto ? 0 : nr == 4 ? 5 : nr > dev->rlen ? 5 : nr;
+	DEBUGP(4, dev, "dev->rlen = %i,  dev->rpos = %i, nr = %i\n",
+	      dev->rlen, dev->rpos, nr);
+
+release_io:
+	DEBUGP(4, dev, "Reset SM\n");
+	xoutb(0x80, REG_FLAGS0(iobase));	/* reset SM */
+
+	if (rc < 0) {
+		DEBUGP(4, dev, "Write failed but clear T_Active\n");
+		dev->flags1 &= 0xdf;
+		xoutb(dev->flags1, REG_FLAGS1(iobase));
+	}
+
+	clear_bit(LOCK_IO, &dev->flags);
+	wake_up_interruptible(&dev->ioq);
+	wake_up_interruptible(&dev->readq);	/* tell read we have data */
+
+	/* ITSEC E2: clear write buffer */
+	memset((char *)dev->sbuf, 0, 512);
+
+	/* return error or actually written bytes */
+	DEBUGP(2, dev, "<- cmm_write\n");
+	return rc < 0 ? rc : nr;
+}
+
+static void start_monitor(struct cm4000_dev *dev)
+{
+	DEBUGP(3, dev, "-> start_monitor\n");
+	if (!dev->monitor_running) {
+		DEBUGP(5, dev, "create, init and add timer\n");
+		init_timer(&dev->timer);
+		dev->monitor_running = 1;
+		dev->timer.expires = jiffies;
+		dev->timer.data = (unsigned long) dev;
+		dev->timer.function = monitor_card;
+		add_timer(&dev->timer);
+	} else
+		DEBUGP(5, dev, "monitor already running\n");
+	DEBUGP(3, dev, "<- start_monitor\n");
+}
+
+static void stop_monitor(struct cm4000_dev *dev)
+{
+	DEBUGP(3, dev, "-> stop_monitor\n");
+	if (dev->monitor_running) {
+		DEBUGP(5, dev, "stopping monitor\n");
+		terminate_monitor(dev);
+		/* reset monitor SM */
+		clear_bit(IS_ATR_VALID, &dev->flags);
+		clear_bit(IS_ATR_PRESENT, &dev->flags);
+	} else
+		DEBUGP(5, dev, "monitor already stopped\n");
+	DEBUGP(3, dev, "<- stop_monitor\n");
+}
+
+static int cmm_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
+		     unsigned long arg)
+{
+	struct cm4000_dev *dev = filp->private_data;
+	ioaddr_t iobase = dev->link.io.BasePort1;
+	dev_link_t *link;
+	int size;
+	int rc;
+#ifdef PCMCIA_DEBUG
+	char *ioctl_names[CM_IOC_MAXNR + 1] = {
+		[_IOC_NR(CM_IOCGSTATUS)] "CM_IOCGSTATUS",
+		[_IOC_NR(CM_IOCGATR)] "CM_IOCGATR",
+		[_IOC_NR(CM_IOCARDOFF)] "CM_IOCARDOFF",
+		[_IOC_NR(CM_IOCSPTS)] "CM_IOCSPTS",
+		[_IOC_NR(CM_IOSDBGLVL)] "CM4000_DBGLVL",
+	};
+#endif
+	DEBUGP(3, dev, "cmm_ioctl(device=%d.%d) %s\n", imajor(inode),
+	       iminor(inode), ioctl_names[_IOC_NR(cmd)]);
+
+	link = dev_table[iminor(inode)];
+	if (!(DEV_OK(link))) {
+		DEBUGP(4, dev, "DEV_OK false\n");
+		return -ENODEV;
+	}
+
+	if (test_bit(IS_CMM_ABSENT, &dev->flags)) {
+		DEBUGP(4, dev, "CMM_ABSENT flag set\n");
+		return -ENODEV;
+	}
+
+	if (_IOC_TYPE(cmd) != CM_IOC_MAGIC) {
+		DEBUGP(4, dev, "ioctype mismatch\n");
+		return -EINVAL;
+	}
+	if (_IOC_NR(cmd) > CM_IOC_MAXNR) {
+		DEBUGP(4, dev, "iocnr mismatch\n");
+		return -EINVAL;
+	}
+	size = _IOC_SIZE(cmd);
+	rc = 0;
+	DEBUGP(4, dev, "iocdir=%.4x iocr=%.4x iocw=%.4x iocsize=%d cmd=%.4x\n",
+	      _IOC_DIR(cmd), _IOC_READ, _IOC_WRITE, size, cmd);
+
+	if (_IOC_DIR(cmd) & _IOC_READ) {
+		if (!access_ok(VERIFY_WRITE, (void *)arg, size))
+			return -EFAULT;
+	}
+	if (_IOC_DIR(cmd) & _IOC_WRITE) {
+		if (!access_ok(VERIFY_READ, (void *)arg, size))
+			return -EFAULT;
+	}
+
+	switch (cmd) {
+	case CM_IOCGSTATUS:
+		DEBUGP(4, dev, " ... in CM_IOCGSTATUS\n");
+		{
+			int status;
+
+			/* clear other bits, but leave inserted & powered as
+			 * they are */
+			status = dev->flags0 & 3;
+			if (test_bit(IS_ATR_PRESENT, &dev->flags))
+				status |= CM_ATR_PRESENT;
+			if (test_bit(IS_ATR_VALID, &dev->flags))
+				status |= CM_ATR_VALID;
+			if (test_bit(IS_CMM_ABSENT, &dev->flags))
+				status |= CM_NO_READER;
+			if (test_bit(IS_BAD_CARD, &dev->flags))
+				status |= CM_BAD_CARD;
+			if (copy_to_user((int *)arg, &status, sizeof(int)))
+				return -EFAULT;
+		}
+		return 0;
+	case CM_IOCGATR:
+		DEBUGP(4, dev, "... in CM_IOCGATR\n");
+		{
+			struct atreq *atreq = (struct atreq *) arg;
+			int tmp;
+			/* allow nonblocking io and being interrupted */
+			if (wait_event_interruptible
+			    (dev->atrq,
+			     ((filp->f_flags & O_NONBLOCK)
+			      || (test_bit(IS_ATR_PRESENT, (void *)&dev->flags)
+				  != 0)))) {
+				if (filp->f_flags & O_NONBLOCK)
+					return -EAGAIN;
+				return -ERESTARTSYS;
+			}
+
+			if (test_bit(IS_ATR_VALID, &dev->flags) == 0) {
+				tmp = -1;
+				if (copy_to_user(&(atreq->atr_len), &tmp,
+						 sizeof(int)))
+					return -EFAULT;
+			} else {
+				if (copy_to_user(atreq->atr, dev->atr,
+						 dev->atr_len))
+					return -EFAULT;
+
+				tmp = dev->atr_len;
+				if (copy_to_user(&(atreq->atr_len), &tmp, sizeof(int)))
+					return -EFAULT;
+			}
+			return 0;
+		}
+	case CM_IOCARDOFF:
+
+#ifdef PCMCIA_DEBUG
+		DEBUGP(4, dev, "... in CM_IOCARDOFF\n");
+		if (dev->flags0 & 0x01) {
+			DEBUGP(4, dev, "    Card inserted\n");
+		} else {
+			DEBUGP(2, dev, "    No card inserted\n");
+		}
+		if (dev->flags0 & 0x02) {
+			DEBUGP(4, dev, "    Card powered\n");
+		} else {
+			DEBUGP(2, dev, "    Card not powered\n");
+		}
+#endif
+
+		/* is a card inserted and powered? */
+		if ((dev->flags0 & 0x01) && (dev->flags0 & 0x02)) {
+
+			/* get IO lock */
+			if (wait_event_interruptible
+			    (dev->ioq,
+			     ((filp->f_flags & O_NONBLOCK)
+			      || (test_and_set_bit(LOCK_IO, (void *)&dev->flags)
+				  == 0)))) {
+				if (filp->f_flags & O_NONBLOCK)
+					return -EAGAIN;
+				return -ERESTARTSYS;
+			}
+			/* Set Flags0 = 0x42 */
+			DEBUGP(4, dev, "Set Flags0=0x42 \n");
+			xoutb(0x42, REG_FLAGS0(iobase));
+			clear_bit(IS_ATR_PRESENT, &dev->flags);
+			clear_bit(IS_ATR_VALID, &dev->flags);
+			dev->mstate = M_CARDOFF;
+			clear_bit(LOCK_IO, &dev->flags);
+			if (wait_event_interruptible
+			    (dev->atrq,
+			     ((filp->f_flags & O_NONBLOCK)
+			      || (test_bit(IS_ATR_VALID, (void *)&dev->flags) !=
+				  0)))) {
+				if (filp->f_flags & O_NONBLOCK)
+					return -EAGAIN;
+				return -ERESTARTSYS;
+			}
+		}
+		/* release lock */
+		clear_bit(LOCK_IO, &dev->flags);
+		wake_up_interruptible(&dev->ioq);
+
+		return 0;
+	case CM_IOCSPTS:
+		{
+			struct ptsreq krnptsreq;
+
+			if (copy_from_user(&krnptsreq, (struct ptsreq *) arg,
+					   sizeof(struct ptsreq)))
+				return -EFAULT;
+
+			rc = 0;
+			DEBUGP(4, dev, "... in CM_IOCSPTS\n");
+			/* wait for ATR to get valid */
+			if (wait_event_interruptible
+			    (dev->atrq,
+			     ((filp->f_flags & O_NONBLOCK)
+			      || (test_bit(IS_ATR_PRESENT, (void *)&dev->flags)
+				  != 0)))) {
+				if (filp->f_flags & O_NONBLOCK)
+					return -EAGAIN;
+				return -ERESTARTSYS;
+			}
+			/* get IO lock */
+			if (wait_event_interruptible
+			    (dev->ioq,
+			     ((filp->f_flags & O_NONBLOCK)
+			      || (test_and_set_bit(LOCK_IO, (void *)&dev->flags)
+				  == 0)))) {
+				if (filp->f_flags & O_NONBLOCK)
+					return -EAGAIN;
+				return -ERESTARTSYS;
+			}
+
+			if ((rc = set_protocol(dev, &krnptsreq)) != 0) {
+				/* auto power_on again */
+				dev->mstate = M_FETCH_ATR;
+				clear_bit(IS_ATR_VALID, &dev->flags);
+			}
+			/* release lock */
+			clear_bit(LOCK_IO, &dev->flags);
+			wake_up_interruptible(&dev->ioq);
+
+		}
+		return rc;
+#ifdef PCMCIA_DEBUG
+	case CM_IOSDBGLVL:	/* set debug log level */
+		{
+			int old_pc_debug = 0;
+
+			old_pc_debug = pc_debug;
+			if (copy_from_user(&pc_debug, (int *)arg, sizeof(int)))
+				return -EFAULT;
+
+			if (old_pc_debug != pc_debug)
+				DEBUGP(0, dev, "Changed debug log level "
+				       "to %i\n", pc_debug);
+		}
+		return rc;
+#endif
+	default:
+		DEBUGP(4, dev, "... in default (unknown IOCTL code)\n");
+		return -EINVAL;
+	}
+}
+
+static int cmm_open(struct inode *inode, struct file *filp)
+{
+	struct cm4000_dev *dev;
+	dev_link_t *link;
+	int rc, minor = iminor(inode);
+
+	if (minor >= CM4000_MAX_DEV)
+		return -ENODEV;
+
+	link = dev_table[minor];
+	if (link == NULL || !(DEV_OK(link)))
+		return -ENODEV;
+
+	if (link->open)
+		return -EBUSY;
+
+	dev = link->priv;
+	filp->private_data = dev;
+
+	DEBUGP(2, dev, "-> cmm_open(device=%d.%d process=%s,%d)\n",
+	      imajor(inode), minor, current->comm, current->pid);
+
+	/* init device variables, they may be "polluted" after close
+	 * or, the device may never have been closed (i.e. open failed)
+	 */
+
+	ZERO_DEV(dev);
+
+	/* opening will always block since the
+	 * monitor will be started by open, which
+	 * means we have to wait for ATR becoming
+	 * vaild = block until valid (or card
+	 * inserted)
+	 */
+	if (filp->f_flags & O_NONBLOCK)
+		return -EAGAIN;
+
+	dev->mdelay = T_50MSEC;
+
+	/* start monitoring the cardstatus */
+	start_monitor(dev);
+
+	link->open = 1;		/* only one open per device */
+	rc = 0;
+
+	DEBUGP(2, dev, "<- cmm_open\n");
+	return nonseekable_open(inode, filp);
+}
+
+static int cmm_close(struct inode *inode, struct file *filp)
+{
+	struct cm4000_dev *dev;
+	dev_link_t *link;
+	int minor = iminor(inode);
+
+	if (minor >= CM4000_MAX_DEV)
+		return -ENODEV;
+
+	link = dev_table[minor];
+	if (link == NULL)
+		return -ENODEV;
+
+	dev = link->priv;
+
+	DEBUGP(2, dev, "-> cmm_close(maj/min=%d.%d)\n",
+	       imajor(inode), minor);
+
+	stop_monitor(dev);
+
+	ZERO_DEV(dev);
+
+	link->open = 0;		/* only one open per device */
+	wake_up(&dev->devq);	/* socket removed? */
+
+	DEBUGP(2, dev, "cmm_close\n");
+	return 0;
+}
+
+static void cmm_cm4000_release(dev_link_t * link)
+{
+	struct cm4000_dev *dev = link->priv;
+
+	/* dont terminate the monitor, rather rely on
+	 * close doing that for us.
+	 */
+	DEBUGP(3, dev, "-> cmm_cm4000_release\n");
+	while (link->open) {
+		printk(KERN_INFO MODULE_NAME ": delaying release until "
+		       "process has terminated\n");
+		/* note: don't interrupt us:
+		 * close the applications which own
+		 * the devices _first_ !
+		 */
+		wait_event(dev->devq, (link->open == 0));
+	}
+	/* dev->devq=NULL;	this cannot be zeroed earlier */
+	DEBUGP(3, dev, "<- cmm_cm4000_release\n");
+	return;
+}
+
+/*==== Interface to PCMCIA Layer =======================================*/
+
+static void cm4000_config(dev_link_t * link, int devno)
+{
+	client_handle_t handle = link->handle;
+	struct cm4000_dev *dev;
+	tuple_t tuple;
+	cisparse_t parse;
+	config_info_t conf;
+	u_char buf[64];
+	int fail_fn, fail_rc;
+	int rc;
+
+	/* read the config-tuples */
+	tuple.DesiredTuple = CISTPL_CONFIG;
+	tuple.Attributes = 0;
+	tuple.TupleData = buf;
+	tuple.TupleDataMax = sizeof(buf);
+	tuple.TupleOffset = 0;
+
+	if ((fail_rc = pcmcia_get_first_tuple(handle, &tuple)) != CS_SUCCESS) {
+		fail_fn = GetFirstTuple;
+		goto cs_failed;
+	}
+	if ((fail_rc = pcmcia_get_tuple_data(handle, &tuple)) != CS_SUCCESS) {
+		fail_fn = GetTupleData;
+		goto cs_failed;
+	}
+	if ((fail_rc =
+	     pcmcia_parse_tuple(handle, &tuple, &parse)) != CS_SUCCESS) {
+		fail_fn = ParseTuple;
+		goto cs_failed;
+	}
+	if ((fail_rc =
+	     pcmcia_get_configuration_info(handle, &conf)) != CS_SUCCESS) {
+		fail_fn = GetConfigurationInfo;
+		goto cs_failed;
+	}
+
+	link->state |= DEV_CONFIG;
+	link->conf.ConfigBase = parse.config.base;
+	link->conf.Present = parse.config.rmask[0];
+	link->conf.Vcc = conf.Vcc;
+
+	link->io.BasePort2 = 0;
+	link->io.NumPorts2 = 0;
+	link->io.Attributes2 = 0;
+	tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY;
+	for (rc = pcmcia_get_first_tuple(handle, &tuple);
+	     rc == CS_SUCCESS; rc = pcmcia_get_next_tuple(handle, &tuple)) {
+
+		rc = pcmcia_get_tuple_data(handle, &tuple);
+		if (rc != CS_SUCCESS)
+			continue;
+		rc = pcmcia_parse_tuple(handle, &tuple, &parse);
+		if (rc != CS_SUCCESS)
+			continue;
+
+		link->conf.ConfigIndex = parse.cftable_entry.index;
+
+		if (!parse.cftable_entry.io.nwin)
+			continue;
+
+		/* Get the IOaddr */
+		link->io.BasePort1 = parse.cftable_entry.io.win[0].base;
+		link->io.NumPorts1 = parse.cftable_entry.io.win[0].len;
+		link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO;
+		if (!(parse.cftable_entry.io.flags & CISTPL_IO_8BIT))
+			link->io.Attributes1 = IO_DATA_PATH_WIDTH_16;
+		if (!(parse.cftable_entry.io.flags & CISTPL_IO_16BIT))
+			link->io.Attributes1 = IO_DATA_PATH_WIDTH_8;
+		link->io.IOAddrLines = parse.cftable_entry.io.flags
+		    & CISTPL_IO_LINES_MASK;
+
+		rc = pcmcia_request_io(handle, &link->io);
+		if (rc == CS_SUCCESS)
+			break;	/* we are done */
+	}
+	if (rc != CS_SUCCESS)
+		goto cs_release;
+
+	link->conf.IntType = 00000002;
+
+	if ((fail_rc =
+	     pcmcia_request_configuration(handle, &link->conf)) != CS_SUCCESS) {
+		fail_fn = RequestConfiguration;
+		goto cs_release;
+	}
+
+	dev = link->priv;
+	sprintf(dev->node.dev_name, DEVICE_NAME "%d", devno);
+	dev->node.major = major;
+	dev->node.minor = devno;
+	dev->node.next = NULL;
+	link->dev = &dev->node;
+	link->state &= ~DEV_CONFIG_PENDING;
+
+	return;
+
+cs_failed:
+	cs_error(handle, fail_fn, fail_rc);
+cs_release:
+	cm4000_release(link);
+
+	link->state &= ~DEV_CONFIG_PENDING;
+}
+
+static int cm4000_event(event_t event, int priority,
+			event_callback_args_t *args)
+{
+	dev_link_t *link;
+	struct cm4000_dev *dev;
+	int devno;
+
+	link = args->client_data;
+	dev = link->priv;
+
+	DEBUGP(3, dev, "-> cm4000_event\n");
+	for (devno = 0; devno < CM4000_MAX_DEV; devno++)
+		if (dev_table[devno] == link)
+			break;
+
+	if (devno == CM4000_MAX_DEV)
+		return CS_BAD_ADAPTER;
+
+	switch (event) {
+	case CS_EVENT_CARD_INSERTION:
+		DEBUGP(5, dev, "CS_EVENT_CARD_INSERTION\n");
+		link->state |= DEV_PRESENT | DEV_CONFIG_PENDING;
+		cm4000_config(link, devno);
+		break;
+	case CS_EVENT_CARD_REMOVAL:
+		DEBUGP(5, dev, "CS_EVENT_CARD_REMOVAL\n");
+		link->state &= ~DEV_PRESENT;
+		stop_monitor(dev);
+		break;
+	case CS_EVENT_PM_SUSPEND:
+		DEBUGP(5, dev, "CS_EVENT_PM_SUSPEND "
+		      "(fall-through to CS_EVENT_RESET_PHYSICAL)\n");
+		link->state |= DEV_SUSPEND;
+		/* fall-through */
+	case CS_EVENT_RESET_PHYSICAL:
+		DEBUGP(5, dev, "CS_EVENT_RESET_PHYSICAL\n");
+		if (link->state & DEV_CONFIG) {
+			DEBUGP(5, dev, "ReleaseConfiguration\n");
+			pcmcia_release_configuration(link->handle);
+		}
+		stop_monitor(dev);
+		break;
+	case CS_EVENT_PM_RESUME:
+		DEBUGP(5, dev, "CS_EVENT_PM_RESUME "
+		      "(fall-through to CS_EVENT_CARD_RESET)\n");
+		link->state &= ~DEV_SUSPEND;
+		/* fall-through */
+	case CS_EVENT_CARD_RESET:
+		DEBUGP(5, dev, "CS_EVENT_CARD_RESET\n");
+		if ((link->state & DEV_CONFIG)) {
+			DEBUGP(5, dev, "RequestConfiguration\n");
+			pcmcia_request_configuration(link->handle, &link->conf);
+		}
+		if (link->open)
+			start_monitor(dev);
+		break;
+	default:
+		DEBUGP(5, dev, "unknown event %.2x\n", event);
+		break;
+	}
+	DEBUGP(3, dev, "<- cm4000_event\n");
+	return CS_SUCCESS;
+}
+
+static void cm4000_release(dev_link_t *link)
+{
+	cmm_cm4000_release(link->priv);	/* delay release until device closed */
+	pcmcia_release_configuration(link->handle);
+	pcmcia_release_io(link->handle, &link->io);
+}
+
+static dev_link_t *cm4000_attach(void)
+{
+	struct cm4000_dev *dev;
+	dev_link_t *link;
+	client_reg_t client_reg;
+	int i;
+
+	for (i = 0; i < CM4000_MAX_DEV; i++)
+		if (dev_table[i] == NULL)
+			break;
+
+	if (i == CM4000_MAX_DEV) {
+		printk(KERN_NOTICE MODULE_NAME ": all devices in use\n");
+		return NULL;
+	}
+
+	/* create a new cm4000_cs device */
+	dev = kzalloc(sizeof(struct cm4000_dev), GFP_KERNEL);
+	if (dev == NULL)
+		return NULL;
+
+	link = &dev->link;
+	link->priv = dev;
+	link->conf.IntType = INT_MEMORY_AND_IO;
+	dev_table[i] = link;
+
+	/* register with card services */
+	client_reg.dev_info = &dev_info;
+	client_reg.EventMask =
+	    CS_EVENT_CARD_INSERTION | CS_EVENT_CARD_REMOVAL |
+	    CS_EVENT_RESET_PHYSICAL | CS_EVENT_CARD_RESET |
+	    CS_EVENT_PM_SUSPEND | CS_EVENT_PM_RESUME;
+	client_reg.Version = 0x0210;
+	client_reg.event_callback_args.client_data = link;
+
+	i = pcmcia_register_client(&link->handle, &client_reg);
+	if (i) {
+		cs_error(link->handle, RegisterClient, i);
+		cm4000_detach(link);
+		return NULL;
+	}
+
+	init_waitqueue_head(&dev->devq);
+	init_waitqueue_head(&dev->ioq);
+	init_waitqueue_head(&dev->atrq);
+	init_waitqueue_head(&dev->readq);
+
+	return link;
+}
+
+static void cm4000_detach_by_devno(int devno, dev_link_t * link)
+{
+	struct cm4000_dev *dev = link->priv;
+
+	DEBUGP(3, dev, "-> detach_by_devno(devno=%d)\n", devno);
+
+	if (link->state & DEV_CONFIG) {
+		DEBUGP(5, dev, "device still configured (try to release it)\n");
+		cm4000_release(link);
+	}
+
+	if (link->handle) {
+		pcmcia_deregister_client(link->handle);
+	}
+
+	dev_table[devno] = NULL;
+	kfree(dev);
+	return;
+}
+
+static void cm4000_detach(dev_link_t * link)
+{
+	int i;
+
+	/* find device */
+	for (i = 0; i < CM4000_MAX_DEV; i++)
+		if (dev_table[i] == link)
+			break;
+
+	if (i == CM4000_MAX_DEV)
+		return;
+
+	cm4000_detach_by_devno(i, link);
+	return;
+}
+
+static struct file_operations cm4000_fops = {
+	.owner	= THIS_MODULE,
+	.read	= cmm_read,
+	.write	= cmm_write,
+	.ioctl	= cmm_ioctl,
+	.open	= cmm_open,
+	.release= cmm_close,
+};
+
+static struct pcmcia_device_id cm4000_ids[] = {
+	PCMCIA_DEVICE_MANF_CARD(0x0223, 0x0002),
+	PCMCIA_DEVICE_PROD_ID12("CardMan", "4000", 0x2FB368CA, 0xA2BD8C39),
+	PCMCIA_DEVICE_NULL,
+};
+MODULE_DEVICE_TABLE(pcmcia, cm4000_ids);
+
+static struct pcmcia_driver cm4000_driver = {
+	.owner	  = THIS_MODULE,
+	.drv	  = {
+		.name = "cm4000_cs",
+		},
+	.attach   = cm4000_attach,
+	.detach   = cm4000_detach,
+	.event	  = cm4000_event,
+	.id_table = cm4000_ids,
+};
+
+static int __init cmm_init(void)
+{
+	printk(KERN_INFO "%s\n", version);
+	pcmcia_register_driver(&cm4000_driver);
+	major = register_chrdev(0, DEVICE_NAME, &cm4000_fops);
+	if (major < 0) {
+		printk(KERN_WARNING MODULE_NAME
+			": could not get major number\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static void __exit cmm_exit(void)
+{
+	int i;
+
+	printk(KERN_INFO MODULE_NAME ": unloading\n");
+	pcmcia_unregister_driver(&cm4000_driver);
+	for (i = 0; i < CM4000_MAX_DEV; i++)
+		if (dev_table[i])
+			cm4000_detach_by_devno(i, dev_table[i]);
+	unregister_chrdev(major, DEVICE_NAME);
+};
+
+module_init(cmm_init);
+module_exit(cmm_exit);
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/include/linux/cm4000_cs.h b/include/linux/cm4000_cs.h
new file mode 100644
index 000000000000..605ebe24bb2e
--- /dev/null
+++ b/include/linux/cm4000_cs.h
@@ -0,0 +1,66 @@
+#ifndef	_CM4000_H_
+#define	_CM4000_H_
+
+#define	MAX_ATR			33
+
+#define	CM4000_MAX_DEV		4
+
+/* those two structures are passed via ioctl() from/to userspace.  They are
+ * used by existing userspace programs, so I kepth the awkward "bIFSD" naming
+ * not to break compilation of userspace apps. -HW */
+
+typedef struct atreq {
+	int32_t atr_len;
+	unsigned char atr[64];
+	int32_t power_act;
+	unsigned char bIFSD;
+	unsigned char bIFSC;
+} atreq_t;
+
+
+/* what is particularly stupid in the original driver is the arch-dependant
+ * member sizes. This leads to CONFIG_COMPAT breakage, since 32bit userspace
+ * will lay out the structure members differently than the 64bit kernel.
+ *
+ * I've changed "ptsreq.protocol" from "unsigned long" to "u_int32_t".
+ * On 32bit this will make no difference.  With 64bit kernels, it will make
+ * 32bit apps work, too.
+ */
+
+typedef struct ptsreq {
+	u_int32_t protocol; /*T=0: 2^0, T=1:  2^1*/
+ 	unsigned char flags;
+ 	unsigned char pts1;
+ 	unsigned char pts2;
+	unsigned char pts3;
+} ptsreq_t;
+
+#define	CM_IOC_MAGIC		'c'
+#define	CM_IOC_MAXNR	        255
+
+#define	CM_IOCGSTATUS		_IOR (CM_IOC_MAGIC, 0, unsigned char *)
+#define	CM_IOCGATR		_IOWR(CM_IOC_MAGIC, 1, atreq_t *)
+#define	CM_IOCSPTS		_IOW (CM_IOC_MAGIC, 2, ptsreq_t *)
+#define	CM_IOCSRDR		_IO  (CM_IOC_MAGIC, 3)
+#define CM_IOCARDOFF            _IO  (CM_IOC_MAGIC, 4)
+
+#define CM_IOSDBGLVL            _IOW(CM_IOC_MAGIC, 250, int*)
+
+/* card and device states */
+#define	CM_CARD_INSERTED		0x01
+#define	CM_CARD_POWERED			0x02
+#define	CM_ATR_PRESENT			0x04
+#define	CM_ATR_VALID	 		0x08
+#define	CM_STATE_VALID			0x0f
+/* extra info only from CM4000 */
+#define	CM_NO_READER			0x10
+#define	CM_BAD_CARD			0x20
+
+
+#ifdef __KERNEL__
+
+#define	DEVICE_NAME		"cmm"
+#define	MODULE_NAME		"cm4000_cs"
+
+#endif	/* __KERNEL__ */
+#endif	/* _CM4000_H_ */
-- 
cgit v1.2.3-71-gd317


From 4c8d3d997ef3c0594350fba716529905b314287e Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@gate.crashing.org>
Date: Sun, 13 Nov 2005 16:06:30 -0800
Subject: [PATCH] Update email address for Kumar

Changed jobs and the Freescale address is no longer valid.

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 CREDITS                                      | 2 +-
 MAINTAINERS                                  | 2 +-
 arch/powerpc/kernel/head_fsl_booke.S         | 2 +-
 arch/powerpc/mm/fsl_booke_mmu.c              | 2 +-
 arch/powerpc/oprofile/op_model_fsl_booke.c   | 2 +-
 arch/ppc/kernel/head_fsl_booke.S             | 2 +-
 arch/ppc/mm/fsl_booke_mmu.c                  | 2 +-
 arch/ppc/platforms/83xx/mpc834x_sys.c        | 2 +-
 arch/ppc/platforms/83xx/mpc834x_sys.h        | 2 +-
 arch/ppc/platforms/85xx/mpc8540_ads.c        | 2 +-
 arch/ppc/platforms/85xx/mpc8540_ads.h        | 2 +-
 arch/ppc/platforms/85xx/mpc8555_cds.h        | 2 +-
 arch/ppc/platforms/85xx/mpc8560_ads.c        | 2 +-
 arch/ppc/platforms/85xx/mpc8560_ads.h        | 2 +-
 arch/ppc/platforms/85xx/mpc85xx_ads_common.c | 2 +-
 arch/ppc/platforms/85xx/mpc85xx_ads_common.h | 2 +-
 arch/ppc/platforms/85xx/mpc85xx_cds_common.c | 2 +-
 arch/ppc/platforms/85xx/mpc85xx_cds_common.h | 2 +-
 arch/ppc/platforms/85xx/sbc8560.c            | 2 +-
 arch/ppc/platforms/pq2ads.c                  | 2 +-
 arch/ppc/syslib/ipic.h                       | 2 +-
 arch/ppc/syslib/mpc83xx_devices.c            | 2 +-
 arch/ppc/syslib/mpc83xx_sys.c                | 2 +-
 arch/ppc/syslib/mpc85xx_devices.c            | 2 +-
 arch/ppc/syslib/mpc85xx_sys.c                | 2 +-
 arch/ppc/syslib/mpc8xx_devices.c             | 2 +-
 arch/ppc/syslib/mpc8xx_sys.c                 | 2 +-
 arch/ppc/syslib/ppc83xx_setup.c              | 2 +-
 arch/ppc/syslib/ppc83xx_setup.h              | 2 +-
 arch/ppc/syslib/ppc85xx_common.c             | 2 +-
 arch/ppc/syslib/ppc85xx_common.h             | 2 +-
 arch/ppc/syslib/ppc85xx_setup.c              | 2 +-
 arch/ppc/syslib/ppc85xx_setup.h              | 2 +-
 arch/ppc/syslib/ppc_sys.c                    | 2 +-
 arch/ppc/syslib/pq2_devices.c                | 2 +-
 arch/ppc/syslib/pq2_sys.c                    | 2 +-
 drivers/char/watchdog/booke_wdt.c            | 2 +-
 drivers/net/gianfar.c                        | 2 +-
 drivers/net/gianfar.h                        | 2 +-
 drivers/net/gianfar_ethtool.c                | 2 +-
 drivers/net/gianfar_mii.c                    | 2 +-
 drivers/net/gianfar_mii.h                    | 2 +-
 drivers/serial/cpm_uart/cpm_uart_core.c      | 2 +-
 drivers/serial/cpm_uart/cpm_uart_cpm1.c      | 2 +-
 drivers/serial/cpm_uart/cpm_uart_cpm2.c      | 2 +-
 include/asm-ppc/immap_85xx.h                 | 2 +-
 include/asm-ppc/ipic.h                       | 2 +-
 include/asm-ppc/mpc83xx.h                    | 2 +-
 include/asm-ppc/mpc85xx.h                    | 2 +-
 include/asm-ppc/ppc_sys.h                    | 2 +-
 include/linux/fsl_devices.h                  | 2 +-
 51 files changed, 51 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/CREDITS b/CREDITS
index 7fb4c73e0228..192f749eba25 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1097,7 +1097,7 @@ S: 80050-430 - Curitiba - Paran
 S: Brazil
 
 N: Kumar Gala
-E: kumar.gala@freescale.com
+E: galak@kernel.crashing.org
 D: Embedded PowerPC 6xx/7xx/74xx/82xx/83xx/85xx support
 S: Austin, Texas 78729
 S: USA
diff --git a/MAINTAINERS b/MAINTAINERS
index cc924073f599..509927e40bbb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1565,7 +1565,7 @@ S:	Maintained
 
 LINUX FOR POWERPC EMBEDDED PPC83XX AND PPC85XX
 P:     Kumar Gala
-M:     kumar.gala@freescale.com
+M:     galak@kernel.crashing.org
 W:     http://www.penguinppc.org/
 L:     linuxppc-embedded@ozlabs.org
 S:     Maintained
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 5063c603fad4..8d60fa99fc4b 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -24,7 +24,7 @@
  *    Copyright 2002-2004 MontaVista Software, Inc.
  *      PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org>
  *    Copyright 2004 Freescale Semiconductor, Inc
- *      PowerPC e500 modifications, Kumar Gala <kumar.gala@freescale.com>
+ *      PowerPC e500 modifications, Kumar Gala <galak@kernel.crashing.org>
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index af9ca0eb6d55..5d581bb3aa12 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -1,5 +1,5 @@
 /*
- * Modifications by Kumar Gala (kumar.gala@freescale.com) to support
+ * Modifications by Kumar Gala (galak@kernel.crashing.org) to support
  * E500 Book E processors.
  *
  * Copyright 2004 Freescale Semiconductor, Inc
diff --git a/arch/powerpc/oprofile/op_model_fsl_booke.c b/arch/powerpc/oprofile/op_model_fsl_booke.c
index 86124a94c9af..26539cda6023 100644
--- a/arch/powerpc/oprofile/op_model_fsl_booke.c
+++ b/arch/powerpc/oprofile/op_model_fsl_booke.c
@@ -7,7 +7,7 @@
  * Copyright (c) 2004 Freescale Semiconductor, Inc
  *
  * Author: Andy Fleming
- * Maintainer: Kumar Gala <Kumar.Gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
diff --git a/arch/ppc/kernel/head_fsl_booke.S b/arch/ppc/kernel/head_fsl_booke.S
index 5063c603fad4..8d60fa99fc4b 100644
--- a/arch/ppc/kernel/head_fsl_booke.S
+++ b/arch/ppc/kernel/head_fsl_booke.S
@@ -24,7 +24,7 @@
  *    Copyright 2002-2004 MontaVista Software, Inc.
  *      PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org>
  *    Copyright 2004 Freescale Semiconductor, Inc
- *      PowerPC e500 modifications, Kumar Gala <kumar.gala@freescale.com>
+ *      PowerPC e500 modifications, Kumar Gala <galak@kernel.crashing.org>
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
diff --git a/arch/ppc/mm/fsl_booke_mmu.c b/arch/ppc/mm/fsl_booke_mmu.c
index af9ca0eb6d55..5d581bb3aa12 100644
--- a/arch/ppc/mm/fsl_booke_mmu.c
+++ b/arch/ppc/mm/fsl_booke_mmu.c
@@ -1,5 +1,5 @@
 /*
- * Modifications by Kumar Gala (kumar.gala@freescale.com) to support
+ * Modifications by Kumar Gala (galak@kernel.crashing.org) to support
  * E500 Book E processors.
  *
  * Copyright 2004 Freescale Semiconductor, Inc
diff --git a/arch/ppc/platforms/83xx/mpc834x_sys.c b/arch/ppc/platforms/83xx/mpc834x_sys.c
index 98edc75f4105..84efc0ced880 100644
--- a/arch/ppc/platforms/83xx/mpc834x_sys.c
+++ b/arch/ppc/platforms/83xx/mpc834x_sys.c
@@ -3,7 +3,7 @@
  *
  * MPC834x SYS board specific routines
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2005 Freescale Semiconductor Inc.
  *
diff --git a/arch/ppc/platforms/83xx/mpc834x_sys.h b/arch/ppc/platforms/83xx/mpc834x_sys.h
index 58e44c042535..2e514d316fb8 100644
--- a/arch/ppc/platforms/83xx/mpc834x_sys.h
+++ b/arch/ppc/platforms/83xx/mpc834x_sys.h
@@ -3,7 +3,7 @@
  *
  * MPC834X SYS common board definitions
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2005 Freescale Semiconductor, Inc.
  *
diff --git a/arch/ppc/platforms/85xx/mpc8540_ads.c b/arch/ppc/platforms/85xx/mpc8540_ads.c
index 7e952c1228cb..c5cde97c6ef0 100644
--- a/arch/ppc/platforms/85xx/mpc8540_ads.c
+++ b/arch/ppc/platforms/85xx/mpc8540_ads.c
@@ -3,7 +3,7 @@
  *
  * MPC8540ADS board specific routines
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2004 Freescale Semiconductor Inc.
  *
diff --git a/arch/ppc/platforms/85xx/mpc8540_ads.h b/arch/ppc/platforms/85xx/mpc8540_ads.h
index 3d05d7c4a938..e48ca3a97397 100644
--- a/arch/ppc/platforms/85xx/mpc8540_ads.h
+++ b/arch/ppc/platforms/85xx/mpc8540_ads.h
@@ -3,7 +3,7 @@
  *
  * MPC8540ADS board definitions
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2004 Freescale Semiconductor Inc.
  *
diff --git a/arch/ppc/platforms/85xx/mpc8555_cds.h b/arch/ppc/platforms/85xx/mpc8555_cds.h
index e0e75568bc57..1a8e6c67355d 100644
--- a/arch/ppc/platforms/85xx/mpc8555_cds.h
+++ b/arch/ppc/platforms/85xx/mpc8555_cds.h
@@ -3,7 +3,7 @@
  *
  * MPC8555CDS board definitions
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2004 Freescale Semiconductor Inc.
  *
diff --git a/arch/ppc/platforms/85xx/mpc8560_ads.c b/arch/ppc/platforms/85xx/mpc8560_ads.c
index 208433f1e93a..8e39a5517092 100644
--- a/arch/ppc/platforms/85xx/mpc8560_ads.c
+++ b/arch/ppc/platforms/85xx/mpc8560_ads.c
@@ -3,7 +3,7 @@
  *
  * MPC8560ADS board specific routines
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2004 Freescale Semiconductor Inc.
  *
diff --git a/arch/ppc/platforms/85xx/mpc8560_ads.h b/arch/ppc/platforms/85xx/mpc8560_ads.h
index 7df885d73e9d..143ae7eefa7c 100644
--- a/arch/ppc/platforms/85xx/mpc8560_ads.h
+++ b/arch/ppc/platforms/85xx/mpc8560_ads.h
@@ -3,7 +3,7 @@
  *
  * MPC8540ADS board definitions
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2004 Freescale Semiconductor Inc.
  *
diff --git a/arch/ppc/platforms/85xx/mpc85xx_ads_common.c b/arch/ppc/platforms/85xx/mpc85xx_ads_common.c
index 16ad092d8a06..17ce48fe3503 100644
--- a/arch/ppc/platforms/85xx/mpc85xx_ads_common.c
+++ b/arch/ppc/platforms/85xx/mpc85xx_ads_common.c
@@ -3,7 +3,7 @@
  *
  * MPC85xx ADS board common routines
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2004 Freescale Semiconductor Inc.
  *
diff --git a/arch/ppc/platforms/85xx/mpc85xx_ads_common.h b/arch/ppc/platforms/85xx/mpc85xx_ads_common.h
index 84acf6e8d45e..7b26bcc5d10d 100644
--- a/arch/ppc/platforms/85xx/mpc85xx_ads_common.h
+++ b/arch/ppc/platforms/85xx/mpc85xx_ads_common.h
@@ -3,7 +3,7 @@
  *
  * MPC85XX ADS common board definitions
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2004 Freescale Semiconductor Inc.
  *
diff --git a/arch/ppc/platforms/85xx/mpc85xx_cds_common.c b/arch/ppc/platforms/85xx/mpc85xx_cds_common.c
index a21156967a5e..d8991b88dc9c 100644
--- a/arch/ppc/platforms/85xx/mpc85xx_cds_common.c
+++ b/arch/ppc/platforms/85xx/mpc85xx_cds_common.c
@@ -3,7 +3,7 @@
  *
  * MPC85xx CDS board specific routines
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2004 Freescale Semiconductor, Inc
  *
diff --git a/arch/ppc/platforms/85xx/mpc85xx_cds_common.h b/arch/ppc/platforms/85xx/mpc85xx_cds_common.h
index 12b292c6ae32..5b588cfd0e41 100644
--- a/arch/ppc/platforms/85xx/mpc85xx_cds_common.h
+++ b/arch/ppc/platforms/85xx/mpc85xx_cds_common.h
@@ -3,7 +3,7 @@
  *
  * MPC85xx CDS board definitions
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2004 Freescale Semiconductor, Inc
  *
diff --git a/arch/ppc/platforms/85xx/sbc8560.c b/arch/ppc/platforms/85xx/sbc8560.c
index b4ee1707a836..45a5b81b4ed1 100644
--- a/arch/ppc/platforms/85xx/sbc8560.c
+++ b/arch/ppc/platforms/85xx/sbc8560.c
@@ -3,7 +3,7 @@
  * 
  * Wind River SBC8560 board specific routines
  * 
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala
  *
  * Copyright 2004 Freescale Semiconductor Inc.
  * 
diff --git a/arch/ppc/platforms/pq2ads.c b/arch/ppc/platforms/pq2ads.c
index 6a1475c1e128..71c9fca1fe9b 100644
--- a/arch/ppc/platforms/pq2ads.c
+++ b/arch/ppc/platforms/pq2ads.c
@@ -3,7 +3,7 @@
  *
  * PQ2ADS platform support
  *
- * Author: Kumar Gala <kumar.gala@freescale.com>
+ * Author: Kumar Gala <galak@kernel.crashing.org>
  * Derived from: est8260_setup.c by Allen Curtis
  *
  * Copyright 2004 Freescale Semiconductor, Inc.
diff --git a/arch/ppc/syslib/ipic.h b/arch/ppc/syslib/ipic.h
index 2b56a4fcf373..a7ce7da8785c 100644
--- a/arch/ppc/syslib/ipic.h
+++ b/arch/ppc/syslib/ipic.h
@@ -3,7 +3,7 @@
  *
  * IPIC private definitions and structure.
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2005 Freescale Semiconductor, Inc
  *
diff --git a/arch/ppc/syslib/mpc83xx_devices.c b/arch/ppc/syslib/mpc83xx_devices.c
index f43fbf9a9389..847df4409982 100644
--- a/arch/ppc/syslib/mpc83xx_devices.c
+++ b/arch/ppc/syslib/mpc83xx_devices.c
@@ -3,7 +3,7 @@
  *
  * MPC83xx Device descriptions
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2005 Freescale Semiconductor Inc.
  *
diff --git a/arch/ppc/syslib/mpc83xx_sys.c b/arch/ppc/syslib/mpc83xx_sys.c
index da743446789b..a1523989aff4 100644
--- a/arch/ppc/syslib/mpc83xx_sys.c
+++ b/arch/ppc/syslib/mpc83xx_sys.c
@@ -3,7 +3,7 @@
  *
  * MPC83xx System descriptions
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2005 Freescale Semiconductor Inc.
  *
diff --git a/arch/ppc/syslib/mpc85xx_devices.c b/arch/ppc/syslib/mpc85xx_devices.c
index 2ede677a0a53..69949d255658 100644
--- a/arch/ppc/syslib/mpc85xx_devices.c
+++ b/arch/ppc/syslib/mpc85xx_devices.c
@@ -3,7 +3,7 @@
  *
  * MPC85xx Device descriptions
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2005 Freescale Semiconductor Inc.
  *
diff --git a/arch/ppc/syslib/mpc85xx_sys.c b/arch/ppc/syslib/mpc85xx_sys.c
index cb68d8c58348..397cfbcce5ea 100644
--- a/arch/ppc/syslib/mpc85xx_sys.c
+++ b/arch/ppc/syslib/mpc85xx_sys.c
@@ -3,7 +3,7 @@
  *
  * MPC85xx System descriptions
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2005 Freescale Semiconductor Inc.
  *
diff --git a/arch/ppc/syslib/mpc8xx_devices.c b/arch/ppc/syslib/mpc8xx_devices.c
index 2b5f0e701687..92dc98b36bde 100644
--- a/arch/ppc/syslib/mpc8xx_devices.c
+++ b/arch/ppc/syslib/mpc8xx_devices.c
@@ -3,7 +3,7 @@
  *
  * MPC8xx Device descriptions
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2005 MontaVista Software, Inc. by Vitaly Bordug<vbordug@ru.mvista.com>
  *
diff --git a/arch/ppc/syslib/mpc8xx_sys.c b/arch/ppc/syslib/mpc8xx_sys.c
index 3cc27d29e3af..d3c617521603 100644
--- a/arch/ppc/syslib/mpc8xx_sys.c
+++ b/arch/ppc/syslib/mpc8xx_sys.c
@@ -3,7 +3,7 @@
  *
  * MPC8xx System descriptions
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2005 MontaVista Software, Inc. by Vitaly Bordug <vbordug@ru.mvista.com>
  *
diff --git a/arch/ppc/syslib/ppc83xx_setup.c b/arch/ppc/syslib/ppc83xx_setup.c
index 4da168a6ad03..1b5fe9e398d4 100644
--- a/arch/ppc/syslib/ppc83xx_setup.c
+++ b/arch/ppc/syslib/ppc83xx_setup.c
@@ -3,7 +3,7 @@
  *
  * MPC83XX common board code
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2005 Freescale Semiconductor Inc.
  *
diff --git a/arch/ppc/syslib/ppc83xx_setup.h b/arch/ppc/syslib/ppc83xx_setup.h
index c766c1a5f786..a122a7322e5e 100644
--- a/arch/ppc/syslib/ppc83xx_setup.h
+++ b/arch/ppc/syslib/ppc83xx_setup.h
@@ -3,7 +3,7 @@
  *
  * MPC83XX common board definitions
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2005 Freescale Semiconductor Inc.
  *
diff --git a/arch/ppc/syslib/ppc85xx_common.c b/arch/ppc/syslib/ppc85xx_common.c
index da841dacdc13..19ad537225e4 100644
--- a/arch/ppc/syslib/ppc85xx_common.c
+++ b/arch/ppc/syslib/ppc85xx_common.c
@@ -3,7 +3,7 @@
  *
  * MPC85xx support routines
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2004 Freescale Semiconductor Inc.
  *
diff --git a/arch/ppc/syslib/ppc85xx_common.h b/arch/ppc/syslib/ppc85xx_common.h
index 2c8f304441bf..94edf32151dd 100644
--- a/arch/ppc/syslib/ppc85xx_common.h
+++ b/arch/ppc/syslib/ppc85xx_common.h
@@ -3,7 +3,7 @@
  *
  * MPC85xx support routines
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2004 Freescale Semiconductor Inc.
  *
diff --git a/arch/ppc/syslib/ppc85xx_setup.c b/arch/ppc/syslib/ppc85xx_setup.c
index de2f90576577..1a47ff4b831d 100644
--- a/arch/ppc/syslib/ppc85xx_setup.c
+++ b/arch/ppc/syslib/ppc85xx_setup.c
@@ -3,7 +3,7 @@
  *
  * MPC85XX common board code
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2004 Freescale Semiconductor Inc.
  *
diff --git a/arch/ppc/syslib/ppc85xx_setup.h b/arch/ppc/syslib/ppc85xx_setup.h
index 6e6cfe162faf..e340b0545fb5 100644
--- a/arch/ppc/syslib/ppc85xx_setup.h
+++ b/arch/ppc/syslib/ppc85xx_setup.h
@@ -3,7 +3,7 @@
  *
  * MPC85XX common board definitions
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2004 Freescale Semiconductor Inc.
  *
diff --git a/arch/ppc/syslib/ppc_sys.c b/arch/ppc/syslib/ppc_sys.c
index 603f01190816..c0b93c4191ee 100644
--- a/arch/ppc/syslib/ppc_sys.c
+++ b/arch/ppc/syslib/ppc_sys.c
@@ -3,7 +3,7 @@
  *
  * PPC System library functions
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2005 Freescale Semiconductor Inc.
  * Copyright 2005 MontaVista, Inc. by Vitaly Bordug <vbordug@ru.mvista.com>
diff --git a/arch/ppc/syslib/pq2_devices.c b/arch/ppc/syslib/pq2_devices.c
index e960fe935325..6ff3aab82fc3 100644
--- a/arch/ppc/syslib/pq2_devices.c
+++ b/arch/ppc/syslib/pq2_devices.c
@@ -3,7 +3,7 @@
  *
  * PQ2 Device descriptions
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * This file is licensed under the terms of the GNU General Public License
  * version 2. This program is licensed "as is" without any warranty of any
diff --git a/arch/ppc/syslib/pq2_sys.c b/arch/ppc/syslib/pq2_sys.c
index 7b6c9ebdb9e3..36d6e2179940 100644
--- a/arch/ppc/syslib/pq2_sys.c
+++ b/arch/ppc/syslib/pq2_sys.c
@@ -3,7 +3,7 @@
  *
  * PQ2 System descriptions
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * This file is licensed under the terms of the GNU General Public License
  * version 2. This program is licensed "as is" without any warranty of any
diff --git a/drivers/char/watchdog/booke_wdt.c b/drivers/char/watchdog/booke_wdt.c
index abc30cca6645..65830ec71042 100644
--- a/drivers/char/watchdog/booke_wdt.c
+++ b/drivers/char/watchdog/booke_wdt.c
@@ -4,7 +4,7 @@
  * Watchdog timer for PowerPC Book-E systems
  *
  * Author: Matthew McClintock
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2005 Freescale Semiconductor Inc.
  *
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index e3a329539f1c..0f030b73cbb3 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -6,7 +6,7 @@
  * Based on 8260_io/fcc_enet.c
  *
  * Author: Andy Fleming
- * Maintainer: Kumar Gala (kumar.gala@freescale.com)
+ * Maintainer: Kumar Gala
  *
  * Copyright (c) 2002-2004 Freescale Semiconductor, Inc.
  *
diff --git a/drivers/net/gianfar.h b/drivers/net/gianfar.h
index 220084e53341..5065ba82cb76 100644
--- a/drivers/net/gianfar.h
+++ b/drivers/net/gianfar.h
@@ -6,7 +6,7 @@
  * Based on 8260_io/fcc_enet.c
  *
  * Author: Andy Fleming
- * Maintainer: Kumar Gala (kumar.gala@freescale.com)
+ * Maintainer: Kumar Gala
  *
  * Copyright (c) 2002-2004 Freescale Semiconductor, Inc.
  *
diff --git a/drivers/net/gianfar_ethtool.c b/drivers/net/gianfar_ethtool.c
index 5a2d810ce575..cfa3cd7c91a0 100644
--- a/drivers/net/gianfar_ethtool.c
+++ b/drivers/net/gianfar_ethtool.c
@@ -6,7 +6,7 @@
  *  Based on e1000 ethtool support
  *
  *  Author: Andy Fleming
- *  Maintainer: Kumar Gala (kumar.gala@freescale.com)
+ *  Maintainer: Kumar Gala
  *
  *  Copyright (c) 2003,2004 Freescale Semiconductor, Inc.
  *
diff --git a/drivers/net/gianfar_mii.c b/drivers/net/gianfar_mii.c
index 9544279e8bcd..04a462c2a5b7 100644
--- a/drivers/net/gianfar_mii.c
+++ b/drivers/net/gianfar_mii.c
@@ -5,7 +5,7 @@
  * Provides Bus interface for MIIM regs
  *
  * Author: Andy Fleming
- * Maintainer: Kumar Gala (kumar.gala@freescale.com)
+ * Maintainer: Kumar Gala
  *
  * Copyright (c) 2002-2004 Freescale Semiconductor, Inc.
  *
diff --git a/drivers/net/gianfar_mii.h b/drivers/net/gianfar_mii.h
index 56e5665d5c9b..e85eb216fb5b 100644
--- a/drivers/net/gianfar_mii.h
+++ b/drivers/net/gianfar_mii.h
@@ -5,7 +5,7 @@
  * Driver for the MDIO bus controller in the Gianfar register space
  *
  * Author: Andy Fleming
- * Maintainer: Kumar Gala (kumar.gala@freescale.com)
+ * Maintainer: Kumar Gala
  *
  * Copyright (c) 2002-2004 Freescale Semiconductor, Inc.
  *
diff --git a/drivers/serial/cpm_uart/cpm_uart_core.c b/drivers/serial/cpm_uart/cpm_uart_core.c
index 25825f2aba22..987d22b53c22 100644
--- a/drivers/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/serial/cpm_uart/cpm_uart_core.c
@@ -7,7 +7,7 @@
  *  Based on ppc8xx.c by Thomas Gleixner
  *  Based on drivers/serial/amba.c by Russell King
  *
- *  Maintainer: Kumar Gala (kumar.gala@freescale.com) (CPM2)
+ *  Maintainer: Kumar Gala (galak@kernel.crashing.org) (CPM2)
  *              Pantelis Antoniou (panto@intracom.gr) (CPM1)
  *
  *  Copyright (C) 2004 Freescale Semiconductor, Inc.
diff --git a/drivers/serial/cpm_uart/cpm_uart_cpm1.c b/drivers/serial/cpm_uart/cpm_uart_cpm1.c
index 4b0786e7eb7f..d789ee55cbb7 100644
--- a/drivers/serial/cpm_uart/cpm_uart_cpm1.c
+++ b/drivers/serial/cpm_uart/cpm_uart_cpm1.c
@@ -3,7 +3,7 @@
  *
  *  Driver for CPM (SCC/SMC) serial ports; CPM1 definitions
  *
- *  Maintainer: Kumar Gala (kumar.gala@freescale.com) (CPM2)
+ *  Maintainer: Kumar Gala (galak@kernel.crashing.org) (CPM2)
  *              Pantelis Antoniou (panto@intracom.gr) (CPM1)
  *
  *  Copyright (C) 2004 Freescale Semiconductor, Inc.
diff --git a/drivers/serial/cpm_uart/cpm_uart_cpm2.c b/drivers/serial/cpm_uart/cpm_uart_cpm2.c
index 15ad58d94889..fd9e53ed3feb 100644
--- a/drivers/serial/cpm_uart/cpm_uart_cpm2.c
+++ b/drivers/serial/cpm_uart/cpm_uart_cpm2.c
@@ -3,7 +3,7 @@
  *
  *  Driver for CPM (SCC/SMC) serial ports; CPM2 definitions
  *
- *  Maintainer: Kumar Gala (kumar.gala@freescale.com) (CPM2)
+ *  Maintainer: Kumar Gala (galak@kernel.crashing.org) (CPM2)
  *              Pantelis Antoniou (panto@intracom.gr) (CPM1)
  * 
  *  Copyright (C) 2004 Freescale Semiconductor, Inc.
diff --git a/include/asm-ppc/immap_85xx.h b/include/asm-ppc/immap_85xx.h
index 50fb5e47094a..9383d0c13ff8 100644
--- a/include/asm-ppc/immap_85xx.h
+++ b/include/asm-ppc/immap_85xx.h
@@ -3,7 +3,7 @@
  *
  * MPC85xx Internal Memory Map
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2004 Freescale Semiconductor, Inc
  *
diff --git a/include/asm-ppc/ipic.h b/include/asm-ppc/ipic.h
index 9092b920997a..0fe396a2b666 100644
--- a/include/asm-ppc/ipic.h
+++ b/include/asm-ppc/ipic.h
@@ -3,7 +3,7 @@
  *
  * IPIC external definitions and structure.
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2005 Freescale Semiconductor, Inc
  *
diff --git a/include/asm-ppc/mpc83xx.h b/include/asm-ppc/mpc83xx.h
index ce212201db2a..7cdf60fa69b6 100644
--- a/include/asm-ppc/mpc83xx.h
+++ b/include/asm-ppc/mpc83xx.h
@@ -3,7 +3,7 @@
  *
  * MPC83xx definitions
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2005 Freescale Semiconductor, Inc
  *
diff --git a/include/asm-ppc/mpc85xx.h b/include/asm-ppc/mpc85xx.h
index d98db980cd49..9d14baea3d71 100644
--- a/include/asm-ppc/mpc85xx.h
+++ b/include/asm-ppc/mpc85xx.h
@@ -3,7 +3,7 @@
  *
  * MPC85xx definitions
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2004 Freescale Semiconductor, Inc
  *
diff --git a/include/asm-ppc/ppc_sys.h b/include/asm-ppc/ppc_sys.h
index bba5305c29ed..83d8c77c124d 100644
--- a/include/asm-ppc/ppc_sys.h
+++ b/include/asm-ppc/ppc_sys.h
@@ -3,7 +3,7 @@
  *
  * PPC system definitions and library functions
  *
- * Maintainer: Kumar Gala <kumar.gala@freescale.com>
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2005 Freescale Semiconductor, Inc
  *
diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index 114d5d59f695..934aa9bda481 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -4,7 +4,7 @@
  * Definitions for any platform device related flags or structures for
  * Freescale processor devices
  *
- * Maintainer: Kumar Gala (kumar.gala@freescale.com)
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
  *
  * Copyright 2004 Freescale Semiconductor, Inc
  *
-- 
cgit v1.2.3-71-gd317


From 51c6f666fceb3184eeff045dad4432b602cd648e Mon Sep 17 00:00:00 2001
From: Robin Holt <holt@sgi.com>
Date: Sun, 13 Nov 2005 16:06:42 -0800
Subject: [PATCH] mm: ZAP_BLOCK causes redundant work

The address based work estimate for unmapping (for lockbreak) is and always
was horribly inefficient for sparse mappings.  The problem is most simply
explained with an example:

If we find a pgd is clear, we still have to call into unmap_page_range
PGDIR_SIZE / ZAP_BLOCK_SIZE times, each time checking the clear pgd, in
order to progress the working address to the next pgd.

The fundamental way to solve the problem is to keep track of the end
address we've processed and pass it back to the higher layers.

From: Nick Piggin <npiggin@suse.de>

  Modification to completely get away from address based work estimate
  and instead use an abstract count, with a very small cost for empty
  entries as opposed to present pages.

  On 2.6.14-git2, ppc64, and CONFIG_PREEMPT=y, mapping and unmapping 1TB
  of virtual address space takes 1.69s; with the following patch applied,
  this operation can be done 1000 times in less than 0.01s

From: Andrew Morton <akpm@osdl.org>

With CONFIG_HUTETLB_PAGE=n:

mm/memory.c: In function `unmap_vmas':
mm/memory.c:779: warning: division by zero

Due to

			zap_work -= (end - start) /
					(HPAGE_SIZE / PAGE_SIZE);

So make the dummy HPAGE_SIZE non-zero

Signed-off-by: Robin Holt <holt@sgi.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/hugetlb.h |  4 +--
 mm/memory.c             | 89 ++++++++++++++++++++++++++++++-------------------
 2 files changed, 57 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 0cea162b08c0..1056717ee501 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -102,8 +102,8 @@ static inline unsigned long hugetlb_total_pages(void)
 #define hugetlb_fault(mm, vma, addr, write)	({ BUG(); 0; })
 
 #ifndef HPAGE_MASK
-#define HPAGE_MASK	0		/* Keep the compiler happy */
-#define HPAGE_SIZE	0
+#define HPAGE_MASK	PAGE_MASK		/* Keep the compiler happy */
+#define HPAGE_SIZE	PAGE_SIZE
 #endif
 
 #endif /* !CONFIG_HUGETLB_PAGE */
diff --git a/mm/memory.c b/mm/memory.c
index 0f60baf6f69b..2998cfc12f5b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -549,10 +549,10 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	return 0;
 }
 
-static void zap_pte_range(struct mmu_gather *tlb,
+static unsigned long zap_pte_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pmd_t *pmd,
 				unsigned long addr, unsigned long end,
-				struct zap_details *details)
+				long *zap_work, struct zap_details *details)
 {
 	struct mm_struct *mm = tlb->mm;
 	pte_t *pte;
@@ -563,10 +563,15 @@ static void zap_pte_range(struct mmu_gather *tlb,
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	do {
 		pte_t ptent = *pte;
-		if (pte_none(ptent))
+		if (pte_none(ptent)) {
+			(*zap_work)--;
 			continue;
+		}
 		if (pte_present(ptent)) {
 			struct page *page = NULL;
+
+			(*zap_work) -= PAGE_SIZE;
+
 			if (!(vma->vm_flags & VM_RESERVED)) {
 				unsigned long pfn = pte_pfn(ptent);
 				if (unlikely(!pfn_valid(pfn)))
@@ -624,16 +629,18 @@ static void zap_pte_range(struct mmu_gather *tlb,
 		if (!pte_file(ptent))
 			free_swap_and_cache(pte_to_swp_entry(ptent));
 		pte_clear_full(mm, addr, pte, tlb->fullmm);
-	} while (pte++, addr += PAGE_SIZE, addr != end);
+	} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
 
 	add_mm_rss(mm, file_rss, anon_rss);
 	pte_unmap_unlock(pte - 1, ptl);
+
+	return addr;
 }
 
-static inline void zap_pmd_range(struct mmu_gather *tlb,
+static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pud_t *pud,
 				unsigned long addr, unsigned long end,
-				struct zap_details *details)
+				long *zap_work, struct zap_details *details)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -641,16 +648,21 @@ static inline void zap_pmd_range(struct mmu_gather *tlb,
 	pmd = pmd_offset(pud, addr);
 	do {
 		next = pmd_addr_end(addr, end);
-		if (pmd_none_or_clear_bad(pmd))
+		if (pmd_none_or_clear_bad(pmd)) {
+			(*zap_work)--;
 			continue;
-		zap_pte_range(tlb, vma, pmd, addr, next, details);
-	} while (pmd++, addr = next, addr != end);
+		}
+		next = zap_pte_range(tlb, vma, pmd, addr, next,
+						zap_work, details);
+	} while (pmd++, addr = next, (addr != end && *zap_work > 0));
+
+	return addr;
 }
 
-static inline void zap_pud_range(struct mmu_gather *tlb,
+static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
 				struct vm_area_struct *vma, pgd_t *pgd,
 				unsigned long addr, unsigned long end,
-				struct zap_details *details)
+				long *zap_work, struct zap_details *details)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -658,15 +670,21 @@ static inline void zap_pud_range(struct mmu_gather *tlb,
 	pud = pud_offset(pgd, addr);
 	do {
 		next = pud_addr_end(addr, end);
-		if (pud_none_or_clear_bad(pud))
+		if (pud_none_or_clear_bad(pud)) {
+			(*zap_work)--;
 			continue;
-		zap_pmd_range(tlb, vma, pud, addr, next, details);
-	} while (pud++, addr = next, addr != end);
+		}
+		next = zap_pmd_range(tlb, vma, pud, addr, next,
+						zap_work, details);
+	} while (pud++, addr = next, (addr != end && *zap_work > 0));
+
+	return addr;
 }
 
-static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
+static unsigned long unmap_page_range(struct mmu_gather *tlb,
+				struct vm_area_struct *vma,
 				unsigned long addr, unsigned long end,
-				struct zap_details *details)
+				long *zap_work, struct zap_details *details)
 {
 	pgd_t *pgd;
 	unsigned long next;
@@ -679,11 +697,16 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	pgd = pgd_offset(vma->vm_mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		if (pgd_none_or_clear_bad(pgd))
+		if (pgd_none_or_clear_bad(pgd)) {
+			(*zap_work)--;
 			continue;
-		zap_pud_range(tlb, vma, pgd, addr, next, details);
-	} while (pgd++, addr = next, addr != end);
+		}
+		next = zap_pud_range(tlb, vma, pgd, addr, next,
+						zap_work, details);
+	} while (pgd++, addr = next, (addr != end && *zap_work > 0));
 	tlb_end_vma(tlb, vma);
+
+	return addr;
 }
 
 #ifdef CONFIG_PREEMPT
@@ -724,7 +747,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 		unsigned long end_addr, unsigned long *nr_accounted,
 		struct zap_details *details)
 {
-	unsigned long zap_bytes = ZAP_BLOCK_SIZE;
+	long zap_work = ZAP_BLOCK_SIZE;
 	unsigned long tlb_start = 0;	/* For tlb_finish_mmu */
 	int tlb_start_valid = 0;
 	unsigned long start = start_addr;
@@ -745,27 +768,25 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 			*nr_accounted += (end - start) >> PAGE_SHIFT;
 
 		while (start != end) {
-			unsigned long block;
-
 			if (!tlb_start_valid) {
 				tlb_start = start;
 				tlb_start_valid = 1;
 			}
 
-			if (is_vm_hugetlb_page(vma)) {
-				block = end - start;
+			if (unlikely(is_vm_hugetlb_page(vma))) {
 				unmap_hugepage_range(vma, start, end);
-			} else {
-				block = min(zap_bytes, end - start);
-				unmap_page_range(*tlbp, vma, start,
-						start + block, details);
+				zap_work -= (end - start) /
+						(HPAGE_SIZE / PAGE_SIZE);
+				start = end;
+			} else
+				start = unmap_page_range(*tlbp, vma,
+						start, end, &zap_work, details);
+
+			if (zap_work > 0) {
+				BUG_ON(start != end);
+				break;
 			}
 
-			start += block;
-			zap_bytes -= block;
-			if ((long)zap_bytes > 0)
-				continue;
-
 			tlb_finish_mmu(*tlbp, tlb_start, start);
 
 			if (need_resched() ||
@@ -779,7 +800,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 
 			*tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
 			tlb_start_valid = 0;
-			zap_bytes = ZAP_BLOCK_SIZE;
+			zap_work = ZAP_BLOCK_SIZE;
 		}
 	}
 out:
-- 
cgit v1.2.3-71-gd317


From 7fb1d9fca5c6e3b06773b69165a73f3fb786b8ee Mon Sep 17 00:00:00 2001
From: Rohit Seth <rohit.seth@intel.com>
Date: Sun, 13 Nov 2005 16:06:43 -0800
Subject: [PATCH] mm: __alloc_pages cleanup

Clean up of __alloc_pages.

Restoration of previous behaviour, plus further cleanups by introducing an
'alloc_flags', removing the last of should_reclaim_zone.

Signed-off-by: Rohit Seth <rohit.seth@intel.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h |   2 +-
 mm/page_alloc.c        | 195 +++++++++++++++++++++----------------------------
 mm/vmscan.c            |   6 +-
 3 files changed, 89 insertions(+), 114 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f5fa3082fd6a..6cfb114a0c34 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -329,7 +329,7 @@ void get_zone_counts(unsigned long *active, unsigned long *inactive,
 void build_all_zonelists(void);
 void wakeup_kswapd(struct zone *zone, int order);
 int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
-		int alloc_type, int can_try_harder, gfp_t gfp_high);
+		int classzone_idx, int alloc_flags);
 
 #ifdef CONFIG_HAVE_MEMORY_PRESENT
 void memory_present(int nid, unsigned long start, unsigned long end);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b37dc0f78d07..845b91749a42 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -732,9 +732,7 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
 		}
 		local_irq_restore(flags);
 		put_cpu();
-	}
-
-	if (page == NULL) {
+	} else {
 		spin_lock_irqsave(&zone->lock, flags);
 		page = __rmqueue(zone, order);
 		spin_unlock_irqrestore(&zone->lock, flags);
@@ -754,20 +752,25 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
 	return page;
 }
 
+#define ALLOC_NO_WATERMARKS	0x01 /* don't check watermarks at all */
+#define ALLOC_HARDER		0x02 /* try to alloc harder */
+#define ALLOC_HIGH		0x04 /* __GFP_HIGH set */
+#define ALLOC_CPUSET		0x08 /* check for correct cpuset */
+
 /*
  * Return 1 if free pages are above 'mark'. This takes into account the order
  * of the allocation.
  */
 int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
-		      int classzone_idx, int can_try_harder, gfp_t gfp_high)
+		      int classzone_idx, int alloc_flags)
 {
 	/* free_pages my go negative - that's OK */
 	long min = mark, free_pages = z->free_pages - (1 << order) + 1;
 	int o;
 
-	if (gfp_high)
+	if (alloc_flags & ALLOC_HIGH)
 		min -= min / 2;
-	if (can_try_harder)
+	if (alloc_flags & ALLOC_HARDER)
 		min -= min / 4;
 
 	if (free_pages <= min + z->lowmem_reserve[classzone_idx])
@@ -785,14 +788,40 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 	return 1;
 }
 
-static inline int
-should_reclaim_zone(struct zone *z, gfp_t gfp_mask)
+/*
+ * get_page_from_freeliest goes through the zonelist trying to allocate
+ * a page.
+ */
+static struct page *
+get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
+		struct zonelist *zonelist, int alloc_flags)
 {
-	if (!z->reclaim_pages)
-		return 0;
-	if (gfp_mask & __GFP_NORECLAIM)
-		return 0;
-	return 1;
+	struct zone **z = zonelist->zones;
+	struct page *page = NULL;
+	int classzone_idx = zone_idx(*z);
+
+	/*
+	 * Go through the zonelist once, looking for a zone with enough free.
+	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
+	 */
+	do {
+		if ((alloc_flags & ALLOC_CPUSET) &&
+				!cpuset_zone_allowed(*z, gfp_mask))
+			continue;
+
+		if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
+			if (!zone_watermark_ok(*z, order, (*z)->pages_low,
+				    classzone_idx, alloc_flags))
+				continue;
+		}
+
+		page = buffered_rmqueue(*z, order, gfp_mask);
+		if (page) {
+			zone_statistics(zonelist, *z);
+			break;
+		}
+	} while (*(++z) != NULL);
+	return page;
 }
 
 /*
@@ -803,92 +832,60 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
 		struct zonelist *zonelist)
 {
 	const gfp_t wait = gfp_mask & __GFP_WAIT;
-	struct zone **zones, *z;
+	struct zone **z;
 	struct page *page;
 	struct reclaim_state reclaim_state;
 	struct task_struct *p = current;
-	int i;
-	int classzone_idx;
 	int do_retry;
-	int can_try_harder;
+	int alloc_flags;
 	int did_some_progress;
 
 	might_sleep_if(wait);
 
-	/*
-	 * The caller may dip into page reserves a bit more if the caller
-	 * cannot run direct reclaim, or is the caller has realtime scheduling
-	 * policy
-	 */
-	can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait;
-
-	zones = zonelist->zones;  /* the list of zones suitable for gfp_mask */
+	z = zonelist->zones;  /* the list of zones suitable for gfp_mask */
 
-	if (unlikely(zones[0] == NULL)) {
+	if (unlikely(*z == NULL)) {
 		/* Should this ever happen?? */
 		return NULL;
 	}
+restart:
+	page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+				zonelist, ALLOC_CPUSET);
+	if (page)
+		goto got_pg;
 
-	classzone_idx = zone_idx(zones[0]);
+	do
+		wakeup_kswapd(*z, order);
+	while (*(++z));
 
-restart:
 	/*
-	 * Go through the zonelist once, looking for a zone with enough free.
-	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
+	 * OK, we're below the kswapd watermark and have kicked background
+	 * reclaim. Now things get more complex, so set up alloc_flags according
+	 * to how we want to proceed.
+	 *
+	 * The caller may dip into page reserves a bit more if the caller
+	 * cannot run direct reclaim, or if the caller has realtime scheduling
+	 * policy.
 	 */
-	for (i = 0; (z = zones[i]) != NULL; i++) {
-		int do_reclaim = should_reclaim_zone(z, gfp_mask);
-
-		if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
-			continue;
-
-		/*
-		 * If the zone is to attempt early page reclaim then this loop
-		 * will try to reclaim pages and check the watermark a second
-		 * time before giving up and falling back to the next zone.
-		 */
-zone_reclaim_retry:
-		if (!zone_watermark_ok(z, order, z->pages_low,
-				       classzone_idx, 0, 0)) {
-			if (!do_reclaim)
-				continue;
-			else {
-				zone_reclaim(z, gfp_mask, order);
-				/* Only try reclaim once */
-				do_reclaim = 0;
-				goto zone_reclaim_retry;
-			}
-		}
-
-		page = buffered_rmqueue(z, order, gfp_mask);
-		if (page)
-			goto got_pg;
-	}
-
-	for (i = 0; (z = zones[i]) != NULL; i++)
-		wakeup_kswapd(z, order);
+	alloc_flags = 0;
+	if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait)
+		alloc_flags |= ALLOC_HARDER;
+	if (gfp_mask & __GFP_HIGH)
+		alloc_flags |= ALLOC_HIGH;
+	if (wait)
+		alloc_flags |= ALLOC_CPUSET;
 
 	/*
 	 * Go through the zonelist again. Let __GFP_HIGH and allocations
-	 * coming from realtime tasks to go deeper into reserves
+	 * coming from realtime tasks go deeper into reserves.
 	 *
 	 * This is the last chance, in general, before the goto nopage.
 	 * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
 	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
 	 */
-	for (i = 0; (z = zones[i]) != NULL; i++) {
-		if (!zone_watermark_ok(z, order, z->pages_min,
-				       classzone_idx, can_try_harder,
-				       gfp_mask & __GFP_HIGH))
-			continue;
-
-		if (wait && !cpuset_zone_allowed(z, gfp_mask))
-			continue;
-
-		page = buffered_rmqueue(z, order, gfp_mask);
-		if (page)
-			goto got_pg;
-	}
+	page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags);
+	if (page)
+		goto got_pg;
 
 	/* This allocation should allow future memory freeing. */
 
@@ -897,13 +894,10 @@ zone_reclaim_retry:
 		if (!(gfp_mask & __GFP_NOMEMALLOC)) {
 nofail_alloc:
 			/* go through the zonelist yet again, ignoring mins */
-			for (i = 0; (z = zones[i]) != NULL; i++) {
-				if (!cpuset_zone_allowed(z, gfp_mask))
-					continue;
-				page = buffered_rmqueue(z, order, gfp_mask);
-				if (page)
-					goto got_pg;
-			}
+			page = get_page_from_freelist(gfp_mask, order,
+				zonelist, ALLOC_NO_WATERMARKS|ALLOC_CPUSET);
+			if (page)
+				goto got_pg;
 			if (gfp_mask & __GFP_NOFAIL) {
 				blk_congestion_wait(WRITE, HZ/50);
 				goto nofail_alloc;
@@ -924,7 +918,7 @@ rebalance:
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
 
-	did_some_progress = try_to_free_pages(zones, gfp_mask);
+	did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask);
 
 	p->reclaim_state = NULL;
 	p->flags &= ~PF_MEMALLOC;
@@ -932,19 +926,10 @@ rebalance:
 	cond_resched();
 
 	if (likely(did_some_progress)) {
-		for (i = 0; (z = zones[i]) != NULL; i++) {
-			if (!zone_watermark_ok(z, order, z->pages_min,
-					       classzone_idx, can_try_harder,
-					       gfp_mask & __GFP_HIGH))
-				continue;
-
-			if (!cpuset_zone_allowed(z, gfp_mask))
-				continue;
-
-			page = buffered_rmqueue(z, order, gfp_mask);
-			if (page)
-				goto got_pg;
-		}
+		page = get_page_from_freelist(gfp_mask, order,
+						zonelist, alloc_flags);
+		if (page)
+			goto got_pg;
 	} else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
 		/*
 		 * Go through the zonelist yet one more time, keep
@@ -952,18 +937,10 @@ rebalance:
 		 * a parallel oom killing, we must fail if we're still
 		 * under heavy pressure.
 		 */
-		for (i = 0; (z = zones[i]) != NULL; i++) {
-			if (!zone_watermark_ok(z, order, z->pages_high,
-					       classzone_idx, 0, 0))
-				continue;
-
-			if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
-				continue;
-
-			page = buffered_rmqueue(z, order, gfp_mask);
-			if (page)
-				goto got_pg;
-		}
+		page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+						zonelist, ALLOC_CPUSET);
+		if (page)
+			goto got_pg;
 
 		out_of_memory(gfp_mask, order);
 		goto restart;
@@ -996,9 +973,7 @@ nopage:
 		dump_stack();
 		show_mem();
 	}
-	return NULL;
 got_pg:
-	zone_statistics(zonelist, z);
 	return page;
 }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 135bf8ca96ee..28130541270f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1074,7 +1074,7 @@ loop_again:
 					continue;
 
 				if (!zone_watermark_ok(zone, order,
-						zone->pages_high, 0, 0, 0)) {
+						zone->pages_high, 0, 0)) {
 					end_zone = i;
 					goto scan;
 				}
@@ -1111,7 +1111,7 @@ scan:
 
 			if (nr_pages == 0) {	/* Not software suspend */
 				if (!zone_watermark_ok(zone, order,
-						zone->pages_high, end_zone, 0, 0))
+						zone->pages_high, end_zone, 0))
 					all_zones_ok = 0;
 			}
 			zone->temp_priority = priority;
@@ -1259,7 +1259,7 @@ void wakeup_kswapd(struct zone *zone, int order)
 		return;
 
 	pgdat = zone->zone_pgdat;
-	if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0))
+	if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0))
 		return;
 	if (pgdat->kswapd_max_order < order)
 		pgdat->kswapd_max_order = order;
-- 
cgit v1.2.3-71-gd317


From 2d6c666e8704cf06267f29a4fa3d2cf823469c38 Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Sun, 13 Nov 2005 16:06:44 -0800
Subject: [PATCH] mm: gfp_noreclaim cleanup

Remove last remnant of the defunct early reclaim page logic, the no longer
used __GFP_NORECLAIM flag bit.

Signed-off-by: Paul Jackson <pj@sgi.com>
Acked-by: Martin Hicks <mort@bork.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/gfp.h     | 5 ++---
 include/linux/pagemap.h | 4 ++--
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index c3779432a723..23279d8f19b1 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -39,8 +39,7 @@ struct vm_area_struct;
 #define __GFP_COMP	((__force gfp_t)0x4000u)/* Add compound page metadata */
 #define __GFP_ZERO	((__force gfp_t)0x8000u)/* Return zeroed page on success */
 #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
-#define __GFP_NORECLAIM  ((__force gfp_t)0x20000u) /* No realy zone reclaim during allocation */
-#define __GFP_HARDWALL   ((__force gfp_t)0x40000u) /* Enforce hardwall cpuset memory allocs */
+#define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
 
 #define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -49,7 +48,7 @@ struct vm_area_struct;
 #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
 			__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
 			__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
-			__GFP_NOMEMALLOC|__GFP_NORECLAIM|__GFP_HARDWALL)
+			__GFP_NOMEMALLOC|__GFP_HARDWALL)
 
 #define GFP_ATOMIC	(__GFP_HIGH)
 #define GFP_NOIO	(__GFP_WAIT)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ba6c310a055f..ee700c6eb442 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -53,12 +53,12 @@ void release_pages(struct page **pages, int nr, int cold);
 
 static inline struct page *page_cache_alloc(struct address_space *x)
 {
-	return alloc_pages(mapping_gfp_mask(x)|__GFP_NORECLAIM, 0);
+	return alloc_pages(mapping_gfp_mask(x), 0);
 }
 
 static inline struct page *page_cache_alloc_cold(struct address_space *x)
 {
-	return alloc_pages(mapping_gfp_mask(x)|__GFP_COLD|__GFP_NORECLAIM, 0);
+	return alloc_pages(mapping_gfp_mask(x)|__GFP_COLD, 0);
 }
 
 typedef int filler_t(void *, struct page *);
-- 
cgit v1.2.3-71-gd317


From 7fce260a6bf75080ef61408504add5618f90e41b Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Sun, 13 Nov 2005 16:06:48 -0800
Subject: [PATCH] ppc: add support for new powerbooks

Enablement patch for the new PowerBooks (late 2005 edition).

This enables the ATA controller, Gigabit ethernet and basic AGP setup.
Bluetooth works out-of-the box after running hid2hci.

Still remaining is to get the touchpad to work, the simple change of just
adding the new USB ids isn't enough.

Signed-off-by: Olof Johansson <olof@lixom.net>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/ppc/platforms/pmac_feature.c |  8 ++++++++
 drivers/char/agp/uninorth-agp.c   |  4 ++++
 drivers/ide/ppc/pmac.c            | 11 ++++++++---
 drivers/net/sungem.c              |  2 ++
 include/linux/pci_ids.h           |  4 ++++
 5 files changed, 26 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/ppc/platforms/pmac_feature.c b/arch/ppc/platforms/pmac_feature.c
index 58884a63ebdb..1e69b0593162 100644
--- a/arch/ppc/platforms/pmac_feature.c
+++ b/arch/ppc/platforms/pmac_feature.c
@@ -2317,6 +2317,14 @@ static struct pmac_mb_def pmac_mb_defs[] = {
 		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
 		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
 	},
+	{	"PowerBook5,8",			"PowerBook G4 15\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,9",			"PowerBook G4 17\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
 	{	"PowerBook6,1",			"PowerBook G4 12\"",
 		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
 		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
diff --git a/drivers/char/agp/uninorth-agp.c b/drivers/char/agp/uninorth-agp.c
index c8255312b8c1..50947e38501a 100644
--- a/drivers/char/agp/uninorth-agp.c
+++ b/drivers/char/agp/uninorth-agp.c
@@ -557,6 +557,10 @@ static struct agp_device_ids uninorth_agp_device_ids[] __devinitdata = {
 		.device_id	= PCI_DEVICE_ID_APPLE_U3H_AGP,
 		.chipset_name	= "U3H",
 	},
+	{
+		.device_id	= PCI_DEVICE_ID_APPLE_IPID2_AGP,
+		.chipset_name	= "UniNorth/Intrepid2",
+	},
 };
 
 static int __devinit agp_uninorth_probe(struct pci_dev *pdev,
diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c
index b3e65a65d202..136911a86e84 100644
--- a/drivers/ide/ppc/pmac.c
+++ b/drivers/ide/ppc/pmac.c
@@ -1667,11 +1667,16 @@ static struct macio_driver pmac_ide_macio_driver =
 };
 
 static struct pci_device_id pmac_ide_pci_match[] = {
-	{ PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_UNI_N_ATA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
-	{ PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_IPID_ATA100, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
-	{ PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_K2_ATA100, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_UNI_N_ATA,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_IPID_ATA100,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_K2_ATA100,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
 	{ PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_SH_ATA,
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_IPID2_ATA,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
 };
 
 static struct pci_driver pmac_ide_pci_driver = {
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index de399563a9db..081717d01374 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -128,6 +128,8 @@ static struct pci_device_id gem_pci_tbl[] = {
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
 	{ PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_SH_SUNGEM,
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+	{ PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_IPID2_GMAC,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
 	{0, }
 };
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index d00f8ba7f22b..d4c1c8fd2925 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -805,6 +805,10 @@
 #define PCI_DEVICE_ID_APPLE_SH_SUNGEM   0x0051
 #define PCI_DEVICE_ID_APPLE_U3L_AGP	0x0058
 #define PCI_DEVICE_ID_APPLE_U3H_AGP	0x0059
+#define PCI_DEVICE_ID_APPLE_IPID2_AGP	0x0066
+#define PCI_DEVICE_ID_APPLE_IPID2_ATA	0x0069
+#define PCI_DEVICE_ID_APPLE_IPID2_FW	0x006a
+#define PCI_DEVICE_ID_APPLE_IPID2_GMAC	0x006b
 #define PCI_DEVICE_ID_APPLE_TIGON3	0x1645
 
 #define PCI_VENDOR_ID_YAMAHA		0x1073
-- 
cgit v1.2.3-71-gd317


From a1261f54611ec4ad6a7ab7080f86747e3ac3685b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@parcelfarce.linux.theplanet.co.uk>
Date: Sun, 13 Nov 2005 16:06:55 -0800
Subject: [PATCH] m68k: introduce task_thread_info

new helper - task_thread_info(task).  On platforms that have thread_info
allocated separately (i.e.  in default case) it simply returns
task->thread_info.  m68k wants (and for good reasons) to embed its thread_info
into task_struct.  So it will (in later patch) have task_thread_info() of its
own.  For now we just add a macro for generic case and convert existing
instances of its body in core kernel to uses of new macro.  Obviously safe -
all normal architectures get the same preprocessor output they used to get.

Signed-off-by: Al Viro <viro@parcelfarce.linux.theplanet.co.uk>
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 16 +++++++++-------
 kernel/exit.c         |  2 +-
 kernel/fork.c         |  4 ++--
 kernel/sched.c        |  6 +++---
 4 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2bbf968b23d9..f8650314ba2f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1233,32 +1233,34 @@ static inline void task_unlock(struct task_struct *p)
 	spin_unlock(&p->alloc_lock);
 }
 
+#define task_thread_info(task) (task)->thread_info
+
 /* set thread flags in other task's structures
  * - see asm/thread_info.h for TIF_xxxx flags available
  */
 static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag)
 {
-	set_ti_thread_flag(tsk->thread_info,flag);
+	set_ti_thread_flag(task_thread_info(tsk), flag);
 }
 
 static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag)
 {
-	clear_ti_thread_flag(tsk->thread_info,flag);
+	clear_ti_thread_flag(task_thread_info(tsk), flag);
 }
 
 static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
 {
-	return test_and_set_ti_thread_flag(tsk->thread_info,flag);
+	return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
 }
 
 static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag)
 {
-	return test_and_clear_ti_thread_flag(tsk->thread_info,flag);
+	return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag);
 }
 
 static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
 {
-	return test_ti_thread_flag(tsk->thread_info,flag);
+	return test_ti_thread_flag(task_thread_info(tsk), flag);
 }
 
 static inline void set_tsk_need_resched(struct task_struct *tsk)
@@ -1329,12 +1331,12 @@ extern void signal_wake_up(struct task_struct *t, int resume_stopped);
 
 static inline unsigned int task_cpu(const struct task_struct *p)
 {
-	return p->thread_info->cpu;
+	return task_thread_info(p)->cpu;
 }
 
 static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 {
-	p->thread_info->cpu = cpu;
+	task_thread_info(p)->cpu = cpu;
 }
 
 #else
diff --git a/kernel/exit.c b/kernel/exit.c
index 452a1d116178..ee515683b92d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -859,7 +859,7 @@ fastcall NORET_TYPE void do_exit(long code)
 	if (group_dead && tsk->signal->leader)
 		disassociate_ctty(1);
 
-	module_put(tsk->thread_info->exec_domain->module);
+	module_put(task_thread_info(tsk)->exec_domain->module);
 	if (tsk->binfmt)
 		module_put(tsk->binfmt->module);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 158710d22566..7ef352ce347b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -919,7 +919,7 @@ static task_t *copy_process(unsigned long clone_flags,
 	if (nr_threads >= max_threads)
 		goto bad_fork_cleanup_count;
 
-	if (!try_module_get(p->thread_info->exec_domain->module))
+	if (!try_module_get(task_thread_info(p)->exec_domain->module))
 		goto bad_fork_cleanup_count;
 
 	if (p->binfmt && !try_module_get(p->binfmt->module))
@@ -1180,7 +1180,7 @@ bad_fork_cleanup:
 	if (p->binfmt)
 		module_put(p->binfmt->module);
 bad_fork_cleanup_put_domain:
-	module_put(p->thread_info->exec_domain->module);
+	module_put(task_thread_info(p)->exec_domain->module);
 bad_fork_cleanup_count:
 	put_group_info(p->group_info);
 	atomic_dec(&p->user->processes);
diff --git a/kernel/sched.c b/kernel/sched.c
index b6506671b2be..831f7e9d8f1c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1437,7 +1437,7 @@ void fastcall sched_fork(task_t *p, int clone_flags)
 #endif
 #ifdef CONFIG_PREEMPT
 	/* Want to start with kernel preemption disabled. */
-	p->thread_info->preempt_count = 1;
+	task_thread_info(p)->preempt_count = 1;
 #endif
 	/*
 	 * Share the timeslice between parent and child, thus the
@@ -4410,9 +4410,9 @@ void __devinit init_idle(task_t *idle, int cpu)
 
 	/* Set the preempt count _outside_ the spinlocks! */
 #if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL)
-	idle->thread_info->preempt_count = (idle->lock_depth >= 0);
+	task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0);
 #else
-	idle->thread_info->preempt_count = 0;
+	task_thread_info(idle)->preempt_count = 0;
 #endif
 }
 
-- 
cgit v1.2.3-71-gd317


From 10ebffde3d3916026974352b7900e44afe2b243f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@parcelfarce.linux.theplanet.co.uk>
Date: Sun, 13 Nov 2005 16:06:56 -0800
Subject: [PATCH] m68k: introduce setup_thread_stack() and end_of_stack()

encapsulates the rest of arch-dependent operations with thread_info access.
Two new helpers - setup_thread_stack() and end_of_stack().  For normal case
the former consists of copying thread_info of parent to new thread_info and
the latter returns pointer immediately past the end of thread_info.

Signed-off-by: Al Viro <viro@parcelfarce.linux.theplanet.co.uk>
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 11 +++++++++++
 kernel/fork.c         |  3 +--
 kernel/sched.c        |  4 ++--
 3 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f8650314ba2f..e4681256e43e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1235,6 +1235,17 @@ static inline void task_unlock(struct task_struct *p)
 
 #define task_thread_info(task) (task)->thread_info
 
+static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
+{
+	*task_thread_info(p) = *task_thread_info(org);
+	task_thread_info(p)->task = p;
+}
+
+static inline unsigned long *end_of_stack(struct task_struct *p)
+{
+	return (unsigned long *)(p->thread_info + 1);
+}
+
 /* set thread flags in other task's structures
  * - see asm/thread_info.h for TIF_xxxx flags available
  */
diff --git a/kernel/fork.c b/kernel/fork.c
index 7ef352ce347b..2c70c9cdf5dc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -171,10 +171,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 		return NULL;
 	}
 
-	*ti = *orig->thread_info;
 	*tsk = *orig;
 	tsk->thread_info = ti;
-	ti->task = tsk;
+	setup_thread_stack(tsk, orig);
 
 	/* One for us, one for whoever does the "release_task()" (usually parent) */
 	atomic_set(&tsk->usage,2);
diff --git a/kernel/sched.c b/kernel/sched.c
index 831f7e9d8f1c..6f46c94cc29e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4327,10 +4327,10 @@ static void show_task(task_t *p)
 #endif
 #ifdef CONFIG_DEBUG_STACK_USAGE
 	{
-		unsigned long *n = (unsigned long *) (p->thread_info+1);
+		unsigned long *n = end_of_stack(p);
 		while (!*n)
 			n++;
-		free = (unsigned long) n - (unsigned long)(p->thread_info+1);
+		free = (unsigned long)n - (unsigned long)end_of_stack(p);
 	}
 #endif
 	printk("%5lu %5d %6d ", free, p->pid, p->parent->pid);
-- 
cgit v1.2.3-71-gd317


From f037360f2ed111fe89a8f5cb6ba351f4e9934e53 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@parcelfarce.linux.theplanet.co.uk>
Date: Sun, 13 Nov 2005 16:06:57 -0800
Subject: [PATCH] m68k: thread_info header cleanup

a) in smp_lock.h #include of sched.h and spinlock.h moved under #ifdef
   CONFIG_LOCK_KERNEL.

b) interrupt.h now explicitly pulls sched.h (not via smp_lock.h from
   hardirq.h as it used to)

c) in three more places we need changes to compensate for (a) - one place
   in arch/sparc needs string.h now, hardirq.h needs forward declaration of
   task_struct and preempt.h needs direct include of thread_info.h.

d) thread_info-related helpers in sched.h and thread_info.h put under
   ifndef __HAVE_THREAD_FUNCTIONS.  Obviously safe.

Signed-off-by: Al Viro <viro@parcelfarce.linux.theplanet.co.uk>
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/sparc/lib/bitext.c   | 1 +
 include/linux/hardirq.h   | 2 ++
 include/linux/interrupt.h | 1 +
 include/linux/preempt.h   | 1 +
 include/linux/sched.h     | 4 ++++
 include/linux/smp_lock.h  | 3 +--
 6 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/lib/bitext.c b/arch/sparc/lib/bitext.c
index 94b05e8c906c..2e168d16547f 100644
--- a/arch/sparc/lib/bitext.c
+++ b/arch/sparc/lib/bitext.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/smp_lock.h>
+#include <linux/string.h>
 #include <linux/bitops.h>
 
 #include <asm/bitext.h>
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 5912874ca83c..71d2b8a723b9 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -90,6 +90,8 @@ extern void synchronize_irq(unsigned int irq);
 #define nmi_enter()		irq_enter()
 #define nmi_exit()		sub_preempt_count(HARDIRQ_OFFSET)
 
+struct task_struct;
+
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
 static inline void account_user_vtime(struct task_struct *tsk)
 {
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 0a90205184b0..41f150a3d2dd 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -9,6 +9,7 @@
 #include <linux/preempt.h>
 #include <linux/cpumask.h>
 #include <linux/hardirq.h>
+#include <linux/sched.h>
 #include <asm/atomic.h>
 #include <asm/ptrace.h>
 #include <asm/system.h>
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index dd98c54a23b4..d9a2f5254a51 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -7,6 +7,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/thread_info.h>
 #include <linux/linkage.h>
 
 #ifdef CONFIG_DEBUG_PREEMPT
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e4681256e43e..41df81395719 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1233,6 +1233,8 @@ static inline void task_unlock(struct task_struct *p)
 	spin_unlock(&p->alloc_lock);
 }
 
+#ifndef __HAVE_THREAD_FUNCTIONS
+
 #define task_thread_info(task) (task)->thread_info
 
 static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
@@ -1246,6 +1248,8 @@ static inline unsigned long *end_of_stack(struct task_struct *p)
 	return (unsigned long *)(p->thread_info + 1);
 }
 
+#endif
+
 /* set thread flags in other task's structures
  * - see asm/thread_info.h for TIF_xxxx flags available
  */
diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h
index b63ce7014093..fa1ff3b165fe 100644
--- a/include/linux/smp_lock.h
+++ b/include/linux/smp_lock.h
@@ -2,11 +2,10 @@
 #define __LINUX_SMPLOCK_H
 
 #include <linux/config.h>
+#ifdef CONFIG_LOCK_KERNEL
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 
-#ifdef CONFIG_LOCK_KERNEL
-
 #define kernel_locked()		(current->lock_depth >= 0)
 
 extern int __lockfunc __reacquire_kernel_lock(void);
-- 
cgit v1.2.3-71-gd317


From abd03753bd1532c05eb13231569a5257b007e29c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@parcelfarce.linux.theplanet.co.uk>
Date: Sun, 13 Nov 2005 16:06:58 -0800
Subject: [PATCH] m68k: m68k-specific thread_info changes

a) added embedded thread_info [m68k processor.h]

b) added missing symbols in asm-offsets.c

c) task_thread_info() and friends in asm-m68k/thread_info.h

d) made m68k thread_info.h included by m68k processor.h, not the other way
   round.

Signed-off-by: Al Viro <viro@parcelfarce.linux.theplanet.co.uk>
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/m68k/kernel/asm-offsets.c |  5 +++++
 include/asm-m68k/processor.h   |  2 ++
 include/asm-m68k/thread_info.h | 14 ++++++++++----
 3 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/m68k/kernel/asm-offsets.c b/arch/m68k/kernel/asm-offsets.c
index cee3317b8665..30beacfef8fb 100644
--- a/arch/m68k/kernel/asm-offsets.c
+++ b/arch/m68k/kernel/asm-offsets.c
@@ -31,6 +31,7 @@ int main(void)
 	DEFINE(TASK_SIGPENDING, offsetof(struct task_struct, thread.work.sigpending));
 	DEFINE(TASK_NOTIFY_RESUME, offsetof(struct task_struct, thread.work.notify_resume));
 	DEFINE(TASK_THREAD, offsetof(struct task_struct, thread));
+	DEFINE(TASK_INFO, offsetof(struct task_struct, thread.info));
 	DEFINE(TASK_MM, offsetof(struct task_struct, mm));
 	DEFINE(TASK_ACTIVE_MM, offsetof(struct task_struct, active_mm));
 
@@ -45,6 +46,10 @@ int main(void)
 	DEFINE(THREAD_FPCNTL, offsetof(struct thread_struct, fpcntl));
 	DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fpstate));
 
+	/* offsets into the thread_info struct */
+	DEFINE(TINFO_PREEMPT, offsetof(struct thread_info, preempt_count));
+	DEFINE(TINFO_FLAGS, offsetof(struct thread_info, flags));
+
 	/* offsets into the pt_regs */
 	DEFINE(PT_D0, offsetof(struct pt_regs, d0));
 	DEFINE(PT_ORIG_D0, offsetof(struct pt_regs, orig_d0));
diff --git a/include/asm-m68k/processor.h b/include/asm-m68k/processor.h
index df1575db32af..84b4b26df04c 100644
--- a/include/asm-m68k/processor.h
+++ b/include/asm-m68k/processor.h
@@ -14,6 +14,7 @@
 #define current_text_addr() ({ __label__ _l; _l: &&_l;})
 
 #include <linux/config.h>
+#include <linux/thread_info.h>
 #include <asm/segment.h>
 #include <asm/fpu.h>
 #include <asm/ptrace.h>
@@ -79,6 +80,7 @@ struct thread_struct {
 	unsigned long  fpcntl[3];	/* fp control regs */
 	unsigned char  fpstate[FPSTATESIZE];  /* floating point state */
 	struct task_work work;
+	struct thread_info info;
 };
 
 #define INIT_THREAD  {							\
diff --git a/include/asm-m68k/thread_info.h b/include/asm-m68k/thread_info.h
index 2aed24f6fd2e..4fdbf55f95e5 100644
--- a/include/asm-m68k/thread_info.h
+++ b/include/asm-m68k/thread_info.h
@@ -2,7 +2,6 @@
 #define _ASM_M68K_THREAD_INFO_H
 
 #include <asm/types.h>
-#include <asm/processor.h>
 #include <asm/page.h>
 
 struct thread_info {
@@ -35,14 +34,21 @@ struct thread_info {
 #define free_thread_info(ti)  free_pages((unsigned long)(ti),1)
 #endif /* PAGE_SHIFT == 13 */
 
-//#define init_thread_info	(init_task.thread.info)
+#define init_thread_info	(init_task.thread.info)
 #define init_stack		(init_thread_union.stack)
 
-#define current_thread_info()	(current->thread_info)
-
+#define task_thread_info(tsk)	(&(tsk)->thread.info)
+#define current_thread_info()	task_thread_info(current)
 
 #define __HAVE_THREAD_FUNCTIONS
 
+#define setup_thread_stack(p, org) ({			\
+	*(struct task_struct **)(p)->thread_info = (p);	\
+	task_thread_info(p)->task = (p);		\
+})
+
+#define end_of_stack(p) ((unsigned long *)(p)->thread_info + 1)
+
 #define TIF_SYSCALL_TRACE	0	/* syscall trace active */
 #define TIF_DELAYED_TRACE	1	/* single step a syscall */
 #define TIF_NOTIFY_RESUME	2	/* resumption notification requested */
-- 
cgit v1.2.3-71-gd317


From 3b66a1edb01b82269a668a478625765b1fa4936f Mon Sep 17 00:00:00 2001
From: Roman Zippel <zippel@linux-m68k.org>
Date: Sun, 13 Nov 2005 16:06:59 -0800
Subject: [PATCH] m68k: convert thread flags to use bit fields

Remove task_work structure, use the standard thread flags functions and use
shifts in entry.S to test the thread flags.  Add a few local labels to entry.S
to allow gas to generate short jumps.

Finally it changes a number of inline functions in thread_info.h to macros to
delay the current_thread_info() usage, which requires on m68k a structure
(task_struct) not yet defined at this point.

Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Cc: Al Viro <viro@parcelfarce.linux.theplanet.co.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/m68k/fpsp040/skeleton.S   |  6 ++--
 arch/m68k/ifpsp060/iskeleton.S |  6 ++--
 arch/m68k/kernel/asm-offsets.c |  5 ---
 arch/m68k/kernel/entry.S       | 78 ++++++++++++++++++++--------------------
 arch/m68k/kernel/ptrace.c      | 15 ++++----
 include/asm-m68k/processor.h   | 12 -------
 include/asm-m68k/thread_info.h | 81 +++++-------------------------------------
 include/linux/thread_info.h    | 47 +++++++-----------------
 8 files changed, 72 insertions(+), 178 deletions(-)

(limited to 'include')

diff --git a/arch/m68k/fpsp040/skeleton.S b/arch/m68k/fpsp040/skeleton.S
index 9571a21d6ad4..a1629194e3fd 100644
--- a/arch/m68k/fpsp040/skeleton.S
+++ b/arch/m68k/fpsp040/skeleton.S
@@ -381,10 +381,8 @@ fpsp_done:
 .Lnotkern:
 	SAVE_ALL_INT
 	GET_CURRENT(%d0)
-	tstb	%curptr@(TASK_NEEDRESCHED)
-	jne	ret_from_exception	| deliver signals,
-					| reschedule etc..
-	RESTORE_ALL
+	| deliver signals, reschedule etc..
+	jra	ret_from_exception
 
 |
 |	mem_write --- write to user or supervisor address space
diff --git a/arch/m68k/ifpsp060/iskeleton.S b/arch/m68k/ifpsp060/iskeleton.S
index 4ba2c74da93d..b2dbdf5ee309 100644
--- a/arch/m68k/ifpsp060/iskeleton.S
+++ b/arch/m68k/ifpsp060/iskeleton.S
@@ -75,10 +75,8 @@ _060_isp_done:
 .Lnotkern:
 	SAVE_ALL_INT
 	GET_CURRENT(%d0)
-	tstb	%curptr@(TASK_NEEDRESCHED)
-	jne	ret_from_exception	| deliver signals,
-					| reschedule etc..
-	RESTORE_ALL
+	| deliver signals, reschedule etc..
+	jra	ret_from_exception
 
 |
 | _060_real_chk():
diff --git a/arch/m68k/kernel/asm-offsets.c b/arch/m68k/kernel/asm-offsets.c
index 30beacfef8fb..c787c5ba9513 100644
--- a/arch/m68k/kernel/asm-offsets.c
+++ b/arch/m68k/kernel/asm-offsets.c
@@ -25,11 +25,6 @@ int main(void)
 	DEFINE(TASK_STATE, offsetof(struct task_struct, state));
 	DEFINE(TASK_FLAGS, offsetof(struct task_struct, flags));
 	DEFINE(TASK_PTRACE, offsetof(struct task_struct, ptrace));
-	DEFINE(TASK_WORK, offsetof(struct task_struct, thread.work));
-	DEFINE(TASK_NEEDRESCHED, offsetof(struct task_struct, thread.work.need_resched));
-	DEFINE(TASK_SYSCALL_TRACE, offsetof(struct task_struct, thread.work.syscall_trace));
-	DEFINE(TASK_SIGPENDING, offsetof(struct task_struct, thread.work.sigpending));
-	DEFINE(TASK_NOTIFY_RESUME, offsetof(struct task_struct, thread.work.notify_resume));
 	DEFINE(TASK_THREAD, offsetof(struct task_struct, thread));
 	DEFINE(TASK_INFO, offsetof(struct task_struct, thread.info));
 	DEFINE(TASK_MM, offsetof(struct task_struct, mm));
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index 23ca60a45552..320fde05dc63 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -44,9 +44,7 @@
 
 #include <asm/asm-offsets.h>
 
-.globl system_call, buserr, trap
-.globl resume, ret_from_exception
-.globl ret_from_signal
+.globl system_call, buserr, trap, resume
 .globl inthandler, sys_call_table
 .globl sys_fork, sys_clone, sys_vfork
 .globl ret_from_interrupt, bad_interrupt
@@ -58,7 +56,7 @@ ENTRY(buserr)
 	movel	%sp,%sp@-		| stack frame pointer argument
 	bsrl	buserr_c
 	addql	#4,%sp
-	jra	ret_from_exception
+	jra	.Lret_from_exception
 
 ENTRY(trap)
 	SAVE_ALL_INT
@@ -66,7 +64,7 @@ ENTRY(trap)
 	movel	%sp,%sp@-		| stack frame pointer argument
 	bsrl	trap_c
 	addql	#4,%sp
-	jra	ret_from_exception
+	jra	.Lret_from_exception
 
 	| After a fork we jump here directly from resume,
 	| so that %d1 contains the previous task
@@ -75,30 +73,31 @@ ENTRY(ret_from_fork)
 	movel	%d1,%sp@-
 	jsr	schedule_tail
 	addql	#4,%sp
-	jra	ret_from_exception
+	jra	.Lret_from_exception
 
-badsys:
-	movel	#-ENOSYS,%sp@(PT_D0)
-	jra	ret_from_exception
-
-do_trace:
+do_trace_entry:
 	movel	#-ENOSYS,%sp@(PT_D0)	| needed for strace
 	subql	#4,%sp
 	SAVE_SWITCH_STACK
 	jbsr	syscall_trace
 	RESTORE_SWITCH_STACK
 	addql	#4,%sp
-	movel	%sp@(PT_ORIG_D0),%d1
-	movel	#-ENOSYS,%d0
-	cmpl	#NR_syscalls,%d1
-	jcc	1f
-	jbsr	@(sys_call_table,%d1:l:4)@(0)
-1:	movel	%d0,%sp@(PT_D0)		| save the return value
-	subql	#4,%sp			| dummy return address
+	movel	%sp@(PT_ORIG_D0),%d0
+	cmpl	#NR_syscalls,%d0
+	jcs	syscall
+badsys:
+	movel	#-ENOSYS,%sp@(PT_D0)
+	jra	ret_from_syscall
+
+do_trace_exit:
+	subql	#4,%sp
 	SAVE_SWITCH_STACK
 	jbsr	syscall_trace
+	RESTORE_SWITCH_STACK
+	addql	#4,%sp
+	jra	.Lret_from_exception
 
-ret_from_signal:
+ENTRY(ret_from_signal)
 	RESTORE_SWITCH_STACK
 	addql	#4,%sp
 /* on 68040 complete pending writebacks if any */
@@ -111,7 +110,7 @@ ret_from_signal:
 	addql	#4,%sp
 1:
 #endif
-	jra	ret_from_exception
+	jra	.Lret_from_exception
 
 ENTRY(system_call)
 	SAVE_ALL_SYS
@@ -120,30 +119,34 @@ ENTRY(system_call)
 	| save top of frame
 	movel	%sp,%curptr@(TASK_THREAD+THREAD_ESP0)
 
-	tstb	%curptr@(TASK_SYSCALL_TRACE)
-	jne	do_trace
+	| syscall trace?
+	tstb	%curptr@(TASK_INFO+TINFO_FLAGS+2)
+	jmi	do_trace_entry
 	cmpl	#NR_syscalls,%d0
 	jcc	badsys
+syscall:
 	jbsr	@(sys_call_table,%d0:l:4)@(0)
 	movel	%d0,%sp@(PT_D0)		| save the return value
-
+ret_from_syscall:
 	|oriw	#0x0700,%sr
-	movel	%curptr@(TASK_WORK),%d0
+	movew	%curptr@(TASK_INFO+TINFO_FLAGS+2),%d0
 	jne	syscall_exit_work
 1:	RESTORE_ALL
 
 syscall_exit_work:
 	btst	#5,%sp@(PT_SR)		| check if returning to kernel
 	bnes	1b			| if so, skip resched, signals
-	tstw	%d0
-	jeq	do_signal_return
-	tstb	%d0
-	jne	do_delayed_trace
-
+	lslw	#1,%d0
+	jcs	do_trace_exit
+	jmi	do_delayed_trace
+	lslw	#8,%d0
+	jmi	do_signal_return
 	pea	resume_userspace
-	jmp	schedule
+	jra	schedule
+
 
-ret_from_exception:
+ENTRY(ret_from_exception)
+.Lret_from_exception:
 	btst	#5,%sp@(PT_SR)		| check if returning to kernel
 	bnes	1f			| if so, skip resched, signals
 	| only allow interrupts when we are really the last one on the
@@ -152,19 +155,18 @@ ret_from_exception:
 	andw	#ALLOWINT,%sr
 
 resume_userspace:
-	movel	%curptr@(TASK_WORK),%d0
-	lsrl	#8,%d0
+	moveb	%curptr@(TASK_INFO+TINFO_FLAGS+3),%d0
 	jne	exit_work
 1:	RESTORE_ALL
 
 exit_work:
 	| save top of frame
 	movel	%sp,%curptr@(TASK_THREAD+THREAD_ESP0)
-	tstb	%d0
-	jeq	do_signal_return
-
+	lslb	#1,%d0
+	jmi	do_signal_return
 	pea	resume_userspace
-	jmp	schedule
+	jra	schedule
+
 
 do_signal_return:
 	|andw	#ALLOWINT,%sr
@@ -254,7 +256,7 @@ ret_from_interrupt:
 
 	/* check if we need to do software interrupts */
 	tstl	irq_stat+CPUSTAT_SOFTIRQ_PENDING
-	jeq	ret_from_exception
+	jeq	.Lret_from_exception
 	pea	ret_from_exception
 	jra	do_softirq
 
diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c
index 7e54422685cf..540638ca81f9 100644
--- a/arch/m68k/kernel/ptrace.c
+++ b/arch/m68k/kernel/ptrace.c
@@ -109,7 +109,7 @@ static inline void singlestep_disable(struct task_struct *child)
 {
 	unsigned long tmp = get_reg(child, PT_SR) & ~(TRACE_BITS << 16);
 	put_reg(child, PT_SR, tmp);
-	child->thread.work.delayed_trace = 0;
+	clear_tsk_thread_flag(child, TIF_DELAYED_TRACE);
 }
 
 /*
@@ -118,7 +118,7 @@ static inline void singlestep_disable(struct task_struct *child)
 void ptrace_disable(struct task_struct *child)
 {
 	singlestep_disable(child);
-	child->thread.work.syscall_trace = 0;
+	clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 }
 
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
@@ -198,9 +198,9 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			goto out_eio;
 
 		if (request == PTRACE_SYSCALL)
-			child->thread.work.syscall_trace = ~0;
+			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 		else
-			child->thread.work.syscall_trace = 0;
+			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 		child->exit_code = data;
 		singlestep_disable(child);
 		wake_up_process(child);
@@ -223,10 +223,10 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		if (!valid_signal(data))
 			goto out_eio;
 
-		child->thread.work.syscall_trace = 0;
+		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 		tmp = get_reg(child, PT_SR) | (TRACE_BITS << 16);
 		put_reg(child, PT_SR, tmp);
-		child->thread.work.delayed_trace = 1;
+		set_tsk_thread_flag(child, TIF_DELAYED_TRACE);
 
 		child->exit_code = data;
 		/* give it a chance to run. */
@@ -288,9 +288,6 @@ out_eio:
 
 asmlinkage void syscall_trace(void)
 {
-	if (!current->thread.work.delayed_trace &&
-	    !current->thread.work.syscall_trace)
-		return;
 	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
 				 ? 0x80 : 0));
 	/*
diff --git a/include/asm-m68k/processor.h b/include/asm-m68k/processor.h
index 84b4b26df04c..7982285e84ed 100644
--- a/include/asm-m68k/processor.h
+++ b/include/asm-m68k/processor.h
@@ -56,17 +56,6 @@ static inline void wrusp(unsigned long usp)
 #endif
 #define TASK_UNMAPPED_ALIGN(addr, off)	PAGE_ALIGN(addr)
 
-struct task_work {
-	unsigned char sigpending;
-	unsigned char notify_resume;	/* request for notification on
-					   userspace execution resumption */
-	char          need_resched;
-	unsigned char delayed_trace;	/* single step a syscall */
-	unsigned char syscall_trace;	/* count of syscall interceptors */
-	unsigned char memdie;		/* task was selected to be killed */
-	unsigned char pad[2];
-};
-
 struct thread_struct {
 	unsigned long  ksp;		/* kernel stack pointer */
 	unsigned long  usp;		/* user stack pointer */
@@ -79,7 +68,6 @@ struct thread_struct {
 	unsigned long  fp[8*3];
 	unsigned long  fpcntl[3];	/* fp control regs */
 	unsigned char  fpstate[FPSTATESIZE];  /* floating point state */
-	struct task_work work;
 	struct thread_info info;
 };
 
diff --git a/include/asm-m68k/thread_info.h b/include/asm-m68k/thread_info.h
index 4fdbf55f95e5..9532ca3c45cb 100644
--- a/include/asm-m68k/thread_info.h
+++ b/include/asm-m68k/thread_info.h
@@ -6,12 +6,11 @@
 
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
+	unsigned long		flags;
 	struct exec_domain	*exec_domain;	/* execution domain */
 	int			preempt_count;	/* 0 => preemptable, <0 => BUG */
 	__u32 cpu; /* should always be 0 on m68k */
 	struct restart_block    restart_block;
-
-	__u8			supervisor_stack[0];
 };
 
 #define PREEMPT_ACTIVE		0x4000000
@@ -49,76 +48,14 @@ struct thread_info {
 
 #define end_of_stack(p) ((unsigned long *)(p)->thread_info + 1)
 
-#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
-#define TIF_DELAYED_TRACE	1	/* single step a syscall */
-#define TIF_NOTIFY_RESUME	2	/* resumption notification requested */
-#define TIF_SIGPENDING		3	/* signal pending */
-#define TIF_NEED_RESCHED	4	/* rescheduling necessary */
-#define TIF_MEMDIE		5
-
-extern int thread_flag_fixme(void);
-
-/*
- * flag set/clear/test wrappers
- * - pass TIF_xxxx constants to these functions
+/* entry.S relies on these definitions!
+ * bits 0-7 are tested at every exception exit
+ * bits 8-15 are also tested at syscall exit
  */
-
-#define __set_tsk_thread_flag(tsk, flag, val) ({	\
-	switch (flag) {					\
-	case TIF_SIGPENDING:				\
-		tsk->thread.work.sigpending = val;	\
-		break;					\
-	case TIF_NEED_RESCHED:				\
-		tsk->thread.work.need_resched = val;	\
-		break;					\
-	case TIF_SYSCALL_TRACE:				\
-		tsk->thread.work.syscall_trace = val;	\
-		break;					\
-	case TIF_MEMDIE:				\
-		tsk->thread.work.memdie = val;		\
-		break;					\
-	default:					\
-		thread_flag_fixme();			\
-	}						\
-})
-
-#define __get_tsk_thread_flag(tsk, flag) ({		\
-	int ___res;					\
-	switch (flag) {					\
-	case TIF_SIGPENDING:				\
-		___res = tsk->thread.work.sigpending;	\
-		break;					\
-	case TIF_NEED_RESCHED:				\
-		___res = tsk->thread.work.need_resched;	\
-		break;					\
-	case TIF_SYSCALL_TRACE:				\
-		___res = tsk->thread.work.syscall_trace;\
-		break;					\
-	case TIF_MEMDIE:				\
-		___res = tsk->thread.work.memdie;\
-		break;					\
-	default:					\
-		___res = thread_flag_fixme();		\
-	}						\
-	___res;						\
-})
-
-#define __get_set_tsk_thread_flag(tsk, flag, val) ({	\
-	int __res = __get_tsk_thread_flag(tsk, flag);	\
-	__set_tsk_thread_flag(tsk, flag, val);		\
-	__res;						\
-})
-
-#define set_tsk_thread_flag(tsk, flag) __set_tsk_thread_flag(tsk, flag, ~0)
-#define clear_tsk_thread_flag(tsk, flag) __set_tsk_thread_flag(tsk, flag, 0)
-#define test_and_set_tsk_thread_flag(tsk, flag) __get_set_tsk_thread_flag(tsk, flag, ~0)
-#define test_tsk_thread_flag(tsk, flag) __get_tsk_thread_flag(tsk, flag)
-
-#define set_thread_flag(flag) set_tsk_thread_flag(current, flag)
-#define clear_thread_flag(flag) clear_tsk_thread_flag(current, flag)
-#define test_thread_flag(flag) test_tsk_thread_flag(current, flag)
-
-#define set_need_resched() set_thread_flag(TIF_NEED_RESCHED)
-#define clear_need_resched() clear_thread_flag(TIF_NEED_RESCHED)
+#define TIF_SIGPENDING		6	/* signal pending */
+#define TIF_NEED_RESCHED	7	/* rescheduling necessary */
+#define TIF_DELAYED_TRACE	14	/* single step a syscall */
+#define TIF_SYSCALL_TRACE	15	/* syscall trace active */
+#define TIF_MEMDIE		16
 
 #endif	/* _ASM_M68K_THREAD_INFO_H */
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index d252f45a0f9b..1c4eb41dbd89 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -27,31 +27,6 @@ extern long do_no_restart_syscall(struct restart_block *parm);
  * - pass TIF_xxxx constants to these functions
  */
 
-static inline void set_thread_flag(int flag)
-{
-	set_bit(flag,&current_thread_info()->flags);
-}
-
-static inline void clear_thread_flag(int flag)
-{
-	clear_bit(flag,&current_thread_info()->flags);
-}
-
-static inline int test_and_set_thread_flag(int flag)
-{
-	return test_and_set_bit(flag,&current_thread_info()->flags);
-}
-
-static inline int test_and_clear_thread_flag(int flag)
-{
-	return test_and_clear_bit(flag,&current_thread_info()->flags);
-}
-
-static inline int test_thread_flag(int flag)
-{
-	return test_bit(flag,&current_thread_info()->flags);
-}
-
 static inline void set_ti_thread_flag(struct thread_info *ti, int flag)
 {
 	set_bit(flag,&ti->flags);
@@ -77,15 +52,19 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
 	return test_bit(flag,&ti->flags);
 }
 
-static inline void set_need_resched(void)
-{
-	set_thread_flag(TIF_NEED_RESCHED);
-}
-
-static inline void clear_need_resched(void)
-{
-	clear_thread_flag(TIF_NEED_RESCHED);
-}
+#define set_thread_flag(flag) \
+	set_ti_thread_flag(current_thread_info(), flag)
+#define clear_thread_flag(flag) \
+	clear_ti_thread_flag(current_thread_info(), flag)
+#define test_and_set_thread_flag(flag) \
+	test_and_set_ti_thread_flag(current_thread_info(), flag)
+#define test_and_clear_thread_flag(flag) \
+	test_and_clear_ti_thread_flag(current_thread_info(), flag)
+#define test_thread_flag(flag) \
+	test_ti_thread_flag(current_thread_info(), flag)
+
+#define set_need_resched()	set_thread_flag(TIF_NEED_RESCHED)
+#define clear_need_resched()	clear_thread_flag(TIF_NEED_RESCHED)
 
 #endif
 
-- 
cgit v1.2.3-71-gd317


From 66341a905ef5b3e7aea65b5d9bd1b0361b0ccc61 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 13 Nov 2005 16:07:21 -0800
Subject: [PATCH] Shut up per_cpu_ptr() on UP

Currently per_cpu_ptr() doesn't really do anything with 'cpu' in the UP
case.  This is problematic in the cases where this is the only place the
variable is referenced:

  CC      kernel/workqueue.o
  kernel/workqueue.c: In function `current_is_keventd':
  kernel/workqueue.c:460: warning: unused variable `cpu'

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/percpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 5451eb1e781d..fb8d2d24e4bb 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -38,7 +38,7 @@ extern void free_percpu(const void *);
 
 #else /* CONFIG_SMP */
 
-#define per_cpu_ptr(ptr, cpu) (ptr)
+#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
 
 static inline void *__alloc_percpu(size_t size, size_t align)
 {
-- 
cgit v1.2.3-71-gd317


From 53e86b91b7ae66d4c2757195cbd42e00d9199cf2 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sun, 13 Nov 2005 16:07:23 -0800
Subject: [PATCH] i386: generic cmpxchg

- Make cmpxchg generally available on the i386 platform.

- Provide emulation of cmpxchg suitable for uniprocessor if built and run on
  386.

From: Christoph Lameter <clameter@sgi.com>

- Cut down patch and small style changes.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/cpu/intel.c | 48 ++++++++++++++++++++++++++++++++++++++++++++
 include/asm-i386/system.h    | 42 +++++++++++++++++++++++++++++++++++---
 2 files changed, 87 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 43601de0f633..c28d26fb5f24 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -6,6 +6,7 @@
 #include <linux/bitops.h>
 #include <linux/smp.h>
 #include <linux/thread_info.h>
+#include <linux/module.h>
 
 #include <asm/processor.h>
 #include <asm/msr.h>
@@ -264,5 +265,52 @@ __init int intel_cpu_init(void)
 	return 0;
 }
 
+#ifndef CONFIG_X86_CMPXCHG
+unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
+{
+	u8 prev;
+	unsigned long flags;
+
+	/* Poor man's cmpxchg for 386. Unsuitable for SMP */
+	local_irq_save(flags);
+	prev = *(u8 *)ptr;
+	if (prev == old)
+		*(u8 *)ptr = new;
+	local_irq_restore(flags);
+	return prev;
+}
+EXPORT_SYMBOL(cmpxchg_386_u8);
+
+unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new)
+{
+	u16 prev;
+	unsigned long flags;
+
+	/* Poor man's cmpxchg for 386. Unsuitable for SMP */
+	local_irq_save(flags);
+	prev = *(u16 *)ptr;
+	if (prev == old)
+		*(u16 *)ptr = new;
+	local_irq_restore(flags);
+	return prev;
+}
+EXPORT_SYMBOL(cmpxchg_386_u16);
+
+unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
+{
+	u32 prev;
+	unsigned long flags;
+
+	/* Poor man's cmpxchg for 386. Unsuitable for SMP */
+	local_irq_save(flags);
+	prev = *(u32 *)ptr;
+	if (prev == old)
+		*(u32 *)ptr = new;
+	local_irq_restore(flags);
+	return prev;
+}
+EXPORT_SYMBOL(cmpxchg_386_u32);
+#endif
+
 // arch_initcall(intel_cpu_init);
 
diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h
index 97d52ac49e46..772f85da1206 100644
--- a/include/asm-i386/system.h
+++ b/include/asm-i386/system.h
@@ -263,6 +263,10 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 
 #ifdef CONFIG_X86_CMPXCHG
 #define __HAVE_ARCH_CMPXCHG 1
+#define cmpxchg(ptr,o,n)\
+	((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+					(unsigned long)(n),sizeof(*(ptr))))
+#endif
 
 static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 				      unsigned long new, int size)
@@ -291,10 +295,42 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 	return old;
 }
 
-#define cmpxchg(ptr,o,n)\
-	((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
-					(unsigned long)(n),sizeof(*(ptr))))
+#ifndef CONFIG_X86_CMPXCHG
+/*
+ * Building a kernel capable running on 80386. It may be necessary to
+ * simulate the cmpxchg on the 80386 CPU. For that purpose we define
+ * a function for each of the sizes we support.
+ */
 
+extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8);
+extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16);
+extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32);
+
+static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
+				      unsigned long new, int size)
+{
+	switch (size) {
+	case 1:
+		return cmpxchg_386_u8(ptr, old, new);
+	case 2:
+		return cmpxchg_386_u16(ptr, old, new);
+	case 4:
+		return cmpxchg_386_u32(ptr, old, new);
+	}
+	return old;
+}
+
+#define cmpxchg(ptr,o,n)						\
+({									\
+	__typeof__(*(ptr)) __ret;					\
+	if (likely(boot_cpu_data.x86 > 3))				\
+		__ret = __cmpxchg((ptr), (unsigned long)(o),		\
+					(unsigned long)(n), sizeof(*(ptr))); \
+	else								\
+		__ret = cmpxchg_386((ptr), (unsigned long)(o),		\
+					(unsigned long)(n), sizeof(*(ptr))); \
+	__ret;								\
+})
 #endif
 
 #ifdef CONFIG_X86_CMPXCHG64
-- 
cgit v1.2.3-71-gd317


From 4a6dae6d382e9edf3ff440b819e554ed706359bc Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sun, 13 Nov 2005 16:07:24 -0800
Subject: [PATCH] atomic: cmpxchg

Introduce an atomic_cmpxchg operation.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/atomic_ops.txt   | 15 +++++++++++++++
 arch/sparc/lib/atomic32.c      | 21 ++++++++++++++++-----
 include/asm-alpha/atomic.h     |  2 ++
 include/asm-arm/atomic.h       | 31 +++++++++++++++++++++++++++++++
 include/asm-arm26/atomic.h     | 14 ++++++++++++++
 include/asm-cris/atomic.h      | 13 +++++++++++++
 include/asm-frv/atomic.h       |  2 ++
 include/asm-h8300/atomic.h     | 13 +++++++++++++
 include/asm-i386/atomic.h      |  2 ++
 include/asm-ia64/atomic.h      |  2 ++
 include/asm-m68k/atomic.h      |  2 ++
 include/asm-m68knommu/atomic.h |  2 ++
 include/asm-mips/atomic.h      |  2 ++
 include/asm-parisc/atomic.h    |  1 +
 include/asm-powerpc/atomic.h   |  2 ++
 include/asm-s390/atomic.h      |  2 ++
 include/asm-sh/atomic.h        | 14 ++++++++++++++
 include/asm-sh64/atomic.h      | 14 ++++++++++++++
 include/asm-sparc/atomic.h     |  1 +
 include/asm-sparc64/atomic.h   |  2 ++
 include/asm-v850/atomic.h      | 14 ++++++++++++++
 include/asm-x86_64/atomic.h    |  2 ++
 include/asm-xtensa/atomic.h    |  1 +
 23 files changed, 169 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt
index 8eedaa24f5e2..f1744161ef06 100644
--- a/Documentation/atomic_ops.txt
+++ b/Documentation/atomic_ops.txt
@@ -115,6 +115,21 @@ boolean is return which indicates whether the resulting counter value
 is negative.  It requires explicit memory barrier semantics around the
 operation.
 
+Finally:
+
+	int atomic_cmpxchg(atomic_t *v, int old, int new);
+
+This performs an atomic compare exchange operation on the atomic value v,
+with the given old and new values. Like all atomic_xxx operations,
+atomic_cmpxchg will only satisfy its atomicity semantics as long as all
+other accesses of *v are performed through atomic_xxx operations.
+
+atomic_cmpxchg requires explicit memory barriers around the operation.
+
+The semantics for atomic_cmpxchg are the same as those defined for 'cas'
+below.
+
+
 If a caller requires memory barrier semantics around an atomic_t
 operation which does not return a value, a set of interfaces are
 defined which accomplish this:
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index 2e64e8c3e8e5..be46f6545184 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -37,17 +37,28 @@ int __atomic_add_return(int i, atomic_t *v)
 	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
 	return ret;
 }
+EXPORT_SYMBOL(__atomic_add_return);
 
-void atomic_set(atomic_t *v, int i)
+int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
+	int ret;
 	unsigned long flags;
-	spin_lock_irqsave(ATOMIC_HASH(v), flags);
 
-	v->counter = i;
+	spin_lock_irqsave(ATOMIC_HASH(v), flags);
+	ret = v->counter;
+	if (likely(ret == old))
+		v->counter = new;
 
 	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
+	return ret;
 }
 
-EXPORT_SYMBOL(__atomic_add_return);
-EXPORT_SYMBOL(atomic_set);
+void atomic_set(atomic_t *v, int i)
+{
+	unsigned long flags;
 
+	spin_lock_irqsave(ATOMIC_HASH(v), flags);
+	v->counter = i;
+	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
+}
+EXPORT_SYMBOL(atomic_set);
diff --git a/include/asm-alpha/atomic.h b/include/asm-alpha/atomic.h
index 20ac3d95ecd9..a6660809a879 100644
--- a/include/asm-alpha/atomic.h
+++ b/include/asm-alpha/atomic.h
@@ -177,6 +177,8 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t * v)
 	return result;
 }
 
+#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+
 #define atomic_dec_return(v) atomic_sub_return(1,(v))
 #define atomic64_dec_return(v) atomic64_sub_return(1,(v))
 
diff --git a/include/asm-arm/atomic.h b/include/asm-arm/atomic.h
index 2885972b0855..8ab1689ef56a 100644
--- a/include/asm-arm/atomic.h
+++ b/include/asm-arm/atomic.h
@@ -80,6 +80,23 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 	return result;
 }
 
+static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
+{
+	u32 oldval, res;
+
+	do {
+		__asm__ __volatile__("@ atomic_cmpxchg\n"
+		"ldrex	%1, [%2]\n"
+		"teq	%1, %3\n"
+		"strexeq %0, %4, [%2]\n"
+		    : "=&r" (res), "=&r" (oldval)
+		    : "r" (&ptr->counter), "Ir" (old), "r" (new)
+		    : "cc");
+	} while (res);
+
+	return oldval;
+}
+
 static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
 {
 	unsigned long tmp, tmp2;
@@ -131,6 +148,20 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 	return val;
 }
 
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	int ret;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ret = v->counter;
+	if (likely(ret == old))
+		v->counter = new;
+	local_irq_restore(flags);
+
+	return ret;
+}
+
 static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
 {
 	unsigned long flags;
diff --git a/include/asm-arm26/atomic.h b/include/asm-arm26/atomic.h
index 4a88235c0e76..54b24ead7132 100644
--- a/include/asm-arm26/atomic.h
+++ b/include/asm-arm26/atomic.h
@@ -62,6 +62,20 @@ static inline int atomic_sub_return(int i, atomic_t *v)
         return val;
 }
 
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	int ret;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ret = v->counter;
+	if (likely(ret == old))
+		v->counter = new;
+	local_irq_restore(flags);
+
+	return ret;
+}
+
 static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
 {
         unsigned long flags;
diff --git a/include/asm-cris/atomic.h b/include/asm-cris/atomic.h
index 8c2e78304523..45891f7de00f 100644
--- a/include/asm-cris/atomic.h
+++ b/include/asm-cris/atomic.h
@@ -123,6 +123,19 @@ static inline int atomic_inc_and_test(volatile atomic_t *v)
 	return retval;
 }
 
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	int ret;
+	unsigned long flags;
+
+	cris_atomic_save(v, flags);
+	ret = v->counter;
+	if (likely(ret == old))
+		v->counter = new;
+	cris_atomic_restore(v, flags);
+	return ret;
+}
+
 /* Atomic operations are already serializing */
 #define smp_mb__before_atomic_dec()    barrier()
 #define smp_mb__after_atomic_dec()     barrier()
diff --git a/include/asm-frv/atomic.h b/include/asm-frv/atomic.h
index e75968463428..55f06a0e949f 100644
--- a/include/asm-frv/atomic.h
+++ b/include/asm-frv/atomic.h
@@ -414,4 +414,6 @@ extern uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new);
 
 #endif
 
+#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
+
 #endif /* _ASM_ATOMIC_H */
diff --git a/include/asm-h8300/atomic.h b/include/asm-h8300/atomic.h
index 7230f6507995..d50439259491 100644
--- a/include/asm-h8300/atomic.h
+++ b/include/asm-h8300/atomic.h
@@ -82,6 +82,19 @@ static __inline__ int atomic_dec_and_test(atomic_t *v)
 	return ret == 0;
 }
 
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	int ret;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ret = v->counter;
+	if (likely(ret == old))
+		v->counter = new;
+	local_irq_restore(flags);
+	return ret;
+}
+
 static __inline__ void atomic_clear_mask(unsigned long mask, unsigned long *v)
 {
 	__asm__ __volatile__("stc ccr,r1l\n\t"
diff --git a/include/asm-i386/atomic.h b/include/asm-i386/atomic.h
index 509720be772a..5ff698e9d2c2 100644
--- a/include/asm-i386/atomic.h
+++ b/include/asm-i386/atomic.h
@@ -215,6 +215,8 @@ static __inline__ int atomic_sub_return(int i, atomic_t *v)
 	return atomic_add_return(-i,v);
 }
 
+#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
+
 #define atomic_inc_return(v)  (atomic_add_return(1,v))
 #define atomic_dec_return(v)  (atomic_sub_return(1,v))
 
diff --git a/include/asm-ia64/atomic.h b/include/asm-ia64/atomic.h
index 874a6f890e75..593d3da9f3c2 100644
--- a/include/asm-ia64/atomic.h
+++ b/include/asm-ia64/atomic.h
@@ -88,6 +88,8 @@ ia64_atomic64_sub (__s64 i, atomic64_t *v)
 	return new;
 }
 
+#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
+
 #define atomic_add_return(i,v)						\
 ({									\
 	int __ia64_aar_i = (i);						\
diff --git a/include/asm-m68k/atomic.h b/include/asm-m68k/atomic.h
index 38f3043e7fe1..b821975a361a 100644
--- a/include/asm-m68k/atomic.h
+++ b/include/asm-m68k/atomic.h
@@ -139,6 +139,8 @@ static inline void atomic_set_mask(unsigned long mask, unsigned long *v)
 	__asm__ __volatile__("orl %1,%0" : "+m" (*v) : "id" (mask));
 }
 
+#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+
 /* Atomic operations are already serializing */
 #define smp_mb__before_atomic_dec()	barrier()
 #define smp_mb__after_atomic_dec()	barrier()
diff --git a/include/asm-m68knommu/atomic.h b/include/asm-m68knommu/atomic.h
index a83631ed8c8f..2fd33a56b603 100644
--- a/include/asm-m68knommu/atomic.h
+++ b/include/asm-m68knommu/atomic.h
@@ -128,6 +128,8 @@ static inline int atomic_sub_return(int i, atomic_t * v)
 	return temp;
 }
 
+#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+
 #define atomic_dec_return(v) atomic_sub_return(1,(v))
 #define atomic_inc_return(v) atomic_add_return(1,(v))
 
diff --git a/include/asm-mips/atomic.h b/include/asm-mips/atomic.h
index 6202eb8a14b7..4fba0d003c99 100644
--- a/include/asm-mips/atomic.h
+++ b/include/asm-mips/atomic.h
@@ -287,6 +287,8 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
 	return result;
 }
 
+#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+
 #define atomic_dec_return(v) atomic_sub_return(1,(v))
 #define atomic_inc_return(v) atomic_add_return(1,(v))
 
diff --git a/include/asm-parisc/atomic.h b/include/asm-parisc/atomic.h
index 048a2c7fd0c0..52c9a45b5f87 100644
--- a/include/asm-parisc/atomic.h
+++ b/include/asm-parisc/atomic.h
@@ -164,6 +164,7 @@ static __inline__ int atomic_read(const atomic_t *v)
 }
 
 /* exported interface */
+#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
 
 #define atomic_add(i,v)	((void)(__atomic_add_return( ((int)i),(v))))
 #define atomic_sub(i,v)	((void)(__atomic_add_return(-((int)i),(v))))
diff --git a/include/asm-powerpc/atomic.h b/include/asm-powerpc/atomic.h
index 9c0b372a46e1..37205faa9d7c 100644
--- a/include/asm-powerpc/atomic.h
+++ b/include/asm-powerpc/atomic.h
@@ -164,6 +164,8 @@ static __inline__ int atomic_dec_return(atomic_t *v)
 	return t;
 }
 
+#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+
 #define atomic_sub_and_test(a, v)	(atomic_sub_return((a), (v)) == 0)
 #define atomic_dec_and_test(v)		(atomic_dec_return((v)) == 0)
 
diff --git a/include/asm-s390/atomic.h b/include/asm-s390/atomic.h
index 9d86ba6f12d0..631014d5de90 100644
--- a/include/asm-s390/atomic.h
+++ b/include/asm-s390/atomic.h
@@ -198,6 +198,8 @@ atomic_compare_and_swap(int expected_oldval,int new_val,atomic_t *v)
         return retval;
 }
 
+#define atomic_cmpxchg(v, o, n) (atomic_compare_and_swap((o), (n), &((v)->counter)))
+
 #define smp_mb__before_atomic_dec()	smp_mb()
 #define smp_mb__after_atomic_dec()	smp_mb()
 #define smp_mb__before_atomic_inc()	smp_mb()
diff --git a/include/asm-sh/atomic.h b/include/asm-sh/atomic.h
index 3c4f805da1ac..a148c762d366 100644
--- a/include/asm-sh/atomic.h
+++ b/include/asm-sh/atomic.h
@@ -87,6 +87,20 @@ static __inline__ int atomic_sub_return(int i, atomic_t * v)
 #define atomic_inc(v) atomic_add(1,(v))
 #define atomic_dec(v) atomic_sub(1,(v))
 
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	int ret;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ret = v->counter;
+	if (likely(ret == old))
+		v->counter = new;
+	local_irq_restore(flags);
+
+	return ret;
+}
+
 static __inline__ void atomic_clear_mask(unsigned int mask, atomic_t *v)
 {
 	unsigned long flags;
diff --git a/include/asm-sh64/atomic.h b/include/asm-sh64/atomic.h
index 8c3872d3e65f..6eeb57b015ce 100644
--- a/include/asm-sh64/atomic.h
+++ b/include/asm-sh64/atomic.h
@@ -99,6 +99,20 @@ static __inline__ int atomic_sub_return(int i, atomic_t * v)
 #define atomic_inc(v) atomic_add(1,(v))
 #define atomic_dec(v) atomic_sub(1,(v))
 
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	int ret;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ret = v->counter;
+	if (likely(ret == old))
+		v->counter = new;
+	local_irq_restore(flags);
+
+	return ret;
+}
+
 static __inline__ void atomic_clear_mask(unsigned int mask, atomic_t *v)
 {
 	unsigned long flags;
diff --git a/include/asm-sparc/atomic.h b/include/asm-sparc/atomic.h
index 37f6ab601c3d..52bdd1a895fa 100644
--- a/include/asm-sparc/atomic.h
+++ b/include/asm-sparc/atomic.h
@@ -19,6 +19,7 @@ typedef struct { volatile int counter; } atomic_t;
 #define ATOMIC_INIT(i)  { (i) }
 
 extern int __atomic_add_return(int, atomic_t *);
+extern int atomic_cmpxchg(atomic_t *, int, int);
 extern void atomic_set(atomic_t *, int);
 
 #define atomic_read(v)          ((v)->counter)
diff --git a/include/asm-sparc64/atomic.h b/include/asm-sparc64/atomic.h
index e175afcf2cde..3a0b4383bbac 100644
--- a/include/asm-sparc64/atomic.h
+++ b/include/asm-sparc64/atomic.h
@@ -70,6 +70,8 @@ extern int atomic64_sub_ret(int, atomic64_t *);
 #define atomic_add_negative(i, v) (atomic_add_ret(i, v) < 0)
 #define atomic64_add_negative(i, v) (atomic64_add_ret(i, v) < 0)
 
+#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+
 /* Atomic operations are already serializing */
 #ifdef CONFIG_SMP
 #define smp_mb__before_atomic_dec()	membar_storeload_loadload();
diff --git a/include/asm-v850/atomic.h b/include/asm-v850/atomic.h
index 395268a8c0de..e497166ca42b 100644
--- a/include/asm-v850/atomic.h
+++ b/include/asm-v850/atomic.h
@@ -90,6 +90,20 @@ static __inline__ void atomic_clear_mask (unsigned long mask, unsigned long *add
 #define atomic_dec_and_test(v)		(atomic_sub_return (1, (v)) == 0)
 #define atomic_add_negative(i,v)	(atomic_add_return ((i), (v)) < 0)
 
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	int ret;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ret = v->counter;
+	if (likely(ret == old))
+		v->counter = new;
+	local_irq_restore(flags);
+
+	return ret;
+}
+
 /* Atomic operations are already serializing on ARM */
 #define smp_mb__before_atomic_dec()	barrier()
 #define smp_mb__after_atomic_dec()	barrier()
diff --git a/include/asm-x86_64/atomic.h b/include/asm-x86_64/atomic.h
index fc4c5956e1ea..75c8a1e96737 100644
--- a/include/asm-x86_64/atomic.h
+++ b/include/asm-x86_64/atomic.h
@@ -360,6 +360,8 @@ static __inline__ int atomic_sub_return(int i, atomic_t *v)
 	return atomic_add_return(-i,v);
 }
 
+#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
+
 #define atomic_inc_return(v)  (atomic_add_return(1,v))
 #define atomic_dec_return(v)  (atomic_sub_return(1,v))
 
diff --git a/include/asm-xtensa/atomic.h b/include/asm-xtensa/atomic.h
index 12b5732dc6e5..cd40c5e75160 100644
--- a/include/asm-xtensa/atomic.h
+++ b/include/asm-xtensa/atomic.h
@@ -223,6 +223,7 @@ static inline int atomic_sub_return(int i, atomic_t * v)
  */
 #define atomic_add_negative(i,v) (atomic_add_return((i),(v)) < 0)
 
+#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
 
 static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
 {
-- 
cgit v1.2.3-71-gd317


From 8426e1f6af0fd7f44d040af7263750c5a52f3cc3 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sun, 13 Nov 2005 16:07:25 -0800
Subject: [PATCH] atomic: inc_not_zero

Introduce an atomic_inc_not_zero operation.  Make this a special case of
atomic_add_unless because lockless pagecache actually wants
atomic_inc_not_negativeone due to its offset refcount.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/atomic_ops.txt   | 14 +++++++++++++-
 arch/sparc/lib/atomic32.c      | 15 +++++++++++++++
 include/asm-alpha/atomic.h     | 10 ++++++++++
 include/asm-arm/atomic.h       | 11 +++++++++++
 include/asm-arm26/atomic.h     | 15 +++++++++++++++
 include/asm-cris/atomic.h      | 14 ++++++++++++++
 include/asm-frv/atomic.h       | 10 ++++++++++
 include/asm-h8300/atomic.h     | 14 ++++++++++++++
 include/asm-i386/atomic.h      | 19 +++++++++++++++++++
 include/asm-ia64/atomic.h      | 10 ++++++++++
 include/asm-m68k/atomic.h      | 10 ++++++++++
 include/asm-m68knommu/atomic.h | 10 ++++++++++
 include/asm-mips/atomic.h      | 19 +++++++++++++++++++
 include/asm-parisc/atomic.h    | 19 +++++++++++++++++++
 include/asm-powerpc/atomic.h   | 25 +++++++++++++++++++++++++
 include/asm-s390/atomic.h      | 10 ++++++++++
 include/asm-sh/atomic.h        | 15 +++++++++++++++
 include/asm-sh64/atomic.h      | 15 +++++++++++++++
 include/asm-sparc/atomic.h     |  3 +++
 include/asm-sparc64/atomic.h   | 10 ++++++++++
 include/asm-v850/atomic.h      | 16 ++++++++++++++++
 include/asm-x86_64/atomic.h    | 19 +++++++++++++++++++
 include/asm-xtensa/atomic.h    | 19 +++++++++++++++++++
 23 files changed, 321 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt
index f1744161ef06..23a1c2402bcc 100644
--- a/Documentation/atomic_ops.txt
+++ b/Documentation/atomic_ops.txt
@@ -115,7 +115,7 @@ boolean is return which indicates whether the resulting counter value
 is negative.  It requires explicit memory barrier semantics around the
 operation.
 
-Finally:
+Then:
 
 	int atomic_cmpxchg(atomic_t *v, int old, int new);
 
@@ -129,6 +129,18 @@ atomic_cmpxchg requires explicit memory barriers around the operation.
 The semantics for atomic_cmpxchg are the same as those defined for 'cas'
 below.
 
+Finally:
+
+	int atomic_add_unless(atomic_t *v, int a, int u);
+
+If the atomic value v is not equal to u, this function adds a to v, and
+returns non zero. If v is equal to u then it returns zero. This is done as
+an atomic operation.
+
+atomic_add_unless requires explicit memory barriers around the operation.
+
+atomic_inc_not_zero, equivalent to atomic_add_unless(v, 1, 0)
+
 
 If a caller requires memory barrier semantics around an atomic_t
 operation which does not return a value, a set of interfaces are
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index be46f6545184..cb3cf0f22822 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -53,6 +53,21 @@ int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
+int atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(ATOMIC_HASH(v), flags);
+	ret = v->counter;
+	if (ret != u)
+		v->counter += a;
+	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
+	return ret != u;
+}
+
+static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
+/* Atomic operations are already serializing */
 void atomic_set(atomic_t *v, int i)
 {
 	unsigned long flags;
diff --git a/include/asm-alpha/atomic.h b/include/asm-alpha/atomic.h
index a6660809a879..36505bb4e8cb 100644
--- a/include/asm-alpha/atomic.h
+++ b/include/asm-alpha/atomic.h
@@ -179,6 +179,16 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t * v)
 
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
 
+#define atomic_add_unless(v, a, u)				\
+({								\
+	int c, old;						\
+	c = atomic_read(v);					\
+	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+		c = old;					\
+	c != (u);						\
+})
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 #define atomic_dec_return(v) atomic_sub_return(1,(v))
 #define atomic64_dec_return(v) atomic64_sub_return(1,(v))
 
diff --git a/include/asm-arm/atomic.h b/include/asm-arm/atomic.h
index 8ab1689ef56a..75b802719723 100644
--- a/include/asm-arm/atomic.h
+++ b/include/asm-arm/atomic.h
@@ -173,6 +173,17 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
 
 #endif /* __LINUX_ARM_ARCH__ */
 
+static inline int atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int c, old;
+
+	c = atomic_read(v);
+	while (c != u && (old = atomic_cmpxchg((v), c, c + a)) != c)
+		c = old;
+	return c != u;
+}
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 #define atomic_add(i, v)	(void) atomic_add_return(i, v)
 #define atomic_inc(v)		(void) atomic_add_return(1, v)
 #define atomic_sub(i, v)	(void) atomic_sub_return(i, v)
diff --git a/include/asm-arm26/atomic.h b/include/asm-arm26/atomic.h
index 54b24ead7132..a47cadc59686 100644
--- a/include/asm-arm26/atomic.h
+++ b/include/asm-arm26/atomic.h
@@ -76,6 +76,21 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
+static inline int atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int ret;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ret = v->counter;
+	if (ret != u)
+		v->counter += a;
+	local_irq_restore(flags);
+
+	return ret != u;
+}
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
 {
         unsigned long flags;
diff --git a/include/asm-cris/atomic.h b/include/asm-cris/atomic.h
index 45891f7de00f..683b05a57d88 100644
--- a/include/asm-cris/atomic.h
+++ b/include/asm-cris/atomic.h
@@ -136,6 +136,20 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
+static inline int atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int ret;
+	unsigned long flags;
+
+	cris_atomic_save(v, flags);
+	ret = v->counter;
+	if (ret != u)
+		v->counter += a;
+	cris_atomic_restore(v, flags);
+	return ret != u;
+}
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 /* Atomic operations are already serializing */
 #define smp_mb__before_atomic_dec()    barrier()
 #define smp_mb__after_atomic_dec()     barrier()
diff --git a/include/asm-frv/atomic.h b/include/asm-frv/atomic.h
index 55f06a0e949f..f6539ff569c5 100644
--- a/include/asm-frv/atomic.h
+++ b/include/asm-frv/atomic.h
@@ -416,4 +416,14 @@ extern uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new);
 
 #define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
 
+#define atomic_add_unless(v, a, u)				\
+({								\
+	int c, old;						\
+	c = atomic_read(v);					\
+	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+		c = old;					\
+	c != (u);						\
+})
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 #endif /* _ASM_ATOMIC_H */
diff --git a/include/asm-h8300/atomic.h b/include/asm-h8300/atomic.h
index d50439259491..f23d86819ea8 100644
--- a/include/asm-h8300/atomic.h
+++ b/include/asm-h8300/atomic.h
@@ -95,6 +95,20 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
+static inline int atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int ret;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ret = v->counter;
+	if (ret != u)
+		v->counter += a;
+	local_irq_restore(flags);
+	return ret != u;
+}
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 static __inline__ void atomic_clear_mask(unsigned long mask, unsigned long *v)
 {
 	__asm__ __volatile__("stc ccr,r1l\n\t"
diff --git a/include/asm-i386/atomic.h b/include/asm-i386/atomic.h
index 5ff698e9d2c2..c68557aa04b2 100644
--- a/include/asm-i386/atomic.h
+++ b/include/asm-i386/atomic.h
@@ -217,6 +217,25 @@ static __inline__ int atomic_sub_return(int i, atomic_t *v)
 
 #define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
 
+/**
+ * atomic_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+#define atomic_add_unless(v, a, u)				\
+({								\
+	int c, old;						\
+	c = atomic_read(v);					\
+	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+		c = old;					\
+	c != (u);						\
+})
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 #define atomic_inc_return(v)  (atomic_add_return(1,v))
 #define atomic_dec_return(v)  (atomic_sub_return(1,v))
 
diff --git a/include/asm-ia64/atomic.h b/include/asm-ia64/atomic.h
index 593d3da9f3c2..2fbebf85c31d 100644
--- a/include/asm-ia64/atomic.h
+++ b/include/asm-ia64/atomic.h
@@ -90,6 +90,16 @@ ia64_atomic64_sub (__s64 i, atomic64_t *v)
 
 #define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
 
+#define atomic_add_unless(v, a, u)				\
+({								\
+	int c, old;						\
+	c = atomic_read(v);					\
+	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+		c = old;					\
+	c != (u);						\
+})
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 #define atomic_add_return(i,v)						\
 ({									\
 	int __ia64_aar_i = (i);						\
diff --git a/include/asm-m68k/atomic.h b/include/asm-m68k/atomic.h
index b821975a361a..e3c962eeabf3 100644
--- a/include/asm-m68k/atomic.h
+++ b/include/asm-m68k/atomic.h
@@ -141,6 +141,16 @@ static inline void atomic_set_mask(unsigned long mask, unsigned long *v)
 
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
 
+#define atomic_add_unless(v, a, u)				\
+({								\
+	int c, old;						\
+	c = atomic_read(v);					\
+	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+		c = old;					\
+	c != (u);						\
+})
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 /* Atomic operations are already serializing */
 #define smp_mb__before_atomic_dec()	barrier()
 #define smp_mb__after_atomic_dec()	barrier()
diff --git a/include/asm-m68knommu/atomic.h b/include/asm-m68knommu/atomic.h
index 2fd33a56b603..3c1cc153c415 100644
--- a/include/asm-m68knommu/atomic.h
+++ b/include/asm-m68knommu/atomic.h
@@ -130,6 +130,16 @@ static inline int atomic_sub_return(int i, atomic_t * v)
 
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
 
+#define atomic_add_unless(v, a, u)				\
+({								\
+	int c, old;						\
+	c = atomic_read(v);					\
+	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+		c = old;					\
+	c != (u);						\
+})
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 #define atomic_dec_return(v) atomic_sub_return(1,(v))
 #define atomic_inc_return(v) atomic_add_return(1,(v))
 
diff --git a/include/asm-mips/atomic.h b/include/asm-mips/atomic.h
index 4fba0d003c99..2c87b41e69ba 100644
--- a/include/asm-mips/atomic.h
+++ b/include/asm-mips/atomic.h
@@ -289,6 +289,25 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
 
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
 
+/**
+ * atomic_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+#define atomic_add_unless(v, a, u)				\
+({								\
+	int c, old;						\
+	c = atomic_read(v);					\
+	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+		c = old;					\
+	c != (u);						\
+})
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 #define atomic_dec_return(v) atomic_sub_return(1,(v))
 #define atomic_inc_return(v) atomic_add_return(1,(v))
 
diff --git a/include/asm-parisc/atomic.h b/include/asm-parisc/atomic.h
index 52c9a45b5f87..983e9a2b6042 100644
--- a/include/asm-parisc/atomic.h
+++ b/include/asm-parisc/atomic.h
@@ -166,6 +166,25 @@ static __inline__ int atomic_read(const atomic_t *v)
 /* exported interface */
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
 
+/**
+ * atomic_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+#define atomic_add_unless(v, a, u)				\
+({								\
+	int c, old;						\
+	c = atomic_read(v);					\
+	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+		c = old;					\
+	c != (u);						\
+})
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 #define atomic_add(i,v)	((void)(__atomic_add_return( ((int)i),(v))))
 #define atomic_sub(i,v)	((void)(__atomic_add_return(-((int)i),(v))))
 #define atomic_inc(v)	((void)(__atomic_add_return(   1,(v))))
diff --git a/include/asm-powerpc/atomic.h b/include/asm-powerpc/atomic.h
index 37205faa9d7c..ec4b14468959 100644
--- a/include/asm-powerpc/atomic.h
+++ b/include/asm-powerpc/atomic.h
@@ -166,6 +166,31 @@ static __inline__ int atomic_dec_return(atomic_t *v)
 
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
 
+/**
+ * atomic_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+#define atomic_add_unless(v, a, u)							 \
+({															 \
+	   int c, old;											 \
+	   c = atomic_read(v);									 \
+	   for (;;) {											  \
+			   if (unlikely(c == (u)))						 \
+					   break;								  \
+			   old = atomic_cmpxchg((v), c, c + (a));		  \
+			   if (likely(old == c))						   \
+					   break;								  \
+			   c = old;										\
+	   }													   \
+	   c != (u);											   \
+})
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 #define atomic_sub_and_test(a, v)	(atomic_sub_return((a), (v)) == 0)
 #define atomic_dec_and_test(v)		(atomic_dec_return((v)) == 0)
 
diff --git a/include/asm-s390/atomic.h b/include/asm-s390/atomic.h
index 631014d5de90..b3bd4f679f72 100644
--- a/include/asm-s390/atomic.h
+++ b/include/asm-s390/atomic.h
@@ -200,6 +200,16 @@ atomic_compare_and_swap(int expected_oldval,int new_val,atomic_t *v)
 
 #define atomic_cmpxchg(v, o, n) (atomic_compare_and_swap((o), (n), &((v)->counter)))
 
+#define atomic_add_unless(v, a, u)				\
+({								\
+	int c, old;						\
+	c = atomic_read(v);					\
+	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+		c = old;					\
+	c != (u);						\
+})
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 #define smp_mb__before_atomic_dec()	smp_mb()
 #define smp_mb__after_atomic_dec()	smp_mb()
 #define smp_mb__before_atomic_inc()	smp_mb()
diff --git a/include/asm-sh/atomic.h b/include/asm-sh/atomic.h
index a148c762d366..aabfd334462c 100644
--- a/include/asm-sh/atomic.h
+++ b/include/asm-sh/atomic.h
@@ -101,6 +101,21 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
+static inline int atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int ret;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ret = v->counter;
+	if (ret != u)
+		v->counter += a;
+	local_irq_restore(flags);
+
+	return ret != u;
+}
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 static __inline__ void atomic_clear_mask(unsigned int mask, atomic_t *v)
 {
 	unsigned long flags;
diff --git a/include/asm-sh64/atomic.h b/include/asm-sh64/atomic.h
index 6eeb57b015ce..927a2bc27b30 100644
--- a/include/asm-sh64/atomic.h
+++ b/include/asm-sh64/atomic.h
@@ -113,6 +113,21 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
+static inline int atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int ret;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ret = v->counter;
+	if (ret != u)
+		v->counter += a;
+	local_irq_restore(flags);
+
+	return ret != u;
+}
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 static __inline__ void atomic_clear_mask(unsigned int mask, atomic_t *v)
 {
 	unsigned long flags;
diff --git a/include/asm-sparc/atomic.h b/include/asm-sparc/atomic.h
index 52bdd1a895fa..62bec7ad271c 100644
--- a/include/asm-sparc/atomic.h
+++ b/include/asm-sparc/atomic.h
@@ -20,6 +20,7 @@ typedef struct { volatile int counter; } atomic_t;
 
 extern int __atomic_add_return(int, atomic_t *);
 extern int atomic_cmpxchg(atomic_t *, int, int);
+extern int atomic_add_unless(atomic_t *, int, int);
 extern void atomic_set(atomic_t *, int);
 
 #define atomic_read(v)          ((v)->counter)
@@ -49,6 +50,8 @@ extern void atomic_set(atomic_t *, int);
 #define atomic_dec_and_test(v) (atomic_dec_return(v) == 0)
 #define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
 
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 /* This is the old 24-bit implementation.  It's still used internally
  * by some sparc-specific code, notably the semaphore implementation.
  */
diff --git a/include/asm-sparc64/atomic.h b/include/asm-sparc64/atomic.h
index 3a0b4383bbac..8198c3d0d007 100644
--- a/include/asm-sparc64/atomic.h
+++ b/include/asm-sparc64/atomic.h
@@ -72,6 +72,16 @@ extern int atomic64_sub_ret(int, atomic64_t *);
 
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
 
+#define atomic_add_unless(v, a, u)				\
+({								\
+	int c, old;						\
+	c = atomic_read(v);					\
+	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+		c = old;					\
+	c != (u);						\
+})
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 /* Atomic operations are already serializing */
 #ifdef CONFIG_SMP
 #define smp_mb__before_atomic_dec()	membar_storeload_loadload();
diff --git a/include/asm-v850/atomic.h b/include/asm-v850/atomic.h
index e497166ca42b..bede3172ce7f 100644
--- a/include/asm-v850/atomic.h
+++ b/include/asm-v850/atomic.h
@@ -104,6 +104,22 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
+static inline int atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int ret;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ret = v->counter;
+	if (ret != u)
+		v->counter += a;
+	local_irq_restore(flags);
+
+	return ret != u;
+}
+
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 /* Atomic operations are already serializing on ARM */
 #define smp_mb__before_atomic_dec()	barrier()
 #define smp_mb__after_atomic_dec()	barrier()
diff --git a/include/asm-x86_64/atomic.h b/include/asm-x86_64/atomic.h
index 75c8a1e96737..0866ef67f198 100644
--- a/include/asm-x86_64/atomic.h
+++ b/include/asm-x86_64/atomic.h
@@ -362,6 +362,25 @@ static __inline__ int atomic_sub_return(int i, atomic_t *v)
 
 #define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
 
+/**
+ * atomic_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+#define atomic_add_unless(v, a, u)				\
+({								\
+	int c, old;						\
+	c = atomic_read(v);					\
+	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+		c = old;					\
+	c != (u);						\
+})
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 #define atomic_inc_return(v)  (atomic_add_return(1,v))
 #define atomic_dec_return(v)  (atomic_sub_return(1,v))
 
diff --git a/include/asm-xtensa/atomic.h b/include/asm-xtensa/atomic.h
index cd40c5e75160..3670cc7695da 100644
--- a/include/asm-xtensa/atomic.h
+++ b/include/asm-xtensa/atomic.h
@@ -225,6 +225,25 @@ static inline int atomic_sub_return(int i, atomic_t * v)
 
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
 
+/**
+ * atomic_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+#define atomic_add_unless(v, a, u)				\
+({								\
+	int c, old;						\
+	c = atomic_read(v);					\
+	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+		c = old;					\
+	c != (u);						\
+})
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
 {
     unsigned int all_f = -1;
-- 
cgit v1.2.3-71-gd317


From 20dcae32439384b6863c626bb3b2a09bed65b33e Mon Sep 17 00:00:00 2001
From: Zach Brown <zach.brown@oracle.com>
Date: Sun, 13 Nov 2005 16:07:33 -0800
Subject: [PATCH] aio: remove kioctx from mm_struct

Sync iocbs have a life cycle that don't need a kioctx.  Their retrying, if
any, is done in the context of their owner who has allocated them on the
stack.

The sole user of a sync iocb's ctx reference was aio_complete() checking for
an elevated iocb ref count that could never happen.  No path which grabs an
iocb ref has access to sync iocbs.

If we were to implement sync iocb cancelation it would be done by the owner of
the iocb using its on-stack reference.

Removing this chunk from aio_complete allows us to remove the entire kioctx
instance from mm_struct, reducing its size by a third.  On a i386 testing box
the slab size went from 768 to 504 bytes and from 5 to 8 per page.

Signed-off-by: Zach Brown <zach.brown@oracle.com>
Acked-by: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/aio.c                  | 27 +++++++++------------------
 include/linux/aio.h       |  2 +-
 include/linux/init_task.h |  1 -
 include/linux/sched.h     |  1 -
 kernel/fork.c             |  1 -
 5 files changed, 10 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/fs/aio.c b/fs/aio.c
index 20bb919eb195..e7cd40b626b7 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -937,28 +937,19 @@ int fastcall aio_complete(struct kiocb *iocb, long res, long res2)
 	unsigned long	tail;
 	int		ret;
 
-	/* Special case handling for sync iocbs: events go directly
-	 * into the iocb for fast handling.  Note that this will not 
-	 * work if we allow sync kiocbs to be cancelled. in which
-	 * case the usage count checks will have to move under ctx_lock
-	 * for all cases.
+	/*
+	 * Special case handling for sync iocbs:
+	 *  - events go directly into the iocb for fast handling
+	 *  - the sync task with the iocb in its stack holds the single iocb
+	 *    ref, no other paths have a way to get another ref
+	 *  - the sync task helpfully left a reference to itself in the iocb
 	 */
 	if (is_sync_kiocb(iocb)) {
-		int ret;
-
+		BUG_ON(iocb->ki_users != 1);
 		iocb->ki_user_data = res;
-		if (iocb->ki_users == 1) {
-			iocb->ki_users = 0;
-			ret = 1;
-		} else {
-			spin_lock_irq(&ctx->ctx_lock);
-			iocb->ki_users--;
-			ret = (0 == iocb->ki_users);
-			spin_unlock_irq(&ctx->ctx_lock);
-		}
-		/* sync iocbs put the task here for us */
+		iocb->ki_users = 0;
 		wake_up_process(iocb->ki_obj.tsk);
-		return ret;
+		return 1;
 	}
 
 	info = &ctx->ring_info;
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 403d71dcb7c8..9e0ae8711276 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -124,7 +124,7 @@ struct kiocb {
 		(x)->ki_users = 1;			\
 		(x)->ki_key = KIOCB_SYNC_KEY;		\
 		(x)->ki_filp = (filp);			\
-		(x)->ki_ctx = &tsk->active_mm->default_kioctx;	\
+		(x)->ki_ctx = NULL;			\
 		(x)->ki_cancel = NULL;			\
 		(x)->ki_dtor = NULL;			\
 		(x)->ki_obj.tsk = tsk;			\
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 68ab5f2ab9cd..dcfd2ecccb5d 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -51,7 +51,6 @@
 	.page_table_lock =  SPIN_LOCK_UNLOCKED, 		\
 	.mmlist		= LIST_HEAD_INIT(name.mmlist),		\
 	.cpu_vm_mask	= CPU_MASK_ALL,				\
-	.default_kioctx = INIT_KIOCTX(name.default_kioctx, name),	\
 }
 
 #define INIT_SIGNALS(sig) {	\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 41df81395719..2038bd27b041 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -357,7 +357,6 @@ struct mm_struct {
 	/* aio bits */
 	rwlock_t		ioctx_list_lock;
 	struct kioctx		*ioctx_list;
-	struct kioctx		default_kioctx;
 };
 
 struct sighand_struct {
diff --git a/kernel/fork.c b/kernel/fork.c
index 2c70c9cdf5dc..e0d0b77343f8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -323,7 +323,6 @@ static struct mm_struct * mm_init(struct mm_struct * mm)
 	spin_lock_init(&mm->page_table_lock);
 	rwlock_init(&mm->ioctx_list_lock);
 	mm->ioctx_list = NULL;
-	mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm);
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	mm->cached_hole_size = ~0UL;
 
-- 
cgit v1.2.3-71-gd317


From 5ef1c49f8f9f0d6b5b8d57bb4b66c605a3d65876 Mon Sep 17 00:00:00 2001
From: Zach Brown <zach.brown@oracle.com>
Date: Sun, 13 Nov 2005 16:07:35 -0800
Subject: [PATCH] aio: don't ref kioctx after decref in put_ioctx

put_ioctx's refcount debugging was doing an atomic_read after dropping its
reference when it wasn't the last ref, leaving a tiny race for another freeing
thread to sneak into.  This shifts the debugging before the ops, uses BUG_ON,
and reformats the defines a little.  Sadly, moving to inlines increased the
code size but this change decreases the code size by a whole 9 bytes :)

Signed-off-by: Zach Brown <zach.brown@oracle.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/aio.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/aio.h b/include/linux/aio.h
index 9e0ae8711276..49fd37629ee4 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -210,8 +210,15 @@ struct kioctx *lookup_ioctx(unsigned long ctx_id);
 int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 				  struct iocb *iocb));
 
-#define get_ioctx(kioctx)	do { if (unlikely(atomic_read(&(kioctx)->users) <= 0)) BUG(); atomic_inc(&(kioctx)->users); } while (0)
-#define put_ioctx(kioctx)	do { if (unlikely(atomic_dec_and_test(&(kioctx)->users))) __put_ioctx(kioctx); else if (unlikely(atomic_read(&(kioctx)->users) < 0)) BUG(); } while (0)
+#define get_ioctx(kioctx) do {						\
+	BUG_ON(unlikely(atomic_read(&(kioctx)->users) <= 0));		\
+	atomic_inc(&(kioctx)->users);					\
+} while (0)
+#define put_ioctx(kioctx) do {						\
+	BUG_ON(unlikely(atomic_read(&(kioctx)->users) <= 0));		\
+	if (unlikely(atomic_dec_and_test(&(kioctx)->users))) 		\
+		__put_ioctx(kioctx);					\
+} while (0)
 
 #define in_aio() !is_sync_wait(current->io_wait)
 /* may be used for debugging */
-- 
cgit v1.2.3-71-gd317


From ff6ed4063da39e6a30ce904005e4ed17385e2739 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sun, 13 Nov 2005 16:07:38 -0800
Subject: [PATCH] acct.h needs jiffies.h

allnoconfig:

In file included from fs/super.c:28:
include/linux/acct.h:173: warning: `TICK_NSEC' is not defined

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/acct.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/acct.h b/include/linux/acct.h
index 93c5b3cdf951..9a66401073fc 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -16,6 +16,8 @@
 #define _LINUX_ACCT_H
 
 #include <linux/types.h>
+#include <linux/jiffies.h>
+
 #include <asm/param.h>
 #include <asm/byteorder.h>
 
-- 
cgit v1.2.3-71-gd317


From 3f39894d1b5c253b10fcb8fbbbcf65a330f6cdc7 Mon Sep 17 00:00:00 2001
From: George Anzinger <george@mvista.com>
Date: Sun, 13 Nov 2005 16:07:44 -0800
Subject: [PATCH] timespec: normalize off by one errors

It would appear that the timespec normalize code has an off by one error.
Found in three places.  Thanks to Ben for spotting.

Signed-off-by: George Anzinger<george@mvista.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/time.h  |  2 +-
 kernel/posix-timers.c | 10 +++-------
 2 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/time.h b/include/linux/time.h
index 8e83f4e778bb..bfbe92d0767c 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -101,7 +101,7 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
 static inline void
 set_normalized_timespec (struct timespec *ts, time_t sec, long nsec)
 {
-	while (nsec > NSEC_PER_SEC) {
+	while (nsec >= NSEC_PER_SEC) {
 		nsec -= NSEC_PER_SEC;
 		++sec;
 	}
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index ea55c7a1cd75..5870efb3e200 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -270,7 +270,7 @@ static void tstojiffie(struct timespec *tp, int res, u64 *jiff)
 	long sec = tp->tv_sec;
 	long nsec = tp->tv_nsec + res - 1;
 
-	if (nsec > NSEC_PER_SEC) {
+	if (nsec >= NSEC_PER_SEC) {
 		sec++;
 		nsec -= NSEC_PER_SEC;
 	}
@@ -1209,13 +1209,9 @@ static int do_posix_clock_monotonic_get(clockid_t clock, struct timespec *tp)
 
 	do_posix_clock_monotonic_gettime_parts(tp, &wall_to_mono);
 
-	tp->tv_sec += wall_to_mono.tv_sec;
-	tp->tv_nsec += wall_to_mono.tv_nsec;
+	set_normalized_timespec(tp, tp->tv_sec + wall_to_mono.tv_sec,
+				tp->tv_nsec + wall_to_mono.tv_nsec);
 
-	if ((tp->tv_nsec - NSEC_PER_SEC) > 0) {
-		tp->tv_nsec -= NSEC_PER_SEC;
-		tp->tv_sec++;
-	}
 	return 0;
 }
 
-- 
cgit v1.2.3-71-gd317


From 800d3c6f90b61cc82b09db635b59c00b1c460728 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@brturbo.com.br>
Date: Sun, 13 Nov 2005 16:07:48 -0800
Subject: [PATCH] v4l: (943) added secam l video standard

- Added SECAM L' video standard
- SECAM L' is a Secam variant that requires special config.

This patch adds support on V4L core. Requires aditional patches
on tuners to support.

Signed-off-by: Mauro Carvalho Chehab <mchehab@brturbo.com.br>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/media/video/tuner-core.c | 11 ++++++++---
 include/linux/videodev2.h        |  1 +
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c
index 73c4041c35d7..e58abdfcaab8 100644
--- a/drivers/media/video/tuner-core.c
+++ b/drivers/media/video/tuner-core.c
@@ -251,7 +251,7 @@ static inline int check_mode(struct tuner *t, char *cmd)
 
 static char pal[] = "-";
 module_param_string(pal, pal, sizeof(pal), 0644);
-static char secam[] = "-";
+static char secam[] = "--";
 module_param_string(secam, secam, sizeof(secam), 0644);
 
 /* get more precise norm info from insmod option */
@@ -307,8 +307,13 @@ static int tuner_fixup_std(struct tuner *t)
 			break;
 		case 'l':
 		case 'L':
-			tuner_dbg ("insmod fixup: SECAM => SECAM-L\n");
-			t->std = V4L2_STD_SECAM_L;
+			if ((secam[1]=='C')||(secam[1]=='c')) {
+				tuner_dbg ("insmod fixup: SECAM => SECAM-L'\n");
+				t->std = V4L2_STD_SECAM_LC;
+			} else {
+				tuner_dbg ("insmod fixup: SECAM => SECAM-L\n");
+				t->std = V4L2_STD_SECAM_L;
+			}
 			break;
 		case '-':
 			/* default parameter, do nothing */
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index a114fff6568b..1cded681eb6d 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -636,6 +636,7 @@ typedef __u64 v4l2_std_id;
 #define V4L2_STD_SECAM_K        ((v4l2_std_id)0x00100000)
 #define V4L2_STD_SECAM_K1       ((v4l2_std_id)0x00200000)
 #define V4L2_STD_SECAM_L        ((v4l2_std_id)0x00400000)
+#define V4L2_STD_SECAM_LC       ((v4l2_std_id)0x00800000)
 
 /* ATSC/HDTV */
 #define V4L2_STD_ATSC_8_VSB     ((v4l2_std_id)0x01000000)
-- 
cgit v1.2.3-71-gd317


From 8069695c9e7da7ab7cd8ee749e8d5aa9e6e0660b Mon Sep 17 00:00:00 2001
From: Ricardo Cerqueira <v4l@cerqueira.org>
Date: Sun, 13 Nov 2005 16:07:49 -0800
Subject: [PATCH] v4l: (935) Moved common IR stuff to ir-common.c

- The pinnacle handler & remote are common to saa7134 PCI boards and em28xx
  USB boards, so the keymap was moved to ir-common and the keyhandler is back
  to ir-kbd-i2c

- request_module("ir-kbd-i2c") is no longer necessary at saa7134-core since
  saa7134.ko now depends on ir-kbd-i2c.ko to get the keyhandler

Signed-off-by: Ricardo Cerqueira <v4l@cerqueira.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@brturbo.com.br>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/media/common/ir-common.c            |  60 +++++++++++++++
 drivers/media/video/ir-kbd-i2c.c            |  52 +++++++++++++
 drivers/media/video/saa7134/saa7134-core.c  |   2 -
 drivers/media/video/saa7134/saa7134-input.c | 109 ----------------------------
 include/media/ir-common.h                   |   1 +
 include/media/ir-kbd-i2c.h                  |   2 +
 6 files changed, 115 insertions(+), 111 deletions(-)

(limited to 'include')

diff --git a/drivers/media/common/ir-common.c b/drivers/media/common/ir-common.c
index 4b71fd6f7aed..7972c73bc14e 100644
--- a/drivers/media/common/ir-common.c
+++ b/drivers/media/common/ir-common.c
@@ -126,6 +126,66 @@ IR_KEYTAB_TYPE ir_codes_winfast[IR_KEYTAB_SIZE] = {
 };
 EXPORT_SYMBOL_GPL(ir_codes_winfast);
 
+IR_KEYTAB_TYPE ir_codes_pinnacle[IR_KEYTAB_SIZE] = {
+	[ 0x59 ] = KEY_MUTE,
+	[ 0x4a ] = KEY_POWER,
+
+	[ 0x18 ] = KEY_TEXT,
+	[ 0x26 ] = KEY_TV,
+	[ 0x3d ] = KEY_PRINT,
+
+	[ 0x48 ] = KEY_RED,
+	[ 0x04 ] = KEY_GREEN,
+	[ 0x11 ] = KEY_YELLOW,
+	[ 0x00 ] = KEY_BLUE,
+
+	[ 0x2d ] = KEY_VOLUMEUP,
+	[ 0x1e ] = KEY_VOLUMEDOWN,
+
+	[ 0x49 ] = KEY_MENU,
+
+	[ 0x16 ] = KEY_CHANNELUP,
+	[ 0x17 ] = KEY_CHANNELDOWN,
+
+	[ 0x20 ] = KEY_UP,
+	[ 0x21 ] = KEY_DOWN,
+	[ 0x22 ] = KEY_LEFT,
+	[ 0x23 ] = KEY_RIGHT,
+	[ 0x0d ] = KEY_SELECT,
+
+
+
+	[ 0x08 ] = KEY_BACK,
+	[ 0x07 ] = KEY_REFRESH,
+
+	[ 0x2f ] = KEY_ZOOM,
+	[ 0x29 ] = KEY_RECORD,
+
+	[ 0x4b ] = KEY_PAUSE,
+	[ 0x4d ] = KEY_REWIND,
+	[ 0x2e ] = KEY_PLAY,
+	[ 0x4e ] = KEY_FORWARD,
+	[ 0x53 ] = KEY_PREVIOUS,
+	[ 0x4c ] = KEY_STOP,
+	[ 0x54 ] = KEY_NEXT,
+
+	[ 0x69 ] = KEY_KP0,
+	[ 0x6a ] = KEY_KP1,
+	[ 0x6b ] = KEY_KP2,
+	[ 0x6c ] = KEY_KP3,
+	[ 0x6d ] = KEY_KP4,
+	[ 0x6e ] = KEY_KP5,
+	[ 0x6f ] = KEY_KP6,
+	[ 0x70 ] = KEY_KP7,
+	[ 0x71 ] = KEY_KP8,
+	[ 0x72 ] = KEY_KP9,
+
+	[ 0x74 ] = KEY_CHANNEL,
+	[ 0x0a ] = KEY_BACKSPACE,
+};
+
+EXPORT_SYMBOL_GPL(ir_codes_pinnacle);
+
 /* empty keytable, can be used as placeholder for not-yet created keytables */
 IR_KEYTAB_TYPE ir_codes_empty[IR_KEYTAB_SIZE] = {
 	[ 42 ] = KEY_COFFEE,
diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c
index 0085567a1421..801c736e9328 100644
--- a/drivers/media/video/ir-kbd-i2c.c
+++ b/drivers/media/video/ir-kbd-i2c.c
@@ -183,6 +183,58 @@ static int get_key_knc1(struct IR_i2c *ir, u32 *ir_key, u32 *ir_raw)
 	return 1;
 }
 
+/* The new pinnacle PCTV remote (with the colored buttons)
+ *
+ * Ricardo Cerqueira <v4l@cerqueira.org>
+ */
+
+int get_key_pinnacle(struct IR_i2c *ir, u32 *ir_key, u32 *ir_raw)
+{
+	unsigned char b[4];
+	unsigned int start = 0,parity = 0,code = 0;
+
+	/* poll IR chip */
+	if (4 != i2c_master_recv(&ir->c,b,4)) {
+		dprintk(2,"read error\n");
+		return -EIO;
+	}
+
+	for (start = 0; start<4; start++) {
+		if (b[start] == 0x80) {
+			code=b[(start+3)%4];
+			parity=b[(start+2)%4];
+		}
+	}
+
+	/* Empty Request */
+	if (parity==0)
+		return 0;
+
+	/* Repeating... */
+	if (ir->old == parity)
+		return 0;
+
+
+	ir->old = parity;
+
+	/* Reduce code value to fit inside IR_KEYTAB_SIZE
+	 *
+	 * this is the only value that results in 42 unique
+	 * codes < 128
+	 */
+
+	code %= 0x88;
+
+	*ir_raw = code;
+	*ir_key = code;
+
+	dprintk(1,"Pinnacle PCTV key %02x\n", code);
+
+	return 1;
+}
+
+EXPORT_SYMBOL_GPL(get_key_pinnacle);
+
 /* ----------------------------------------------------------------------- */
 
 static void ir_key_poll(struct IR_i2c *ir)
diff --git a/drivers/media/video/saa7134/saa7134-core.c b/drivers/media/video/saa7134/saa7134-core.c
index 3d89a33289ed..14347854f98c 100644
--- a/drivers/media/video/saa7134/saa7134-core.c
+++ b/drivers/media/video/saa7134/saa7134-core.c
@@ -728,8 +728,6 @@ static int saa7134_hwinit2(struct saa7134_dev *dev)
 		irq2_mask |= (SAA7134_IRQ2_INTE_GPIO18  |
 			      SAA7134_IRQ2_INTE_GPIO18A |
 			      SAA7134_IRQ2_INTE_GPIO16  );
-	else if (dev->has_remote == SAA7134_REMOTE_I2C)
-		request_module("ir-kbd-i2c");
 
 	saa_writel(SAA7134_IRQ1, 0);
 	saa_writel(SAA7134_IRQ2, irq2_mask);
diff --git a/drivers/media/video/saa7134/saa7134-input.c b/drivers/media/video/saa7134/saa7134-input.c
index 329accda6d45..e648cc3bc96d 100644
--- a/drivers/media/video/saa7134/saa7134-input.c
+++ b/drivers/media/video/saa7134/saa7134-input.c
@@ -485,64 +485,6 @@ static IR_KEYTAB_TYPE ir_codes_purpletv[IR_KEYTAB_SIZE] = {
 
 };
 
-static IR_KEYTAB_TYPE ir_codes_pinnacle[IR_KEYTAB_SIZE] = {
-	[ 0x59 ] = KEY_MUTE,
-	[ 0x4a ] = KEY_POWER,
-
-	[ 0x18 ] = KEY_TEXT,
-	[ 0x26 ] = KEY_TV,
-	[ 0x3d ] = KEY_PRINT,
-
-	[ 0x48 ] = KEY_RED,
-	[ 0x04 ] = KEY_GREEN,
-	[ 0x11 ] = KEY_YELLOW,
-	[ 0x00 ] = KEY_BLUE,
-
-	[ 0x2d ] = KEY_VOLUMEUP,
-	[ 0x1e ] = KEY_VOLUMEDOWN,
-
-	[ 0x49 ] = KEY_MENU,
-
-	[ 0x16 ] = KEY_CHANNELUP,
-	[ 0x17 ] = KEY_CHANNELDOWN,
-
-	[ 0x20 ] = KEY_UP,
-	[ 0x21 ] = KEY_DOWN,
-	[ 0x22 ] = KEY_LEFT,
-	[ 0x23 ] = KEY_RIGHT,
-	[ 0x0d ] = KEY_SELECT,
-
-
-
-	[ 0x08 ] = KEY_BACK,
-	[ 0x07 ] = KEY_REFRESH,
-
-	[ 0x2f ] = KEY_ZOOM,
-	[ 0x29 ] = KEY_RECORD,
-
-	[ 0x4b ] = KEY_PAUSE,
-	[ 0x4d ] = KEY_REWIND,
-	[ 0x2e ] = KEY_PLAY,
-	[ 0x4e ] = KEY_FORWARD,
-	[ 0x53 ] = KEY_PREVIOUS,
-	[ 0x4c ] = KEY_STOP,
-	[ 0x54 ] = KEY_NEXT,
-
-	[ 0x69 ] = KEY_KP0,
-	[ 0x6a ] = KEY_KP1,
-	[ 0x6b ] = KEY_KP2,
-	[ 0x6c ] = KEY_KP3,
-	[ 0x6d ] = KEY_KP4,
-	[ 0x6e ] = KEY_KP5,
-	[ 0x6f ] = KEY_KP6,
-	[ 0x70 ] = KEY_KP7,
-	[ 0x71 ] = KEY_KP8,
-	[ 0x72 ] = KEY_KP9,
-
-	[ 0x74 ] = KEY_CHANNEL,
-	[ 0x0a ] = KEY_BACKSPACE,
-};
-
 /* Mapping for the 28 key remote control as seen at
    http://www.sednacomputer.com/photo/cardbus-tv.jpg
    Pavel Mihaylov <bin@bash.info> */
@@ -635,57 +577,6 @@ static int get_key_purpletv(struct IR_i2c *ir, u32 *ir_key, u32 *ir_raw)
 	return 1;
 }
 
-/* The new pinnacle PCTV remote (with the colored buttons)
- *
- * Ricardo Cerqueira <v4l@cerqueira.org>
- */
-
-static int get_key_pinnacle(struct IR_i2c *ir, u32 *ir_key, u32 *ir_raw)
-{
-	unsigned char b[4];
-	unsigned int start = 0,parity = 0,code = 0;
-
-	/* poll IR chip */
-	if (4 != i2c_master_recv(&ir->c,b,4)) {
-		i2cdprintk("read error\n");
-		return -EIO;
-	}
-
-	for (start = 0; start<4; start++) {
-		if (b[start] == 0x80) {
-			code=b[(start+3)%4];
-			parity=b[(start+2)%4];
-		}
-	}
-
-	/* Empty Request */
-	if (parity==0)
-		return 0;
-
-	/* Repeating... */
-	if (ir->old == parity)
-		return 0;
-
-
-	ir->old = parity;
-
-	/* Reduce code value to fit inside IR_KEYTAB_SIZE
-	 *
-	 * this is the only value that results in 42 unique
-	 * codes < 128
-	 */
-
-	code %= 0x88;
-
-	*ir_raw = code;
-	*ir_key = code;
-
-	i2cdprintk("Pinnacle PCTV key %02x\n", code);
-
-	return 1;
-}
-
-
 void saa7134_input_irq(struct saa7134_dev *dev)
 {
 	struct saa7134_ir *ir = dev->remote;
diff --git a/include/media/ir-common.h b/include/media/ir-common.h
index 0f1ba95ec8d6..ad3e9bb670c3 100644
--- a/include/media/ir-common.h
+++ b/include/media/ir-common.h
@@ -49,6 +49,7 @@ struct ir_input_state {
 
 extern IR_KEYTAB_TYPE ir_codes_rc5_tv[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_winfast[IR_KEYTAB_SIZE];
+extern IR_KEYTAB_TYPE ir_codes_pinnacle[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_empty[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_hauppauge_new[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_pixelview[IR_KEYTAB_SIZE];
diff --git a/include/media/ir-kbd-i2c.h b/include/media/ir-kbd-i2c.h
index 00fa57eb9fde..730f21ed91db 100644
--- a/include/media/ir-kbd-i2c.h
+++ b/include/media/ir-kbd-i2c.h
@@ -19,4 +19,6 @@ struct IR_i2c {
 	char                   phys[32];
 	int                    (*get_key)(struct IR_i2c*, u32*, u32*);
 };
+
+int get_key_pinnacle(struct IR_i2c *ir, u32 *ir_key, u32 *ir_raw);
 #endif
-- 
cgit v1.2.3-71-gd317


From 633323ffffae91c3f22a08e0185fbfd3fae2a825 Mon Sep 17 00:00:00 2001
From: Bill Pechter <pechter@gmail.com>
Date: Sun, 13 Nov 2005 16:07:50 -0800
Subject: [PATCH] v4l:: (936) Support for sabrent bt848 version

Support for Sabrent bt848 version.

Signed-off-by: Bill Pechter <pechter@gmail.com>
Signed-off-by: Nickolay V. Shmyrev <nshmyrev@yandex.ru>
Signed-off-by: Mauro Carvalho Chehab <mchehab@brturbo.com.br>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/video4linux/CARDLIST.bttv  |  1 +
 Documentation/video4linux/CARDLIST.tuner |  1 +
 drivers/media/video/bttv-cards.c         | 19 ++++++++++++++++++-
 drivers/media/video/bttv.h               |  1 +
 drivers/media/video/tuner-simple.c       |  4 +++-
 include/media/tuner.h                    |  1 +
 6 files changed, 25 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/video4linux/CARDLIST.bttv b/Documentation/video4linux/CARDLIST.bttv
index 2404099996ac..330246ac80f8 100644
--- a/Documentation/video4linux/CARDLIST.bttv
+++ b/Documentation/video4linux/CARDLIST.bttv
@@ -140,3 +140,4 @@
 139 -> Prolink PixelView PlayTV MPEG2 PV-M4900
 140 -> Osprey 440                                          [0070:ff07]
 141 -> Asound Skyeye PCTV
+142 -> Sabrent TV-FM (bttv version)
diff --git a/Documentation/video4linux/CARDLIST.tuner b/Documentation/video4linux/CARDLIST.tuner
index ec840ca6f455..9d6544ea9f41 100644
--- a/Documentation/video4linux/CARDLIST.tuner
+++ b/Documentation/video4linux/CARDLIST.tuner
@@ -67,3 +67,4 @@ tuner=65 - Ymec TVF66T5-B/DFF
 tuner=66 - LG NTSC (TALN mini series)
 tuner=67 - Philips TD1316 Hybrid Tuner
 tuner=68 - Philips TUV1236D ATSC/NTSC dual in
+tuner=69 - Tena TNF 5335 MF
diff --git a/drivers/media/video/bttv-cards.c b/drivers/media/video/bttv-cards.c
index 3413bace443a..66ed9ea64180 100644
--- a/drivers/media/video/bttv-cards.c
+++ b/drivers/media/video/bttv-cards.c
@@ -2796,7 +2796,24 @@ struct tvcard bttv_tvcards[] = {
 		.tuner_addr	= ADDR_UNSET,
 		.radio_addr     = ADDR_UNSET,
 	},
-
+		/* ---- card 0x8e ---------------------------------- */
+	[BTTV_BOARD_SABRENT_TVFM] = {
+		.name		= "Sabrent TV-FM (bttv version)",
+		.video_inputs	= 3,
+		.audio_inputs	= 1,
+		.tuner		= 0,
+		.svhs		= 2,
+		.gpiomask	= 0x108007,
+		.muxsel		= { 2, 3, 1, 1},
+		.audiomux	= { 100000, 100002, 100002, 100000},
+		.no_msp34xx	= 1,
+		.no_tda9875     = 1,
+		.no_tda7432     = 1,
+		.pll		= PLL_28,
+		.tuner_type	= TUNER_TNF_5335MF,
+		.tuner_addr	= ADDR_UNSET,
+		.has_radio      = 1,
+	},
 };
 
 static const unsigned int bttv_num_tvcards = ARRAY_SIZE(bttv_tvcards);
diff --git a/drivers/media/video/bttv.h b/drivers/media/video/bttv.h
index 124ea41dada4..c1825248beb5 100644
--- a/drivers/media/video/bttv.h
+++ b/drivers/media/video/bttv.h
@@ -162,6 +162,7 @@
 #define BTTV_BOARD_PV_M4900                0x8b
 #define BTTV_BOARD_OSPREY440               0x8c
 #define BTTV_BOARD_ASOUND_SKYEYE	   0x8d
+#define BTTV_BOARD_SABRENT_TVFM   	   0x8e
 
 /* i2c address list */
 #define I2C_TSA5522        0xc2
diff --git a/drivers/media/video/tuner-simple.c b/drivers/media/video/tuner-simple.c
index d832205818f2..e0c9fdb9914a 100644
--- a/drivers/media/video/tuner-simple.c
+++ b/drivers/media/video/tuner-simple.c
@@ -233,7 +233,7 @@ static struct tunertype tuners[] = {
 	{ "Ymec TVision TVF-5533MF", Philips, NTSC,
 	  16*160.00,16*454.00,0x01,0x02,0x04,0x8e,732},
 
-	/* 60-68 */
+	/* 60-69 */
 	{ "Thomson DDT 7611 (ATSC/NTSC)", THOMSON, ATSC,
 	  16*157.25,16*454.00,0x39,0x3a,0x3c,0x8e,732},
 	{ "Tena TNF9533-D/IF/TNF9533-B/DF", Philips, PAL,
@@ -252,6 +252,8 @@ static struct tunertype tuners[] = {
 	  16*160.00,16*442.00,0xa1,0xa2,0xa4,0xc8,623 },
 	{ "Philips TUV1236D ATSC/NTSC dual in", Philips, ATSC,
 	  16*157.25,16*454.00,0x01,0x02,0x04,0xce,732 },
+	{ "Tena TNF 5335 MF", Philips, NTSC,
+	  16*157.25,16*454.00,0x01,0x02,0x04,0x8e,732 },
 };
 
 unsigned const int tuner_count = ARRAY_SIZE(tuners);
diff --git a/include/media/tuner.h b/include/media/tuner.h
index 9184e534b7ef..faa0f8e3091b 100644
--- a/include/media/tuner.h
+++ b/include/media/tuner.h
@@ -113,6 +113,7 @@
 #define TUNER_PHILIPS_TD1316		67
 
 #define TUNER_PHILIPS_TUV1236D		68	/* ATI HDTV Wonder */
+#define TUNER_TNF_5335MF                69	/* Sabrent Bt848   */
 
 #define NOTUNER 0
 #define PAL     1	/* PAL_BG */
-- 
cgit v1.2.3-71-gd317


From b2f0648ffda862d53f04f0a05979f3fa530d63c9 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sun, 13 Nov 2005 16:07:55 -0800
Subject: [PATCH] v4l: (945) adds a new include for internal v4l2 ioctls and
 api

Adds a new include for internal V4L2 ioctls and API

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@brturbo.com.br>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/media/v4l2-common.h | 110 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)
 create mode 100644 include/media/v4l2-common.h

(limited to 'include')

diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h
new file mode 100644
index 000000000000..d3fd48157eb8
--- /dev/null
+++ b/include/media/v4l2-common.h
@@ -0,0 +1,110 @@
+/*
+    v4l2 common internal API header
+
+    This header contains internal shared ioctl definitions for use by the
+    internal low-level v4l2 drivers.
+    Each ioctl begins with VIDIOC_INT_ to clearly mark that it is an internal
+    define,
+
+    Copyright (C) 2005  Hans Verkuil <hverkuil@xs4all.nl>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef V4L2_COMMON_H_
+#define V4L2_COMMON_H_
+
+/* VIDIOC_INT_AUDIO_CLOCK_FREQ */
+enum v4l2_audio_clock_freq {
+	V4L2_AUDCLK_32_KHZ  = 32000,
+	V4L2_AUDCLK_441_KHZ = 44100,
+	V4L2_AUDCLK_48_KHZ  = 48000,
+};
+
+/* VIDIOC_INT_G_REGISTER and VIDIOC_INT_S_REGISTER */
+struct v4l2_register {
+	u32 i2c_id; 		/* I2C driver ID of the I2C chip. 0 for the I2C adapter. */
+	unsigned long reg;
+	u32 val;
+};
+
+/* VIDIOC_INT_DECODE_VBI_LINE */
+struct v4l2_decode_vbi_line {
+	u32 is_second_field;	/* Set to 0 for the first (odd) field,
+				   set to 1 for the second (even) field. */
+	u8 *p; 			/* Pointer to the sliced VBI data from the decoder.
+				   On exit points to the start of the payload. */
+	u32 line;		/* Line number of the sliced VBI data (1-23) */
+	u32 type;		/* VBI service type (V4L2_SLICED_*). 0 if no service found */
+};
+
+/* VIDIOC_INT_G_CHIP_IDENT: identifies the actual chip installed on the board */
+enum v4l2_chip_ident {
+	/* general idents: reserved range 0-49 */
+	V4L2_IDENT_UNKNOWN = 0,
+
+	/* module saa7115: reserved range 100-149 */
+	V4L2_IDENT_SAA7114 = 104,
+	V4L2_IDENT_SAA7115 = 105,
+
+	/* module saa7127: reserved range 150-199 */
+	V4L2_IDENT_SAA7127 = 157,
+	V4L2_IDENT_SAA7129 = 159,
+
+	/* module cx25840: reserved range 200-249 */
+	V4L2_IDENT_CX25840 = 240,
+	V4L2_IDENT_CX25841 = 241,
+	V4L2_IDENT_CX25842 = 242,
+	V4L2_IDENT_CX25843 = 243,
+};
+
+/* only implemented if CONFIG_VIDEO_ADV_DEBUG is defined */
+#define	VIDIOC_INT_S_REGISTER 		_IOR ('d', 100, struct v4l2_register)
+#define	VIDIOC_INT_G_REGISTER 		_IOWR('d', 101, struct v4l2_register)
+
+/* Reset the I2C chip */
+#define VIDIOC_INT_RESET            	_IO  ('d', 102)
+
+/* Set the frequency of the audio clock output.
+   Used to slave an audio processor to the video decoder, ensuring that audio
+   and video remain synchronized. */
+#define VIDIOC_INT_AUDIO_CLOCK_FREQ 	_IOR ('d', 103, enum v4l2_audio_clock_freq)
+
+/* Video decoders that support sliced VBI need to implement this ioctl.
+   Field p of the v4l2_sliced_vbi_line struct is set to the start of the VBI
+   data that was generated by the decoder. The driver then parses the sliced
+   VBI data and sets the other fields in the struct accordingly. The pointer p
+   is updated to point to the start of the payload which can be copied
+   verbatim into the data field of the v4l2_sliced_vbi_data struct. If no
+   valid VBI data was found, then the type field is set to 0 on return. */
+#define VIDIOC_INT_DECODE_VBI_LINE  	_IOWR('d', 104, struct v4l2_decode_vbi_line)
+
+/* Used to generate VBI signals on a video signal. v4l2_sliced_vbi_data is
+   filled with the data packets that should be output. Note that if you set
+   the line field to 0, then that VBI signal is disabled. */
+#define VIDIOC_INT_S_VBI_DATA 		_IOW ('d', 105, struct v4l2_sliced_vbi_data)
+
+/* Used to obtain the sliced VBI packet from a readback register. Not all
+   video decoders support this. If no data is available because the readback
+   register contains invalid or erroneous data -EIO is returned. Note that
+   you must fill in the 'id' member and the 'field' member (to determine
+   whether CC data from the first or second field should be obtained). */
+#define VIDIOC_INT_G_VBI_DATA 		_IOWR('d', 106, struct v4l2_sliced_vbi_data *)
+
+/* Returns the chip identifier or V4L2_IDENT_UNKNOWN if no identification can
+   be made. */
+#define VIDIOC_INT_G_CHIP_IDENT		_IOR ('d', 107, enum v4l2_chip_ident *)
+
+#endif /* V4L2_COMMON_H_ */
-- 
cgit v1.2.3-71-gd317


From aeec46b97a7975fd983219177980c58ed4fd607c Mon Sep 17 00:00:00 2001
From: Martin Waitz <tali@admingilde.org>
Date: Sun, 13 Nov 2005 16:08:13 -0800
Subject: [PATCH] DocBook: allow to mark structure members private

Many structures contain both an internal part and one which is part of the API
to other modules.  With this patch it is possible to only include these public
members in the kernel documentation.

Signed-off-by: Martin Waitz <tali@admingilde.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/usb.h |  6 +++---
 scripts/kernel-doc  | 13 +++++++++++--
 2 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 748d04385256..856d232c7562 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -819,7 +819,7 @@ typedef void (*usb_complete_t)(struct urb *, struct pt_regs *);
  */
 struct urb
 {
-	/* private, usb core and host controller only fields in the urb */
+	/* private: usb core and host controller only fields in the urb */
 	struct kref kref;		/* reference count of the URB */
 	spinlock_t lock;		/* lock for the URB */
 	void *hcpriv;			/* private data for host controller */
@@ -827,7 +827,7 @@ struct urb
 	atomic_t use_count;		/* concurrent submissions counter */
 	u8 reject;			/* submissions will fail */
 
-	/* public, documented fields in the urb that can be used by drivers */
+	/* public: documented fields in the urb that can be used by drivers */
 	struct list_head urb_list;	/* list head for use by the urb's
 					 * current owner */
 	struct usb_device *dev; 	/* (in) pointer to associated device */
@@ -1045,7 +1045,7 @@ struct usb_sg_request {
 	size_t			bytes;
 
 	/* 
-	 * members below are private to usbcore,
+	 * members below are private: to usbcore,
 	 * and are not provided for driver access!
 	 */
 	spinlock_t		lock;
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 8aaf74e64183..2f45fd2969d0 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -117,6 +117,8 @@ use strict;
 # struct my_struct {
 #     int a;
 #     int b;
+# /* private: */
+#     int c;
 # };
 #
 # All descriptions can be multiline, except the short function description.
@@ -1304,6 +1306,12 @@ sub dump_struct($$) {
 	# ignore embedded structs or unions
 	$members =~ s/{.*?}//g;
 
+	# ignore members marked private:
+	$members =~ s/\/\*.*?private:.*?public:.*?\*\///gos;
+	$members =~ s/\/\*.*?private:.*//gos;
+	# strip comments:
+	$members =~ s/\/\*.*?\*\///gos;
+
 	create_parameterlist($members, ';', $file);
 
 	output_declaration($declaration_name,
@@ -1329,6 +1337,7 @@ sub dump_enum($$) {
     my $x = shift;
     my $file = shift;
 
+    $x =~ s@/\*.*?\*/@@gos;	# strip comments.
     if ($x =~ /enum\s+(\w+)\s*{(.*)}/) {
         $declaration_name = $1;
         my $members = $2;
@@ -1365,6 +1374,7 @@ sub dump_typedef($$) {
     my $x = shift;
     my $file = shift;
 
+    $x =~ s@/\*.*?\*/@@gos;	# strip comments.
     while (($x =~ /\(*.\)\s*;$/) || ($x =~ /\[*.\]\s*;$/)) {
         $x =~ s/\(*.\)\s*;$/;/;
 	$x =~ s/\[*.\]\s*;$/;/;
@@ -1420,7 +1430,7 @@ sub create_parameterlist($$$) {
 	    $type = $arg;
 	    $type =~ s/([^\(]+\(\*)$param/$1/;
 	    push_parameter($param, $type, $file);
-	} else {
+	} elsif ($arg) {
 	    $arg =~ s/\s*:\s*/:/g;
 	    $arg =~ s/\s*\[/\[/g;
 
@@ -1628,7 +1638,6 @@ sub process_state3_type($$) {
     my $x = shift;
     my $file = shift;
 
-    $x =~ s@/\*.*?\*/@@gos;	# strip comments.
     $x =~ s@[\r\n]+@ @gos; # strip newlines/cr's.
     $x =~ s@^\s+@@gos; # strip leading spaces
     $x =~ s@\s+$@@gos; # strip trailing spaces
-- 
cgit v1.2.3-71-gd317


From 5cd16ee934eafca74a6bb790328950cec68a8b78 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Fri, 11 Nov 2005 14:25:24 +1100
Subject: [PATCH] powerpc: Merge page.h

Merge asm-ppc/page.h and asm-ppc64/page.h into asm-powerpc/page.h,
asm-powerpc/page_32.h and asm-powerpc/page_64.h

Built for PPC (common_defconfig), with ARCH=powerpc, mostly built with
ARCH=ppc (other things break the build). Built and booted on P5 LPAR
for PPC64 with ARCH=ppc/powerpc (pseries_defconfig). Mostly built for
iSeries powerpc.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/asm-powerpc/page.h    | 179 ++++++++++++++++++++++++++++++++++++++++++
 include/asm-powerpc/page_32.h |  38 +++++++++
 include/asm-powerpc/page_64.h | 174 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 391 insertions(+)
 create mode 100644 include/asm-powerpc/page.h
 create mode 100644 include/asm-powerpc/page_32.h
 create mode 100644 include/asm-powerpc/page_64.h

(limited to 'include')

diff --git a/include/asm-powerpc/page.h b/include/asm-powerpc/page.h
new file mode 100644
index 000000000000..18c1e5ee81a3
--- /dev/null
+++ b/include/asm-powerpc/page.h
@@ -0,0 +1,179 @@
+#ifndef _ASM_POWERPC_PAGE_H
+#define _ASM_POWERPC_PAGE_H
+
+/*
+ * Copyright (C) 2001,2005 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifdef __KERNEL__
+#include <linux/config.h>
+#include <asm/asm-compat.h>
+
+/*
+ * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software
+ * page size. When using 64K pages however, whether we are really supporting
+ * 64K pages in HW or not is irrelevant to those definitions.
+ */
+#ifdef CONFIG_PPC_64K_PAGES
+#define PAGE_SHIFT		16
+#else
+#define PAGE_SHIFT		12
+#endif
+
+#define PAGE_SIZE		(ASM_CONST(1) << PAGE_SHIFT)
+
+/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */
+#define __HAVE_ARCH_GATE_AREA		1
+
+/*
+ * Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we
+ * assign PAGE_MASK to a larger type it gets extended the way we want
+ * (i.e. with 1s in the high bits)
+ */
+#define PAGE_MASK      (~((1 << PAGE_SHIFT) - 1))
+
+#define PAGE_OFFSET     ASM_CONST(CONFIG_KERNEL_START)
+#define KERNELBASE      PAGE_OFFSET
+
+#ifdef CONFIG_DISCONTIGMEM
+#define page_to_pfn(page)	discontigmem_page_to_pfn(page)
+#define pfn_to_page(pfn)	discontigmem_pfn_to_page(pfn)
+#define pfn_valid(pfn)		discontigmem_pfn_valid(pfn)
+#endif
+
+#ifdef CONFIG_FLATMEM
+#define pfn_to_page(pfn)	(mem_map + (pfn))
+#define page_to_pfn(page)	((unsigned long)((page) - mem_map))
+#define pfn_valid(pfn)		((pfn) < max_mapnr)
+#endif
+
+#define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
+#define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+
+#define __va(x) ((void *)((unsigned long)(x) + KERNELBASE))
+#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET)
+
+/*
+ * Unfortunately the PLT is in the BSS in the PPC32 ELF ABI,
+ * and needs to be executable.  This means the whole heap ends
+ * up being executable.
+ */
+#define VM_DATA_DEFAULT_FLAGS32	(VM_READ | VM_WRITE | VM_EXEC | \
+				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_DATA_DEFAULT_FLAGS64	(VM_READ | VM_WRITE | \
+				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#ifdef __powerpc64__
+#include <asm/page_64.h>
+#else
+#include <asm/page_32.h>
+#endif
+
+/* align addr on a size boundary - adjust address up/down if needed */
+#define _ALIGN_UP(addr,size)	(((addr)+((size)-1))&(~((size)-1)))
+#define _ALIGN_DOWN(addr,size)	((addr)&(~((size)-1)))
+
+/* align addr on a size boundary - adjust address up if needed */
+#define _ALIGN(addr,size)     _ALIGN_UP(addr,size)
+
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr)	_ALIGN(addr, PAGE_SIZE)
+
+#ifndef __ASSEMBLY__
+
+#undef STRICT_MM_TYPECHECKS
+
+#ifdef STRICT_MM_TYPECHECKS
+/* These are used to make use of C type-checking. */
+
+/* PTE level */
+typedef struct { pte_basic_t pte; } pte_t;
+#define pte_val(x)	((x).pte)
+#define __pte(x)	((pte_t) { (x) })
+
+/* 64k pages additionally define a bigger "real PTE" type that gathers
+ * the "second half" part of the PTE for pseudo 64k pages
+ */
+#ifdef CONFIG_PPC_64K_PAGES
+typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
+#else
+typedef struct { pte_t pte; } real_pte_t;
+#endif
+
+/* PMD level */
+typedef struct { unsigned long pmd; } pmd_t;
+#define pmd_val(x)	((x).pmd)
+#define __pmd(x)	((pmd_t) { (x) })
+
+/* PUD level exusts only on 4k pages */
+#ifndef CONFIG_PPC_64K_PAGES
+typedef struct { unsigned long pud; } pud_t;
+#define pud_val(x)	((x).pud)
+#define __pud(x)	((pud_t) { (x) })
+#endif
+
+/* PGD level */
+typedef struct { unsigned long pgd; } pgd_t;
+#define pgd_val(x)	((x).pgd)
+#define __pgd(x)	((pgd_t) { (x) })
+
+/* Page protection bits */
+typedef struct { unsigned long pgprot; } pgprot_t;
+#define pgprot_val(x)	((x).pgprot)
+#define __pgprot(x)	((pgprot_t) { (x) })
+
+#else
+
+/*
+ * .. while these make it easier on the compiler
+ */
+
+typedef pte_basic_t pte_t;
+#define pte_val(x)	(x)
+#define __pte(x)	(x)
+
+#ifdef CONFIG_PPC_64K_PAGES
+typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
+#else
+typedef unsigned long real_pte_t;
+#endif
+
+
+typedef unsigned long pmd_t;
+#define pmd_val(x)	(x)
+#define __pmd(x)	(x)
+
+#ifndef CONFIG_PPC_64K_PAGES
+typedef unsigned long pud_t;
+#define pud_val(x)	(x)
+#define __pud(x)	(x)
+#endif
+
+typedef unsigned long pgd_t;
+#define pgd_val(x)	(x)
+#define pgprot_val(x)	(x)
+
+typedef unsigned long pgprot_t;
+#define __pgd(x)	(x)
+#define __pgprot(x)	(x)
+
+#endif
+
+struct page;
+extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
+extern void copy_user_page(void *to, void *from, unsigned long vaddr,
+		struct page *p);
+extern int page_is_ram(unsigned long pfn);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_POWERPC_PAGE_H */
diff --git a/include/asm-powerpc/page_32.h b/include/asm-powerpc/page_32.h
new file mode 100644
index 000000000000..35221300a2ce
--- /dev/null
+++ b/include/asm-powerpc/page_32.h
@@ -0,0 +1,38 @@
+#ifndef _ASM_POWERPC_PAGE_32_H
+#define _ASM_POWERPC_PAGE_32_H
+
+#define VM_DATA_DEFAULT_FLAGS	VM_DATA_DEFAULT_FLAGS32
+
+#ifndef __ASSEMBLY__
+/*
+ * The basic type of a PTE - 64 bits for those CPUs with > 32 bit
+ * physical addressing.  For now this just the IBM PPC440.
+ */
+#ifdef CONFIG_PTE_64BIT
+typedef unsigned long long pte_basic_t;
+#define PTE_SHIFT	(PAGE_SHIFT - 3)	/* 512 ptes per page */
+#define PTE_FMT		"%16Lx"
+#else
+typedef unsigned long pte_basic_t;
+#define PTE_SHIFT	(PAGE_SHIFT - 2)	/* 1024 ptes per page */
+#define PTE_FMT		"%.8lx"
+#endif
+
+struct page;
+extern void clear_pages(void *page, int order);
+static inline void clear_page(void *page) { clear_pages(page, 0); }
+extern void copy_page(void *to, void *from);
+
+/* Pure 2^n version of get_order */
+extern __inline__ int get_order(unsigned long size)
+{
+	int lz;
+
+	size = (size-1) >> PAGE_SHIFT;
+	asm ("cntlzw %0,%1" : "=r" (lz) : "r" (size));
+	return 32 - lz;
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_PAGE_32_H */
diff --git a/include/asm-powerpc/page_64.h b/include/asm-powerpc/page_64.h
new file mode 100644
index 000000000000..c16f106b5373
--- /dev/null
+++ b/include/asm-powerpc/page_64.h
@@ -0,0 +1,174 @@
+#ifndef _ASM_POWERPC_PAGE_64_H
+#define _ASM_POWERPC_PAGE_64_H
+
+/*
+ * Copyright (C) 2001 PPC64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * We always define HW_PAGE_SHIFT to 12 as use of 64K pages remains Linux
+ * specific, every notion of page number shared with the firmware, TCEs,
+ * iommu, etc... still uses a page size of 4K.
+ */
+#define HW_PAGE_SHIFT		12
+#define HW_PAGE_SIZE		(ASM_CONST(1) << HW_PAGE_SHIFT)
+#define HW_PAGE_MASK		(~(HW_PAGE_SIZE-1))
+
+/*
+ * PAGE_FACTOR is the number of bits factor between PAGE_SHIFT and
+ * HW_PAGE_SHIFT, that is 4K pages.
+ */
+#define PAGE_FACTOR		(PAGE_SHIFT - HW_PAGE_SHIFT)
+
+#define REGION_SIZE   4UL
+#define REGION_SHIFT  60UL
+#define REGION_MASK   (((1UL<<REGION_SIZE)-1UL)<<REGION_SHIFT)
+
+#define VMALLOCBASE		ASM_CONST(0xD000000000000000)
+#define VMALLOC_REGION_ID	(VMALLOCBASE >> REGION_SHIFT)
+#define KERNEL_REGION_ID	(KERNELBASE >> REGION_SHIFT)
+#define USER_REGION_ID		(0UL)
+#define REGION_ID(ea)		(((unsigned long)(ea)) >> REGION_SHIFT)
+
+/* Segment size */
+#define SID_SHIFT		28
+#define SID_MASK		0xfffffffffUL
+#define ESID_MASK		0xfffffffff0000000UL
+#define GET_ESID(x)		(((x) >> SID_SHIFT) & SID_MASK)
+
+#ifndef __ASSEMBLY__
+#include <asm/cache.h>
+
+typedef unsigned long pte_basic_t;
+
+static __inline__ void clear_page(void *addr)
+{
+	unsigned long lines, line_size;
+
+	line_size = ppc64_caches.dline_size;
+	lines = ppc64_caches.dlines_per_page;
+
+	__asm__ __volatile__(
+	"mtctr	%1	# clear_page\n\
+1:      dcbz	0,%0\n\
+	add	%0,%0,%3\n\
+	bdnz+	1b"
+        : "=r" (addr)
+        : "r" (lines), "0" (addr), "r" (line_size)
+	: "ctr", "memory");
+}
+
+extern void copy_4K_page(void *to, void *from);
+
+#ifdef CONFIG_PPC_64K_PAGES
+static inline void copy_page(void *to, void *from)
+{
+	unsigned int i;
+	for (i=0; i < (1 << (PAGE_SHIFT - 12)); i++) {
+		copy_4K_page(to, from);
+		to += 4096;
+		from += 4096;
+	}
+}
+#else /* CONFIG_PPC_64K_PAGES */
+static inline void copy_page(void *to, void *from)
+{
+	copy_4K_page(to, from);
+}
+#endif /* CONFIG_PPC_64K_PAGES */
+
+/* Log 2 of page table size */
+extern u64 ppc64_pft_size;
+
+/* Large pages size */
+extern unsigned int HPAGE_SHIFT;
+#define HPAGE_SIZE		((1UL) << HPAGE_SHIFT)
+#define HPAGE_MASK		(~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+
+#endif /* __ASSEMBLY__ */
+
+#ifdef CONFIG_HUGETLB_PAGE
+
+#define HTLB_AREA_SHIFT		40
+#define HTLB_AREA_SIZE		(1UL << HTLB_AREA_SHIFT)
+#define GET_HTLB_AREA(x)	((x) >> HTLB_AREA_SHIFT)
+
+#define LOW_ESID_MASK(addr, len)    (((1U << (GET_ESID(addr+len-1)+1)) \
+		                      - (1U << GET_ESID(addr))) & 0xffff)
+#define HTLB_AREA_MASK(addr, len)   (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \
+		                      - (1U << GET_HTLB_AREA(addr))) & 0xffff)
+
+#define ARCH_HAS_HUGEPAGE_ONLY_RANGE
+#define ARCH_HAS_PREPARE_HUGEPAGE_RANGE
+#define ARCH_HAS_SETCLEAR_HUGE_PTE
+
+#define touches_hugepage_low_range(mm, addr, len) \
+	(LOW_ESID_MASK((addr), (len)) & (mm)->context.low_htlb_areas)
+#define touches_hugepage_high_range(mm, addr, len) \
+	(HTLB_AREA_MASK((addr), (len)) & (mm)->context.high_htlb_areas)
+
+#define __within_hugepage_low_range(addr, len, segmask) \
+	((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask))
+#define within_hugepage_low_range(addr, len) \
+	__within_hugepage_low_range((addr), (len), \
+				    current->mm->context.low_htlb_areas)
+#define __within_hugepage_high_range(addr, len, zonemask) \
+	((HTLB_AREA_MASK((addr), (len)) | (zonemask)) == (zonemask))
+#define within_hugepage_high_range(addr, len) \
+	__within_hugepage_high_range((addr), (len), \
+				    current->mm->context.high_htlb_areas)
+
+#define is_hugepage_only_range(mm, addr, len) \
+	(touches_hugepage_high_range((mm), (addr), (len)) || \
+	  touches_hugepage_low_range((mm), (addr), (len)))
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+
+#define in_hugepage_area(context, addr) \
+	(cpu_has_feature(CPU_FTR_16M_PAGE) && \
+	 ( ((1 << GET_HTLB_AREA(addr)) & (context).high_htlb_areas) || \
+	   ( ((addr) < 0x100000000L) && \
+	     ((1 << GET_ESID(addr)) & (context).low_htlb_areas) ) ) )
+
+#else /* !CONFIG_HUGETLB_PAGE */
+
+#define in_hugepage_area(mm, addr)	0
+
+#endif /* !CONFIG_HUGETLB_PAGE */
+
+#ifdef MODULE
+#define __page_aligned __attribute__((__aligned__(PAGE_SIZE)))
+#else
+#define __page_aligned \
+	__attribute__((__aligned__(PAGE_SIZE), \
+		__section__(".data.page_aligned")))
+#endif
+
+#define VM_DATA_DEFAULT_FLAGS \
+	(test_thread_flag(TIF_32BIT) ? \
+	 VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64)
+
+/*
+ * This is the default if a program doesn't have a PT_GNU_STACK
+ * program header entry. The PPC64 ELF ABI has a non executable stack
+ * stack by default, so in the absense of a PT_GNU_STACK program header
+ * we turn execute permission off.
+ */
+#define VM_STACK_DEFAULT_FLAGS32	(VM_READ | VM_WRITE | VM_EXEC | \
+					 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_STACK_DEFAULT_FLAGS64	(VM_READ | VM_WRITE | \
+					 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_STACK_DEFAULT_FLAGS \
+	(test_thread_flag(TIF_32BIT) ? \
+	 VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64)
+
+#include <asm-generic/page.h>
+
+#endif /* _ASM_POWERPC_PAGE_64_H */
-- 
cgit v1.2.3-71-gd317


From c5e24354efae9f962e0e369d875d45f47e0bb9aa Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Sat, 12 Nov 2005 00:06:05 +1100
Subject: [PATCH] powerpc: Turn cpu_irq_down into kexec_cpu_down

We currently have a ppc_md member called cpu_irq_down, which disables IRQs
for the cpu in question. The only caller of cpu_irq_down is the kexec code.

On pSeries we need to do more than just teardown IRQs at kexec time, so rename
the ppc_md member to kexec_cpu_down and expand it. The pSeries code needs to
know, and other platforms might too, whether we're doing a crash shutdown (ie.
panicking) or a regular kexec, so add a flag for that.

The pSeries implementation of kexec_cpu_down does an unregister VPA call, which
tells the Hypervisor to stop writing stuff into our pacas. Without this we can
get weird memory corruption bugs when we kexec, caused by the Hypervisor
writing into the first kernel's pacas which happens to be somewhere interesting
in the second kernel's memory.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/pseries/setup.c | 26 ++++++++++++++++++++++++--
 arch/ppc64/kernel/machine_kexec.c      | 12 ++++++------
 include/asm-powerpc/machdep.h          |  4 +++-
 3 files changed, 33 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 31990829310c..b9d9732b2e06 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -200,14 +200,12 @@ static void __init pSeries_setup_arch(void)
 	if (ppc64_interrupt_controller == IC_OPEN_PIC) {
 		ppc_md.init_IRQ       = pSeries_init_mpic;
 		ppc_md.get_irq        = mpic_get_irq;
-	 	ppc_md.cpu_irq_down   = mpic_teardown_this_cpu;
 		/* Allocate the mpic now, so that find_and_init_phbs() can
 		 * fill the ISUs */
 		pSeries_setup_mpic();
 	} else {
 		ppc_md.init_IRQ       = xics_init_IRQ;
 		ppc_md.get_irq        = xics_get_irq;
-		ppc_md.cpu_irq_down   = xics_teardown_cpu;
 	}
 
 #ifdef CONFIG_SMP
@@ -595,6 +593,27 @@ static int pSeries_pci_probe_mode(struct pci_bus *bus)
 	return PCI_PROBE_NORMAL;
 }
 
+#ifdef CONFIG_KEXEC
+static void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
+{
+	/* Don't risk a hypervisor call if we're crashing */
+	if (!crash_shutdown) {
+		unsigned long vpa = __pa(&get_paca()->lppaca);
+
+		if (unregister_vpa(hard_smp_processor_id(), vpa)) {
+			printk("VPA deregistration of cpu %u (hw_cpu_id %d) "
+					"failed\n", smp_processor_id(),
+					hard_smp_processor_id());
+		}
+	}
+
+	if (ppc64_interrupt_controller == IC_OPEN_PIC)
+		mpic_teardown_this_cpu(secondary);
+	else
+		xics_teardown_cpu(secondary);
+}
+#endif
+
 struct machdep_calls __initdata pSeries_md = {
 	.probe			= pSeries_probe,
 	.setup_arch		= pSeries_setup_arch,
@@ -617,4 +636,7 @@ struct machdep_calls __initdata pSeries_md = {
 	.check_legacy_ioport	= pSeries_check_legacy_ioport,
 	.system_reset_exception = pSeries_system_reset_exception,
 	.machine_check_exception = pSeries_machine_check_exception,
+#ifdef CONFIG_KEXEC
+	.kexec_cpu_down		= pseries_kexec_cpu_down,
+#endif
 };
diff --git a/arch/ppc64/kernel/machine_kexec.c b/arch/ppc64/kernel/machine_kexec.c
index 07ea03598c00..203f1d5e6f10 100644
--- a/arch/ppc64/kernel/machine_kexec.c
+++ b/arch/ppc64/kernel/machine_kexec.c
@@ -185,8 +185,8 @@ void kexec_copy_flush(struct kimage *image)
  */
 void kexec_smp_down(void *arg)
 {
-	if (ppc_md.cpu_irq_down)
-		ppc_md.cpu_irq_down(1);
+	if (ppc_md.kexec_cpu_down)
+		ppc_md.kexec_cpu_down(0, 1);
 
 	local_irq_disable();
 	kexec_smp_wait();
@@ -233,8 +233,8 @@ static void kexec_prepare_cpus(void)
 	}
 
 	/* after we tell the others to go down */
-	if (ppc_md.cpu_irq_down)
-		ppc_md.cpu_irq_down(0);
+	if (ppc_md.kexec_cpu_down)
+		ppc_md.kexec_cpu_down(0, 0);
 
 	put_cpu();
 
@@ -255,8 +255,8 @@ static void kexec_prepare_cpus(void)
 	 * UP to an SMP kernel.
 	 */
 	smp_release_cpus();
-	if (ppc_md.cpu_irq_down)
-		ppc_md.cpu_irq_down(0);
+	if (ppc_md.kexec_cpu_down)
+		ppc_md.kexec_cpu_down(0, 0);
 	local_irq_disable();
 }
 
diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h
index 5670f0cd6143..c011abb8b600 100644
--- a/include/asm-powerpc/machdep.h
+++ b/include/asm-powerpc/machdep.h
@@ -93,7 +93,9 @@ struct machdep_calls {
 
 	void		(*init_IRQ)(void);
 	int		(*get_irq)(struct pt_regs *);
-	void		(*cpu_irq_down)(int secondary);
+#ifdef CONFIG_KEXEC
+	void		(*kexec_cpu_down)(int crash_shutdown, int secondary);
+#endif
 
 	/* PCI stuff */
 	/* Called after scanning the bus, before allocating resources */
-- 
cgit v1.2.3-71-gd317


From 593e537b93193d1696809817533ce5ad510445b1 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Sat, 12 Nov 2005 00:06:06 +1100
Subject: [PATCH] powerpc: Export htab start/end via device tree

The userspace kexec-tools need to know the location of the htab on non-lpar
machines, as well as the end of the kernel. Export via the device tree.

NB. This patch has been updated to use "linux,x" property names. You may
need to update your kexec-tools to match.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/setup_64.c    |  5 ++++
 arch/ppc64/kernel/machine_kexec.c | 51 +++++++++++++++++++++++++++++++++++++++
 include/asm-powerpc/kexec.h       |  1 +
 3 files changed, 57 insertions(+)

(limited to 'include')

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index fdbd9f9122f2..608fee7c7e20 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -59,6 +59,7 @@
 #include <asm/firmware.h>
 #include <asm/xmon.h>
 #include <asm/udbg.h>
+#include <asm/kexec.h>
 
 #include "setup.h"
 
@@ -415,6 +416,10 @@ void __init setup_system(void)
 	 */
 	unflatten_device_tree();
 
+#ifdef CONFIG_KEXEC
+	kexec_setup();	/* requires unflattened device tree. */
+#endif
+
 	/*
 	 * Fill the ppc64_caches & systemcfg structures with informations
 	 * retreived from the device-tree. Need to be called before
diff --git a/arch/ppc64/kernel/machine_kexec.c b/arch/ppc64/kernel/machine_kexec.c
index 203f1d5e6f10..97c51e452be7 100644
--- a/arch/ppc64/kernel/machine_kexec.c
+++ b/arch/ppc64/kernel/machine_kexec.c
@@ -305,3 +305,54 @@ void machine_kexec(struct kimage *image)
 			ppc_md.hpte_clear_all);
 	/* NOTREACHED */
 }
+
+/* Values we need to export to the second kernel via the device tree. */
+static unsigned long htab_base, htab_size, kernel_end;
+
+static struct property htab_base_prop = {
+	.name = "linux,htab-base",
+	.length = sizeof(unsigned long),
+	.value = (unsigned char *)&htab_base,
+};
+
+static struct property htab_size_prop = {
+	.name = "linux,htab-size",
+	.length = sizeof(unsigned long),
+	.value = (unsigned char *)&htab_size,
+};
+
+static struct property kernel_end_prop = {
+	.name = "linux,kernel-end",
+	.length = sizeof(unsigned long),
+	.value = (unsigned char *)&kernel_end,
+};
+
+static void __init export_htab_values(void)
+{
+	struct device_node *node;
+
+	node = of_find_node_by_path("/chosen");
+	if (!node)
+		return;
+
+	kernel_end = __pa(_end);
+	prom_add_property(node, &kernel_end_prop);
+
+	/* On machines with no htab htab_address is NULL */
+	if (NULL == htab_address)
+		goto out;
+
+	htab_base = __pa(htab_address);
+	prom_add_property(node, &htab_base_prop);
+
+	htab_size = 1UL << ppc64_pft_size;
+	prom_add_property(node, &htab_size_prop);
+
+ out:
+	of_node_put(node);
+}
+
+void __init kexec_setup(void)
+{
+	export_htab_values();
+}
diff --git a/include/asm-powerpc/kexec.h b/include/asm-powerpc/kexec.h
index 062ab9ba68eb..c72ffc709ea8 100644
--- a/include/asm-powerpc/kexec.h
+++ b/include/asm-powerpc/kexec.h
@@ -40,6 +40,7 @@ extern note_buf_t crash_notes[];
 #ifdef __powerpc64__
 extern void kexec_smp_wait(void);	/* get and clear naca physid, wait for
 					  master to copy new code to 0 */
+extern void __init kexec_setup(void);
 #else
 struct kimage;
 extern void machine_kexec_simple(struct kimage *image);
-- 
cgit v1.2.3-71-gd317


From 0c37ec2aa88bd8a6aaeb284ff5c86f4c6d8e8469 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 14 Nov 2005 14:55:58 +1100
Subject: [PATCH] powerpc: vdso fixes (take #2)

This fixes various errors in the new functions added in the vDSO's,
I've now verified all functions on both 32 and 64 bits vDSOs. It also
fix a sign extension bug getting the initial time of day at boot that
could cause the monotonic clock value to be completely on bogus for
64 bits applications (with either the vDSO or the syscall) on
powermacs.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/asm-offsets.c         |  6 ++++--
 arch/powerpc/kernel/vdso32/datapage.S     |  3 ++-
 arch/powerpc/kernel/vdso32/gettimeofday.S | 12 ++++++++----
 arch/powerpc/kernel/vdso64/datapage.S     |  1 +
 arch/powerpc/kernel/vdso64/gettimeofday.S | 31 +++++++++++++++++++------------
 arch/powerpc/platforms/powermac/time.c    |  8 ++++----
 include/asm-powerpc/vdso_datapage.h       |  2 +-
 7 files changed, 39 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 4550eb4f4fbd..91538d2445bf 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -270,13 +270,15 @@ int main(void)
 	DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec));
 	DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec));
 	DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec));
+	DEFINE(TSPC64_TV_SEC, offsetof(struct timespec, tv_sec));
+	DEFINE(TSPC64_TV_NSEC, offsetof(struct timespec, tv_nsec));
 	DEFINE(TSPC32_TV_SEC, offsetof(struct compat_timespec, tv_sec));
 	DEFINE(TSPC32_TV_NSEC, offsetof(struct compat_timespec, tv_nsec));
 #else
 	DEFINE(TVAL32_TV_SEC, offsetof(struct timeval, tv_sec));
 	DEFINE(TVAL32_TV_USEC, offsetof(struct timeval, tv_usec));
-	DEFINE(TSPEC32_TV_SEC, offsetof(struct timespec, tv_sec));
-	DEFINE(TSPEC32_TV_NSEC, offsetof(struct timespec, tv_nsec));
+	DEFINE(TSPC32_TV_SEC, offsetof(struct timespec, tv_sec));
+	DEFINE(TSPC32_TV_NSEC, offsetof(struct timespec, tv_nsec));
 #endif
 	/* timeval/timezone offsets for use by vdso */
 	DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
index a08c26e87835..f6b38472318d 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -77,8 +77,9 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
 	mflr	r12
   .cfi_register lr,r12
 	bl	__get_datapage@local
-	lwz	r3,CFG_TB_TICKS_PER_SEC(r3)
 	lwz	r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
+	lwz	r3,CFG_TB_TICKS_PER_SEC(r3)
 	mtlr	r12
+	blr
   .cfi_endproc
 V_FUNCTION_END(__kernel_get_tbfreq)
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index aeb5fc9b87b3..0a32a41d50b0 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -83,7 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
 	/* Check for supported clock IDs */
 	cmpli	cr0,r3,CLOCK_REALTIME
 	cmpli	cr1,r3,CLOCK_MONOTONIC
-	cror	cr0,cr0,cr1
+	cror	cr0*4+eq,cr0*4+eq,cr1*4+eq
 	bne	cr0,99f
 
 	mflr	r12			/* r12 saves lr */
@@ -91,7 +91,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
 	mr	r10,r3			/* r10 saves id */
 	mr	r11,r4			/* r11 saves tp */
 	bl	__get_datapage@local	/* get data page */
-	mr	r9, r3			/* datapage ptr in r9 */
+	mr	r9,r3			/* datapage ptr in r9 */
 	beq	cr1,50f			/* if monotonic -> jump there */
 
 	/*
@@ -173,10 +173,14 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
 	add	r4,r4,r7
 	lis	r5,NSEC_PER_SEC@h
 	ori	r5,r5,NSEC_PER_SEC@l
-	cmpli	cr0,r4,r5
+	cmpl	cr0,r4,r5
+	cmpli	cr1,r4,0
 	blt	1f
 	subf	r4,r5,r4
 	addi	r3,r3,1
+1:	bge	cr1,1f
+	addi	r3,r3,-1
+	add	r4,r4,r5
 1:	stw	r3,TSPC32_TV_SEC(r11)
 	stw	r4,TSPC32_TV_NSEC(r11)
 
@@ -210,7 +214,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
 	/* Check for supported clock IDs */
 	cmpwi	cr0,r3,CLOCK_REALTIME
 	cmpwi	cr1,r3,CLOCK_MONOTONIC
-	cror	cr0,cr0,cr1
+	cror	cr0*4+eq,cr0*4+eq,cr1*4+eq
 	bne	cr0,99f
 
 	li	r3,0
diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
index e67eda0f8cda..6393e4137bc7 100644
--- a/arch/powerpc/kernel/vdso64/datapage.S
+++ b/arch/powerpc/kernel/vdso64/datapage.S
@@ -80,5 +80,6 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
 	bl	V_LOCAL_FUNC(__get_datapage)
 	ld	r3,CFG_TB_TICKS_PER_SEC(r3)
 	mtlr	r12
+	blr
   .cfi_endproc
 V_FUNCTION_END(__kernel_get_tbfreq)
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index d371c02a8c0e..1a89094715cc 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -1,4 +1,5 @@
-/*
+
+	/*
  * Userland implementation of gettimeofday() for 64 bits processes in a
  * ppc64 kernel for use in the vDSO
  *
@@ -68,7 +69,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
 	/* Check for supported clock IDs */
 	cmpwi	cr0,r3,CLOCK_REALTIME
 	cmpwi	cr1,r3,CLOCK_MONOTONIC
-	cror	cr0,cr0,cr1
+	cror	cr0*4+eq,cr0*4+eq,cr1*4+eq
 	bne	cr0,99f
 
 	mflr	r12			/* r12 saves lr */
@@ -84,16 +85,17 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
 
 	bl	V_LOCAL_FUNC(__do_get_xsec)	/* get xsec from tb & kernel */
 
-	lis     r7,0x3b9a		/* r7 = 1000000000 = NSEC_PER_SEC */
-	ori     r7,r7,0xca00
+	lis     r7,15			/* r7 = 1000000 = USEC_PER_SEC */
+	ori     r7,r7,16960
 	rldicl  r5,r4,44,20		/* r5 = sec = xsec / XSEC_PER_SEC */
 	rldicr  r6,r5,20,43		/* r6 = sec * XSEC_PER_SEC */
 	std	r5,TSPC64_TV_SEC(r11)	/* store sec in tv */
 	subf	r0,r6,r4		/* r0 = xsec = (xsec - r6) */
-	mulld   r0,r0,r7		/* nsec = (xsec * NSEC_PER_SEC) /
+	mulld   r0,r0,r7		/* usec = (xsec * USEC_PER_SEC) /
 					 * XSEC_PER_SEC
 					 */
 	rldicl  r0,r0,44,20
+	mulli	r0,r0,1000		/* nsec = usec * 1000 */
 	std	r0,TSPC64_TV_NSEC(r11)	/* store nsec in tp */
 
 	mtlr	r12
@@ -106,15 +108,16 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
 
 50:	bl	V_LOCAL_FUNC(__do_get_xsec)	/* get xsec from tb & kernel */
 
-	lis     r7,0x3b9a		/* r7 = 1000000000 = NSEC_PER_SEC */
-	ori     r7,r7,0xca00
+	lis     r7,15			/* r7 = 1000000 = USEC_PER_SEC */
+	ori     r7,r7,16960
 	rldicl  r5,r4,44,20		/* r5 = sec = xsec / XSEC_PER_SEC */
 	rldicr  r6,r5,20,43		/* r6 = sec * XSEC_PER_SEC */
 	subf	r0,r6,r4		/* r0 = xsec = (xsec - r6) */
-	mulld   r0,r0,r7		/* nsec = (xsec * NSEC_PER_SEC) /
+	mulld   r0,r0,r7		/* usec = (xsec * USEC_PER_SEC) /
 					 * XSEC_PER_SEC
 					 */
 	rldicl  r6,r0,44,20
+	mulli	r6,r6,1000		/* nsec = usec * 1000 */
 
 	/* now we must fixup using wall to monotonic. We need to snapshot
 	 * that value and do the counter trick again. Fortunately, we still
@@ -123,8 +126,8 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
 	 * can be used
 	 */
 
-	lwz	r4,WTOM_CLOCK_SEC(r9)
-	lwz	r7,WTOM_CLOCK_NSEC(r9)
+	lwa	r4,WTOM_CLOCK_SEC(r3)
+	lwa	r7,WTOM_CLOCK_NSEC(r3)
 
 	/* We now have our result in r4,r7. We create a fake dependency
 	 * on that result and re-check the counter
@@ -144,10 +147,14 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
 	add	r7,r7,r6
 	lis	r9,NSEC_PER_SEC@h
 	ori	r9,r9,NSEC_PER_SEC@l
-	cmpli	cr0,r7,r9
+	cmpl	cr0,r7,r9
+	cmpli	cr1,r7,0
 	blt	1f
 	subf	r7,r9,r7
 	addi	r4,r4,1
+1:	bge	cr1,1f
+	addi	r4,r4,-1
+	add	r7,r7,r9
 1:	std	r4,TSPC64_TV_SEC(r11)
 	std	r7,TSPC64_TV_NSEC(r11)
 
@@ -181,7 +188,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
 	/* Check for supported clock IDs */
 	cmpwi	cr0,r3,CLOCK_REALTIME
 	cmpwi	cr1,r3,CLOCK_MONOTONIC
-	cror	cr0,cr0,cr1
+	cror	cr0*4+eq,cr0*4+eq,cr1*4+eq
 	bne	cr0,99f
 
 	li	r3,0
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
index 5947b21a8588..4c7682a65227 100644
--- a/arch/powerpc/platforms/powermac/time.c
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -102,7 +102,7 @@ static unsigned long from_rtc_time(struct rtc_time *tm)
 static unsigned long cuda_get_time(void)
 {
 	struct adb_request req;
-	unsigned long now;
+	unsigned int now;
 
 	if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0)
 		return 0;
@@ -113,7 +113,7 @@ static unsigned long cuda_get_time(void)
 		       req.reply_len);
 	now = (req.reply[3] << 24) + (req.reply[4] << 16)
 		+ (req.reply[5] << 8) + req.reply[6];
-	return now - RTC_OFFSET;
+	return ((unsigned long)now) - RTC_OFFSET;
 }
 
 #define cuda_get_rtc_time(tm)	to_rtc_time(cuda_get_time(), (tm))
@@ -146,7 +146,7 @@ static int cuda_set_rtc_time(struct rtc_time *tm)
 static unsigned long pmu_get_time(void)
 {
 	struct adb_request req;
-	unsigned long now;
+	unsigned int now;
 
 	if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0)
 		return 0;
@@ -156,7 +156,7 @@ static unsigned long pmu_get_time(void)
 		       req.reply_len);
 	now = (req.reply[0] << 24) + (req.reply[1] << 16)
 		+ (req.reply[2] << 8) + req.reply[3];
-	return now - RTC_OFFSET;
+	return ((unsigned long)now) - RTC_OFFSET;
 }
 
 #define pmu_get_rtc_time(tm)	to_rtc_time(pmu_get_time(), (tm))
diff --git a/include/asm-powerpc/vdso_datapage.h b/include/asm-powerpc/vdso_datapage.h
index fc323b51366b..411832d5bbdb 100644
--- a/include/asm-powerpc/vdso_datapage.h
+++ b/include/asm-powerpc/vdso_datapage.h
@@ -73,7 +73,7 @@ struct vdso_data {
 	/* those additional ones don't have to be located anywhere
 	 * special as they were not part of the original systemcfg
 	 */
-	__s64 wtom_clock_sec;			/* Wall to monotonic clock */
+	__s32 wtom_clock_sec;			/* Wall to monotonic clock */
 	__s32 wtom_clock_nsec;
    	__u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls  */
    	__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
-- 
cgit v1.2.3-71-gd317


From c55377ee73f6efeb373ae06f6e918d87660b4852 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 14 Nov 2005 17:22:01 +1100
Subject: powerpc: Move a bunch of ppc64 headers to include/asm-powerpc

... and also delete some that are no longer used because we already
had an include/asm-powerpc version of the header.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/asm-powerpc/btext.h     |  27 +++
 include/asm-powerpc/delay.h     |  55 ++++++
 include/asm-powerpc/eeh.h       | 364 ++++++++++++++++++++++++++++++++++++++++
 include/asm-powerpc/floppy.h    | 105 ++++++++++++
 include/asm-powerpc/hvconsole.h |  49 ++++++
 include/asm-powerpc/hvcserver.h |  57 +++++++
 include/asm-powerpc/nvram.h     | 120 +++++++++++++
 include/asm-powerpc/serial.h    |  18 ++
 include/asm-ppc/nvram.h         |  73 --------
 include/asm-ppc64/btext.h       |  27 ---
 include/asm-ppc64/delay.h       |  48 ------
 include/asm-ppc64/eeh.h         | 364 ----------------------------------------
 include/asm-ppc64/floppy.h      | 106 ------------
 include/asm-ppc64/hvconsole.h   |  49 ------
 include/asm-ppc64/hvcserver.h   |  57 -------
 include/asm-ppc64/nvram.h       | 116 -------------
 include/asm-ppc64/prom.h        | 220 ------------------------
 include/asm-ppc64/serial.h      |  23 ---
 include/asm-ppc64/system.h      | 310 ----------------------------------
 19 files changed, 795 insertions(+), 1393 deletions(-)
 create mode 100644 include/asm-powerpc/btext.h
 create mode 100644 include/asm-powerpc/delay.h
 create mode 100644 include/asm-powerpc/eeh.h
 create mode 100644 include/asm-powerpc/floppy.h
 create mode 100644 include/asm-powerpc/hvconsole.h
 create mode 100644 include/asm-powerpc/hvcserver.h
 create mode 100644 include/asm-powerpc/nvram.h
 create mode 100644 include/asm-powerpc/serial.h
 delete mode 100644 include/asm-ppc/nvram.h
 delete mode 100644 include/asm-ppc64/btext.h
 delete mode 100644 include/asm-ppc64/delay.h
 delete mode 100644 include/asm-ppc64/eeh.h
 delete mode 100644 include/asm-ppc64/floppy.h
 delete mode 100644 include/asm-ppc64/hvconsole.h
 delete mode 100644 include/asm-ppc64/hvcserver.h
 delete mode 100644 include/asm-ppc64/nvram.h
 delete mode 100644 include/asm-ppc64/prom.h
 delete mode 100644 include/asm-ppc64/serial.h
 delete mode 100644 include/asm-ppc64/system.h

(limited to 'include')

diff --git a/include/asm-powerpc/btext.h b/include/asm-powerpc/btext.h
new file mode 100644
index 000000000000..71cce36bc630
--- /dev/null
+++ b/include/asm-powerpc/btext.h
@@ -0,0 +1,27 @@
+/*
+ * Definitions for using the procedures in btext.c.
+ *
+ * Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ */
+#ifndef __PPC_BTEXT_H
+#define __PPC_BTEXT_H
+#ifdef __KERNEL__
+
+extern void btext_clearscreen(void);
+extern void btext_flushscreen(void);
+
+extern int boot_text_mapped;
+
+extern int btext_initialize(struct device_node *np);
+
+extern void map_boot_text(void);
+extern void init_boot_display(void);
+extern void btext_update_display(unsigned long phys, int width, int height,
+				 int depth, int pitch);
+
+extern void btext_drawchar(char c);
+extern void btext_drawstring(const char *str);
+extern void btext_drawhex(unsigned long v);
+
+#endif /* __KERNEL__ */
+#endif /* __PPC_BTEXT_H */
diff --git a/include/asm-powerpc/delay.h b/include/asm-powerpc/delay.h
new file mode 100644
index 000000000000..1492aa9ab716
--- /dev/null
+++ b/include/asm-powerpc/delay.h
@@ -0,0 +1,55 @@
+#ifndef _ASM_POWERPC_DELAY_H
+#define _ASM_POWERPC_DELAY_H
+
+/*
+ * Copyright 1996, Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * PPC64 Support added by Dave Engebretsen, Todd Inglett, Mike Corrigan,
+ * Anton Blanchard.
+ */
+
+extern unsigned long tb_ticks_per_usec;
+
+#ifdef CONFIG_PPC64
+/* define these here to prevent circular dependencies */
+/* these instructions control the thread priority on multi-threaded cpus */
+#define __HMT_low()	asm volatile("or 1,1,1")
+#define __HMT_medium()	asm volatile("or 2,2,2")
+#else
+#define __HMT_low()
+#define __HMT_medium()
+#endif
+
+#define __barrier()	asm volatile("" ::: "memory")
+
+static inline unsigned long __get_tb(void)
+{
+	unsigned long rval;
+
+	asm volatile("mftb %0" : "=r" (rval));
+	return rval;
+}
+
+static inline void __delay(unsigned long loops)
+{
+	unsigned long start = __get_tb();
+
+	while((__get_tb() - start) < loops)
+		__HMT_low();
+	__HMT_medium();
+	__barrier();
+}
+
+static inline void udelay(unsigned long usecs)
+{
+	unsigned long loops = tb_ticks_per_usec * usecs;
+
+	__delay(loops);
+}
+
+#endif /* _ASM_POWERPC_DELAY_H */
diff --git a/include/asm-powerpc/eeh.h b/include/asm-powerpc/eeh.h
new file mode 100644
index 000000000000..89f26ab31908
--- /dev/null
+++ b/include/asm-powerpc/eeh.h
@@ -0,0 +1,364 @@
+/*
+ * eeh.h
+ * Copyright (C) 2001  Dave Engebretsen & Todd Inglett IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef _PPC64_EEH_H
+#define _PPC64_EEH_H
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/string.h>
+
+struct pci_dev;
+struct device_node;
+
+#ifdef CONFIG_EEH
+
+/* Values for eeh_mode bits in device_node */
+#define EEH_MODE_SUPPORTED	(1<<0)
+#define EEH_MODE_NOCHECK	(1<<1)
+#define EEH_MODE_ISOLATED	(1<<2)
+
+/* Max number of EEH freezes allowed before we consider the device
+ * to be permanently disabled. */
+#define EEH_MAX_ALLOWED_FREEZES 5
+
+void __init eeh_init(void);
+unsigned long eeh_check_failure(const volatile void __iomem *token,
+				unsigned long val);
+int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev);
+void __init pci_addr_cache_build(void);
+
+/**
+ * eeh_add_device_early
+ * eeh_add_device_late
+ *
+ * Perform eeh initialization for devices added after boot.
+ * Call eeh_add_device_early before doing any i/o to the
+ * device (including config space i/o).  Call eeh_add_device_late
+ * to finish the eeh setup for this device.
+ */
+void eeh_add_device_early(struct device_node *);
+void eeh_add_device_late(struct pci_dev *);
+
+/**
+ * eeh_remove_device - undo EEH setup for the indicated pci device
+ * @dev: pci device to be removed
+ *
+ * This routine should be called when a device is removed from
+ * a running system (e.g. by hotplug or dlpar).  It unregisters
+ * the PCI device from the EEH subsystem.  I/O errors affecting
+ * this device will no longer be detected after this call; thus,
+ * i/o errors affecting this slot may leave this device unusable.
+ */
+void eeh_remove_device(struct pci_dev *);
+
+/**
+ * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
+ *
+ * If this macro yields TRUE, the caller relays to eeh_check_failure()
+ * which does further tests out of line.
+ */
+#define EEH_POSSIBLE_ERROR(val, type)	((val) == (type)~0)
+
+/*
+ * Reads from a device which has been isolated by EEH will return
+ * all 1s.  This macro gives an all-1s value of the given size (in
+ * bytes: 1, 2, or 4) for comparing with the result of a read.
+ */
+#define EEH_IO_ERROR_VALUE(size)	(~0U >> ((4 - (size)) * 8))
+
+#else /* !CONFIG_EEH */
+static inline void eeh_init(void) { }
+
+static inline unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
+{
+	return val;
+}
+
+static inline int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
+{
+	return 0;
+}
+
+static inline void pci_addr_cache_build(void) { }
+
+static inline void eeh_add_device_early(struct device_node *dn) { }
+
+static inline void eeh_add_device_late(struct pci_dev *dev) { }
+
+static inline void eeh_remove_device(struct pci_dev *dev) { }
+
+#define EEH_POSSIBLE_ERROR(val, type) (0)
+#define EEH_IO_ERROR_VALUE(size) (-1UL)
+#endif /* CONFIG_EEH */
+
+/*
+ * MMIO read/write operations with EEH support.
+ */
+static inline u8 eeh_readb(const volatile void __iomem *addr)
+{
+	u8 val = in_8(addr);
+	if (EEH_POSSIBLE_ERROR(val, u8))
+		return eeh_check_failure(addr, val);
+	return val;
+}
+static inline void eeh_writeb(u8 val, volatile void __iomem *addr)
+{
+	out_8(addr, val);
+}
+
+static inline u16 eeh_readw(const volatile void __iomem *addr)
+{
+	u16 val = in_le16(addr);
+	if (EEH_POSSIBLE_ERROR(val, u16))
+		return eeh_check_failure(addr, val);
+	return val;
+}
+static inline void eeh_writew(u16 val, volatile void __iomem *addr)
+{
+	out_le16(addr, val);
+}
+static inline u16 eeh_raw_readw(const volatile void __iomem *addr)
+{
+	u16 val = in_be16(addr);
+	if (EEH_POSSIBLE_ERROR(val, u16))
+		return eeh_check_failure(addr, val);
+	return val;
+}
+static inline void eeh_raw_writew(u16 val, volatile void __iomem *addr) {
+	volatile u16 __iomem *vaddr = (volatile u16 __iomem *) addr;
+	out_be16(vaddr, val);
+}
+
+static inline u32 eeh_readl(const volatile void __iomem *addr)
+{
+	u32 val = in_le32(addr);
+	if (EEH_POSSIBLE_ERROR(val, u32))
+		return eeh_check_failure(addr, val);
+	return val;
+}
+static inline void eeh_writel(u32 val, volatile void __iomem *addr)
+{
+	out_le32(addr, val);
+}
+static inline u32 eeh_raw_readl(const volatile void __iomem *addr)
+{
+	u32 val = in_be32(addr);
+	if (EEH_POSSIBLE_ERROR(val, u32))
+		return eeh_check_failure(addr, val);
+	return val;
+}
+static inline void eeh_raw_writel(u32 val, volatile void __iomem *addr)
+{
+	out_be32(addr, val);
+}
+
+static inline u64 eeh_readq(const volatile void __iomem *addr)
+{
+	u64 val = in_le64(addr);
+	if (EEH_POSSIBLE_ERROR(val, u64))
+		return eeh_check_failure(addr, val);
+	return val;
+}
+static inline void eeh_writeq(u64 val, volatile void __iomem *addr)
+{
+	out_le64(addr, val);
+}
+static inline u64 eeh_raw_readq(const volatile void __iomem *addr)
+{
+	u64 val = in_be64(addr);
+	if (EEH_POSSIBLE_ERROR(val, u64))
+		return eeh_check_failure(addr, val);
+	return val;
+}
+static inline void eeh_raw_writeq(u64 val, volatile void __iomem *addr)
+{
+	out_be64(addr, val);
+}
+
+#define EEH_CHECK_ALIGN(v,a) \
+	((((unsigned long)(v)) & ((a) - 1)) == 0)
+
+static inline void eeh_memset_io(volatile void __iomem *addr, int c,
+				 unsigned long n)
+{
+	void *p = (void __force *)addr;
+	u32 lc = c;
+	lc |= lc << 8;
+	lc |= lc << 16;
+
+	while(n && !EEH_CHECK_ALIGN(p, 4)) {
+		*((volatile u8 *)p) = c;
+		p++;
+		n--;
+	}
+	while(n >= 4) {
+		*((volatile u32 *)p) = lc;
+		p += 4;
+		n -= 4;
+	}
+	while(n) {
+		*((volatile u8 *)p) = c;
+		p++;
+		n--;
+	}
+	__asm__ __volatile__ ("sync" : : : "memory");
+}
+static inline void eeh_memcpy_fromio(void *dest, const volatile void __iomem *src,
+				     unsigned long n)
+{
+	void *vsrc = (void __force *) src;
+	void *destsave = dest;
+	unsigned long nsave = n;
+
+	while(n && (!EEH_CHECK_ALIGN(vsrc, 4) || !EEH_CHECK_ALIGN(dest, 4))) {
+		*((u8 *)dest) = *((volatile u8 *)vsrc);
+		__asm__ __volatile__ ("eieio" : : : "memory");
+		vsrc++;
+		dest++;
+		n--;
+	}
+	while(n > 4) {
+		*((u32 *)dest) = *((volatile u32 *)vsrc);
+		__asm__ __volatile__ ("eieio" : : : "memory");
+		vsrc += 4;
+		dest += 4;
+		n -= 4;
+	}
+	while(n) {
+		*((u8 *)dest) = *((volatile u8 *)vsrc);
+		__asm__ __volatile__ ("eieio" : : : "memory");
+		vsrc++;
+		dest++;
+		n--;
+	}
+	__asm__ __volatile__ ("sync" : : : "memory");
+
+	/* Look for ffff's here at dest[n].  Assume that at least 4 bytes
+	 * were copied. Check all four bytes.
+	 */
+	if ((nsave >= 4) &&
+		(EEH_POSSIBLE_ERROR((*((u32 *) destsave+nsave-4)), u32))) {
+		eeh_check_failure(src, (*((u32 *) destsave+nsave-4)));
+	}
+}
+
+static inline void eeh_memcpy_toio(volatile void __iomem *dest, const void *src,
+				   unsigned long n)
+{
+	void *vdest = (void __force *) dest;
+
+	while(n && (!EEH_CHECK_ALIGN(vdest, 4) || !EEH_CHECK_ALIGN(src, 4))) {
+		*((volatile u8 *)vdest) = *((u8 *)src);
+		src++;
+		vdest++;
+		n--;
+	}
+	while(n > 4) {
+		*((volatile u32 *)vdest) = *((volatile u32 *)src);
+		src += 4;
+		vdest += 4;
+		n-=4;
+	}
+	while(n) {
+		*((volatile u8 *)vdest) = *((u8 *)src);
+		src++;
+		vdest++;
+		n--;
+	}
+	__asm__ __volatile__ ("sync" : : : "memory");
+}
+
+#undef EEH_CHECK_ALIGN
+
+static inline u8 eeh_inb(unsigned long port)
+{
+	u8 val;
+	if (!_IO_IS_VALID(port))
+		return ~0;
+	val = in_8((u8 __iomem *)(port+pci_io_base));
+	if (EEH_POSSIBLE_ERROR(val, u8))
+		return eeh_check_failure((void __iomem *)(port), val);
+	return val;
+}
+
+static inline void eeh_outb(u8 val, unsigned long port)
+{
+	if (_IO_IS_VALID(port))
+		out_8((u8 __iomem *)(port+pci_io_base), val);
+}
+
+static inline u16 eeh_inw(unsigned long port)
+{
+	u16 val;
+	if (!_IO_IS_VALID(port))
+		return ~0;
+	val = in_le16((u16 __iomem *)(port+pci_io_base));
+	if (EEH_POSSIBLE_ERROR(val, u16))
+		return eeh_check_failure((void __iomem *)(port), val);
+	return val;
+}
+
+static inline void eeh_outw(u16 val, unsigned long port)
+{
+	if (_IO_IS_VALID(port))
+		out_le16((u16 __iomem *)(port+pci_io_base), val);
+}
+
+static inline u32 eeh_inl(unsigned long port)
+{
+	u32 val;
+	if (!_IO_IS_VALID(port))
+		return ~0;
+	val = in_le32((u32 __iomem *)(port+pci_io_base));
+	if (EEH_POSSIBLE_ERROR(val, u32))
+		return eeh_check_failure((void __iomem *)(port), val);
+	return val;
+}
+
+static inline void eeh_outl(u32 val, unsigned long port)
+{
+	if (_IO_IS_VALID(port))
+		out_le32((u32 __iomem *)(port+pci_io_base), val);
+}
+
+/* in-string eeh macros */
+static inline void eeh_insb(unsigned long port, void * buf, int ns)
+{
+	_insb((u8 __iomem *)(port+pci_io_base), buf, ns);
+	if (EEH_POSSIBLE_ERROR((*(((u8*)buf)+ns-1)), u8))
+		eeh_check_failure((void __iomem *)(port), *(u8*)buf);
+}
+
+static inline void eeh_insw_ns(unsigned long port, void * buf, int ns)
+{
+	_insw_ns((u16 __iomem *)(port+pci_io_base), buf, ns);
+	if (EEH_POSSIBLE_ERROR((*(((u16*)buf)+ns-1)), u16))
+		eeh_check_failure((void __iomem *)(port), *(u16*)buf);
+}
+
+static inline void eeh_insl_ns(unsigned long port, void * buf, int nl)
+{
+	_insl_ns((u32 __iomem *)(port+pci_io_base), buf, nl);
+	if (EEH_POSSIBLE_ERROR((*(((u32*)buf)+nl-1)), u32))
+		eeh_check_failure((void __iomem *)(port), *(u32*)buf);
+}
+
+#endif /* _PPC64_EEH_H */
diff --git a/include/asm-powerpc/floppy.h b/include/asm-powerpc/floppy.h
new file mode 100644
index 000000000000..64276a3f6153
--- /dev/null
+++ b/include/asm-powerpc/floppy.h
@@ -0,0 +1,105 @@
+/*
+ * Architecture specific parts of the Floppy driver
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1995
+ */
+#ifndef __ASM_POWERPC_FLOPPY_H
+#define __ASM_POWERPC_FLOPPY_H
+
+#include <linux/config.h>
+#include <asm/machdep.h>
+
+#define fd_inb(port)		inb_p(port)
+#define fd_outb(value,port)	outb_p(value,port)
+
+#define fd_enable_dma()         enable_dma(FLOPPY_DMA)
+#define fd_disable_dma()        disable_dma(FLOPPY_DMA)
+#define fd_request_dma()        request_dma(FLOPPY_DMA, "floppy")
+#define fd_free_dma()           free_dma(FLOPPY_DMA)
+#define fd_clear_dma_ff()       clear_dma_ff(FLOPPY_DMA)
+#define fd_set_dma_mode(mode)   set_dma_mode(FLOPPY_DMA, mode)
+#define fd_set_dma_count(count) set_dma_count(FLOPPY_DMA, count)
+#define fd_enable_irq()         enable_irq(FLOPPY_IRQ)
+#define fd_disable_irq()        disable_irq(FLOPPY_IRQ)
+#define fd_cacheflush(addr,size) /* nothing */
+#define fd_request_irq()        request_irq(FLOPPY_IRQ, floppy_interrupt, \
+					    SA_INTERRUPT|SA_SAMPLE_RANDOM, \
+				            "floppy", NULL)
+#define fd_free_irq()           free_irq(FLOPPY_IRQ, NULL);
+
+#ifdef CONFIG_PCI
+
+#include <linux/pci.h>
+
+#define fd_dma_setup(addr,size,mode,io) powerpc_fd_dma_setup(addr,size,mode,io)
+
+static __inline__ int powerpc_fd_dma_setup(char *addr, unsigned long size,
+					   int mode, int io)
+{
+	static unsigned long prev_size;
+	static dma_addr_t bus_addr = 0;
+	static char *prev_addr;
+	static int prev_dir;
+	int dir;
+
+	dir = (mode == DMA_MODE_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE;
+
+	if (bus_addr 
+	    && (addr != prev_addr || size != prev_size || dir != prev_dir)) {
+		/* different from last time -- unmap prev */
+		pci_unmap_single(NULL, bus_addr, prev_size, prev_dir);
+		bus_addr = 0;
+	}
+
+	if (!bus_addr)	/* need to map it */
+		bus_addr = pci_map_single(NULL, addr, size, dir);
+
+	/* remember this one as prev */
+	prev_addr = addr;
+	prev_size = size;
+	prev_dir = dir;
+
+	fd_clear_dma_ff();
+	fd_cacheflush(addr, size);
+	fd_set_dma_mode(mode);
+	set_dma_addr(FLOPPY_DMA, bus_addr);
+	fd_set_dma_count(size);
+	virtual_dma_port = io;
+	fd_enable_dma();
+
+	return 0;
+}
+
+#endif /* CONFIG_PCI */
+
+__inline__ void virtual_dma_init(void)
+{
+	/* Nothing to do on PowerPC */
+}
+
+static int FDC1 = 0x3f0;
+static int FDC2 = -1;
+
+/*
+ * Again, the CMOS information not available
+ */
+#define FLOPPY0_TYPE 6
+#define FLOPPY1_TYPE 0
+
+#define N_FDC 2			/* Don't change this! */
+#define N_DRIVE 8
+
+#define FLOPPY_MOTOR_MASK 0xf0
+
+/*
+ * The PowerPC has no problems with floppy DMA crossing 64k borders.
+ */
+#define CROSS_64KB(a,s)	(0)
+
+#define EXTRA_FLOPPY_PARAMS
+
+#endif /* __ASM_POWERPC_FLOPPY_H */
diff --git a/include/asm-powerpc/hvconsole.h b/include/asm-powerpc/hvconsole.h
new file mode 100644
index 000000000000..6da93ce74dc0
--- /dev/null
+++ b/include/asm-powerpc/hvconsole.h
@@ -0,0 +1,49 @@
+/*
+ * hvconsole.h
+ * Copyright (C) 2004 Ryan S Arnold, IBM Corporation
+ *
+ * LPAR console support.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef _PPC64_HVCONSOLE_H
+#define _PPC64_HVCONSOLE_H
+
+/*
+ * This is the max number of console adapters that can/will be found as
+ * console devices on first stage console init.  Any number beyond this range
+ * can't be used as a console device but is still a valid tty device.
+ */
+#define MAX_NR_HVC_CONSOLES	16
+
+/* implemented by a low level driver */
+struct hv_ops {
+	int (*get_chars)(uint32_t vtermno, char *buf, int count);
+	int (*put_chars)(uint32_t vtermno, const char *buf, int count);
+};
+extern int hvc_get_chars(uint32_t vtermno, char *buf, int count);
+extern int hvc_put_chars(uint32_t vtermno, const char *buf, int count);
+
+struct hvc_struct;
+
+/* Register a vterm and a slot index for use as a console (console_init) */
+extern int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops);
+/* register a vterm for hvc tty operation (module_init or hotplug add) */
+extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int irq,
+						 struct hv_ops *ops);
+/* remove a vterm from hvc tty operation (modele_exit or hotplug remove) */
+extern int __devexit hvc_remove(struct hvc_struct *hp);
+#endif /* _PPC64_HVCONSOLE_H */
diff --git a/include/asm-powerpc/hvcserver.h b/include/asm-powerpc/hvcserver.h
new file mode 100644
index 000000000000..aecba9665796
--- /dev/null
+++ b/include/asm-powerpc/hvcserver.h
@@ -0,0 +1,57 @@
+/*
+ * hvcserver.h
+ * Copyright (C) 2004 Ryan S Arnold, IBM Corporation
+ *
+ * PPC64 virtual I/O console server support.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef _PPC64_HVCSERVER_H
+#define _PPC64_HVCSERVER_H
+
+#include <linux/list.h>
+
+/* Converged Location Code length */
+#define HVCS_CLC_LENGTH	79
+
+/**
+ * hvcs_partner_info - an element in a list of partner info
+ * @node: list_head denoting this partner_info struct's position in the list of
+ *	partner info.
+ * @unit_address: The partner unit address of this entry.
+ * @partition_ID: The partner partition ID of this entry.
+ * @location_code: The converged location code of this entry + 1 char for the
+ *	null-term.
+ *
+ * This structure outlines the format that partner info is presented to a caller
+ * of the hvcs partner info fetching functions.  These are strung together into
+ * a list using linux kernel lists.
+ */
+struct hvcs_partner_info {
+	struct list_head node;
+	uint32_t unit_address;
+	uint32_t partition_ID;
+	char location_code[HVCS_CLC_LENGTH + 1]; /* CLC + 1 null-term char */
+};
+
+extern int hvcs_free_partner_info(struct list_head *head);
+extern int hvcs_get_partner_info(uint32_t unit_address,
+		struct list_head *head, unsigned long *pi_buff);
+extern int hvcs_register_connection(uint32_t unit_address,
+		uint32_t p_partition_ID, uint32_t p_unit_address);
+extern int hvcs_free_connection(uint32_t unit_address);
+
+#endif /* _PPC64_HVCSERVER_H */
diff --git a/include/asm-powerpc/nvram.h b/include/asm-powerpc/nvram.h
new file mode 100644
index 000000000000..1858244ced32
--- /dev/null
+++ b/include/asm-powerpc/nvram.h
@@ -0,0 +1,120 @@
+/*
+ * NVRAM definitions and access functions.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ASM_POWERPC_NVRAM_H
+#define _ASM_POWERPC_NVRAM_H
+
+#define NVRW_CNT 0x20
+#define NVRAM_HEADER_LEN 16 /* sizeof(struct nvram_header) */
+#define NVRAM_BLOCK_LEN 16
+#define NVRAM_MAX_REQ (2080/NVRAM_BLOCK_LEN)
+#define NVRAM_MIN_REQ (1056/NVRAM_BLOCK_LEN)
+
+#define NVRAM_AS0  0x74
+#define NVRAM_AS1  0x75
+#define NVRAM_DATA 0x77
+
+
+/* RTC Offsets */
+
+#define MOTO_RTC_SECONDS	0x1FF9
+#define MOTO_RTC_MINUTES	0x1FFA
+#define MOTO_RTC_HOURS		0x1FFB
+#define MOTO_RTC_DAY_OF_WEEK	0x1FFC
+#define MOTO_RTC_DAY_OF_MONTH	0x1FFD
+#define MOTO_RTC_MONTH		0x1FFE
+#define MOTO_RTC_YEAR		0x1FFF
+#define MOTO_RTC_CONTROLA       0x1FF8
+#define MOTO_RTC_CONTROLB       0x1FF9
+
+#define NVRAM_SIG_SP	0x02	/* support processor */
+#define NVRAM_SIG_OF	0x50	/* open firmware config */
+#define NVRAM_SIG_FW	0x51	/* general firmware */
+#define NVRAM_SIG_HW	0x52	/* hardware (VPD) */
+#define NVRAM_SIG_FLIP	0x5a	/* Apple flip/flop header */
+#define NVRAM_SIG_APPL	0x5f	/* Apple "system" (???) */
+#define NVRAM_SIG_SYS	0x70	/* system env vars */
+#define NVRAM_SIG_CFG	0x71	/* config data */
+#define NVRAM_SIG_ELOG	0x72	/* error log */
+#define NVRAM_SIG_VEND	0x7e	/* vendor defined */
+#define NVRAM_SIG_FREE	0x7f	/* Free space */
+#define NVRAM_SIG_OS	0xa0	/* OS defined */
+#define NVRAM_SIG_PANIC	0xa1	/* Apple OSX "panic" */
+
+/* If change this size, then change the size of NVNAME_LEN */
+struct nvram_header {
+	unsigned char signature;
+	unsigned char checksum;
+	unsigned short length;
+	char name[12];
+};
+
+struct nvram_partition {
+	struct list_head partition;
+	struct nvram_header header;
+	unsigned int index;
+};
+
+
+extern int nvram_write_error_log(char * buff, int length, unsigned int err_type);
+extern int nvram_read_error_log(char * buff, int length, unsigned int * err_type);
+extern int nvram_clear_error_log(void);
+extern struct nvram_partition *nvram_find_partition(int sig, const char *name);
+
+extern int pSeries_nvram_init(void);
+extern int pmac_nvram_init(void);
+extern int mmio_nvram_init(void);
+
+/* PowerMac specific nvram stuffs */
+
+enum {
+	pmac_nvram_OF,		/* Open Firmware partition */
+	pmac_nvram_XPRAM,	/* MacOS XPRAM partition */
+	pmac_nvram_NR		/* MacOS Name Registry partition */
+};
+
+/* Return partition offset in nvram */
+extern int	pmac_get_partition(int partition);
+
+/* Direct access to XPRAM on PowerMacs */
+extern u8	pmac_xpram_read(int xpaddr);
+extern void	pmac_xpram_write(int xpaddr, u8 data);
+
+/* Synchronize NVRAM */
+extern void	nvram_sync(void);
+
+/* Normal access to NVRAM */
+extern unsigned char nvram_read_byte(int i);
+extern void nvram_write_byte(unsigned char c, int i);
+
+/* Some offsets in XPRAM */
+#define PMAC_XPRAM_MACHINE_LOC	0xe4
+#define PMAC_XPRAM_SOUND_VOLUME	0x08
+
+/* Machine location structure in PowerMac XPRAM */
+struct pmac_machine_location {
+	unsigned int	latitude;	/* 2+30 bit Fractional number */
+	unsigned int	longitude;	/* 2+30 bit Fractional number */
+	unsigned int	delta;		/* mix of GMT delta and DLS */
+};
+
+/*
+ * /dev/nvram ioctls
+ *
+ * Note that PMAC_NVRAM_GET_OFFSET is still supported, but is
+ * definitely obsolete. Do not use it if you can avoid it
+ */
+
+#define OBSOLETE_PMAC_NVRAM_GET_OFFSET \
+				_IOWR('p', 0x40, int)
+
+#define IOC_NVRAM_GET_OFFSET	_IOWR('p', 0x42, int)	/* Get NVRAM partition offset */
+#define IOC_NVRAM_SYNC		_IO('p', 0x43)		/* Sync NVRAM image */
+
+#endif /* _ASM_POWERPC_NVRAM_H */
diff --git a/include/asm-powerpc/serial.h b/include/asm-powerpc/serial.h
new file mode 100644
index 000000000000..b273d630b32f
--- /dev/null
+++ b/include/asm-powerpc/serial.h
@@ -0,0 +1,18 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _ASM_POWERPC_SERIAL_H
+#define _ASM_POWERPC_SERIAL_H
+
+/*
+ * Serial ports are not listed here, because they are discovered
+ * through the device tree.
+ */
+
+/* Default baud base if not found in device-tree */
+#define BASE_BAUD ( 1843200 / 16 )
+
+#endif /* _PPC64_SERIAL_H */
diff --git a/include/asm-ppc/nvram.h b/include/asm-ppc/nvram.h
deleted file mode 100644
index 31ef16e3fc4f..000000000000
--- a/include/asm-ppc/nvram.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * PreP compliant NVRAM access
- */
-
-#ifdef __KERNEL__
-#ifndef _PPC_NVRAM_H
-#define _PPC_NVRAM_H
-
-#define NVRAM_AS0  0x74
-#define NVRAM_AS1  0x75
-#define NVRAM_DATA 0x77
-
-
-/* RTC Offsets */
-
-#define MOTO_RTC_SECONDS		0x1FF9
-#define MOTO_RTC_MINUTES		0x1FFA
-#define MOTO_RTC_HOURS		0x1FFB
-#define MOTO_RTC_DAY_OF_WEEK		0x1FFC
-#define MOTO_RTC_DAY_OF_MONTH	0x1FFD
-#define MOTO_RTC_MONTH		0x1FFE
-#define MOTO_RTC_YEAR		0x1FFF
-#define MOTO_RTC_CONTROLA            0x1FF8
-#define MOTO_RTC_CONTROLB            0x1FF9
-
-/* PowerMac specific nvram stuffs */
-
-enum {
-	pmac_nvram_OF,		/* Open Firmware partition */
-	pmac_nvram_XPRAM,	/* MacOS XPRAM partition */
-	pmac_nvram_NR		/* MacOS Name Registry partition */
-};
-
-/* Return partition offset in nvram */
-extern int	pmac_get_partition(int partition);
-
-/* Direct access to XPRAM on PowerMacs */
-extern u8	pmac_xpram_read(int xpaddr);
-extern void	pmac_xpram_write(int xpaddr, u8 data);
-
-/* Synchronize NVRAM */
-extern void	nvram_sync(void);
-
-/* Normal access to NVRAM */
-extern unsigned char nvram_read_byte(int i);
-extern void nvram_write_byte(unsigned char c, int i);
-
-/* Some offsets in XPRAM */
-#define PMAC_XPRAM_MACHINE_LOC	0xe4
-#define PMAC_XPRAM_SOUND_VOLUME	0x08
-
-/* Machine location structure in PowerMac XPRAM */
-struct pmac_machine_location {
-	unsigned int	latitude;	/* 2+30 bit Fractional number */
-	unsigned int	longitude;	/* 2+30 bit Fractional number */
-	unsigned int	delta;		/* mix of GMT delta and DLS */
-};
-
-/*
- * /dev/nvram ioctls
- *
- * Note that PMAC_NVRAM_GET_OFFSET is still supported, but is
- * definitely obsolete. Do not use it if you can avoid it
- */
-
-#define OBSOLETE_PMAC_NVRAM_GET_OFFSET \
-				_IOWR('p', 0x40, int)
-
-#define IOC_NVRAM_GET_OFFSET	_IOWR('p', 0x42, int)	/* Get NVRAM partition offset */
-#define IOC_NVRAM_SYNC		_IO('p', 0x43)		/* Sync NVRAM image */
-
-#endif
-#endif /* __KERNEL__ */
diff --git a/include/asm-ppc64/btext.h b/include/asm-ppc64/btext.h
deleted file mode 100644
index 71cce36bc630..000000000000
--- a/include/asm-ppc64/btext.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Definitions for using the procedures in btext.c.
- *
- * Benjamin Herrenschmidt <benh@kernel.crashing.org>
- */
-#ifndef __PPC_BTEXT_H
-#define __PPC_BTEXT_H
-#ifdef __KERNEL__
-
-extern void btext_clearscreen(void);
-extern void btext_flushscreen(void);
-
-extern int boot_text_mapped;
-
-extern int btext_initialize(struct device_node *np);
-
-extern void map_boot_text(void);
-extern void init_boot_display(void);
-extern void btext_update_display(unsigned long phys, int width, int height,
-				 int depth, int pitch);
-
-extern void btext_drawchar(char c);
-extern void btext_drawstring(const char *str);
-extern void btext_drawhex(unsigned long v);
-
-#endif /* __KERNEL__ */
-#endif /* __PPC_BTEXT_H */
diff --git a/include/asm-ppc64/delay.h b/include/asm-ppc64/delay.h
deleted file mode 100644
index 05f198cf73d9..000000000000
--- a/include/asm-ppc64/delay.h
+++ /dev/null
@@ -1,48 +0,0 @@
-#ifndef _PPC64_DELAY_H
-#define _PPC64_DELAY_H
-
-/*
- * Copyright 1996, Paul Mackerras.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * PPC64 Support added by Dave Engebretsen, Todd Inglett, Mike Corrigan,
- * Anton Blanchard.
- */
-
-extern unsigned long tb_ticks_per_usec;
-
-/* define these here to prevent circular dependencies */ 
-#define __HMT_low()	asm volatile("or 1,1,1")
-#define __HMT_medium()	asm volatile("or 2,2,2")
-#define __barrier()	asm volatile("":::"memory")
-
-static inline unsigned long __get_tb(void)
-{
-	unsigned long rval;
-
-	asm volatile("mftb %0" : "=r" (rval));
-	return rval;
-}
-
-static inline void __delay(unsigned long loops)
-{
-	unsigned long start = __get_tb();
-
-	while((__get_tb()-start) < loops)
-		__HMT_low();
-	__HMT_medium();
-	__barrier();
-}
-
-static inline void udelay(unsigned long usecs)
-{
-	unsigned long loops = tb_ticks_per_usec * usecs;
-
-	__delay(loops);
-}
-
-#endif /* _PPC64_DELAY_H */
diff --git a/include/asm-ppc64/eeh.h b/include/asm-ppc64/eeh.h
deleted file mode 100644
index 89f26ab31908..000000000000
--- a/include/asm-ppc64/eeh.h
+++ /dev/null
@@ -1,364 +0,0 @@
-/*
- * eeh.h
- * Copyright (C) 2001  Dave Engebretsen & Todd Inglett IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#ifndef _PPC64_EEH_H
-#define _PPC64_EEH_H
-
-#include <linux/config.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/string.h>
-
-struct pci_dev;
-struct device_node;
-
-#ifdef CONFIG_EEH
-
-/* Values for eeh_mode bits in device_node */
-#define EEH_MODE_SUPPORTED	(1<<0)
-#define EEH_MODE_NOCHECK	(1<<1)
-#define EEH_MODE_ISOLATED	(1<<2)
-
-/* Max number of EEH freezes allowed before we consider the device
- * to be permanently disabled. */
-#define EEH_MAX_ALLOWED_FREEZES 5
-
-void __init eeh_init(void);
-unsigned long eeh_check_failure(const volatile void __iomem *token,
-				unsigned long val);
-int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev);
-void __init pci_addr_cache_build(void);
-
-/**
- * eeh_add_device_early
- * eeh_add_device_late
- *
- * Perform eeh initialization for devices added after boot.
- * Call eeh_add_device_early before doing any i/o to the
- * device (including config space i/o).  Call eeh_add_device_late
- * to finish the eeh setup for this device.
- */
-void eeh_add_device_early(struct device_node *);
-void eeh_add_device_late(struct pci_dev *);
-
-/**
- * eeh_remove_device - undo EEH setup for the indicated pci device
- * @dev: pci device to be removed
- *
- * This routine should be called when a device is removed from
- * a running system (e.g. by hotplug or dlpar).  It unregisters
- * the PCI device from the EEH subsystem.  I/O errors affecting
- * this device will no longer be detected after this call; thus,
- * i/o errors affecting this slot may leave this device unusable.
- */
-void eeh_remove_device(struct pci_dev *);
-
-/**
- * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
- *
- * If this macro yields TRUE, the caller relays to eeh_check_failure()
- * which does further tests out of line.
- */
-#define EEH_POSSIBLE_ERROR(val, type)	((val) == (type)~0)
-
-/*
- * Reads from a device which has been isolated by EEH will return
- * all 1s.  This macro gives an all-1s value of the given size (in
- * bytes: 1, 2, or 4) for comparing with the result of a read.
- */
-#define EEH_IO_ERROR_VALUE(size)	(~0U >> ((4 - (size)) * 8))
-
-#else /* !CONFIG_EEH */
-static inline void eeh_init(void) { }
-
-static inline unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
-{
-	return val;
-}
-
-static inline int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
-{
-	return 0;
-}
-
-static inline void pci_addr_cache_build(void) { }
-
-static inline void eeh_add_device_early(struct device_node *dn) { }
-
-static inline void eeh_add_device_late(struct pci_dev *dev) { }
-
-static inline void eeh_remove_device(struct pci_dev *dev) { }
-
-#define EEH_POSSIBLE_ERROR(val, type) (0)
-#define EEH_IO_ERROR_VALUE(size) (-1UL)
-#endif /* CONFIG_EEH */
-
-/*
- * MMIO read/write operations with EEH support.
- */
-static inline u8 eeh_readb(const volatile void __iomem *addr)
-{
-	u8 val = in_8(addr);
-	if (EEH_POSSIBLE_ERROR(val, u8))
-		return eeh_check_failure(addr, val);
-	return val;
-}
-static inline void eeh_writeb(u8 val, volatile void __iomem *addr)
-{
-	out_8(addr, val);
-}
-
-static inline u16 eeh_readw(const volatile void __iomem *addr)
-{
-	u16 val = in_le16(addr);
-	if (EEH_POSSIBLE_ERROR(val, u16))
-		return eeh_check_failure(addr, val);
-	return val;
-}
-static inline void eeh_writew(u16 val, volatile void __iomem *addr)
-{
-	out_le16(addr, val);
-}
-static inline u16 eeh_raw_readw(const volatile void __iomem *addr)
-{
-	u16 val = in_be16(addr);
-	if (EEH_POSSIBLE_ERROR(val, u16))
-		return eeh_check_failure(addr, val);
-	return val;
-}
-static inline void eeh_raw_writew(u16 val, volatile void __iomem *addr) {
-	volatile u16 __iomem *vaddr = (volatile u16 __iomem *) addr;
-	out_be16(vaddr, val);
-}
-
-static inline u32 eeh_readl(const volatile void __iomem *addr)
-{
-	u32 val = in_le32(addr);
-	if (EEH_POSSIBLE_ERROR(val, u32))
-		return eeh_check_failure(addr, val);
-	return val;
-}
-static inline void eeh_writel(u32 val, volatile void __iomem *addr)
-{
-	out_le32(addr, val);
-}
-static inline u32 eeh_raw_readl(const volatile void __iomem *addr)
-{
-	u32 val = in_be32(addr);
-	if (EEH_POSSIBLE_ERROR(val, u32))
-		return eeh_check_failure(addr, val);
-	return val;
-}
-static inline void eeh_raw_writel(u32 val, volatile void __iomem *addr)
-{
-	out_be32(addr, val);
-}
-
-static inline u64 eeh_readq(const volatile void __iomem *addr)
-{
-	u64 val = in_le64(addr);
-	if (EEH_POSSIBLE_ERROR(val, u64))
-		return eeh_check_failure(addr, val);
-	return val;
-}
-static inline void eeh_writeq(u64 val, volatile void __iomem *addr)
-{
-	out_le64(addr, val);
-}
-static inline u64 eeh_raw_readq(const volatile void __iomem *addr)
-{
-	u64 val = in_be64(addr);
-	if (EEH_POSSIBLE_ERROR(val, u64))
-		return eeh_check_failure(addr, val);
-	return val;
-}
-static inline void eeh_raw_writeq(u64 val, volatile void __iomem *addr)
-{
-	out_be64(addr, val);
-}
-
-#define EEH_CHECK_ALIGN(v,a) \
-	((((unsigned long)(v)) & ((a) - 1)) == 0)
-
-static inline void eeh_memset_io(volatile void __iomem *addr, int c,
-				 unsigned long n)
-{
-	void *p = (void __force *)addr;
-	u32 lc = c;
-	lc |= lc << 8;
-	lc |= lc << 16;
-
-	while(n && !EEH_CHECK_ALIGN(p, 4)) {
-		*((volatile u8 *)p) = c;
-		p++;
-		n--;
-	}
-	while(n >= 4) {
-		*((volatile u32 *)p) = lc;
-		p += 4;
-		n -= 4;
-	}
-	while(n) {
-		*((volatile u8 *)p) = c;
-		p++;
-		n--;
-	}
-	__asm__ __volatile__ ("sync" : : : "memory");
-}
-static inline void eeh_memcpy_fromio(void *dest, const volatile void __iomem *src,
-				     unsigned long n)
-{
-	void *vsrc = (void __force *) src;
-	void *destsave = dest;
-	unsigned long nsave = n;
-
-	while(n && (!EEH_CHECK_ALIGN(vsrc, 4) || !EEH_CHECK_ALIGN(dest, 4))) {
-		*((u8 *)dest) = *((volatile u8 *)vsrc);
-		__asm__ __volatile__ ("eieio" : : : "memory");
-		vsrc++;
-		dest++;
-		n--;
-	}
-	while(n > 4) {
-		*((u32 *)dest) = *((volatile u32 *)vsrc);
-		__asm__ __volatile__ ("eieio" : : : "memory");
-		vsrc += 4;
-		dest += 4;
-		n -= 4;
-	}
-	while(n) {
-		*((u8 *)dest) = *((volatile u8 *)vsrc);
-		__asm__ __volatile__ ("eieio" : : : "memory");
-		vsrc++;
-		dest++;
-		n--;
-	}
-	__asm__ __volatile__ ("sync" : : : "memory");
-
-	/* Look for ffff's here at dest[n].  Assume that at least 4 bytes
-	 * were copied. Check all four bytes.
-	 */
-	if ((nsave >= 4) &&
-		(EEH_POSSIBLE_ERROR((*((u32 *) destsave+nsave-4)), u32))) {
-		eeh_check_failure(src, (*((u32 *) destsave+nsave-4)));
-	}
-}
-
-static inline void eeh_memcpy_toio(volatile void __iomem *dest, const void *src,
-				   unsigned long n)
-{
-	void *vdest = (void __force *) dest;
-
-	while(n && (!EEH_CHECK_ALIGN(vdest, 4) || !EEH_CHECK_ALIGN(src, 4))) {
-		*((volatile u8 *)vdest) = *((u8 *)src);
-		src++;
-		vdest++;
-		n--;
-	}
-	while(n > 4) {
-		*((volatile u32 *)vdest) = *((volatile u32 *)src);
-		src += 4;
-		vdest += 4;
-		n-=4;
-	}
-	while(n) {
-		*((volatile u8 *)vdest) = *((u8 *)src);
-		src++;
-		vdest++;
-		n--;
-	}
-	__asm__ __volatile__ ("sync" : : : "memory");
-}
-
-#undef EEH_CHECK_ALIGN
-
-static inline u8 eeh_inb(unsigned long port)
-{
-	u8 val;
-	if (!_IO_IS_VALID(port))
-		return ~0;
-	val = in_8((u8 __iomem *)(port+pci_io_base));
-	if (EEH_POSSIBLE_ERROR(val, u8))
-		return eeh_check_failure((void __iomem *)(port), val);
-	return val;
-}
-
-static inline void eeh_outb(u8 val, unsigned long port)
-{
-	if (_IO_IS_VALID(port))
-		out_8((u8 __iomem *)(port+pci_io_base), val);
-}
-
-static inline u16 eeh_inw(unsigned long port)
-{
-	u16 val;
-	if (!_IO_IS_VALID(port))
-		return ~0;
-	val = in_le16((u16 __iomem *)(port+pci_io_base));
-	if (EEH_POSSIBLE_ERROR(val, u16))
-		return eeh_check_failure((void __iomem *)(port), val);
-	return val;
-}
-
-static inline void eeh_outw(u16 val, unsigned long port)
-{
-	if (_IO_IS_VALID(port))
-		out_le16((u16 __iomem *)(port+pci_io_base), val);
-}
-
-static inline u32 eeh_inl(unsigned long port)
-{
-	u32 val;
-	if (!_IO_IS_VALID(port))
-		return ~0;
-	val = in_le32((u32 __iomem *)(port+pci_io_base));
-	if (EEH_POSSIBLE_ERROR(val, u32))
-		return eeh_check_failure((void __iomem *)(port), val);
-	return val;
-}
-
-static inline void eeh_outl(u32 val, unsigned long port)
-{
-	if (_IO_IS_VALID(port))
-		out_le32((u32 __iomem *)(port+pci_io_base), val);
-}
-
-/* in-string eeh macros */
-static inline void eeh_insb(unsigned long port, void * buf, int ns)
-{
-	_insb((u8 __iomem *)(port+pci_io_base), buf, ns);
-	if (EEH_POSSIBLE_ERROR((*(((u8*)buf)+ns-1)), u8))
-		eeh_check_failure((void __iomem *)(port), *(u8*)buf);
-}
-
-static inline void eeh_insw_ns(unsigned long port, void * buf, int ns)
-{
-	_insw_ns((u16 __iomem *)(port+pci_io_base), buf, ns);
-	if (EEH_POSSIBLE_ERROR((*(((u16*)buf)+ns-1)), u16))
-		eeh_check_failure((void __iomem *)(port), *(u16*)buf);
-}
-
-static inline void eeh_insl_ns(unsigned long port, void * buf, int nl)
-{
-	_insl_ns((u32 __iomem *)(port+pci_io_base), buf, nl);
-	if (EEH_POSSIBLE_ERROR((*(((u32*)buf)+nl-1)), u32))
-		eeh_check_failure((void __iomem *)(port), *(u32*)buf);
-}
-
-#endif /* _PPC64_EEH_H */
diff --git a/include/asm-ppc64/floppy.h b/include/asm-ppc64/floppy.h
deleted file mode 100644
index 5c497b588e54..000000000000
--- a/include/asm-ppc64/floppy.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Architecture specific parts of the Floppy driver
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1995
- */
-#ifndef __ASM_PPC64_FLOPPY_H
-#define __ASM_PPC64_FLOPPY_H
-
-#include <linux/config.h>
-#include <asm/machdep.h>
-
-#define fd_inb(port)			inb_p(port)
-#define fd_outb(value,port)		outb_p(value,port)
-
-#define fd_enable_dma()         enable_dma(FLOPPY_DMA)
-#define fd_disable_dma()        disable_dma(FLOPPY_DMA)
-#define fd_request_dma()        request_dma(FLOPPY_DMA,"floppy")
-#define fd_free_dma()           free_dma(FLOPPY_DMA)
-#define fd_clear_dma_ff()       clear_dma_ff(FLOPPY_DMA)
-#define fd_set_dma_mode(mode)   set_dma_mode(FLOPPY_DMA,mode)
-#define fd_set_dma_count(count) set_dma_count(FLOPPY_DMA,count)
-#define fd_enable_irq()         enable_irq(FLOPPY_IRQ)
-#define fd_disable_irq()        disable_irq(FLOPPY_IRQ)
-#define fd_cacheflush(addr,size) /* nothing */
-#define fd_request_irq()        request_irq(FLOPPY_IRQ, floppy_interrupt, \
-					    SA_INTERRUPT|SA_SAMPLE_RANDOM, \
-				            "floppy", NULL)
-#define fd_free_irq()           free_irq(FLOPPY_IRQ, NULL);
-
-#ifdef CONFIG_PCI
-
-#include <linux/pci.h>
-
-#define fd_dma_setup(addr,size,mode,io) ppc64_fd_dma_setup(addr,size,mode,io)
-
-static __inline__ int 
-ppc64_fd_dma_setup(char *addr, unsigned long size, int mode, int io)
-{
-	static unsigned long prev_size;
-	static dma_addr_t bus_addr = 0;
-	static char *prev_addr;
-	static int prev_dir;
-	int dir;
-
-	dir = (mode == DMA_MODE_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE;
-
-	if (bus_addr 
-	    && (addr != prev_addr || size != prev_size || dir != prev_dir)) {
-		/* different from last time -- unmap prev */
-		pci_unmap_single(NULL, bus_addr, prev_size, prev_dir);
-		bus_addr = 0;
-	}
-
-	if (!bus_addr)	/* need to map it */ {
-		bus_addr = pci_map_single(NULL, addr, size, dir);
-	}
-
-	/* remember this one as prev */
-	prev_addr = addr;
-	prev_size = size;
-	prev_dir = dir;
-
-	fd_clear_dma_ff();
-	fd_cacheflush(addr, size);
-	fd_set_dma_mode(mode);
-	set_dma_addr(FLOPPY_DMA, bus_addr);
-	fd_set_dma_count(size);
-	virtual_dma_port = io;
-	fd_enable_dma();
-
-	return 0;
-}
-
-#endif /* CONFIG_PCI */
-
-__inline__ void virtual_dma_init(void)
-{
-	/* Nothing to do on PowerPC */
-}
-
-static int FDC1 = 0x3f0;
-static int FDC2 = -1;
-
-/*
- * Again, the CMOS information not available
- */
-#define FLOPPY0_TYPE 6
-#define FLOPPY1_TYPE 0
-
-#define N_FDC 2			/* Don't change this! */
-#define N_DRIVE 8
-
-#define FLOPPY_MOTOR_MASK 0xf0
-
-/*
- * The PowerPC has no problems with floppy DMA crossing 64k borders.
- */
-#define CROSS_64KB(a,s)	(0)
-
-#define EXTRA_FLOPPY_PARAMS
-
-#endif /* __ASM_PPC64_FLOPPY_H */
diff --git a/include/asm-ppc64/hvconsole.h b/include/asm-ppc64/hvconsole.h
deleted file mode 100644
index 6da93ce74dc0..000000000000
--- a/include/asm-ppc64/hvconsole.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * hvconsole.h
- * Copyright (C) 2004 Ryan S Arnold, IBM Corporation
- *
- * LPAR console support.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#ifndef _PPC64_HVCONSOLE_H
-#define _PPC64_HVCONSOLE_H
-
-/*
- * This is the max number of console adapters that can/will be found as
- * console devices on first stage console init.  Any number beyond this range
- * can't be used as a console device but is still a valid tty device.
- */
-#define MAX_NR_HVC_CONSOLES	16
-
-/* implemented by a low level driver */
-struct hv_ops {
-	int (*get_chars)(uint32_t vtermno, char *buf, int count);
-	int (*put_chars)(uint32_t vtermno, const char *buf, int count);
-};
-extern int hvc_get_chars(uint32_t vtermno, char *buf, int count);
-extern int hvc_put_chars(uint32_t vtermno, const char *buf, int count);
-
-struct hvc_struct;
-
-/* Register a vterm and a slot index for use as a console (console_init) */
-extern int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops);
-/* register a vterm for hvc tty operation (module_init or hotplug add) */
-extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int irq,
-						 struct hv_ops *ops);
-/* remove a vterm from hvc tty operation (modele_exit or hotplug remove) */
-extern int __devexit hvc_remove(struct hvc_struct *hp);
-#endif /* _PPC64_HVCONSOLE_H */
diff --git a/include/asm-ppc64/hvcserver.h b/include/asm-ppc64/hvcserver.h
deleted file mode 100644
index aecba9665796..000000000000
--- a/include/asm-ppc64/hvcserver.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * hvcserver.h
- * Copyright (C) 2004 Ryan S Arnold, IBM Corporation
- *
- * PPC64 virtual I/O console server support.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#ifndef _PPC64_HVCSERVER_H
-#define _PPC64_HVCSERVER_H
-
-#include <linux/list.h>
-
-/* Converged Location Code length */
-#define HVCS_CLC_LENGTH	79
-
-/**
- * hvcs_partner_info - an element in a list of partner info
- * @node: list_head denoting this partner_info struct's position in the list of
- *	partner info.
- * @unit_address: The partner unit address of this entry.
- * @partition_ID: The partner partition ID of this entry.
- * @location_code: The converged location code of this entry + 1 char for the
- *	null-term.
- *
- * This structure outlines the format that partner info is presented to a caller
- * of the hvcs partner info fetching functions.  These are strung together into
- * a list using linux kernel lists.
- */
-struct hvcs_partner_info {
-	struct list_head node;
-	uint32_t unit_address;
-	uint32_t partition_ID;
-	char location_code[HVCS_CLC_LENGTH + 1]; /* CLC + 1 null-term char */
-};
-
-extern int hvcs_free_partner_info(struct list_head *head);
-extern int hvcs_get_partner_info(uint32_t unit_address,
-		struct list_head *head, unsigned long *pi_buff);
-extern int hvcs_register_connection(uint32_t unit_address,
-		uint32_t p_partition_ID, uint32_t p_unit_address);
-extern int hvcs_free_connection(uint32_t unit_address);
-
-#endif /* _PPC64_HVCSERVER_H */
diff --git a/include/asm-ppc64/nvram.h b/include/asm-ppc64/nvram.h
deleted file mode 100644
index def47d720d3d..000000000000
--- a/include/asm-ppc64/nvram.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * PreP compliant NVRAM access
- * This needs to be updated for PPC64
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _PPC64_NVRAM_H
-#define _PPC64_NVRAM_H
-
-#define NVRW_CNT 0x20
-#define NVRAM_HEADER_LEN 16 /* sizeof(struct nvram_header) */
-#define NVRAM_BLOCK_LEN 16
-#define NVRAM_MAX_REQ (2080/NVRAM_BLOCK_LEN)
-#define NVRAM_MIN_REQ (1056/NVRAM_BLOCK_LEN)
-
-#define NVRAM_AS0  0x74
-#define NVRAM_AS1  0x75
-#define NVRAM_DATA 0x77
-
-
-/* RTC Offsets */
-
-#define MOTO_RTC_SECONDS	0x1FF9
-#define MOTO_RTC_MINUTES	0x1FFA
-#define MOTO_RTC_HOURS		0x1FFB
-#define MOTO_RTC_DAY_OF_WEEK	0x1FFC
-#define MOTO_RTC_DAY_OF_MONTH	0x1FFD
-#define MOTO_RTC_MONTH		0x1FFE
-#define MOTO_RTC_YEAR		0x1FFF
-#define MOTO_RTC_CONTROLA       0x1FF8
-#define MOTO_RTC_CONTROLB       0x1FF9
-
-#define NVRAM_SIG_SP	0x02	/* support processor */
-#define NVRAM_SIG_OF	0x50	/* open firmware config */
-#define NVRAM_SIG_FW	0x51	/* general firmware */
-#define NVRAM_SIG_HW	0x52	/* hardware (VPD) */
-#define NVRAM_SIG_FLIP	0x5a	/* Apple flip/flop header */
-#define NVRAM_SIG_APPL	0x5f	/* Apple "system" (???) */
-#define NVRAM_SIG_SYS	0x70	/* system env vars */
-#define NVRAM_SIG_CFG	0x71	/* config data */
-#define NVRAM_SIG_ELOG	0x72	/* error log */
-#define NVRAM_SIG_VEND	0x7e	/* vendor defined */
-#define NVRAM_SIG_FREE	0x7f	/* Free space */
-#define NVRAM_SIG_OS	0xa0	/* OS defined */
-#define NVRAM_SIG_PANIC	0xa1	/* Apple OSX "panic" */
-
-/* If change this size, then change the size of NVNAME_LEN */
-struct nvram_header {
-	unsigned char signature;
-	unsigned char checksum;
-	unsigned short length;
-	char name[12];
-};
-
-struct nvram_partition {
-	struct list_head partition;
-	struct nvram_header header;
-	unsigned int index;
-};
-
-
-extern int nvram_write_error_log(char * buff, int length, unsigned int err_type);
-extern int nvram_read_error_log(char * buff, int length, unsigned int * err_type);
-extern int nvram_clear_error_log(void);
-extern struct nvram_partition *nvram_find_partition(int sig, const char *name);
-
-extern int pSeries_nvram_init(void);
-extern int pmac_nvram_init(void);
-extern int mmio_nvram_init(void);
-
-/* PowerMac specific nvram stuffs */
-
-enum {
-	pmac_nvram_OF,		/* Open Firmware partition */
-	pmac_nvram_XPRAM,	/* MacOS XPRAM partition */
-	pmac_nvram_NR		/* MacOS Name Registry partition */
-};
-
-/* Return partition offset in nvram */
-extern int	pmac_get_partition(int partition);
-
-/* Direct access to XPRAM on PowerMacs */
-extern u8	pmac_xpram_read(int xpaddr);
-extern void	pmac_xpram_write(int xpaddr, u8 data);
-
-/* Synchronize NVRAM */
-extern int	nvram_sync(void);
-
-/* Some offsets in XPRAM */
-#define PMAC_XPRAM_MACHINE_LOC	0xe4
-#define PMAC_XPRAM_SOUND_VOLUME	0x08
-
-/* Machine location structure in PowerMac XPRAM */
-struct pmac_machine_location {
-	unsigned int	latitude;	/* 2+30 bit Fractional number */
-	unsigned int	longitude;	/* 2+30 bit Fractional number */
-	unsigned int	delta;		/* mix of GMT delta and DLS */
-};
-
-/*
- * /dev/nvram ioctls
- *
- * Note that PMAC_NVRAM_GET_OFFSET is still supported, but is
- * definitely obsolete. Do not use it if you can avoid it
- */
-
-#define OBSOLETE_PMAC_NVRAM_GET_OFFSET \
-				_IOWR('p', 0x40, int)
-
-#define IOC_NVRAM_GET_OFFSET	_IOWR('p', 0x42, int)	/* Get NVRAM partition offset */
-
-#endif /* _PPC64_NVRAM_H */
diff --git a/include/asm-ppc64/prom.h b/include/asm-ppc64/prom.h
deleted file mode 100644
index ddfe186589fa..000000000000
--- a/include/asm-ppc64/prom.h
+++ /dev/null
@@ -1,220 +0,0 @@
-#ifndef _PPC64_PROM_H
-#define _PPC64_PROM_H
-
-/*
- * Definitions for talking to the Open Firmware PROM on
- * Power Macintosh computers.
- *
- * Copyright (C) 1996 Paul Mackerras.
- *
- * Updates for PPC64 by Peter Bergner & David Engebretsen, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/config.h>
-#include <linux/proc_fs.h>
-#include <asm/atomic.h>
-
-#define PTRRELOC(x)     ((typeof(x))((unsigned long)(x) - offset))
-#define PTRUNRELOC(x)   ((typeof(x))((unsigned long)(x) + offset))
-#define RELOC(x)        (*PTRRELOC(&(x)))
-
-/* Definitions used by the flattened device tree */
-#define OF_DT_HEADER		0xd00dfeed	/* marker */
-#define OF_DT_BEGIN_NODE	0x1		/* Start of node, full name */
-#define OF_DT_END_NODE		0x2		/* End node */
-#define OF_DT_PROP		0x3		/* Property: name off, size,
-						 * content */
-#define OF_DT_NOP		0x4		/* nop */
-#define OF_DT_END		0x9
-
-#define OF_DT_VERSION		0x10
-
-/*
- * This is what gets passed to the kernel by prom_init or kexec
- *
- * The dt struct contains the device tree structure, full pathes and
- * property contents. The dt strings contain a separate block with just
- * the strings for the property names, and is fully page aligned and
- * self contained in a page, so that it can be kept around by the kernel,
- * each property name appears only once in this page (cheap compression)
- *
- * the mem_rsvmap contains a map of reserved ranges of physical memory,
- * passing it here instead of in the device-tree itself greatly simplifies
- * the job of everybody. It's just a list of u64 pairs (base/size) that
- * ends when size is 0
- */
-struct boot_param_header
-{
-	u32	magic;			/* magic word OF_DT_HEADER */
-	u32	totalsize;		/* total size of DT block */
-	u32	off_dt_struct;		/* offset to structure */
-	u32	off_dt_strings;		/* offset to strings */
-	u32	off_mem_rsvmap;		/* offset to memory reserve map */
-	u32	version;		/* format version */
-	u32	last_comp_version;	/* last compatible version */
-	/* version 2 fields below */
-	u32	boot_cpuid_phys;	/* Physical CPU id we're booting on */
-	/* version 3 fields below */
-	u32	dt_strings_size;	/* size of the DT strings block */
-};
-
-
-
-typedef u32 phandle;
-typedef u32 ihandle;
-
-struct address_range {
-	unsigned long space;
-	unsigned long address;
-	unsigned long size;
-};
-
-struct interrupt_info {
-	int	line;
-	int	sense;		/* +ve/-ve logic, edge or level, etc. */
-};
-
-struct pci_address {
-	u32 a_hi;
-	u32 a_mid;
-	u32 a_lo;
-};
-
-struct isa_address {
-	u32 a_hi;
-	u32 a_lo;
-};
-
-struct isa_range {
-	struct isa_address isa_addr;
-	struct pci_address pci_addr;
-	unsigned int size;
-};
-
-struct reg_property {
-	unsigned long address;
-	unsigned long size;
-};
-
-struct reg_property32 {
-	unsigned int address;
-	unsigned int size;
-};
-
-struct reg_property64 {
-	unsigned long address;
-	unsigned long size;
-};
-
-struct property {
-	char	*name;
-	int	length;
-	unsigned char *value;
-	struct property *next;
-};
-
-struct device_node {
-	char	*name;
-	char	*type;
-	phandle	node;
-	phandle linux_phandle;
-	int	n_addrs;
-	struct	address_range *addrs;
-	int	n_intrs;
-	struct	interrupt_info *intrs;
-	char	*full_name;
-
-	struct	property *properties;
-	struct	device_node *parent;
-	struct	device_node *child;
-	struct	device_node *sibling;
-	struct	device_node *next;	/* next device of same type */
-	struct	device_node *allnext;	/* next in list of all nodes */
-	struct  proc_dir_entry *pde;	/* this node's proc directory */
-	struct  kref kref;
-	unsigned long _flags;
-	void	*data;
-#ifdef CONFIG_PPC_ISERIES
-	struct list_head Device_List;
-#endif
-};
-
-extern struct device_node *of_chosen;
-
-/* flag descriptions */
-#define OF_DYNAMIC 1 /* node and properties were allocated via kmalloc */
-
-#define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags)
-#define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags)
-
-/*
- * Until 32-bit ppc can add proc_dir_entries to its device_node
- * definition, we cannot refer to pde, name_link, and addr_link
- * in arch-independent code.
- */
-#define HAVE_ARCH_DEVTREE_FIXUPS
-
-static inline void set_node_proc_entry(struct device_node *dn, struct proc_dir_entry *de)
-{
-	dn->pde = de;
-}
-
-
-/* OBSOLETE: Old stlye node lookup */
-extern struct device_node *find_devices(const char *name);
-extern struct device_node *find_type_devices(const char *type);
-extern struct device_node *find_path_device(const char *path);
-extern struct device_node *find_compatible_devices(const char *type,
-						   const char *compat);
-extern struct device_node *find_all_nodes(void);
-
-/* New style node lookup */
-extern struct device_node *of_find_node_by_name(struct device_node *from,
-	const char *name);
-extern struct device_node *of_find_node_by_type(struct device_node *from,
-	const char *type);
-extern struct device_node *of_find_compatible_node(struct device_node *from,
-	const char *type, const char *compat);
-extern struct device_node *of_find_node_by_path(const char *path);
-extern struct device_node *of_find_node_by_phandle(phandle handle);
-extern struct device_node *of_find_all_nodes(struct device_node *prev);
-extern struct device_node *of_get_parent(const struct device_node *node);
-extern struct device_node *of_get_next_child(const struct device_node *node,
-					     struct device_node *prev);
-extern struct device_node *of_node_get(struct device_node *node);
-extern void of_node_put(struct device_node *node);
-
-/* For scanning the flat device-tree at boot time */
-int __init of_scan_flat_dt(int (*it)(unsigned long node,
-				     const char *uname, int depth,
-				     void *data),
-			   void *data);
-void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
-				 unsigned long *size);
-
-/* For updating the device tree at runtime */
-extern void of_attach_node(struct device_node *);
-extern void of_detach_node(const struct device_node *);
-
-/* Other Prototypes */
-extern unsigned long prom_init(unsigned long, unsigned long, unsigned long,
-	unsigned long, unsigned long);
-extern void finish_device_tree(void);
-extern void unflatten_device_tree(void);
-extern void early_init_devtree(void *);
-extern int device_is_compatible(struct device_node *device, const char *);
-extern int machine_is_compatible(const char *compat);
-extern unsigned char *get_property(struct device_node *node, const char *name,
-				   int *lenp);
-extern void print_properties(struct device_node *node);
-extern int prom_n_addr_cells(struct device_node* np);
-extern int prom_n_size_cells(struct device_node* np);
-extern int prom_n_intr_cells(struct device_node* np);
-extern void prom_get_irq_senses(unsigned char *senses, int off, int max);
-extern int prom_add_property(struct device_node* np, struct property* prop);
-
-#endif /* _PPC64_PROM_H */
diff --git a/include/asm-ppc64/serial.h b/include/asm-ppc64/serial.h
deleted file mode 100644
index d6bcb79b7d7b..000000000000
--- a/include/asm-ppc64/serial.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * include/asm-ppc64/serial.h
- */
-#ifndef _PPC64_SERIAL_H
-#define _PPC64_SERIAL_H
-
-/*
- * This assumes you have a 1.8432 MHz clock for your UART.
- *
- * It'd be nice if someone built a serial card with a 24.576 MHz
- * clock, since the 16550A is capable of handling a top speed of 1.5
- * megabits/second; but this requires the faster clock.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-/* Default baud base if not found in device-tree */
-#define BASE_BAUD ( 1843200 / 16 )
-
-#endif /* _PPC64_SERIAL_H */
diff --git a/include/asm-ppc64/system.h b/include/asm-ppc64/system.h
deleted file mode 100644
index bf9a6aba19c9..000000000000
--- a/include/asm-ppc64/system.h
+++ /dev/null
@@ -1,310 +0,0 @@
-#ifndef __PPC64_SYSTEM_H
-#define __PPC64_SYSTEM_H
-
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/config.h>
-#include <linux/compiler.h>
-#include <asm/page.h>
-#include <asm/processor.h>
-#include <asm/hw_irq.h>
-#include <asm/synch.h>
-
-/*
- * Memory barrier.
- * The sync instruction guarantees that all memory accesses initiated
- * by this processor have been performed (with respect to all other
- * mechanisms that access memory).  The eieio instruction is a barrier
- * providing an ordering (separately) for (a) cacheable stores and (b)
- * loads and stores to non-cacheable memory (e.g. I/O devices).
- *
- * mb() prevents loads and stores being reordered across this point.
- * rmb() prevents loads being reordered across this point.
- * wmb() prevents stores being reordered across this point.
- * read_barrier_depends() prevents data-dependent loads being reordered
- *	across this point (nop on PPC).
- *
- * We have to use the sync instructions for mb(), since lwsync doesn't
- * order loads with respect to previous stores.  Lwsync is fine for
- * rmb(), though.
- * For wmb(), we use sync since wmb is used in drivers to order
- * stores to system memory with respect to writes to the device.
- * However, smp_wmb() can be a lighter-weight eieio barrier on
- * SMP since it is only used to order updates to system memory.
- */
-#define mb()   __asm__ __volatile__ ("sync" : : : "memory")
-#define rmb()  __asm__ __volatile__ ("lwsync" : : : "memory")
-#define wmb()  __asm__ __volatile__ ("sync" : : : "memory")
-#define read_barrier_depends()  do { } while(0)
-
-#define set_mb(var, value)	do { var = value; smp_mb(); } while (0)
-#define set_wmb(var, value)	do { var = value; smp_wmb(); } while (0)
-
-#ifdef CONFIG_SMP
-#define smp_mb()	mb()
-#define smp_rmb()	rmb()
-#define smp_wmb()	eieio()
-#define smp_read_barrier_depends()  read_barrier_depends()
-#else
-#define smp_mb()	__asm__ __volatile__("": : :"memory")
-#define smp_rmb()	__asm__ __volatile__("": : :"memory")
-#define smp_wmb()	__asm__ __volatile__("": : :"memory")
-#define smp_read_barrier_depends()  do { } while(0)
-#endif /* CONFIG_SMP */
-
-#ifdef __KERNEL__
-struct task_struct;
-struct pt_regs;
-
-#ifdef CONFIG_DEBUGGER
-
-extern int (*__debugger)(struct pt_regs *regs);
-extern int (*__debugger_ipi)(struct pt_regs *regs);
-extern int (*__debugger_bpt)(struct pt_regs *regs);
-extern int (*__debugger_sstep)(struct pt_regs *regs);
-extern int (*__debugger_iabr_match)(struct pt_regs *regs);
-extern int (*__debugger_dabr_match)(struct pt_regs *regs);
-extern int (*__debugger_fault_handler)(struct pt_regs *regs);
-
-#define DEBUGGER_BOILERPLATE(__NAME) \
-static inline int __NAME(struct pt_regs *regs) \
-{ \
-	if (unlikely(__ ## __NAME)) \
-		return __ ## __NAME(regs); \
-	return 0; \
-}
-
-DEBUGGER_BOILERPLATE(debugger)
-DEBUGGER_BOILERPLATE(debugger_ipi)
-DEBUGGER_BOILERPLATE(debugger_bpt)
-DEBUGGER_BOILERPLATE(debugger_sstep)
-DEBUGGER_BOILERPLATE(debugger_iabr_match)
-DEBUGGER_BOILERPLATE(debugger_dabr_match)
-DEBUGGER_BOILERPLATE(debugger_fault_handler)
-
-#ifdef CONFIG_XMON
-extern void xmon_init(int enable);
-#endif
-
-#else
-static inline int debugger(struct pt_regs *regs) { return 0; }
-static inline int debugger_ipi(struct pt_regs *regs) { return 0; }
-static inline int debugger_bpt(struct pt_regs *regs) { return 0; }
-static inline int debugger_sstep(struct pt_regs *regs) { return 0; }
-static inline int debugger_iabr_match(struct pt_regs *regs) { return 0; }
-static inline int debugger_dabr_match(struct pt_regs *regs) { return 0; }
-static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
-#endif
-
-extern int set_dabr(unsigned long dabr);
-extern void _exception(int signr, struct pt_regs *regs, int code,
-		       unsigned long addr);
-extern int fix_alignment(struct pt_regs *regs);
-extern void bad_page_fault(struct pt_regs *regs, unsigned long address,
-			   int sig);
-extern void show_regs(struct pt_regs * regs);
-extern void low_hash_fault(struct pt_regs *regs, unsigned long address);
-extern int die(const char *str, struct pt_regs *regs, long err);
-
-extern int _get_PVR(void);
-extern void giveup_fpu(struct task_struct *);
-extern void disable_kernel_fp(void);
-extern void flush_fp_to_thread(struct task_struct *);
-extern void enable_kernel_fp(void);
-extern void giveup_altivec(struct task_struct *);
-extern void disable_kernel_altivec(void);
-extern void enable_kernel_altivec(void);
-extern int emulate_altivec(struct pt_regs *);
-extern void cvt_fd(float *from, double *to, struct thread_struct *thread);
-extern void cvt_df(double *from, float *to, struct thread_struct *thread);
-
-#ifdef CONFIG_ALTIVEC
-extern void flush_altivec_to_thread(struct task_struct *);
-#else
-static inline void flush_altivec_to_thread(struct task_struct *t)
-{
-}
-#endif
-
-static inline void flush_spe_to_thread(struct task_struct *t)
-{
-}
-
-extern int mem_init_done;	/* set on boot once kmalloc can be called */
-extern unsigned long memory_limit;
-
-/* EBCDIC -> ASCII conversion for [0-9A-Z] on iSeries */
-extern unsigned char e2a(unsigned char);
-
-extern struct task_struct *__switch_to(struct task_struct *,
-				       struct task_struct *);
-#define switch_to(prev, next, last)	((last) = __switch_to((prev), (next)))
-
-struct thread_struct;
-extern struct task_struct * _switch(struct thread_struct *prev,
-				    struct thread_struct *next);
-
-extern unsigned long klimit;
-
-extern int powersave_nap;	/* set if nap mode can be used in idle loop */
-
-/*
- * Atomic exchange
- *
- * Changes the memory location '*ptr' to be val and returns
- * the previous value stored there.
- *
- * Inline asm pulled from arch/ppc/kernel/misc.S so ppc64
- * is more like most of the other architectures.
- */
-static __inline__ unsigned long
-__xchg_u32(volatile unsigned int *m, unsigned long val)
-{
-	unsigned long dummy;
-
-	__asm__ __volatile__(
-	EIEIO_ON_SMP
-"1:	lwarx %0,0,%3		# __xchg_u32\n\
-	stwcx. %2,0,%3\n\
-2:	bne- 1b"
-	ISYNC_ON_SMP
- 	: "=&r" (dummy), "=m" (*m)
-	: "r" (val), "r" (m)
-	: "cc", "memory");
-
-	return (dummy);
-}
-
-static __inline__ unsigned long
-__xchg_u64(volatile long *m, unsigned long val)
-{
-	unsigned long dummy;
-
-	__asm__ __volatile__(
-	EIEIO_ON_SMP
-"1:	ldarx %0,0,%3		# __xchg_u64\n\
-	stdcx. %2,0,%3\n\
-2:	bne- 1b"
-	ISYNC_ON_SMP
-	: "=&r" (dummy), "=m" (*m)
-	: "r" (val), "r" (m)
-	: "cc", "memory");
-
-	return (dummy);
-}
-
-/*
- * This function doesn't exist, so you'll get a linker error
- * if something tries to do an invalid xchg().
- */
-extern void __xchg_called_with_bad_pointer(void);
-
-static __inline__ unsigned long
-__xchg(volatile void *ptr, unsigned long x, unsigned int size)
-{
-	switch (size) {
-	case 4:
-		return __xchg_u32(ptr, x);
-	case 8:
-		return __xchg_u64(ptr, x);
-	}
-	__xchg_called_with_bad_pointer();
-	return x;
-}
-
-#define xchg(ptr,x)							     \
-  ({									     \
-     __typeof__(*(ptr)) _x_ = (x);					     \
-     (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \
-  })
-
-#define tas(ptr) (xchg((ptr),1))
-
-#define __HAVE_ARCH_CMPXCHG	1
-
-static __inline__ unsigned long
-__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
-{
-	unsigned int prev;
-
-	__asm__ __volatile__ (
-	EIEIO_ON_SMP
-"1:	lwarx	%0,0,%2		# __cmpxchg_u32\n\
-	cmpw	0,%0,%3\n\
-	bne-	2f\n\
-	stwcx.	%4,0,%2\n\
-	bne-	1b"
-	ISYNC_ON_SMP
-	"\n\
-2:"
-	: "=&r" (prev), "=m" (*p)
-	: "r" (p), "r" (old), "r" (new), "m" (*p)
-	: "cc", "memory");
-
-	return prev;
-}
-
-static __inline__ unsigned long
-__cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new)
-{
-	unsigned long prev;
-
-	__asm__ __volatile__ (
-	EIEIO_ON_SMP
-"1:	ldarx	%0,0,%2		# __cmpxchg_u64\n\
-	cmpd	0,%0,%3\n\
-	bne-	2f\n\
-	stdcx.	%4,0,%2\n\
-	bne-	1b"
-	ISYNC_ON_SMP
-	"\n\
-2:"
-	: "=&r" (prev), "=m" (*p)
-	: "r" (p), "r" (old), "r" (new), "m" (*p)
-	: "cc", "memory");
-
-	return prev;
-}
-
-/* This function doesn't exist, so you'll get a linker error
-   if something tries to do an invalid cmpxchg().  */
-extern void __cmpxchg_called_with_bad_pointer(void);
-
-static __inline__ unsigned long
-__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
-	  unsigned int size)
-{
-	switch (size) {
-	case 4:
-		return __cmpxchg_u32(ptr, old, new);
-	case 8:
-		return __cmpxchg_u64(ptr, old, new);
-	}
-	__cmpxchg_called_with_bad_pointer();
-	return old;
-}
-
-#define cmpxchg(ptr,o,n)\
-	((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
-	(unsigned long)(n),sizeof(*(ptr))))
-
-/*
- * We handle most unaligned accesses in hardware. On the other hand 
- * unaligned DMA can be very expensive on some ppc64 IO chips (it does
- * powers of 2 writes until it reaches sufficient alignment).
- *
- * Based on this we disable the IP header alignment in network drivers.
- */
-#define NET_IP_ALIGN   0
-
-#define arch_align_stack(x) (x)
-
-extern unsigned long reloc_offset(void);
-
-#endif /* __KERNEL__ */
-#endif
-- 
cgit v1.2.3-71-gd317


From 7568cb4ef6c507164b65b01f972a3bd026898ae1 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 14 Nov 2005 17:30:17 +1100
Subject: powerpc: Move most remaining ppc64 files over to arch/powerpc

Also deletes files in arch/ppc64 that are no longer used now that
we don't compile with ARCH=ppc64 any more.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/Kconfig                       |    1 +
 arch/powerpc/kernel/Makefile               |   18 +-
 arch/powerpc/kernel/dma_64.c               |  151 ++
 arch/powerpc/kernel/iomap.c                |  146 ++
 arch/powerpc/kernel/iommu.c                |  572 ++++++++
 arch/powerpc/kernel/kprobes.c              |  459 +++++++
 arch/powerpc/kernel/machine_kexec_64.c     |  358 +++++
 arch/powerpc/kernel/module_64.c            |  455 ++++++
 arch/powerpc/kernel/pci_64.c               | 1319 ++++++++++++++++++
 arch/powerpc/kernel/pci_direct_iommu.c     |   94 ++
 arch/powerpc/kernel/pci_dn.c               |  230 ++++
 arch/powerpc/kernel/pci_iommu.c            |  128 ++
 arch/powerpc/platforms/pseries/Makefile    |    5 +-
 arch/powerpc/platforms/pseries/hvconsole.c |   74 +
 arch/powerpc/platforms/pseries/hvcserver.c |  251 ++++
 arch/ppc64/Kconfig                         |  520 -------
 arch/ppc64/kernel/Makefile                 |   40 +-
 arch/ppc64/kernel/asm-offsets.c            |  195 ---
 arch/ppc64/kernel/btext.c                  |  792 -----------
 arch/ppc64/kernel/dma.c                    |  151 --
 arch/ppc64/kernel/head.S                   | 2007 ---------------------------
 arch/ppc64/kernel/hvconsole.c              |   74 -
 arch/ppc64/kernel/hvcserver.c              |  251 ----
 arch/ppc64/kernel/iomap.c                  |  146 --
 arch/ppc64/kernel/iommu.c                  |  572 --------
 arch/ppc64/kernel/kprobes.c                |  459 -------
 arch/ppc64/kernel/machine_kexec.c          |  358 -----
 arch/ppc64/kernel/misc.S                   |  940 -------------
 arch/ppc64/kernel/module.c                 |  455 ------
 arch/ppc64/kernel/pci.c                    | 1319 ------------------
 arch/ppc64/kernel/pci_direct_iommu.c       |   94 --
 arch/ppc64/kernel/pci_dn.c                 |  230 ----
 arch/ppc64/kernel/pci_iommu.c              |  128 --
 arch/ppc64/kernel/ppc_ksyms.c              |   76 --
 arch/ppc64/kernel/prom.c                   | 1956 --------------------------
 arch/ppc64/kernel/prom_init.c              | 2051 ----------------------------
 arch/ppc64/kernel/semaphore.c              |  136 --
 arch/ppc64/kernel/vdso.c                   |  625 ---------
 arch/ppc64/kernel/vmlinux.lds.S            |  151 --
 arch/ppc64/xmon/privinst.h                 |   64 -
 include/asm-ppc64/page.h                   |  328 -----
 41 files changed, 4256 insertions(+), 14123 deletions(-)
 create mode 100644 arch/powerpc/kernel/dma_64.c
 create mode 100644 arch/powerpc/kernel/iomap.c
 create mode 100644 arch/powerpc/kernel/iommu.c
 create mode 100644 arch/powerpc/kernel/kprobes.c
 create mode 100644 arch/powerpc/kernel/machine_kexec_64.c
 create mode 100644 arch/powerpc/kernel/module_64.c
 create mode 100644 arch/powerpc/kernel/pci_64.c
 create mode 100644 arch/powerpc/kernel/pci_direct_iommu.c
 create mode 100644 arch/powerpc/kernel/pci_dn.c
 create mode 100644 arch/powerpc/kernel/pci_iommu.c
 create mode 100644 arch/powerpc/platforms/pseries/hvconsole.c
 create mode 100644 arch/powerpc/platforms/pseries/hvcserver.c
 delete mode 100644 arch/ppc64/Kconfig
 delete mode 100644 arch/ppc64/kernel/asm-offsets.c
 delete mode 100644 arch/ppc64/kernel/btext.c
 delete mode 100644 arch/ppc64/kernel/dma.c
 delete mode 100644 arch/ppc64/kernel/head.S
 delete mode 100644 arch/ppc64/kernel/hvconsole.c
 delete mode 100644 arch/ppc64/kernel/hvcserver.c
 delete mode 100644 arch/ppc64/kernel/iomap.c
 delete mode 100644 arch/ppc64/kernel/iommu.c
 delete mode 100644 arch/ppc64/kernel/kprobes.c
 delete mode 100644 arch/ppc64/kernel/machine_kexec.c
 delete mode 100644 arch/ppc64/kernel/misc.S
 delete mode 100644 arch/ppc64/kernel/module.c
 delete mode 100644 arch/ppc64/kernel/pci.c
 delete mode 100644 arch/ppc64/kernel/pci_direct_iommu.c
 delete mode 100644 arch/ppc64/kernel/pci_dn.c
 delete mode 100644 arch/ppc64/kernel/pci_iommu.c
 delete mode 100644 arch/ppc64/kernel/ppc_ksyms.c
 delete mode 100644 arch/ppc64/kernel/prom.c
 delete mode 100644 arch/ppc64/kernel/prom_init.c
 delete mode 100644 arch/ppc64/kernel/semaphore.c
 delete mode 100644 arch/ppc64/kernel/vdso.c
 delete mode 100644 arch/ppc64/kernel/vmlinux.lds.S
 delete mode 100644 arch/ppc64/xmon/privinst.h
 delete mode 100644 include/asm-ppc64/page.h

(limited to 'include')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c523029674e6..c5c3f4213cd9 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -932,6 +932,7 @@ source "arch/powerpc/oprofile/Kconfig"
 
 config KPROBES
 	bool "Kprobes (EXPERIMENTAL)"
+	depends on PPC64
 	help
 	  Kprobes allows you to trap at almost any kernel address and
 	  execute a callback function.  register_kprobe() establishes
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 046b4bf1f21e..4970e3721a84 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -49,12 +49,23 @@ extra-y				+= vmlinux.lds
 obj-y				+= process.o init_task.o time.o \
 				   prom.o traps.o setup-common.o
 obj-$(CONFIG_PPC32)		+= entry_32.o setup_32.o misc_32.o systbl.o
-obj-$(CONFIG_PPC64)		+= misc_64.o
+obj-$(CONFIG_PPC64)		+= misc_64.o dma_64.o iommu.o
 obj-$(CONFIG_PPC_OF)		+= prom_init.o
 obj-$(CONFIG_MODULES)		+= ppc_ksyms.o
 obj-$(CONFIG_BOOTX_TEXT)	+= btext.o
 obj-$(CONFIG_6xx)		+= idle_6xx.o
 obj-$(CONFIG_SMP)		+= smp.o
+obj-$(CONFIG_KPROBES)		+= kprobes.o
+
+module-$(CONFIG_PPC64)		+= module_64.o
+obj-$(CONFIG_MODULES)		+= $(module-y)
+
+pci64-$(CONFIG_PPC64)		+= pci_64.o pci_dn.o pci_iommu.o \
+				   pci_direct_iommu.o iomap.o
+obj-$(CONFIG_PCI)		+= $(pci64-y)
+
+kexec64-$(CONFIG_PPC64)		+= machine_kexec_64.o
+obj-$(CONFIG_KEXEC)		+= $(kexec64-y)
 
 ifeq ($(CONFIG_PPC_ISERIES),y)
 $(obj)/head_64.o: $(obj)/lparmap.s
@@ -62,11 +73,8 @@ AFLAGS_head_64.o += -I$(obj)
 endif
 
 else
-# stuff used from here for ARCH=ppc or ARCH=ppc64
+# stuff used from here for ARCH=ppc
 smpobj-$(CONFIG_SMP)		+= smp.o
-obj-$(CONFIG_PPC64)		+= traps.o process.o init_task.o time.o \
-				   setup-common.o $(smpobj-y)
-
 
 endif
 
diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c
new file mode 100644
index 000000000000..7c3419656ccc
--- /dev/null
+++ b/arch/powerpc/kernel/dma_64.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2004 IBM Corporation
+ *
+ * Implements the generic device dma API for ppc64. Handles
+ * the pci and vio busses
+ */
+
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+/* Include the busses we support */
+#include <linux/pci.h>
+#include <asm/vio.h>
+#include <asm/scatterlist.h>
+#include <asm/bug.h>
+
+static struct dma_mapping_ops *get_dma_ops(struct device *dev)
+{
+#ifdef CONFIG_PCI
+	if (dev->bus == &pci_bus_type)
+		return &pci_dma_ops;
+#endif
+#ifdef CONFIG_IBMVIO
+	if (dev->bus == &vio_bus_type)
+		return &vio_dma_ops;
+#endif
+	return NULL;
+}
+
+int dma_supported(struct device *dev, u64 mask)
+{
+	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+	if (dma_ops)
+		return dma_ops->dma_supported(dev, mask);
+	BUG();
+	return 0;
+}
+EXPORT_SYMBOL(dma_supported);
+
+int dma_set_mask(struct device *dev, u64 dma_mask)
+{
+#ifdef CONFIG_PCI
+	if (dev->bus == &pci_bus_type)
+		return pci_set_dma_mask(to_pci_dev(dev), dma_mask);
+#endif
+#ifdef CONFIG_IBMVIO
+	if (dev->bus == &vio_bus_type)
+		return -EIO;
+#endif /* CONFIG_IBMVIO */
+	BUG();
+	return 0;
+}
+EXPORT_SYMBOL(dma_set_mask);
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+		dma_addr_t *dma_handle, gfp_t flag)
+{
+	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+	if (dma_ops)
+		return dma_ops->alloc_coherent(dev, size, dma_handle, flag);
+	BUG();
+	return NULL;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
+		dma_addr_t dma_handle)
+{
+	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+	if (dma_ops)
+		dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
+	else
+		BUG();
+}
+EXPORT_SYMBOL(dma_free_coherent);
+
+dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, size_t size,
+		enum dma_data_direction direction)
+{
+	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+	if (dma_ops)
+		return dma_ops->map_single(dev, cpu_addr, size, direction);
+	BUG();
+	return (dma_addr_t)0;
+}
+EXPORT_SYMBOL(dma_map_single);
+
+void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+		enum dma_data_direction direction)
+{
+	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+	if (dma_ops)
+		dma_ops->unmap_single(dev, dma_addr, size, direction);
+	else
+		BUG();
+}
+EXPORT_SYMBOL(dma_unmap_single);
+
+dma_addr_t dma_map_page(struct device *dev, struct page *page,
+		unsigned long offset, size_t size,
+		enum dma_data_direction direction)
+{
+	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+	if (dma_ops)
+		return dma_ops->map_single(dev,
+				(page_address(page) + offset), size, direction);
+	BUG();
+	return (dma_addr_t)0;
+}
+EXPORT_SYMBOL(dma_map_page);
+
+void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+		enum dma_data_direction direction)
+{
+	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+	if (dma_ops)
+		dma_ops->unmap_single(dev, dma_address, size, direction);
+	else
+		BUG();
+}
+EXPORT_SYMBOL(dma_unmap_page);
+
+int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+		enum dma_data_direction direction)
+{
+	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+	if (dma_ops)
+		return dma_ops->map_sg(dev, sg, nents, direction);
+	BUG();
+	return 0;
+}
+EXPORT_SYMBOL(dma_map_sg);
+
+void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
+		enum dma_data_direction direction)
+{
+	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+
+	if (dma_ops)
+		dma_ops->unmap_sg(dev, sg, nhwentries, direction);
+	else
+		BUG();
+}
+EXPORT_SYMBOL(dma_unmap_sg);
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
new file mode 100644
index 000000000000..6160c8dbb7c5
--- /dev/null
+++ b/arch/powerpc/kernel/iomap.c
@@ -0,0 +1,146 @@
+/*
+ * arch/ppc64/kernel/iomap.c
+ *
+ * ppc64 "iomap" interface implementation.
+ *
+ * (C) Copyright 2004 Linus Torvalds
+ */
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/mm.h>
+#include <asm/io.h>
+
+/*
+ * Here comes the ppc64 implementation of the IOMAP 
+ * interfaces.
+ */
+unsigned int fastcall ioread8(void __iomem *addr)
+{
+	return readb(addr);
+}
+unsigned int fastcall ioread16(void __iomem *addr)
+{
+	return readw(addr);
+}
+unsigned int fastcall ioread16be(void __iomem *addr)
+{
+	return in_be16(addr);
+}
+unsigned int fastcall ioread32(void __iomem *addr)
+{
+	return readl(addr);
+}
+unsigned int fastcall ioread32be(void __iomem *addr)
+{
+	return in_be32(addr);
+}
+EXPORT_SYMBOL(ioread8);
+EXPORT_SYMBOL(ioread16);
+EXPORT_SYMBOL(ioread16be);
+EXPORT_SYMBOL(ioread32);
+EXPORT_SYMBOL(ioread32be);
+
+void fastcall iowrite8(u8 val, void __iomem *addr)
+{
+	writeb(val, addr);
+}
+void fastcall iowrite16(u16 val, void __iomem *addr)
+{
+	writew(val, addr);
+}
+void fastcall iowrite16be(u16 val, void __iomem *addr)
+{
+	out_be16(addr, val);
+}
+void fastcall iowrite32(u32 val, void __iomem *addr)
+{
+	writel(val, addr);
+}
+void fastcall iowrite32be(u32 val, void __iomem *addr)
+{
+	out_be32(addr, val);
+}
+EXPORT_SYMBOL(iowrite8);
+EXPORT_SYMBOL(iowrite16);
+EXPORT_SYMBOL(iowrite16be);
+EXPORT_SYMBOL(iowrite32);
+EXPORT_SYMBOL(iowrite32be);
+
+/*
+ * These are the "repeat read/write" functions. Note the
+ * non-CPU byte order. We do things in "IO byteorder"
+ * here.
+ *
+ * FIXME! We could make these do EEH handling if we really
+ * wanted. Not clear if we do.
+ */
+void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
+{
+	_insb((u8 __iomem *) addr, dst, count);
+}
+void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
+{
+	_insw_ns((u16 __iomem *) addr, dst, count);
+}
+void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
+{
+	_insl_ns((u32 __iomem *) addr, dst, count);
+}
+EXPORT_SYMBOL(ioread8_rep);
+EXPORT_SYMBOL(ioread16_rep);
+EXPORT_SYMBOL(ioread32_rep);
+
+void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)
+{
+	_outsb((u8 __iomem *) addr, src, count);
+}
+void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)
+{
+	_outsw_ns((u16 __iomem *) addr, src, count);
+}
+void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)
+{
+	_outsl_ns((u32 __iomem *) addr, src, count);
+}
+EXPORT_SYMBOL(iowrite8_rep);
+EXPORT_SYMBOL(iowrite16_rep);
+EXPORT_SYMBOL(iowrite32_rep);
+
+void __iomem *ioport_map(unsigned long port, unsigned int len)
+{
+	if (!_IO_IS_VALID(port))
+		return NULL;
+	return (void __iomem *) (port+pci_io_base);
+}
+
+void ioport_unmap(void __iomem *addr)
+{
+	/* Nothing to do */
+}
+EXPORT_SYMBOL(ioport_map);
+EXPORT_SYMBOL(ioport_unmap);
+
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
+{
+	unsigned long start = pci_resource_start(dev, bar);
+	unsigned long len = pci_resource_len(dev, bar);
+	unsigned long flags = pci_resource_flags(dev, bar);
+
+	if (!len)
+		return NULL;
+	if (max && len > max)
+		len = max;
+	if (flags & IORESOURCE_IO)
+		return ioport_map(start, len);
+	if (flags & IORESOURCE_MEM)
+		return ioremap(start, len);
+	/* What? */
+	return NULL;
+}
+
+void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
+{
+	/* Nothing to do */
+}
+EXPORT_SYMBOL(pci_iomap);
+EXPORT_SYMBOL(pci_iounmap);
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
new file mode 100644
index 000000000000..4d9b4388918b
--- /dev/null
+++ b/arch/powerpc/kernel/iommu.c
@@ -0,0 +1,572 @@
+/*
+ * arch/ppc64/kernel/iommu.c
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
+ * 
+ * Rewrite, cleanup, new allocation schemes, virtual merging: 
+ * Copyright (C) 2004 Olof Johansson, IBM Corporation
+ *               and  Ben. Herrenschmidt, IBM Corporation
+ *
+ * Dynamic DMA mapping support, bus-independent parts.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/iommu.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+
+#define DBG(...)
+
+#ifdef CONFIG_IOMMU_VMERGE
+static int novmerge = 0;
+#else
+static int novmerge = 1;
+#endif
+
+static int __init setup_iommu(char *str)
+{
+	if (!strcmp(str, "novmerge"))
+		novmerge = 1;
+	else if (!strcmp(str, "vmerge"))
+		novmerge = 0;
+	return 1;
+}
+
+__setup("iommu=", setup_iommu);
+
+static unsigned long iommu_range_alloc(struct iommu_table *tbl,
+                                       unsigned long npages,
+                                       unsigned long *handle,
+                                       unsigned int align_order)
+{ 
+	unsigned long n, end, i, start;
+	unsigned long limit;
+	int largealloc = npages > 15;
+	int pass = 0;
+	unsigned long align_mask;
+
+	align_mask = 0xffffffffffffffffl >> (64 - align_order);
+
+	/* This allocator was derived from x86_64's bit string search */
+
+	/* Sanity check */
+	if (unlikely(npages) == 0) {
+		if (printk_ratelimit())
+			WARN_ON(1);
+		return DMA_ERROR_CODE;
+	}
+
+	if (handle && *handle)
+		start = *handle;
+	else
+		start = largealloc ? tbl->it_largehint : tbl->it_hint;
+
+	/* Use only half of the table for small allocs (15 pages or less) */
+	limit = largealloc ? tbl->it_size : tbl->it_halfpoint;
+
+	if (largealloc && start < tbl->it_halfpoint)
+		start = tbl->it_halfpoint;
+
+	/* The case below can happen if we have a small segment appended
+	 * to a large, or when the previous alloc was at the very end of
+	 * the available space. If so, go back to the initial start.
+	 */
+	if (start >= limit)
+		start = largealloc ? tbl->it_largehint : tbl->it_hint;
+	
+ again:
+
+	n = find_next_zero_bit(tbl->it_map, limit, start);
+
+	/* Align allocation */
+	n = (n + align_mask) & ~align_mask;
+
+	end = n + npages;
+
+	if (unlikely(end >= limit)) {
+		if (likely(pass < 2)) {
+			/* First failure, just rescan the half of the table.
+			 * Second failure, rescan the other half of the table.
+			 */
+			start = (largealloc ^ pass) ? tbl->it_halfpoint : 0;
+			limit = pass ? tbl->it_size : limit;
+			pass++;
+			goto again;
+		} else {
+			/* Third failure, give up */
+			return DMA_ERROR_CODE;
+		}
+	}
+
+	for (i = n; i < end; i++)
+		if (test_bit(i, tbl->it_map)) {
+			start = i+1;
+			goto again;
+		}
+
+	for (i = n; i < end; i++)
+		__set_bit(i, tbl->it_map);
+
+	/* Bump the hint to a new block for small allocs. */
+	if (largealloc) {
+		/* Don't bump to new block to avoid fragmentation */
+		tbl->it_largehint = end;
+	} else {
+		/* Overflow will be taken care of at the next allocation */
+		tbl->it_hint = (end + tbl->it_blocksize - 1) &
+		                ~(tbl->it_blocksize - 1);
+	}
+
+	/* Update handle for SG allocations */
+	if (handle)
+		*handle = end;
+
+	return n;
+}
+
+static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *page,
+		       unsigned int npages, enum dma_data_direction direction,
+		       unsigned int align_order)
+{
+	unsigned long entry, flags;
+	dma_addr_t ret = DMA_ERROR_CODE;
+	
+	spin_lock_irqsave(&(tbl->it_lock), flags);
+
+	entry = iommu_range_alloc(tbl, npages, NULL, align_order);
+
+	if (unlikely(entry == DMA_ERROR_CODE)) {
+		spin_unlock_irqrestore(&(tbl->it_lock), flags);
+		return DMA_ERROR_CODE;
+	}
+
+	entry += tbl->it_offset;	/* Offset into real TCE table */
+	ret = entry << PAGE_SHIFT;	/* Set the return dma address */
+
+	/* Put the TCEs in the HW table */
+	ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & PAGE_MASK,
+			 direction);
+
+
+	/* Flush/invalidate TLB caches if necessary */
+	if (ppc_md.tce_flush)
+		ppc_md.tce_flush(tbl);
+
+	spin_unlock_irqrestore(&(tbl->it_lock), flags);
+
+	/* Make sure updates are seen by hardware */
+	mb();
+
+	return ret;
+}
+
+static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, 
+			 unsigned int npages)
+{
+	unsigned long entry, free_entry;
+	unsigned long i;
+
+	entry = dma_addr >> PAGE_SHIFT;
+	free_entry = entry - tbl->it_offset;
+
+	if (((free_entry + npages) > tbl->it_size) ||
+	    (entry < tbl->it_offset)) {
+		if (printk_ratelimit()) {
+			printk(KERN_INFO "iommu_free: invalid entry\n");
+			printk(KERN_INFO "\tentry     = 0x%lx\n", entry); 
+			printk(KERN_INFO "\tdma_addr  = 0x%lx\n", (u64)dma_addr);
+			printk(KERN_INFO "\tTable     = 0x%lx\n", (u64)tbl);
+			printk(KERN_INFO "\tbus#      = 0x%lx\n", (u64)tbl->it_busno);
+			printk(KERN_INFO "\tsize      = 0x%lx\n", (u64)tbl->it_size);
+			printk(KERN_INFO "\tstartOff  = 0x%lx\n", (u64)tbl->it_offset);
+			printk(KERN_INFO "\tindex     = 0x%lx\n", (u64)tbl->it_index);
+			WARN_ON(1);
+		}
+		return;
+	}
+
+	ppc_md.tce_free(tbl, entry, npages);
+	
+	for (i = 0; i < npages; i++)
+		__clear_bit(free_entry+i, tbl->it_map);
+}
+
+static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+		unsigned int npages)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&(tbl->it_lock), flags);
+
+	__iommu_free(tbl, dma_addr, npages);
+
+	/* Make sure TLB cache is flushed if the HW needs it. We do
+	 * not do an mb() here on purpose, it is not needed on any of
+	 * the current platforms.
+	 */
+	if (ppc_md.tce_flush)
+		ppc_md.tce_flush(tbl);
+
+	spin_unlock_irqrestore(&(tbl->it_lock), flags);
+}
+
+int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
+		struct scatterlist *sglist, int nelems,
+		enum dma_data_direction direction)
+{
+	dma_addr_t dma_next = 0, dma_addr;
+	unsigned long flags;
+	struct scatterlist *s, *outs, *segstart;
+	int outcount, incount;
+	unsigned long handle;
+
+	BUG_ON(direction == DMA_NONE);
+
+	if ((nelems == 0) || !tbl)
+		return 0;
+
+	outs = s = segstart = &sglist[0];
+	outcount = 1;
+	incount = nelems;
+	handle = 0;
+
+	/* Init first segment length for backout at failure */
+	outs->dma_length = 0;
+
+	DBG("mapping %d elements:\n", nelems);
+
+	spin_lock_irqsave(&(tbl->it_lock), flags);
+
+	for (s = outs; nelems; nelems--, s++) {
+		unsigned long vaddr, npages, entry, slen;
+
+		slen = s->length;
+		/* Sanity check */
+		if (slen == 0) {
+			dma_next = 0;
+			continue;
+		}
+		/* Allocate iommu entries for that segment */
+		vaddr = (unsigned long)page_address(s->page) + s->offset;
+		npages = PAGE_ALIGN(vaddr + slen) - (vaddr & PAGE_MASK);
+		npages >>= PAGE_SHIFT;
+		entry = iommu_range_alloc(tbl, npages, &handle, 0);
+
+		DBG("  - vaddr: %lx, size: %lx\n", vaddr, slen);
+
+		/* Handle failure */
+		if (unlikely(entry == DMA_ERROR_CODE)) {
+			if (printk_ratelimit())
+				printk(KERN_INFO "iommu_alloc failed, tbl %p vaddr %lx"
+				       " npages %lx\n", tbl, vaddr, npages);
+			goto failure;
+		}
+
+		/* Convert entry to a dma_addr_t */
+		entry += tbl->it_offset;
+		dma_addr = entry << PAGE_SHIFT;
+		dma_addr |= s->offset;
+
+		DBG("  - %lx pages, entry: %lx, dma_addr: %lx\n",
+			    npages, entry, dma_addr);
+
+		/* Insert into HW table */
+		ppc_md.tce_build(tbl, entry, npages, vaddr & PAGE_MASK, direction);
+
+		/* If we are in an open segment, try merging */
+		if (segstart != s) {
+			DBG("  - trying merge...\n");
+			/* We cannot merge if:
+			 * - allocated dma_addr isn't contiguous to previous allocation
+			 */
+			if (novmerge || (dma_addr != dma_next)) {
+				/* Can't merge: create a new segment */
+				segstart = s;
+				outcount++; outs++;
+				DBG("    can't merge, new segment.\n");
+			} else {
+				outs->dma_length += s->length;
+				DBG("    merged, new len: %lx\n", outs->dma_length);
+			}
+		}
+
+		if (segstart == s) {
+			/* This is a new segment, fill entries */
+			DBG("  - filling new segment.\n");
+			outs->dma_address = dma_addr;
+			outs->dma_length = slen;
+		}
+
+		/* Calculate next page pointer for contiguous check */
+		dma_next = dma_addr + slen;
+
+		DBG("  - dma next is: %lx\n", dma_next);
+	}
+
+	/* Flush/invalidate TLB caches if necessary */
+	if (ppc_md.tce_flush)
+		ppc_md.tce_flush(tbl);
+
+	spin_unlock_irqrestore(&(tbl->it_lock), flags);
+
+	/* Make sure updates are seen by hardware */
+	mb();
+
+	DBG("mapped %d elements:\n", outcount);
+
+	/* For the sake of iommu_unmap_sg, we clear out the length in the
+	 * next entry of the sglist if we didn't fill the list completely
+	 */
+	if (outcount < incount) {
+		outs++;
+		outs->dma_address = DMA_ERROR_CODE;
+		outs->dma_length = 0;
+	}
+	return outcount;
+
+ failure:
+	for (s = &sglist[0]; s <= outs; s++) {
+		if (s->dma_length != 0) {
+			unsigned long vaddr, npages;
+
+			vaddr = s->dma_address & PAGE_MASK;
+			npages = (PAGE_ALIGN(s->dma_address + s->dma_length) - vaddr)
+				>> PAGE_SHIFT;
+			__iommu_free(tbl, vaddr, npages);
+		}
+	}
+	spin_unlock_irqrestore(&(tbl->it_lock), flags);
+	return 0;
+}
+
+
+void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
+		int nelems, enum dma_data_direction direction)
+{
+	unsigned long flags;
+
+	BUG_ON(direction == DMA_NONE);
+
+	if (!tbl)
+		return;
+
+	spin_lock_irqsave(&(tbl->it_lock), flags);
+
+	while (nelems--) {
+		unsigned int npages;
+		dma_addr_t dma_handle = sglist->dma_address;
+
+		if (sglist->dma_length == 0)
+			break;
+		npages = (PAGE_ALIGN(dma_handle + sglist->dma_length)
+			  - (dma_handle & PAGE_MASK)) >> PAGE_SHIFT;
+		__iommu_free(tbl, dma_handle, npages);
+		sglist++;
+	}
+
+	/* Flush/invalidate TLBs if necessary. As for iommu_free(), we
+	 * do not do an mb() here, the affected platforms do not need it
+	 * when freeing.
+	 */
+	if (ppc_md.tce_flush)
+		ppc_md.tce_flush(tbl);
+
+	spin_unlock_irqrestore(&(tbl->it_lock), flags);
+}
+
+/*
+ * Build a iommu_table structure.  This contains a bit map which
+ * is used to manage allocation of the tce space.
+ */
+struct iommu_table *iommu_init_table(struct iommu_table *tbl)
+{
+	unsigned long sz;
+	static int welcomed = 0;
+
+	/* Set aside 1/4 of the table for large allocations. */
+	tbl->it_halfpoint = tbl->it_size * 3 / 4;
+
+	/* number of bytes needed for the bitmap */
+	sz = (tbl->it_size + 7) >> 3;
+
+	tbl->it_map = (unsigned long *)__get_free_pages(GFP_ATOMIC, get_order(sz));
+	if (!tbl->it_map)
+		panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
+
+	memset(tbl->it_map, 0, sz);
+
+	tbl->it_hint = 0;
+	tbl->it_largehint = tbl->it_halfpoint;
+	spin_lock_init(&tbl->it_lock);
+
+	/* Clear the hardware table in case firmware left allocations in it */
+	ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
+
+	if (!welcomed) {
+		printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
+		       novmerge ? "disabled" : "enabled");
+		welcomed = 1;
+	}
+
+	return tbl;
+}
+
+void iommu_free_table(struct device_node *dn)
+{
+	struct pci_dn *pdn = dn->data;
+	struct iommu_table *tbl = pdn->iommu_table;
+	unsigned long bitmap_sz, i;
+	unsigned int order;
+
+	if (!tbl || !tbl->it_map) {
+		printk(KERN_ERR "%s: expected TCE map for %s\n", __FUNCTION__,
+				dn->full_name);
+		return;
+	}
+
+	/* verify that table contains no entries */
+	/* it_size is in entries, and we're examining 64 at a time */
+	for (i = 0; i < (tbl->it_size/64); i++) {
+		if (tbl->it_map[i] != 0) {
+			printk(KERN_WARNING "%s: Unexpected TCEs for %s\n",
+				__FUNCTION__, dn->full_name);
+			break;
+		}
+	}
+
+	/* calculate bitmap size in bytes */
+	bitmap_sz = (tbl->it_size + 7) / 8;
+
+	/* free bitmap */
+	order = get_order(bitmap_sz);
+	free_pages((unsigned long) tbl->it_map, order);
+
+	/* free table */
+	kfree(tbl);
+}
+
+/* Creates TCEs for a user provided buffer.  The user buffer must be
+ * contiguous real kernel storage (not vmalloc).  The address of the buffer
+ * passed here is the kernel (virtual) address of the buffer.  The buffer
+ * need not be page aligned, the dma_addr_t returned will point to the same
+ * byte within the page as vaddr.
+ */
+dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr,
+		size_t size, enum dma_data_direction direction)
+{
+	dma_addr_t dma_handle = DMA_ERROR_CODE;
+	unsigned long uaddr;
+	unsigned int npages;
+
+	BUG_ON(direction == DMA_NONE);
+
+	uaddr = (unsigned long)vaddr;
+	npages = PAGE_ALIGN(uaddr + size) - (uaddr & PAGE_MASK);
+	npages >>= PAGE_SHIFT;
+
+	if (tbl) {
+		dma_handle = iommu_alloc(tbl, vaddr, npages, direction, 0);
+		if (dma_handle == DMA_ERROR_CODE) {
+			if (printk_ratelimit())  {
+				printk(KERN_INFO "iommu_alloc failed, "
+						"tbl %p vaddr %p npages %d\n",
+						tbl, vaddr, npages);
+			}
+		} else
+			dma_handle |= (uaddr & ~PAGE_MASK);
+	}
+
+	return dma_handle;
+}
+
+void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle,
+		size_t size, enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+
+	if (tbl)
+		iommu_free(tbl, dma_handle, (PAGE_ALIGN(dma_handle + size) -
+					(dma_handle & PAGE_MASK)) >> PAGE_SHIFT);
+}
+
+/* Allocates a contiguous real buffer and creates mappings over it.
+ * Returns the virtual address of the buffer and sets dma_handle
+ * to the dma address (mapping) of the first page.
+ */
+void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size,
+		dma_addr_t *dma_handle, gfp_t flag)
+{
+	void *ret = NULL;
+	dma_addr_t mapping;
+	unsigned int npages, order;
+
+	size = PAGE_ALIGN(size);
+	npages = size >> PAGE_SHIFT;
+	order = get_order(size);
+
+ 	/*
+	 * Client asked for way too much space.  This is checked later
+	 * anyway.  It is easier to debug here for the drivers than in
+	 * the tce tables.
+	 */
+	if (order >= IOMAP_MAX_ORDER) {
+		printk("iommu_alloc_consistent size too large: 0x%lx\n", size);
+		return NULL;
+	}
+
+	if (!tbl)
+		return NULL;
+
+	/* Alloc enough pages (and possibly more) */
+	ret = (void *)__get_free_pages(flag, order);
+	if (!ret)
+		return NULL;
+	memset(ret, 0, size);
+
+	/* Set up tces to cover the allocated range */
+	mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL, order);
+	if (mapping == DMA_ERROR_CODE) {
+		free_pages((unsigned long)ret, order);
+		ret = NULL;
+	} else
+		*dma_handle = mapping;
+	return ret;
+}
+
+void iommu_free_coherent(struct iommu_table *tbl, size_t size,
+			 void *vaddr, dma_addr_t dma_handle)
+{
+	unsigned int npages;
+
+	if (tbl) {
+		size = PAGE_ALIGN(size);
+		npages = size >> PAGE_SHIFT;
+		iommu_free(tbl, dma_handle, npages);
+		free_pages((unsigned long)vaddr, get_order(size));
+	}
+}
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
new file mode 100644
index 000000000000..511af54e6230
--- /dev/null
+++ b/arch/powerpc/kernel/kprobes.c
@@ -0,0 +1,459 @@
+/*
+ *  Kernel Probes (KProbes)
+ *  arch/ppc64/kernel/kprobes.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
+ *		Probes initial implementation ( includes contributions from
+ *		Rusty Russell).
+ * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
+ *		interface to access function arguments.
+ * 2004-Nov	Ananth N Mavinakayanahalli <ananth@in.ibm.com> kprobes port
+ *		for PPC64
+ */
+
+#include <linux/config.h>
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/preempt.h>
+#include <asm/cacheflush.h>
+#include <asm/kdebug.h>
+#include <asm/sstep.h>
+
+static DECLARE_MUTEX(kprobe_mutex);
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+	int ret = 0;
+	kprobe_opcode_t insn = *p->addr;
+
+	if ((unsigned long)p->addr & 0x03) {
+		printk("Attempt to register kprobe at an unaligned address\n");
+		ret = -EINVAL;
+	} else if (IS_MTMSRD(insn) || IS_RFID(insn)) {
+		printk("Cannot register a kprobe on rfid or mtmsrd\n");
+		ret = -EINVAL;
+	}
+
+	/* insn must be on a special executable page on ppc64 */
+	if (!ret) {
+		down(&kprobe_mutex);
+		p->ainsn.insn = get_insn_slot();
+		up(&kprobe_mutex);
+		if (!p->ainsn.insn)
+			ret = -ENOMEM;
+	}
+	return ret;
+}
+
+void __kprobes arch_copy_kprobe(struct kprobe *p)
+{
+	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+	p->opcode = *p->addr;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+	*p->addr = BREAKPOINT_INSTRUCTION;
+	flush_icache_range((unsigned long) p->addr,
+			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+	*p->addr = p->opcode;
+	flush_icache_range((unsigned long) p->addr,
+			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+	down(&kprobe_mutex);
+	free_insn_slot(p->ainsn.insn);
+	up(&kprobe_mutex);
+}
+
+static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = *p->ainsn.insn;
+
+	regs->msr |= MSR_SE;
+
+	/* single step inline if it is a trap variant */
+	if (is_trap(insn))
+		regs->nip = (unsigned long)p->addr;
+	else
+		regs->nip = (unsigned long)p->ainsn.insn;
+}
+
+static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	kcb->prev_kprobe.kp = kprobe_running();
+	kcb->prev_kprobe.status = kcb->kprobe_status;
+	kcb->prev_kprobe.saved_msr = kcb->kprobe_saved_msr;
+}
+
+static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+	kcb->kprobe_status = kcb->prev_kprobe.status;
+	kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr;
+}
+
+static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+				struct kprobe_ctlblk *kcb)
+{
+	__get_cpu_var(current_kprobe) = p;
+	kcb->kprobe_saved_msr = regs->msr;
+}
+
+/* Called with kretprobe_lock held */
+void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
+				      struct pt_regs *regs)
+{
+	struct kretprobe_instance *ri;
+
+	if ((ri = get_free_rp_inst(rp)) != NULL) {
+		ri->rp = rp;
+		ri->task = current;
+		ri->ret_addr = (kprobe_opcode_t *)regs->link;
+
+		/* Replace the return addr with trampoline addr */
+		regs->link = (unsigned long)kretprobe_trampoline;
+		add_rp_inst(ri);
+	} else {
+		rp->nmissed++;
+	}
+}
+
+static inline int kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *p;
+	int ret = 0;
+	unsigned int *addr = (unsigned int *)regs->nip;
+	struct kprobe_ctlblk *kcb;
+
+	/*
+	 * We don't want to be preempted for the entire
+	 * duration of kprobe processing
+	 */
+	preempt_disable();
+	kcb = get_kprobe_ctlblk();
+
+	/* Check we're not actually recursing */
+	if (kprobe_running()) {
+		p = get_kprobe(addr);
+		if (p) {
+			kprobe_opcode_t insn = *p->ainsn.insn;
+			if (kcb->kprobe_status == KPROBE_HIT_SS &&
+					is_trap(insn)) {
+				regs->msr &= ~MSR_SE;
+				regs->msr |= kcb->kprobe_saved_msr;
+				goto no_kprobe;
+			}
+			/* We have reentered the kprobe_handler(), since
+			 * another probe was hit while within the handler.
+			 * We here save the original kprobes variables and
+			 * just single step on the instruction of the new probe
+			 * without calling any user handlers.
+			 */
+			save_previous_kprobe(kcb);
+			set_current_kprobe(p, regs, kcb);
+			kcb->kprobe_saved_msr = regs->msr;
+			p->nmissed++;
+			prepare_singlestep(p, regs);
+			kcb->kprobe_status = KPROBE_REENTER;
+			return 1;
+		} else {
+			p = __get_cpu_var(current_kprobe);
+			if (p->break_handler && p->break_handler(p, regs)) {
+				goto ss_probe;
+			}
+		}
+		goto no_kprobe;
+	}
+
+	p = get_kprobe(addr);
+	if (!p) {
+		if (*addr != BREAKPOINT_INSTRUCTION) {
+			/*
+			 * PowerPC has multiple variants of the "trap"
+			 * instruction. If the current instruction is a
+			 * trap variant, it could belong to someone else
+			 */
+			kprobe_opcode_t cur_insn = *addr;
+			if (is_trap(cur_insn))
+		       		goto no_kprobe;
+			/*
+			 * The breakpoint instruction was removed right
+			 * after we hit it.  Another cpu has removed
+			 * either a probepoint or a debugger breakpoint
+			 * at this address.  In either case, no further
+			 * handling of this interrupt is appropriate.
+			 */
+			ret = 1;
+		}
+		/* Not one of ours: let kernel handle it */
+		goto no_kprobe;
+	}
+
+	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+	set_current_kprobe(p, regs, kcb);
+	if (p->pre_handler && p->pre_handler(p, regs))
+		/* handler has already set things up, so skip ss setup */
+		return 1;
+
+ss_probe:
+	prepare_singlestep(p, regs);
+	kcb->kprobe_status = KPROBE_HIT_SS;
+	return 1;
+
+no_kprobe:
+	preempt_enable_no_resched();
+	return ret;
+}
+
+/*
+ * Function return probe trampoline:
+ * 	- init_kprobes() establishes a probepoint here
+ * 	- When the probed function returns, this probe
+ * 		causes the handlers to fire
+ */
+void kretprobe_trampoline_holder(void)
+{
+	asm volatile(".global kretprobe_trampoline\n"
+			"kretprobe_trampoline:\n"
+			"nop\n");
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+        struct kretprobe_instance *ri = NULL;
+        struct hlist_head *head;
+        struct hlist_node *node, *tmp;
+	unsigned long flags, orig_ret_address = 0;
+	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
+
+	spin_lock_irqsave(&kretprobe_lock, flags);
+        head = kretprobe_inst_table_head(current);
+
+	/*
+	 * It is possible to have multiple instances associated with a given
+	 * task either because an multiple functions in the call path
+	 * have a return probe installed on them, and/or more then one return
+	 * return probe was registered for a target function.
+	 *
+	 * We can handle this because:
+	 *     - instances are always inserted at the head of the list
+	 *     - when multiple return probes are registered for the same
+         *       function, the first instance's ret_addr will point to the
+	 *       real return address, and all the rest will point to
+	 *       kretprobe_trampoline
+	 */
+	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+                if (ri->task != current)
+			/* another task is sharing our hash bucket */
+                        continue;
+
+		if (ri->rp && ri->rp->handler)
+			ri->rp->handler(ri, regs);
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
+		recycle_rp_inst(ri);
+
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
+	}
+
+	BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
+	regs->nip = orig_ret_address;
+
+	reset_current_kprobe();
+	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	preempt_enable_no_resched();
+
+        /*
+         * By returning a non-zero value, we are telling
+         * kprobe_handler() that we don't want the post_handler
+         * to run (and have re-enabled preemption)
+         */
+        return 1;
+}
+
+/*
+ * Called after single-stepping.  p->addr is the address of the
+ * instruction whose first byte has been replaced by the "breakpoint"
+ * instruction.  To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction.  The address of this
+ * copy is p->ainsn.insn.
+ */
+static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
+{
+	int ret;
+	unsigned int insn = *p->ainsn.insn;
+
+	regs->nip = (unsigned long)p->addr;
+	ret = emulate_step(regs, insn);
+	if (ret == 0)
+		regs->nip = (unsigned long)p->addr + 4;
+}
+
+static inline int post_kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	if (!cur)
+		return 0;
+
+	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		cur->post_handler(cur, regs, 0);
+	}
+
+	resume_execution(cur, regs);
+	regs->msr |= kcb->kprobe_saved_msr;
+
+	/*Restore back the original saved kprobes variables and continue. */
+	if (kcb->kprobe_status == KPROBE_REENTER) {
+		restore_previous_kprobe(kcb);
+		goto out;
+	}
+	reset_current_kprobe();
+out:
+	preempt_enable_no_resched();
+
+	/*
+	 * if somebody else is singlestepping across a probe point, msr
+	 * will have SE set, in which case, continue the remaining processing
+	 * of do_debug, as if this is not a probe hit.
+	 */
+	if (regs->msr & MSR_SE)
+		return 0;
+
+	return 1;
+}
+
+static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+		return 1;
+
+	if (kcb->kprobe_status & KPROBE_HIT_SS) {
+		resume_execution(cur, regs);
+		regs->msr &= ~MSR_SE;
+		regs->msr |= kcb->kprobe_saved_msr;
+
+		reset_current_kprobe();
+		preempt_enable_no_resched();
+	}
+	return 0;
+}
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+				       unsigned long val, void *data)
+{
+	struct die_args *args = (struct die_args *)data;
+	int ret = NOTIFY_DONE;
+
+	switch (val) {
+	case DIE_BPT:
+		if (kprobe_handler(args->regs))
+			ret = NOTIFY_STOP;
+		break;
+	case DIE_SSTEP:
+		if (post_kprobe_handler(args->regs))
+			ret = NOTIFY_STOP;
+		break;
+	case DIE_PAGE_FAULT:
+		/* kprobe_running() needs smp_processor_id() */
+		preempt_disable();
+		if (kprobe_running() &&
+		    kprobe_fault_handler(args->regs, args->trapnr))
+			ret = NOTIFY_STOP;
+		preempt_enable();
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct jprobe *jp = container_of(p, struct jprobe, kp);
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
+
+	/* setup return addr to the jprobe handler routine */
+	regs->nip = (unsigned long)(((func_descr_t *)jp->entry)->entry);
+	regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
+
+	return 1;
+}
+
+void __kprobes jprobe_return(void)
+{
+	asm volatile("trap" ::: "memory");
+}
+
+void __kprobes jprobe_return_end(void)
+{
+};
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	/*
+	 * FIXME - we should ideally be validating that we got here 'cos
+	 * of the "trap" in jprobe_return() above, before restoring the
+	 * saved regs...
+	 */
+	memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+	preempt_enable_no_resched();
+	return 1;
+}
+
+static struct kprobe trampoline_p = {
+	.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+	.pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+	return register_kprobe(&trampoline_p);
+}
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
new file mode 100644
index 000000000000..97c51e452be7
--- /dev/null
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -0,0 +1,358 @@
+/*
+ * machine_kexec.c - handle transition of Linux booting another kernel
+ *
+ * Copyright (C) 2004-2005, IBM Corp.
+ *
+ * Created by: Milton D Miller II
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+
+#include <linux/cpumask.h>
+#include <linux/kexec.h>
+#include <linux/smp.h>
+#include <linux/thread_info.h>
+#include <linux/errno.h>
+
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/machdep.h>
+#include <asm/cacheflush.h>
+#include <asm/paca.h>
+#include <asm/mmu.h>
+#include <asm/sections.h>	/* _end */
+#include <asm/prom.h>
+#include <asm/smp.h>
+
+#define HASH_GROUP_SIZE 0x80	/* size of each hash group, asm/mmu.h */
+
+/* Have this around till we move it into crash specific file */
+note_buf_t crash_notes[NR_CPUS];
+
+/* Dummy for now. Not sure if we need to have a crash shutdown in here
+ * and if what it will achieve. Letting it be now to compile the code
+ * in generic kexec environment
+ */
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	/* do nothing right now */
+	/* smp_relase_cpus() if we want smp on panic kernel */
+	/* cpu_irq_down to isolate us until we are ready */
+}
+
+int machine_kexec_prepare(struct kimage *image)
+{
+	int i;
+	unsigned long begin, end;	/* limits of segment */
+	unsigned long low, high;	/* limits of blocked memory range */
+	struct device_node *node;
+	unsigned long *basep;
+	unsigned int *sizep;
+
+	if (!ppc_md.hpte_clear_all)
+		return -ENOENT;
+
+	/*
+	 * Since we use the kernel fault handlers and paging code to
+	 * handle the virtual mode, we must make sure no destination
+	 * overlaps kernel static data or bss.
+	 */
+	for (i = 0; i < image->nr_segments; i++)
+		if (image->segment[i].mem < __pa(_end))
+			return -ETXTBSY;
+
+	/*
+	 * For non-LPAR, we absolutely can not overwrite the mmu hash
+	 * table, since we are still using the bolted entries in it to
+	 * do the copy.  Check that here.
+	 *
+	 * It is safe if the end is below the start of the blocked
+	 * region (end <= low), or if the beginning is after the
+	 * end of the blocked region (begin >= high).  Use the
+	 * boolean identity !(a || b)  === (!a && !b).
+	 */
+	if (htab_address) {
+		low = __pa(htab_address);
+		high = low + (htab_hash_mask + 1) * HASH_GROUP_SIZE;
+
+		for (i = 0; i < image->nr_segments; i++) {
+			begin = image->segment[i].mem;
+			end = begin + image->segment[i].memsz;
+
+			if ((begin < high) && (end > low))
+				return -ETXTBSY;
+		}
+	}
+
+	/* We also should not overwrite the tce tables */
+	for (node = of_find_node_by_type(NULL, "pci"); node != NULL;
+			node = of_find_node_by_type(node, "pci")) {
+		basep = (unsigned long *)get_property(node, "linux,tce-base",
+							NULL);
+		sizep = (unsigned int *)get_property(node, "linux,tce-size",
+							NULL);
+		if (basep == NULL || sizep == NULL)
+			continue;
+
+		low = *basep;
+		high = low + (*sizep);
+
+		for (i = 0; i < image->nr_segments; i++) {
+			begin = image->segment[i].mem;
+			end = begin + image->segment[i].memsz;
+
+			if ((begin < high) && (end > low))
+				return -ETXTBSY;
+		}
+	}
+
+	return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+	/* we do nothing in prepare that needs to be undone */
+}
+
+#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE)
+
+static void copy_segments(unsigned long ind)
+{
+	unsigned long entry;
+	unsigned long *ptr;
+	void *dest;
+	void *addr;
+
+	/*
+	 * We rely on kexec_load to create a lists that properly
+	 * initializes these pointers before they are used.
+	 * We will still crash if the list is wrong, but at least
+	 * the compiler will be quiet.
+	 */
+	ptr = NULL;
+	dest = NULL;
+
+	for (entry = ind; !(entry & IND_DONE); entry = *ptr++) {
+		addr = __va(entry & PAGE_MASK);
+
+		switch (entry & IND_FLAGS) {
+		case IND_DESTINATION:
+			dest = addr;
+			break;
+		case IND_INDIRECTION:
+			ptr = addr;
+			break;
+		case IND_SOURCE:
+			copy_page(dest, addr);
+			dest += PAGE_SIZE;
+		}
+	}
+}
+
+void kexec_copy_flush(struct kimage *image)
+{
+	long i, nr_segments = image->nr_segments;
+	struct  kexec_segment ranges[KEXEC_SEGMENT_MAX];
+
+	/* save the ranges on the stack to efficiently flush the icache */
+	memcpy(ranges, image->segment, sizeof(ranges));
+
+	/*
+	 * After this call we may not use anything allocated in dynamic
+	 * memory, including *image.
+	 *
+	 * Only globals and the stack are allowed.
+	 */
+	copy_segments(image->head);
+
+	/*
+	 * we need to clear the icache for all dest pages sometime,
+	 * including ones that were in place on the original copy
+	 */
+	for (i = 0; i < nr_segments; i++)
+		flush_icache_range(ranges[i].mem + KERNELBASE,
+				ranges[i].mem + KERNELBASE +
+				ranges[i].memsz);
+}
+
+#ifdef CONFIG_SMP
+
+/* FIXME: we should schedule this function to be called on all cpus based
+ * on calling the interrupts, but we would like to call it off irq level
+ * so that the interrupt controller is clean.
+ */
+void kexec_smp_down(void *arg)
+{
+	if (ppc_md.kexec_cpu_down)
+		ppc_md.kexec_cpu_down(0, 1);
+
+	local_irq_disable();
+	kexec_smp_wait();
+	/* NOTREACHED */
+}
+
+static void kexec_prepare_cpus(void)
+{
+	int my_cpu, i, notified=-1;
+
+	smp_call_function(kexec_smp_down, NULL, 0, /* wait */0);
+	my_cpu = get_cpu();
+
+	/* check the others cpus are now down (via paca hw cpu id == -1) */
+	for (i=0; i < NR_CPUS; i++) {
+		if (i == my_cpu)
+			continue;
+
+		while (paca[i].hw_cpu_id != -1) {
+			barrier();
+			if (!cpu_possible(i)) {
+				printk("kexec: cpu %d hw_cpu_id %d is not"
+						" possible, ignoring\n",
+						i, paca[i].hw_cpu_id);
+				break;
+			}
+			if (!cpu_online(i)) {
+				/* Fixme: this can be spinning in
+				 * pSeries_secondary_wait with a paca
+				 * waiting for it to go online.
+				 */
+				printk("kexec: cpu %d hw_cpu_id %d is not"
+						" online, ignoring\n",
+						i, paca[i].hw_cpu_id);
+				break;
+			}
+			if (i != notified) {
+				printk( "kexec: waiting for cpu %d (physical"
+						" %d) to go down\n",
+						i, paca[i].hw_cpu_id);
+				notified = i;
+			}
+		}
+	}
+
+	/* after we tell the others to go down */
+	if (ppc_md.kexec_cpu_down)
+		ppc_md.kexec_cpu_down(0, 0);
+
+	put_cpu();
+
+	local_irq_disable();
+}
+
+#else /* ! SMP */
+
+static void kexec_prepare_cpus(void)
+{
+	/*
+	 * move the secondarys to us so that we can copy
+	 * the new kernel 0-0x100 safely
+	 *
+	 * do this if kexec in setup.c ?
+	 *
+	 * We need to release the cpus if we are ever going from an
+	 * UP to an SMP kernel.
+	 */
+	smp_release_cpus();
+	if (ppc_md.kexec_cpu_down)
+		ppc_md.kexec_cpu_down(0, 0);
+	local_irq_disable();
+}
+
+#endif /* SMP */
+
+/*
+ * kexec thread structure and stack.
+ *
+ * We need to make sure that this is 16384-byte aligned due to the
+ * way process stacks are handled.  It also must be statically allocated
+ * or allocated as part of the kimage, because everything else may be
+ * overwritten when we copy the kexec image.  We piggyback on the
+ * "init_task" linker section here to statically allocate a stack.
+ *
+ * We could use a smaller stack if we don't care about anything using
+ * current, but that audit has not been performed.
+ */
+union thread_union kexec_stack
+	__attribute__((__section__(".data.init_task"))) = { };
+
+/* Our assembly helper, in kexec_stub.S */
+extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
+					void *image, void *control,
+					void (*clear_all)(void)) ATTRIB_NORET;
+
+/* too late to fail here */
+void machine_kexec(struct kimage *image)
+{
+
+	/* prepare control code if any */
+
+	/* shutdown other cpus into our wait loop and quiesce interrupts */
+	kexec_prepare_cpus();
+
+	/* switch to a staticly allocated stack.  Based on irq stack code.
+	 * XXX: the task struct will likely be invalid once we do the copy!
+	 */
+	kexec_stack.thread_info.task = current_thread_info()->task;
+	kexec_stack.thread_info.flags = 0;
+
+	/* Some things are best done in assembly.  Finding globals with
+	 * a toc is easier in C, so pass in what we can.
+	 */
+	kexec_sequence(&kexec_stack, image->start, image,
+			page_address(image->control_code_page),
+			ppc_md.hpte_clear_all);
+	/* NOTREACHED */
+}
+
+/* Values we need to export to the second kernel via the device tree. */
+static unsigned long htab_base, htab_size, kernel_end;
+
+static struct property htab_base_prop = {
+	.name = "linux,htab-base",
+	.length = sizeof(unsigned long),
+	.value = (unsigned char *)&htab_base,
+};
+
+static struct property htab_size_prop = {
+	.name = "linux,htab-size",
+	.length = sizeof(unsigned long),
+	.value = (unsigned char *)&htab_size,
+};
+
+static struct property kernel_end_prop = {
+	.name = "linux,kernel-end",
+	.length = sizeof(unsigned long),
+	.value = (unsigned char *)&kernel_end,
+};
+
+static void __init export_htab_values(void)
+{
+	struct device_node *node;
+
+	node = of_find_node_by_path("/chosen");
+	if (!node)
+		return;
+
+	kernel_end = __pa(_end);
+	prom_add_property(node, &kernel_end_prop);
+
+	/* On machines with no htab htab_address is NULL */
+	if (NULL == htab_address)
+		goto out;
+
+	htab_base = __pa(htab_address);
+	prom_add_property(node, &htab_base_prop);
+
+	htab_size = 1UL << ppc64_pft_size;
+	prom_add_property(node, &htab_size_prop);
+
+ out:
+	of_node_put(node);
+}
+
+void __init kexec_setup(void)
+{
+	export_htab_values();
+}
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
new file mode 100644
index 000000000000..928b8581fcb0
--- /dev/null
+++ b/arch/powerpc/kernel/module_64.c
@@ -0,0 +1,455 @@
+/*  Kernel module help for PPC64.
+    Copyright (C) 2001, 2003 Rusty Russell IBM Corporation.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include <linux/module.h>
+#include <linux/elf.h>
+#include <linux/moduleloader.h>
+#include <linux/err.h>
+#include <linux/vmalloc.h>
+#include <asm/module.h>
+#include <asm/uaccess.h>
+
+/* FIXME: We don't do .init separately.  To do this, we'd need to have
+   a separate r2 value in the init and core section, and stub between
+   them, too.
+
+   Using a magic allocator which places modules within 32MB solves
+   this, and makes other things simpler.  Anton?
+   --RR.  */
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt , ...)
+#endif
+
+/* There's actually a third entry here, but it's unused */
+struct ppc64_opd_entry
+{
+	unsigned long funcaddr;
+	unsigned long r2;
+};
+
+/* Like PPC32, we need little trampolines to do > 24-bit jumps (into
+   the kernel itself).  But on PPC64, these need to be used for every
+   jump, actually, to reset r2 (TOC+0x8000). */
+struct ppc64_stub_entry
+{
+	/* 28 byte jump instruction sequence (7 instructions) */
+	unsigned char jump[28];
+	unsigned char unused[4];
+	/* Data for the above code */
+	struct ppc64_opd_entry opd;
+};
+
+/* We use a stub to fix up r2 (TOC ptr) and to jump to the (external)
+   function which may be more than 24-bits away.  We could simply
+   patch the new r2 value and function pointer into the stub, but it's
+   significantly shorter to put these values at the end of the stub
+   code, and patch the stub address (32-bits relative to the TOC ptr,
+   r2) into the stub. */
+static struct ppc64_stub_entry ppc64_stub =
+{ .jump = {
+	0x3d, 0x82, 0x00, 0x00, /* addis   r12,r2, <high> */
+	0x39, 0x8c, 0x00, 0x00, /* addi    r12,r12, <low> */
+	/* Save current r2 value in magic place on the stack. */
+	0xf8, 0x41, 0x00, 0x28, /* std     r2,40(r1) */
+	0xe9, 0x6c, 0x00, 0x20, /* ld      r11,32(r12) */
+	0xe8, 0x4c, 0x00, 0x28, /* ld      r2,40(r12) */
+	0x7d, 0x69, 0x03, 0xa6, /* mtctr   r11 */
+	0x4e, 0x80, 0x04, 0x20  /* bctr */
+} };
+
+/* Count how many different 24-bit relocations (different symbol,
+   different addend) */
+static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num)
+{
+	unsigned int i, j, ret = 0;
+
+	/* FIXME: Only count external ones --RR */
+	/* Sure, this is order(n^2), but it's usually short, and not
+           time critical */
+	for (i = 0; i < num; i++) {
+		/* Only count 24-bit relocs, others don't need stubs */
+		if (ELF64_R_TYPE(rela[i].r_info) != R_PPC_REL24)
+			continue;
+		for (j = 0; j < i; j++) {
+			/* If this addend appeared before, it's
+                           already been counted */
+			if (rela[i].r_info == rela[j].r_info
+			    && rela[i].r_addend == rela[j].r_addend)
+				break;
+		}
+		if (j == i) ret++;
+	}
+	return ret;
+}
+
+void *module_alloc(unsigned long size)
+{
+	if (size == 0)
+		return NULL;
+
+	return vmalloc_exec(size);
+}
+
+/* Free memory returned from module_alloc */
+void module_free(struct module *mod, void *module_region)
+{
+	vfree(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+           table entries. */
+}
+
+/* Get size of potential trampolines required. */
+static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
+				    const Elf64_Shdr *sechdrs)
+{
+	/* One extra reloc so it's always 0-funcaddr terminated */
+	unsigned long relocs = 1;
+	unsigned i;
+
+	/* Every relocated section... */
+	for (i = 1; i < hdr->e_shnum; i++) {
+		if (sechdrs[i].sh_type == SHT_RELA) {
+			DEBUGP("Found relocations in section %u\n", i);
+			DEBUGP("Ptr: %p.  Number: %lu\n",
+			       (void *)sechdrs[i].sh_addr,
+			       sechdrs[i].sh_size / sizeof(Elf64_Rela));
+			relocs += count_relocs((void *)sechdrs[i].sh_addr,
+					       sechdrs[i].sh_size
+					       / sizeof(Elf64_Rela));
+		}
+	}
+
+	DEBUGP("Looks like a total of %lu stubs, max\n", relocs);
+	return relocs * sizeof(struct ppc64_stub_entry);
+}
+
+static void dedotify_versions(struct modversion_info *vers,
+			      unsigned long size)
+{
+	struct modversion_info *end;
+
+	for (end = (void *)vers + size; vers < end; vers++)
+		if (vers->name[0] == '.')
+			memmove(vers->name, vers->name+1, strlen(vers->name));
+}
+
+/* Undefined symbols which refer to .funcname, hack to funcname */
+static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
+{
+	unsigned int i;
+
+	for (i = 1; i < numsyms; i++) {
+		if (syms[i].st_shndx == SHN_UNDEF) {
+			char *name = strtab + syms[i].st_name;
+			if (name[0] == '.')
+				memmove(name, name+1, strlen(name));
+		}
+	}
+}
+
+int module_frob_arch_sections(Elf64_Ehdr *hdr,
+			      Elf64_Shdr *sechdrs,
+			      char *secstrings,
+			      struct module *me)
+{
+	unsigned int i;
+
+	/* Find .toc and .stubs sections, symtab and strtab */
+	for (i = 1; i < hdr->e_shnum; i++) {
+		char *p;
+		if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0)
+			me->arch.stubs_section = i;
+		else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0)
+			me->arch.toc_section = i;
+		else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
+			dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
+					  sechdrs[i].sh_size);
+
+		/* We don't handle .init for the moment: rename to _init */
+		while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init")))
+			p[0] = '_';
+
+		if (sechdrs[i].sh_type == SHT_SYMTAB)
+			dedotify((void *)hdr + sechdrs[i].sh_offset,
+				 sechdrs[i].sh_size / sizeof(Elf64_Sym),
+				 (void *)hdr
+				 + sechdrs[sechdrs[i].sh_link].sh_offset);
+	}
+	if (!me->arch.stubs_section || !me->arch.toc_section) {
+		printk("%s: doesn't contain .toc or .stubs.\n", me->name);
+		return -ENOEXEC;
+	}
+
+	/* Override the stubs size */
+	sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs);
+	return 0;
+}
+
+int apply_relocate(Elf64_Shdr *sechdrs,
+		   const char *strtab,
+		   unsigned int symindex,
+		   unsigned int relsec,
+		   struct module *me)
+{
+	printk(KERN_ERR "%s: Non-ADD RELOCATION unsupported\n", me->name);
+	return -ENOEXEC;
+}
+
+/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this
+   gives the value maximum span in an instruction which uses a signed
+   offset) */
+static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me)
+{
+	return sechdrs[me->arch.toc_section].sh_addr + 0x8000;
+}
+
+/* Both low and high 16 bits are added as SIGNED additions, so if low
+   16 bits has high bit set, high 16 bits must be adjusted.  These
+   macros do that (stolen from binutils). */
+#define PPC_LO(v) ((v) & 0xffff)
+#define PPC_HI(v) (((v) >> 16) & 0xffff)
+#define PPC_HA(v) PPC_HI ((v) + 0x8000)
+
+/* Patch stub to reference function and correct r2 value. */
+static inline int create_stub(Elf64_Shdr *sechdrs,
+			      struct ppc64_stub_entry *entry,
+			      struct ppc64_opd_entry *opd,
+			      struct module *me)
+{
+	Elf64_Half *loc1, *loc2;
+	long reladdr;
+
+	*entry = ppc64_stub;
+
+	loc1 = (Elf64_Half *)&entry->jump[2];
+	loc2 = (Elf64_Half *)&entry->jump[6];
+
+	/* Stub uses address relative to r2. */
+	reladdr = (unsigned long)entry - my_r2(sechdrs, me);
+	if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+		printk("%s: Address %p of stub out of range of %p.\n",
+		       me->name, (void *)reladdr, (void *)my_r2);
+		return 0;
+	}
+	DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr);
+
+	*loc1 = PPC_HA(reladdr);
+	*loc2 = PPC_LO(reladdr);
+	entry->opd.funcaddr = opd->funcaddr;
+	entry->opd.r2 = opd->r2;
+	return 1;
+}
+
+/* Create stub to jump to function described in this OPD: we need the
+   stub to set up the TOC ptr (r2) for the function. */
+static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
+				   unsigned long opdaddr,
+				   struct module *me)
+{
+	struct ppc64_stub_entry *stubs;
+	struct ppc64_opd_entry *opd = (void *)opdaddr;
+	unsigned int i, num_stubs;
+
+	num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stubs);
+
+	/* Find this stub, or if that fails, the next avail. entry */
+	stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr;
+	for (i = 0; stubs[i].opd.funcaddr; i++) {
+		BUG_ON(i >= num_stubs);
+
+		if (stubs[i].opd.funcaddr == opd->funcaddr)
+			return (unsigned long)&stubs[i];
+	}
+
+	if (!create_stub(sechdrs, &stubs[i], opd, me))
+		return 0;
+
+	return (unsigned long)&stubs[i];
+}
+
+/* We expect a noop next: if it is, replace it with instruction to
+   restore r2. */
+static int restore_r2(u32 *instruction, struct module *me)
+{
+	if (*instruction != 0x60000000) {
+		printk("%s: Expect noop after relocate, got %08x\n",
+		       me->name, *instruction);
+		return 0;
+	}
+	*instruction = 0xe8410028;	/* ld r2,40(r1) */
+	return 1;
+}
+
+int apply_relocate_add(Elf64_Shdr *sechdrs,
+		       const char *strtab,
+		       unsigned int symindex,
+		       unsigned int relsec,
+		       struct module *me)
+{
+	unsigned int i;
+	Elf64_Rela *rela = (void *)sechdrs[relsec].sh_addr;
+	Elf64_Sym *sym;
+	unsigned long *location;
+	unsigned long value;
+
+	DEBUGP("Applying ADD relocate section %u to %u\n", relsec,
+	       sechdrs[relsec].sh_info);
+	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
+		/* This is where to make the change */
+		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+			+ rela[i].r_offset;
+		/* This is the symbol it is referring to */
+		sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
+			+ ELF64_R_SYM(rela[i].r_info);
+
+		DEBUGP("RELOC at %p: %li-type as %s (%lu) + %li\n",
+		       location, (long)ELF64_R_TYPE(rela[i].r_info),
+		       strtab + sym->st_name, (unsigned long)sym->st_value,
+		       (long)rela[i].r_addend);
+
+		/* `Everything is relative'. */
+		value = sym->st_value + rela[i].r_addend;
+
+		switch (ELF64_R_TYPE(rela[i].r_info)) {
+		case R_PPC64_ADDR32:
+			/* Simply set it */
+			*(u32 *)location = value;
+			break;
+			
+		case R_PPC64_ADDR64:
+			/* Simply set it */
+			*(unsigned long *)location = value;
+			break;
+
+		case R_PPC64_TOC:
+			*(unsigned long *)location = my_r2(sechdrs, me);
+			break;
+
+		case R_PPC64_TOC16:
+			/* Subtact TOC pointer */
+			value -= my_r2(sechdrs, me);
+			if (value + 0x8000 > 0xffff) {
+				printk("%s: bad TOC16 relocation (%lu)\n",
+				       me->name, value);
+				return -ENOEXEC;
+			}
+			*((uint16_t *) location)
+				= (*((uint16_t *) location) & ~0xffff)
+				| (value & 0xffff);
+			break;
+
+		case R_PPC64_TOC16_DS:
+			/* Subtact TOC pointer */
+			value -= my_r2(sechdrs, me);
+			if ((value & 3) != 0 || value + 0x8000 > 0xffff) {
+				printk("%s: bad TOC16_DS relocation (%lu)\n",
+				       me->name, value);
+				return -ENOEXEC;
+			}
+			*((uint16_t *) location)
+				= (*((uint16_t *) location) & ~0xfffc)
+				| (value & 0xfffc);
+			break;
+
+		case R_PPC_REL24:
+			/* FIXME: Handle weak symbols here --RR */
+			if (sym->st_shndx == SHN_UNDEF) {
+				/* External: go via stub */
+				value = stub_for_addr(sechdrs, value, me);
+				if (!value)
+					return -ENOENT;
+				if (!restore_r2((u32 *)location + 1, me))
+					return -ENOEXEC;
+			}
+
+			/* Convert value to relative */
+			value -= (unsigned long)location;
+			if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0){
+				printk("%s: REL24 %li out of range!\n",
+				       me->name, (long int)value);
+				return -ENOEXEC;
+			}
+
+			/* Only replace bits 2 through 26 */
+			*(uint32_t *)location 
+				= (*(uint32_t *)location & ~0x03fffffc)
+				| (value & 0x03fffffc);
+			break;
+
+		default:
+			printk("%s: Unknown ADD relocation: %lu\n",
+			       me->name,
+			       (unsigned long)ELF64_R_TYPE(rela[i].r_info));
+			return -ENOEXEC;
+		}
+	}
+
+	return 0;
+}
+
+LIST_HEAD(module_bug_list);
+
+int module_finalize(const Elf_Ehdr *hdr,
+		const Elf_Shdr *sechdrs, struct module *me)
+{
+	char *secstrings;
+	unsigned int i;
+
+	me->arch.bug_table = NULL;
+	me->arch.num_bugs = 0;
+
+	/* Find the __bug_table section, if present */
+	secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+	for (i = 1; i < hdr->e_shnum; i++) {
+		if (strcmp(secstrings+sechdrs[i].sh_name, "__bug_table"))
+			continue;
+		me->arch.bug_table = (void *) sechdrs[i].sh_addr;
+		me->arch.num_bugs = sechdrs[i].sh_size / sizeof(struct bug_entry);
+		break;
+	}
+
+	/*
+	 * Strictly speaking this should have a spinlock to protect against
+	 * traversals, but since we only traverse on BUG()s, a spinlock
+	 * could potentially lead to deadlock and thus be counter-productive.
+	 */
+	list_add(&me->arch.bug_list, &module_bug_list);
+
+	return 0;
+}
+
+void module_arch_cleanup(struct module *mod)
+{
+	list_del(&mod->arch.bug_list);
+}
+
+struct bug_entry *module_find_bug(unsigned long bugaddr)
+{
+	struct mod_arch_specific *mod;
+	unsigned int i;
+	struct bug_entry *bug;
+
+	list_for_each_entry(mod, &module_bug_list, bug_list) {
+		bug = mod->bug_table;
+		for (i = 0; i < mod->num_bugs; ++i, ++bug)
+			if (bugaddr == bug->bug_addr)
+				return bug;
+	}
+	return NULL;
+}
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
new file mode 100644
index 000000000000..3cef1b8f57f0
--- /dev/null
+++ b/arch/powerpc/kernel/pci_64.c
@@ -0,0 +1,1319 @@
+/*
+ * Port for PPC64 David Engebretsen, IBM Corp.
+ * Contains common pci routines for ppc64 platform, pSeries and iSeries brands.
+ * 
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ *   Rework, based on alpha PCI code.
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/syscalls.h>
+
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/ppc-pci.h>
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+unsigned long pci_probe_only = 1;
+unsigned long pci_assign_all_buses = 0;
+
+/*
+ * legal IO pages under MAX_ISA_PORT.  This is to ensure we don't touch
+ * devices we don't have access to.
+ */
+unsigned long io_page_mask;
+
+EXPORT_SYMBOL(io_page_mask);
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+static void fixup_resource(struct resource *res, struct pci_dev *dev);
+static void do_bus_setup(struct pci_bus *bus);
+#endif
+
+unsigned int pcibios_assign_all_busses(void)
+{
+	return pci_assign_all_buses;
+}
+
+/* pci_io_base -- the base address from which io bars are offsets.
+ * This is the lowest I/O base address (so bar values are always positive),
+ * and it *must* be the start of ISA space if an ISA bus exists because
+ * ISA drivers use hard coded offsets.  If no ISA bus exists a dummy
+ * page is mapped and isa_io_limit prevents access to it.
+ */
+unsigned long isa_io_base;	/* NULL if no ISA bus */
+EXPORT_SYMBOL(isa_io_base);
+unsigned long pci_io_base;
+EXPORT_SYMBOL(pci_io_base);
+
+void iSeries_pcibios_init(void);
+
+LIST_HEAD(hose_list);
+
+struct dma_mapping_ops pci_dma_ops;
+EXPORT_SYMBOL(pci_dma_ops);
+
+int global_phb_number;		/* Global phb counter */
+
+/* Cached ISA bridge dev. */
+struct pci_dev *ppc64_isabridge_dev = NULL;
+
+static void fixup_broken_pcnet32(struct pci_dev* dev)
+{
+	if ((dev->class>>8 == PCI_CLASS_NETWORK_ETHERNET)) {
+		dev->vendor = PCI_VENDOR_ID_AMD;
+		pci_write_config_word(dev, PCI_VENDOR_ID, PCI_VENDOR_ID_AMD);
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TRIDENT, PCI_ANY_ID, fixup_broken_pcnet32);
+
+void  pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
+			      struct resource *res)
+{
+	unsigned long offset = 0;
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+
+	if (!hose)
+		return;
+
+	if (res->flags & IORESOURCE_IO)
+	        offset = (unsigned long)hose->io_base_virt - pci_io_base;
+
+	if (res->flags & IORESOURCE_MEM)
+		offset = hose->pci_mem_offset;
+
+	region->start = res->start - offset;
+	region->end = res->end - offset;
+}
+
+void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+			      struct pci_bus_region *region)
+{
+	unsigned long offset = 0;
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+
+	if (!hose)
+		return;
+
+	if (res->flags & IORESOURCE_IO)
+	        offset = (unsigned long)hose->io_base_virt - pci_io_base;
+
+	if (res->flags & IORESOURCE_MEM)
+		offset = hose->pci_mem_offset;
+
+	res->start = region->start + offset;
+	res->end = region->end + offset;
+}
+
+#ifdef CONFIG_HOTPLUG
+EXPORT_SYMBOL(pcibios_resource_to_bus);
+EXPORT_SYMBOL(pcibios_bus_to_resource);
+#endif
+
+/*
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ *
+ * Why? Because some silly external IO cards only decode
+ * the low 10 bits of the IO address. The 0x00-0xff region
+ * is reserved for motherboard devices that decode all 16
+ * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
+ * but we want to try to avoid allocating at 0x2900-0x2bff
+ * which might have be mirrored at 0x0100-0x03ff..
+ */
+void pcibios_align_resource(void *data, struct resource *res,
+			    unsigned long size, unsigned long align)
+{
+	struct pci_dev *dev = data;
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	unsigned long start = res->start;
+	unsigned long alignto;
+
+	if (res->flags & IORESOURCE_IO) {
+	        unsigned long offset = (unsigned long)hose->io_base_virt -
+					pci_io_base;
+		/* Make sure we start at our min on all hoses */
+		if (start - offset < PCIBIOS_MIN_IO)
+			start = PCIBIOS_MIN_IO + offset;
+
+		/*
+		 * Put everything into 0x00-0xff region modulo 0x400
+		 */
+		if (start & 0x300)
+			start = (start + 0x3ff) & ~0x3ff;
+
+	} else if (res->flags & IORESOURCE_MEM) {
+		/* Make sure we start at our min on all hoses */
+		if (start - hose->pci_mem_offset < PCIBIOS_MIN_MEM)
+			start = PCIBIOS_MIN_MEM + hose->pci_mem_offset;
+
+		/* Align to multiple of size of minimum base.  */
+		alignto = max(0x1000UL, align);
+		start = ALIGN(start, alignto);
+	}
+
+	res->start = start;
+}
+
+static DEFINE_SPINLOCK(hose_spinlock);
+
+/*
+ * pci_controller(phb) initialized common variables.
+ */
+void __devinit pci_setup_pci_controller(struct pci_controller *hose)
+{
+	memset(hose, 0, sizeof(struct pci_controller));
+
+	spin_lock(&hose_spinlock);
+	hose->global_number = global_phb_number++;
+	list_add_tail(&hose->list_node, &hose_list);
+	spin_unlock(&hose_spinlock);
+}
+
+static void __init pcibios_claim_one_bus(struct pci_bus *b)
+{
+	struct pci_dev *dev;
+	struct pci_bus *child_bus;
+
+	list_for_each_entry(dev, &b->devices, bus_list) {
+		int i;
+
+		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+			struct resource *r = &dev->resource[i];
+
+			if (r->parent || !r->start || !r->flags)
+				continue;
+			pci_claim_resource(dev, i);
+		}
+	}
+
+	list_for_each_entry(child_bus, &b->children, node)
+		pcibios_claim_one_bus(child_bus);
+}
+
+#ifndef CONFIG_PPC_ISERIES
+static void __init pcibios_claim_of_setup(void)
+{
+	struct pci_bus *b;
+
+	list_for_each_entry(b, &pci_root_buses, node)
+		pcibios_claim_one_bus(b);
+}
+#endif
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
+{
+	u32 *prop;
+	int len;
+
+	prop = (u32 *) get_property(np, name, &len);
+	if (prop && len >= 4)
+		return *prop;
+	return def;
+}
+
+static unsigned int pci_parse_of_flags(u32 addr0)
+{
+	unsigned int flags = 0;
+
+	if (addr0 & 0x02000000) {
+		flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
+		flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
+		flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
+		if (addr0 & 0x40000000)
+			flags |= IORESOURCE_PREFETCH
+				 | PCI_BASE_ADDRESS_MEM_PREFETCH;
+	} else if (addr0 & 0x01000000)
+		flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
+	return flags;
+}
+
+#define GET_64BIT(prop, i)	((((u64) (prop)[(i)]) << 32) | (prop)[(i)+1])
+
+static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev)
+{
+	u64 base, size;
+	unsigned int flags;
+	struct resource *res;
+	u32 *addrs, i;
+	int proplen;
+
+	addrs = (u32 *) get_property(node, "assigned-addresses", &proplen);
+	if (!addrs)
+		return;
+	for (; proplen >= 20; proplen -= 20, addrs += 5) {
+		flags = pci_parse_of_flags(addrs[0]);
+		if (!flags)
+			continue;
+		base = GET_64BIT(addrs, 1);
+		size = GET_64BIT(addrs, 3);
+		if (!size)
+			continue;
+		i = addrs[0] & 0xff;
+		if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) {
+			res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
+		} else if (i == dev->rom_base_reg) {
+			res = &dev->resource[PCI_ROM_RESOURCE];
+			flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
+		} else {
+			printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
+			continue;
+		}
+		res->start = base;
+		res->end = base + size - 1;
+		res->flags = flags;
+		res->name = pci_name(dev);
+		fixup_resource(res, dev);
+	}
+}
+
+struct pci_dev *of_create_pci_dev(struct device_node *node,
+				 struct pci_bus *bus, int devfn)
+{
+	struct pci_dev *dev;
+	const char *type;
+
+	dev = kmalloc(sizeof(struct pci_dev), GFP_KERNEL);
+	if (!dev)
+		return NULL;
+	type = get_property(node, "device_type", NULL);
+	if (type == NULL)
+		type = "";
+
+	memset(dev, 0, sizeof(struct pci_dev));
+	dev->bus = bus;
+	dev->sysdata = node;
+	dev->dev.parent = bus->bridge;
+	dev->dev.bus = &pci_bus_type;
+	dev->devfn = devfn;
+	dev->multifunction = 0;		/* maybe a lie? */
+
+	dev->vendor = get_int_prop(node, "vendor-id", 0xffff);
+	dev->device = get_int_prop(node, "device-id", 0xffff);
+	dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0);
+	dev->subsystem_device = get_int_prop(node, "subsystem-id", 0);
+
+	dev->cfg_size = 256; /*pci_cfg_space_size(dev);*/
+
+	sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(bus),
+		dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn));
+	dev->class = get_int_prop(node, "class-code", 0);
+
+	dev->current_state = 4;		/* unknown power state */
+
+	if (!strcmp(type, "pci")) {
+		/* a PCI-PCI bridge */
+		dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
+		dev->rom_base_reg = PCI_ROM_ADDRESS1;
+	} else if (!strcmp(type, "cardbus")) {
+		dev->hdr_type = PCI_HEADER_TYPE_CARDBUS;
+	} else {
+		dev->hdr_type = PCI_HEADER_TYPE_NORMAL;
+		dev->rom_base_reg = PCI_ROM_ADDRESS;
+		dev->irq = NO_IRQ;
+		if (node->n_intrs > 0) {
+			dev->irq = node->intrs[0].line;
+			pci_write_config_byte(dev, PCI_INTERRUPT_LINE,
+					      dev->irq);
+		}
+	}
+
+	pci_parse_of_addrs(node, dev);
+
+	pci_device_add(dev, bus);
+
+	/* XXX pci_scan_msi_device(dev); */
+
+	return dev;
+}
+EXPORT_SYMBOL(of_create_pci_dev);
+
+void __devinit of_scan_bus(struct device_node *node,
+				  struct pci_bus *bus)
+{
+	struct device_node *child = NULL;
+	u32 *reg;
+	int reglen, devfn;
+	struct pci_dev *dev;
+
+	while ((child = of_get_next_child(node, child)) != NULL) {
+		reg = (u32 *) get_property(child, "reg", &reglen);
+		if (reg == NULL || reglen < 20)
+			continue;
+		devfn = (reg[0] >> 8) & 0xff;
+		/* create a new pci_dev for this device */
+		dev = of_create_pci_dev(child, bus, devfn);
+		if (!dev)
+			continue;
+		if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
+		    dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+			of_scan_pci_bridge(child, dev);
+	}
+
+	do_bus_setup(bus);
+}
+EXPORT_SYMBOL(of_scan_bus);
+
+void __devinit of_scan_pci_bridge(struct device_node *node,
+			 	struct pci_dev *dev)
+{
+	struct pci_bus *bus;
+	u32 *busrange, *ranges;
+	int len, i, mode;
+	struct resource *res;
+	unsigned int flags;
+	u64 size;
+
+	/* parse bus-range property */
+	busrange = (u32 *) get_property(node, "bus-range", &len);
+	if (busrange == NULL || len != 8) {
+		printk(KERN_ERR "Can't get bus-range for PCI-PCI bridge %s\n",
+		       node->full_name);
+		return;
+	}
+	ranges = (u32 *) get_property(node, "ranges", &len);
+	if (ranges == NULL) {
+		printk(KERN_ERR "Can't get ranges for PCI-PCI bridge %s\n",
+		       node->full_name);
+		return;
+	}
+
+	bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
+	if (!bus) {
+		printk(KERN_ERR "Failed to create pci bus for %s\n",
+		       node->full_name);
+		return;
+	}
+
+	bus->primary = dev->bus->number;
+	bus->subordinate = busrange[1];
+	bus->bridge_ctl = 0;
+	bus->sysdata = node;
+
+	/* parse ranges property */
+	/* PCI #address-cells == 3 and #size-cells == 2 always */
+	res = &dev->resource[PCI_BRIDGE_RESOURCES];
+	for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) {
+		res->flags = 0;
+		bus->resource[i] = res;
+		++res;
+	}
+	i = 1;
+	for (; len >= 32; len -= 32, ranges += 8) {
+		flags = pci_parse_of_flags(ranges[0]);
+		size = GET_64BIT(ranges, 6);
+		if (flags == 0 || size == 0)
+			continue;
+		if (flags & IORESOURCE_IO) {
+			res = bus->resource[0];
+			if (res->flags) {
+				printk(KERN_ERR "PCI: ignoring extra I/O range"
+				       " for bridge %s\n", node->full_name);
+				continue;
+			}
+		} else {
+			if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
+				printk(KERN_ERR "PCI: too many memory ranges"
+				       " for bridge %s\n", node->full_name);
+				continue;
+			}
+			res = bus->resource[i];
+			++i;
+		}
+		res->start = GET_64BIT(ranges, 1);
+		res->end = res->start + size - 1;
+		res->flags = flags;
+		fixup_resource(res, dev);
+	}
+	sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),
+		bus->number);
+
+	mode = PCI_PROBE_NORMAL;
+	if (ppc_md.pci_probe_mode)
+		mode = ppc_md.pci_probe_mode(bus);
+	if (mode == PCI_PROBE_DEVTREE)
+		of_scan_bus(node, bus);
+	else if (mode == PCI_PROBE_NORMAL)
+		pci_scan_child_bus(bus);
+}
+EXPORT_SYMBOL(of_scan_pci_bridge);
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+void __devinit scan_phb(struct pci_controller *hose)
+{
+	struct pci_bus *bus;
+	struct device_node *node = hose->arch_data;
+	int i, mode;
+	struct resource *res;
+
+	bus = pci_create_bus(NULL, hose->first_busno, hose->ops, node);
+	if (bus == NULL) {
+		printk(KERN_ERR "Failed to create bus for PCI domain %04x\n",
+		       hose->global_number);
+		return;
+	}
+	bus->secondary = hose->first_busno;
+	hose->bus = bus;
+
+	bus->resource[0] = res = &hose->io_resource;
+	if (res->flags && request_resource(&ioport_resource, res))
+		printk(KERN_ERR "Failed to request PCI IO region "
+		       "on PCI domain %04x\n", hose->global_number);
+
+	for (i = 0; i < 3; ++i) {
+		res = &hose->mem_resources[i];
+		bus->resource[i+1] = res;
+		if (res->flags && request_resource(&iomem_resource, res))
+			printk(KERN_ERR "Failed to request PCI memory region "
+			       "on PCI domain %04x\n", hose->global_number);
+	}
+
+	mode = PCI_PROBE_NORMAL;
+#ifdef CONFIG_PPC_MULTIPLATFORM
+	if (ppc_md.pci_probe_mode)
+		mode = ppc_md.pci_probe_mode(bus);
+	if (mode == PCI_PROBE_DEVTREE) {
+		bus->subordinate = hose->last_busno;
+		of_scan_bus(node, bus);
+	}
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+	if (mode == PCI_PROBE_NORMAL)
+		hose->last_busno = bus->subordinate = pci_scan_child_bus(bus);
+	pci_bus_add_devices(bus);
+}
+
+static int __init pcibios_init(void)
+{
+	struct pci_controller *hose, *tmp;
+
+	/* For now, override phys_mem_access_prot. If we need it,
+	 * later, we may move that initialization to each ppc_md
+	 */
+	ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot;
+
+#ifdef CONFIG_PPC_ISERIES
+	iSeries_pcibios_init(); 
+#endif
+
+	printk("PCI: Probing PCI hardware\n");
+
+	/* Scan all of the recorded PCI controllers.  */
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+		scan_phb(hose);
+
+#ifndef CONFIG_PPC_ISERIES
+	if (pci_probe_only)
+		pcibios_claim_of_setup();
+	else
+		/* FIXME: `else' will be removed when
+		   pci_assign_unassigned_resources() is able to work
+		   correctly with [partially] allocated PCI tree. */
+		pci_assign_unassigned_resources();
+#endif /* !CONFIG_PPC_ISERIES */
+
+	/* Call machine dependent final fixup */
+	if (ppc_md.pcibios_fixup)
+		ppc_md.pcibios_fixup();
+
+	/* Cache the location of the ISA bridge (if we have one) */
+	ppc64_isabridge_dev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
+	if (ppc64_isabridge_dev != NULL)
+		printk("ISA bridge at %s\n", pci_name(ppc64_isabridge_dev));
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+	/* map in PCI I/O space */
+	phbs_remap_io();
+#endif
+
+	printk("PCI: Probing PCI hardware done\n");
+
+	return 0;
+}
+
+subsys_initcall(pcibios_init);
+
+char __init *pcibios_setup(char *str)
+{
+	return str;
+}
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+	u16 cmd, oldcmd;
+	int i;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	oldcmd = cmd;
+
+	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+		struct resource *res = &dev->resource[i];
+
+		/* Only set up the requested stuff */
+		if (!(mask & (1<<i)))
+			continue;
+
+		if (res->flags & IORESOURCE_IO)
+			cmd |= PCI_COMMAND_IO;
+		if (res->flags & IORESOURCE_MEM)
+			cmd |= PCI_COMMAND_MEMORY;
+	}
+
+	if (cmd != oldcmd) {
+		printk(KERN_DEBUG "PCI: Enabling device: (%s), cmd %x\n",
+		       pci_name(dev), cmd);
+                /* Enable the appropriate bits in the PCI command register.  */
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+	return 0;
+}
+
+/*
+ * Return the domain number for this bus.
+ */
+int pci_domain_nr(struct pci_bus *bus)
+{
+#ifdef CONFIG_PPC_ISERIES
+	return 0;
+#else
+	struct pci_controller *hose = pci_bus_to_host(bus);
+
+	return hose->global_number;
+#endif
+}
+
+EXPORT_SYMBOL(pci_domain_nr);
+
+/* Decide whether to display the domain number in /proc */
+int pci_proc_domain(struct pci_bus *bus)
+{
+#ifdef CONFIG_PPC_ISERIES
+	return 0;
+#else
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	return hose->buid;
+#endif
+}
+
+/*
+ * Platform support for /proc/bus/pci/X/Y mmap()s,
+ * modelled on the sparc64 implementation by Dave Miller.
+ *  -- paulus.
+ */
+
+/*
+ * Adjust vm_pgoff of VMA such that it is the physical page offset
+ * corresponding to the 32-bit pci bus offset for DEV requested by the user.
+ *
+ * Basically, the user finds the base address for his device which he wishes
+ * to mmap.  They read the 32-bit value from the config space base register,
+ * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
+ * offset parameter of mmap on /proc/bus/pci/XXX for that device.
+ *
+ * Returns negative error code on failure, zero on success.
+ */
+static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
+					       unsigned long *offset,
+					       enum pci_mmap_state mmap_state)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	unsigned long io_offset = 0;
+	int i, res_bit;
+
+	if (hose == 0)
+		return NULL;		/* should never happen */
+
+	/* If memory, add on the PCI bridge address offset */
+	if (mmap_state == pci_mmap_mem) {
+		*offset += hose->pci_mem_offset;
+		res_bit = IORESOURCE_MEM;
+	} else {
+		io_offset = (unsigned long)hose->io_base_virt - pci_io_base;
+		*offset += io_offset;
+		res_bit = IORESOURCE_IO;
+	}
+
+	/*
+	 * Check that the offset requested corresponds to one of the
+	 * resources of the device.
+	 */
+	for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+		struct resource *rp = &dev->resource[i];
+		int flags = rp->flags;
+
+		/* treat ROM as memory (should be already) */
+		if (i == PCI_ROM_RESOURCE)
+			flags |= IORESOURCE_MEM;
+
+		/* Active and same type? */
+		if ((flags & res_bit) == 0)
+			continue;
+
+		/* In the range of this resource? */
+		if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end)
+			continue;
+
+		/* found it! construct the final physical address */
+		if (mmap_state == pci_mmap_io)
+		       	*offset += hose->io_base_phys - io_offset;
+		return rp;
+	}
+
+	return NULL;
+}
+
+/*
+ * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
+ * device mapping.
+ */
+static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp,
+				      pgprot_t protection,
+				      enum pci_mmap_state mmap_state,
+				      int write_combine)
+{
+	unsigned long prot = pgprot_val(protection);
+
+	/* Write combine is always 0 on non-memory space mappings. On
+	 * memory space, if the user didn't pass 1, we check for a
+	 * "prefetchable" resource. This is a bit hackish, but we use
+	 * this to workaround the inability of /sysfs to provide a write
+	 * combine bit
+	 */
+	if (mmap_state != pci_mmap_mem)
+		write_combine = 0;
+	else if (write_combine == 0) {
+		if (rp->flags & IORESOURCE_PREFETCH)
+			write_combine = 1;
+	}
+
+	/* XXX would be nice to have a way to ask for write-through */
+	prot |= _PAGE_NO_CACHE;
+	if (write_combine)
+		prot &= ~_PAGE_GUARDED;
+	else
+		prot |= _PAGE_GUARDED;
+
+	printk("PCI map for %s:%lx, prot: %lx\n", pci_name(dev), rp->start,
+	       prot);
+
+	return __pgprot(prot);
+}
+
+/*
+ * This one is used by /dev/mem and fbdev who have no clue about the
+ * PCI device, it tries to find the PCI device first and calls the
+ * above routine
+ */
+pgprot_t pci_phys_mem_access_prot(struct file *file,
+				  unsigned long pfn,
+				  unsigned long size,
+				  pgprot_t protection)
+{
+	struct pci_dev *pdev = NULL;
+	struct resource *found = NULL;
+	unsigned long prot = pgprot_val(protection);
+	unsigned long offset = pfn << PAGE_SHIFT;
+	int i;
+
+	if (page_is_ram(pfn))
+		return __pgprot(prot);
+
+	prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
+
+	for_each_pci_dev(pdev) {
+		for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+			struct resource *rp = &pdev->resource[i];
+			int flags = rp->flags;
+
+			/* Active and same type? */
+			if ((flags & IORESOURCE_MEM) == 0)
+				continue;
+			/* In the range of this resource? */
+			if (offset < (rp->start & PAGE_MASK) ||
+			    offset > rp->end)
+				continue;
+			found = rp;
+			break;
+		}
+		if (found)
+			break;
+	}
+	if (found) {
+		if (found->flags & IORESOURCE_PREFETCH)
+			prot &= ~_PAGE_GUARDED;
+		pci_dev_put(pdev);
+	}
+
+	DBG("non-PCI map for %lx, prot: %lx\n", offset, prot);
+
+	return __pgprot(prot);
+}
+
+
+/*
+ * Perform the actual remap of the pages for a PCI device mapping, as
+ * appropriate for this architecture.  The region in the process to map
+ * is described by vm_start and vm_end members of VMA, the base physical
+ * address is found in vm_pgoff.
+ * The pci device structure is provided so that architectures may make mapping
+ * decisions on a per-device or per-bus basis.
+ *
+ * Returns a negative error code on failure, zero on success.
+ */
+int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+			enum pci_mmap_state mmap_state,
+			int write_combine)
+{
+	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+	struct resource *rp;
+	int ret;
+
+	rp = __pci_mmap_make_offset(dev, &offset, mmap_state);
+	if (rp == NULL)
+		return -EINVAL;
+
+	vma->vm_pgoff = offset >> PAGE_SHIFT;
+	vma->vm_flags |= VM_SHM | VM_LOCKED | VM_IO;
+	vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp,
+						  vma->vm_page_prot,
+						  mmap_state, write_combine);
+
+	ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			       vma->vm_end - vma->vm_start, vma->vm_page_prot);
+
+	return ret;
+}
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+static ssize_t pci_show_devspec(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev;
+	struct device_node *np;
+
+	pdev = to_pci_dev (dev);
+	np = pci_device_to_OF_node(pdev);
+	if (np == NULL || np->full_name == NULL)
+		return 0;
+	return sprintf(buf, "%s", np->full_name);
+}
+static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL);
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+void pcibios_add_platform_entries(struct pci_dev *pdev)
+{
+#ifdef CONFIG_PPC_MULTIPLATFORM
+	device_create_file(&pdev->dev, &dev_attr_devspec);
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+}
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+
+#define ISA_SPACE_MASK 0x1
+#define ISA_SPACE_IO 0x1
+
+static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node,
+				      unsigned long phb_io_base_phys,
+				      void __iomem * phb_io_base_virt)
+{
+	struct isa_range *range;
+	unsigned long pci_addr;
+	unsigned int isa_addr;
+	unsigned int size;
+	int rlen = 0;
+
+	range = (struct isa_range *) get_property(isa_node, "ranges", &rlen);
+	if (range == NULL || (rlen < sizeof(struct isa_range))) {
+		printk(KERN_ERR "no ISA ranges or unexpected isa range size,"
+		       "mapping 64k\n");
+		__ioremap_explicit(phb_io_base_phys,
+				   (unsigned long)phb_io_base_virt,
+				   0x10000, _PAGE_NO_CACHE | _PAGE_GUARDED);
+		return;	
+	}
+	
+	/* From "ISA Binding to 1275"
+	 * The ranges property is laid out as an array of elements,
+	 * each of which comprises:
+	 *   cells 0 - 1:	an ISA address
+	 *   cells 2 - 4:	a PCI address 
+	 *			(size depending on dev->n_addr_cells)
+	 *   cell 5:		the size of the range
+	 */
+	if ((range->isa_addr.a_hi && ISA_SPACE_MASK) == ISA_SPACE_IO) {
+		isa_addr = range->isa_addr.a_lo;
+		pci_addr = (unsigned long) range->pci_addr.a_mid << 32 | 
+			range->pci_addr.a_lo;
+
+		/* Assume these are both zero */
+		if ((pci_addr != 0) || (isa_addr != 0)) {
+			printk(KERN_ERR "unexpected isa to pci mapping: %s\n",
+					__FUNCTION__);
+			return;
+		}
+		
+		size = PAGE_ALIGN(range->size);
+
+		__ioremap_explicit(phb_io_base_phys, 
+				   (unsigned long) phb_io_base_virt, 
+				   size, _PAGE_NO_CACHE | _PAGE_GUARDED);
+	}
+}
+
+void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
+					    struct device_node *dev, int prim)
+{
+	unsigned int *ranges, pci_space;
+	unsigned long size;
+	int rlen = 0;
+	int memno = 0;
+	struct resource *res;
+	int np, na = prom_n_addr_cells(dev);
+	unsigned long pci_addr, cpu_phys_addr;
+
+	np = na + 5;
+
+	/* From "PCI Binding to 1275"
+	 * The ranges property is laid out as an array of elements,
+	 * each of which comprises:
+	 *   cells 0 - 2:	a PCI address
+	 *   cells 3 or 3+4:	a CPU physical address
+	 *			(size depending on dev->n_addr_cells)
+	 *   cells 4+5 or 5+6:	the size of the range
+	 */
+	rlen = 0;
+	hose->io_base_phys = 0;
+	ranges = (unsigned int *) get_property(dev, "ranges", &rlen);
+	while ((rlen -= np * sizeof(unsigned int)) >= 0) {
+		res = NULL;
+		pci_space = ranges[0];
+		pci_addr = ((unsigned long)ranges[1] << 32) | ranges[2];
+
+		cpu_phys_addr = ranges[3];
+		if (na >= 2)
+			cpu_phys_addr = (cpu_phys_addr << 32) | ranges[4];
+
+		size = ((unsigned long)ranges[na+3] << 32) | ranges[na+4];
+		ranges += np;
+		if (size == 0)
+			continue;
+
+		/* Now consume following elements while they are contiguous */
+		while (rlen >= np * sizeof(unsigned int)) {
+			unsigned long addr, phys;
+
+			if (ranges[0] != pci_space)
+				break;
+			addr = ((unsigned long)ranges[1] << 32) | ranges[2];
+			phys = ranges[3];
+			if (na >= 2)
+				phys = (phys << 32) | ranges[4];
+			if (addr != pci_addr + size ||
+			    phys != cpu_phys_addr + size)
+				break;
+
+			size += ((unsigned long)ranges[na+3] << 32)
+				| ranges[na+4];
+			ranges += np;
+			rlen -= np * sizeof(unsigned int);
+		}
+
+		switch ((pci_space >> 24) & 0x3) {
+		case 1:		/* I/O space */
+			hose->io_base_phys = cpu_phys_addr;
+			hose->pci_io_size = size;
+
+			res = &hose->io_resource;
+			res->flags = IORESOURCE_IO;
+			res->start = pci_addr;
+			DBG("phb%d: IO 0x%lx -> 0x%lx\n", hose->global_number,
+				    res->start, res->start + size - 1);
+			break;
+		case 2:		/* memory space */
+			memno = 0;
+			while (memno < 3 && hose->mem_resources[memno].flags)
+				++memno;
+
+			if (memno == 0)
+				hose->pci_mem_offset = cpu_phys_addr - pci_addr;
+			if (memno < 3) {
+				res = &hose->mem_resources[memno];
+				res->flags = IORESOURCE_MEM;
+				res->start = cpu_phys_addr;
+				DBG("phb%d: MEM 0x%lx -> 0x%lx\n", hose->global_number,
+					    res->start, res->start + size - 1);
+			}
+			break;
+		}
+		if (res != NULL) {
+			res->name = dev->full_name;
+			res->end = res->start + size - 1;
+			res->parent = NULL;
+			res->sibling = NULL;
+			res->child = NULL;
+		}
+	}
+}
+
+void __init pci_setup_phb_io(struct pci_controller *hose, int primary)
+{
+	unsigned long size = hose->pci_io_size;
+	unsigned long io_virt_offset;
+	struct resource *res;
+	struct device_node *isa_dn;
+
+	hose->io_base_virt = reserve_phb_iospace(size);
+	DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
+		hose->global_number, hose->io_base_phys,
+		(unsigned long) hose->io_base_virt);
+
+	if (primary) {
+		pci_io_base = (unsigned long)hose->io_base_virt;
+		isa_dn = of_find_node_by_type(NULL, "isa");
+		if (isa_dn) {
+			isa_io_base = pci_io_base;
+			pci_process_ISA_OF_ranges(isa_dn, hose->io_base_phys,
+						hose->io_base_virt);
+			of_node_put(isa_dn);
+			/* Allow all IO */
+			io_page_mask = -1;
+		}
+	}
+
+	io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
+	res = &hose->io_resource;
+	res->start += io_virt_offset;
+	res->end += io_virt_offset;
+}
+
+void __devinit pci_setup_phb_io_dynamic(struct pci_controller *hose,
+					int primary)
+{
+	unsigned long size = hose->pci_io_size;
+	unsigned long io_virt_offset;
+	struct resource *res;
+
+	hose->io_base_virt = __ioremap(hose->io_base_phys, size,
+					_PAGE_NO_CACHE | _PAGE_GUARDED);
+	DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
+		hose->global_number, hose->io_base_phys,
+		(unsigned long) hose->io_base_virt);
+
+	if (primary)
+		pci_io_base = (unsigned long)hose->io_base_virt;
+
+	io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
+	res = &hose->io_resource;
+	res->start += io_virt_offset;
+	res->end += io_virt_offset;
+}
+
+
+static int get_bus_io_range(struct pci_bus *bus, unsigned long *start_phys,
+				unsigned long *start_virt, unsigned long *size)
+{
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	struct pci_bus_region region;
+	struct resource *res;
+
+	if (bus->self) {
+		res = bus->resource[0];
+		pcibios_resource_to_bus(bus->self, &region, res);
+		*start_phys = hose->io_base_phys + region.start;
+		*start_virt = (unsigned long) hose->io_base_virt + 
+				region.start;
+		if (region.end > region.start) 
+			*size = region.end - region.start + 1;
+		else {
+			printk("%s(): unexpected region 0x%lx->0x%lx\n", 
+					__FUNCTION__, region.start, region.end);
+			return 1;
+		}
+		
+	} else {
+		/* Root Bus */
+		res = &hose->io_resource;
+		*start_phys = hose->io_base_phys;
+		*start_virt = (unsigned long) hose->io_base_virt;
+		if (res->end > res->start)
+			*size = res->end - res->start + 1;
+		else {
+			printk("%s(): unexpected region 0x%lx->0x%lx\n", 
+					__FUNCTION__, res->start, res->end);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+int unmap_bus_range(struct pci_bus *bus)
+{
+	unsigned long start_phys;
+	unsigned long start_virt;
+	unsigned long size;
+
+	if (!bus) {
+		printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
+		return 1;
+	}
+	
+	if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
+		return 1;
+	if (iounmap_explicit((void __iomem *) start_virt, size))
+		return 1;
+
+	return 0;
+}
+EXPORT_SYMBOL(unmap_bus_range);
+
+int remap_bus_range(struct pci_bus *bus)
+{
+	unsigned long start_phys;
+	unsigned long start_virt;
+	unsigned long size;
+
+	if (!bus) {
+		printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
+		return 1;
+	}
+	
+	
+	if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
+		return 1;
+	printk("mapping IO %lx -> %lx, size: %lx\n", start_phys, start_virt, size);
+	if (__ioremap_explicit(start_phys, start_virt, size,
+			       _PAGE_NO_CACHE | _PAGE_GUARDED))
+		return 1;
+
+	return 0;
+}
+EXPORT_SYMBOL(remap_bus_range);
+
+void phbs_remap_io(void)
+{
+	struct pci_controller *hose, *tmp;
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+		remap_bus_range(hose->bus);
+}
+
+/*
+ * ppc64 can have multifunction devices that do not respond to function 0.
+ * In this case we must scan all functions.
+ * XXX this can go now, we use the OF device tree in all the
+ * cases that caused problems. -- paulus
+ */
+int pcibios_scan_all_fns(struct pci_bus *bus, int devfn)
+{
+       return 0;
+}
+
+static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	unsigned long start, end, mask, offset;
+
+	if (res->flags & IORESOURCE_IO) {
+		offset = (unsigned long)hose->io_base_virt - pci_io_base;
+
+		start = res->start += offset;
+		end = res->end += offset;
+
+		/* Need to allow IO access to pages that are in the
+		   ISA range */
+		if (start < MAX_ISA_PORT) {
+			if (end > MAX_ISA_PORT)
+				end = MAX_ISA_PORT;
+
+			start >>= PAGE_SHIFT;
+			end >>= PAGE_SHIFT;
+
+			/* get the range of pages for the map */
+			mask = ((1 << (end+1)) - 1) ^ ((1 << start) - 1);
+			io_page_mask |= mask;
+		}
+	} else if (res->flags & IORESOURCE_MEM) {
+		res->start += hose->pci_mem_offset;
+		res->end += hose->pci_mem_offset;
+	}
+}
+
+void __devinit pcibios_fixup_device_resources(struct pci_dev *dev,
+					      struct pci_bus *bus)
+{
+	/* Update device resources.  */
+	int i;
+
+	for (i = 0; i < PCI_NUM_RESOURCES; i++)
+		if (dev->resource[i].flags)
+			fixup_resource(&dev->resource[i], dev);
+}
+EXPORT_SYMBOL(pcibios_fixup_device_resources);
+
+static void __devinit do_bus_setup(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+
+	ppc_md.iommu_bus_setup(bus);
+
+	list_for_each_entry(dev, &bus->devices, bus_list)
+		ppc_md.iommu_dev_setup(dev);
+
+	if (ppc_md.irq_bus_setup)
+		ppc_md.irq_bus_setup(bus);
+}
+
+void __devinit pcibios_fixup_bus(struct pci_bus *bus)
+{
+	struct pci_dev *dev = bus->self;
+
+	if (dev && pci_probe_only &&
+	    (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+		/* This is a subordinate bridge */
+
+		pci_read_bridge_bases(bus);
+		pcibios_fixup_device_resources(dev, bus);
+	}
+
+	do_bus_setup(bus);
+
+	if (!pci_probe_only)
+		return;
+
+	list_for_each_entry(dev, &bus->devices, bus_list)
+		if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
+			pcibios_fixup_device_resources(dev, bus);
+}
+EXPORT_SYMBOL(pcibios_fixup_bus);
+
+/*
+ * Reads the interrupt pin to determine if interrupt is use by card.
+ * If the interrupt is used, then gets the interrupt line from the 
+ * openfirmware and sets it in the pci_dev and pci_config line.
+ */
+int pci_read_irq_line(struct pci_dev *pci_dev)
+{
+	u8 intpin;
+	struct device_node *node;
+
+    	pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &intpin);
+	if (intpin == 0)
+		return 0;
+
+	node = pci_device_to_OF_node(pci_dev);
+	if (node == NULL)
+		return -1;
+
+	if (node->n_intrs == 0)
+		return -1;
+
+	pci_dev->irq = node->intrs[0].line;
+
+	pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, pci_dev->irq);
+
+	return 0;
+}
+EXPORT_SYMBOL(pci_read_irq_line);
+
+void pci_resource_to_user(const struct pci_dev *dev, int bar,
+			  const struct resource *rsrc,
+			  u64 *start, u64 *end)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	unsigned long offset = 0;
+
+	if (hose == NULL)
+		return;
+
+	if (rsrc->flags & IORESOURCE_IO)
+		offset = pci_io_base - (unsigned long)hose->io_base_virt +
+			hose->io_base_phys;
+
+	*start = rsrc->start + offset;
+	*end = rsrc->end + offset;
+}
+
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+
+#define IOBASE_BRIDGE_NUMBER	0
+#define IOBASE_MEMORY		1
+#define IOBASE_IO		2
+#define IOBASE_ISA_IO		3
+#define IOBASE_ISA_MEM		4
+
+long sys_pciconfig_iobase(long which, unsigned long in_bus,
+			  unsigned long in_devfn)
+{
+	struct pci_controller* hose;
+	struct list_head *ln;
+	struct pci_bus *bus = NULL;
+	struct device_node *hose_node;
+
+	/* Argh ! Please forgive me for that hack, but that's the
+	 * simplest way to get existing XFree to not lockup on some
+	 * G5 machines... So when something asks for bus 0 io base
+	 * (bus 0 is HT root), we return the AGP one instead.
+	 */
+	if (machine_is_compatible("MacRISC4"))
+		if (in_bus == 0)
+			in_bus = 0xf0;
+
+	/* That syscall isn't quite compatible with PCI domains, but it's
+	 * used on pre-domains setup. We return the first match
+	 */
+
+	for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) {
+		bus = pci_bus_b(ln);
+		if (in_bus >= bus->number && in_bus < (bus->number + bus->subordinate))
+			break;
+		bus = NULL;
+	}
+	if (bus == NULL || bus->sysdata == NULL)
+		return -ENODEV;
+
+	hose_node = (struct device_node *)bus->sysdata;
+	hose = PCI_DN(hose_node)->phb;
+
+	switch (which) {
+	case IOBASE_BRIDGE_NUMBER:
+		return (long)hose->first_busno;
+	case IOBASE_MEMORY:
+		return (long)hose->pci_mem_offset;
+	case IOBASE_IO:
+		return (long)hose->io_base_phys;
+	case IOBASE_ISA_IO:
+		return (long)isa_io_base;
+	case IOBASE_ISA_MEM:
+		return -EINVAL;
+	}
+
+	return -EOPNOTSUPP;
+}
diff --git a/arch/powerpc/kernel/pci_direct_iommu.c b/arch/powerpc/kernel/pci_direct_iommu.c
new file mode 100644
index 000000000000..e1a32f802c0b
--- /dev/null
+++ b/arch/powerpc/kernel/pci_direct_iommu.c
@@ -0,0 +1,94 @@
+/*
+ * Support for DMA from PCI devices to main memory on
+ * machines without an iommu or with directly addressable
+ * RAM (typically a pmac with 2Gb of RAM or less)
+ *
+ * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/pmac_feature.h>
+#include <asm/abs_addr.h>
+#include <asm/ppc-pci.h>
+
+static void *pci_direct_alloc_coherent(struct device *hwdev, size_t size,
+				   dma_addr_t *dma_handle, gfp_t flag)
+{
+	void *ret;
+
+	ret = (void *)__get_free_pages(flag, get_order(size));
+	if (ret != NULL) {
+		memset(ret, 0, size);
+		*dma_handle = virt_to_abs(ret);
+	}
+	return ret;
+}
+
+static void pci_direct_free_coherent(struct device *hwdev, size_t size,
+				 void *vaddr, dma_addr_t dma_handle)
+{
+	free_pages((unsigned long)vaddr, get_order(size));
+}
+
+static dma_addr_t pci_direct_map_single(struct device *hwdev, void *ptr,
+		size_t size, enum dma_data_direction direction)
+{
+	return virt_to_abs(ptr);
+}
+
+static void pci_direct_unmap_single(struct device *hwdev, dma_addr_t dma_addr,
+		size_t size, enum dma_data_direction direction)
+{
+}
+
+static int pci_direct_map_sg(struct device *hwdev, struct scatterlist *sg,
+		int nents, enum dma_data_direction direction)
+{
+	int i;
+
+	for (i = 0; i < nents; i++, sg++) {
+		sg->dma_address = page_to_phys(sg->page) + sg->offset;
+		sg->dma_length = sg->length;
+	}
+
+	return nents;
+}
+
+static void pci_direct_unmap_sg(struct device *hwdev, struct scatterlist *sg,
+		int nents, enum dma_data_direction direction)
+{
+}
+
+static int pci_direct_dma_supported(struct device *dev, u64 mask)
+{
+	return mask < 0x100000000ull;
+}
+
+void __init pci_direct_iommu_init(void)
+{
+	pci_dma_ops.alloc_coherent = pci_direct_alloc_coherent;
+	pci_dma_ops.free_coherent = pci_direct_free_coherent;
+	pci_dma_ops.map_single = pci_direct_map_single;
+	pci_dma_ops.unmap_single = pci_direct_unmap_single;
+	pci_dma_ops.map_sg = pci_direct_map_sg;
+	pci_dma_ops.unmap_sg = pci_direct_unmap_sg;
+	pci_dma_ops.dma_supported = pci_direct_dma_supported;
+}
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
new file mode 100644
index 000000000000..12c4c9e9bbc7
--- /dev/null
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -0,0 +1,230 @@
+/*
+ * pci_dn.c
+ *
+ * Copyright (C) 2001 Todd Inglett, IBM Corporation
+ *
+ * PCI manipulation via device_nodes.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *    
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/bootmem.h>
+
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/pSeries_reconfig.h>
+#include <asm/ppc-pci.h>
+
+/*
+ * Traverse_func that inits the PCI fields of the device node.
+ * NOTE: this *must* be done before read/write config to the device.
+ */
+static void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
+{
+	struct pci_controller *phb = data;
+	int *type = (int *)get_property(dn, "ibm,pci-config-space-type", NULL);
+	u32 *regs;
+	struct pci_dn *pdn;
+
+	if (mem_init_done)
+		pdn = kmalloc(sizeof(*pdn), GFP_KERNEL);
+	else
+		pdn = alloc_bootmem(sizeof(*pdn));
+	if (pdn == NULL)
+		return NULL;
+	memset(pdn, 0, sizeof(*pdn));
+	dn->data = pdn;
+	pdn->node = dn;
+	pdn->phb = phb;
+	regs = (u32 *)get_property(dn, "reg", NULL);
+	if (regs) {
+		/* First register entry is addr (00BBSS00)  */
+		pdn->busno = (regs[0] >> 16) & 0xff;
+		pdn->devfn = (regs[0] >> 8) & 0xff;
+	}
+
+	pdn->pci_ext_config_space = (type && *type == 1);
+	return NULL;
+}
+
+/*
+ * Traverse a device tree stopping each PCI device in the tree.
+ * This is done depth first.  As each node is processed, a "pre"
+ * function is called and the children are processed recursively.
+ *
+ * The "pre" func returns a value.  If non-zero is returned from
+ * the "pre" func, the traversal stops and this value is returned.
+ * This return value is useful when using traverse as a method of
+ * finding a device.
+ *
+ * NOTE: we do not run the func for devices that do not appear to
+ * be PCI except for the start node which we assume (this is good
+ * because the start node is often a phb which may be missing PCI
+ * properties).
+ * We use the class-code as an indicator. If we run into
+ * one of these nodes we also assume its siblings are non-pci for
+ * performance.
+ */
+void *traverse_pci_devices(struct device_node *start, traverse_func pre,
+		void *data)
+{
+	struct device_node *dn, *nextdn;
+	void *ret;
+
+	/* We started with a phb, iterate all childs */
+	for (dn = start->child; dn; dn = nextdn) {
+		u32 *classp, class;
+
+		nextdn = NULL;
+		classp = (u32 *)get_property(dn, "class-code", NULL);
+		class = classp ? *classp : 0;
+
+		if (pre && ((ret = pre(dn, data)) != NULL))
+			return ret;
+
+		/* If we are a PCI bridge, go down */
+		if (dn->child && ((class >> 8) == PCI_CLASS_BRIDGE_PCI ||
+				  (class >> 8) == PCI_CLASS_BRIDGE_CARDBUS))
+			/* Depth first...do children */
+			nextdn = dn->child;
+		else if (dn->sibling)
+			/* ok, try next sibling instead. */
+			nextdn = dn->sibling;
+		if (!nextdn) {
+			/* Walk up to next valid sibling. */
+			do {
+				dn = dn->parent;
+				if (dn == start)
+					return NULL;
+			} while (dn->sibling == NULL);
+			nextdn = dn->sibling;
+		}
+	}
+	return NULL;
+}
+
+/** 
+ * pci_devs_phb_init_dynamic - setup pci devices under this PHB
+ * phb: pci-to-host bridge (top-level bridge connecting to cpu)
+ *
+ * This routine is called both during boot, (before the memory
+ * subsystem is set up, before kmalloc is valid) and during the 
+ * dynamic lpar operation of adding a PHB to a running system.
+ */
+void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb)
+{
+	struct device_node * dn = (struct device_node *) phb->arch_data;
+	struct pci_dn *pdn;
+
+	/* PHB nodes themselves must not match */
+	update_dn_pci_info(dn, phb);
+	pdn = dn->data;
+	if (pdn) {
+		pdn->devfn = pdn->busno = -1;
+		pdn->phb = phb;
+	}
+
+	/* Update dn->phb ptrs for new phb and children devices */
+	traverse_pci_devices(dn, update_dn_pci_info, phb);
+}
+
+/*
+ * Traversal func that looks for a <busno,devfcn> value.
+ * If found, the pci_dn is returned (thus terminating the traversal).
+ */
+static void *is_devfn_node(struct device_node *dn, void *data)
+{
+	int busno = ((unsigned long)data >> 8) & 0xff;
+	int devfn = ((unsigned long)data) & 0xff;
+	struct pci_dn *pci = dn->data;
+
+	if (pci && (devfn == pci->devfn) && (busno == pci->busno))
+		return dn;
+	return NULL;
+}
+
+/*
+ * This is the "slow" path for looking up a device_node from a
+ * pci_dev.  It will hunt for the device under its parent's
+ * phb and then update sysdata for a future fastpath.
+ *
+ * It may also do fixups on the actual device since this happens
+ * on the first read/write.
+ *
+ * Note that it also must deal with devices that don't exist.
+ * In this case it may probe for real hardware ("just in case")
+ * and add a device_node to the device tree if necessary.
+ *
+ */
+struct device_node *fetch_dev_dn(struct pci_dev *dev)
+{
+	struct device_node *orig_dn = dev->sysdata;
+	struct device_node *dn;
+	unsigned long searchval = (dev->bus->number << 8) | dev->devfn;
+
+	dn = traverse_pci_devices(orig_dn, is_devfn_node, (void *)searchval);
+	if (dn)
+		dev->sysdata = dn;
+	return dn;
+}
+EXPORT_SYMBOL(fetch_dev_dn);
+
+static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
+{
+	struct device_node *np = node;
+	struct pci_dn *pci = NULL;
+	int err = NOTIFY_OK;
+
+	switch (action) {
+	case PSERIES_RECONFIG_ADD:
+		pci = np->parent->data;
+		if (pci)
+			update_dn_pci_info(np, pci->phb);
+		break;
+	default:
+		err = NOTIFY_DONE;
+		break;
+	}
+	return err;
+}
+
+static struct notifier_block pci_dn_reconfig_nb = {
+	.notifier_call = pci_dn_reconfig_notifier,
+};
+
+/** 
+ * pci_devs_phb_init - Initialize phbs and pci devs under them.
+ * 
+ * This routine walks over all phb's (pci-host bridges) on the
+ * system, and sets up assorted pci-related structures 
+ * (including pci info in the device node structs) for each
+ * pci device found underneath.  This routine runs once,
+ * early in the boot sequence.
+ */
+void __init pci_devs_phb_init(void)
+{
+	struct pci_controller *phb, *tmp;
+
+	/* This must be done first so the device nodes have valid pci info! */
+	list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
+		pci_devs_phb_init_dynamic(phb);
+
+	pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb);
+}
diff --git a/arch/powerpc/kernel/pci_iommu.c b/arch/powerpc/kernel/pci_iommu.c
new file mode 100644
index 000000000000..bdf15dbbf4f0
--- /dev/null
+++ b/arch/powerpc/kernel/pci_iommu.c
@@ -0,0 +1,128 @@
+/*
+ * arch/ppc64/kernel/pci_iommu.c
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
+ *
+ * Rewrite, cleanup, new allocation schemes:
+ * Copyright (C) 2004 Olof Johansson, IBM Corporation
+ *
+ * Dynamic DMA mapping support, platform-independent parts.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/iommu.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+
+/*
+ * We can use ->sysdata directly and avoid the extra work in
+ * pci_device_to_OF_node since ->sysdata will have been initialised
+ * in the iommu init code for all devices.
+ */
+#define PCI_GET_DN(dev) ((struct device_node *)((dev)->sysdata))
+
+static inline struct iommu_table *devnode_table(struct device *dev)
+{
+	struct pci_dev *pdev;
+
+	if (!dev) {
+		pdev = ppc64_isabridge_dev;
+		if (!pdev)
+			return NULL;
+	} else
+		pdev = to_pci_dev(dev);
+
+	return PCI_DN(PCI_GET_DN(pdev))->iommu_table;
+}
+
+
+/* Allocates a contiguous real buffer and creates mappings over it.
+ * Returns the virtual address of the buffer and sets dma_handle
+ * to the dma address (mapping) of the first page.
+ */
+static void *pci_iommu_alloc_coherent(struct device *hwdev, size_t size,
+			   dma_addr_t *dma_handle, gfp_t flag)
+{
+	return iommu_alloc_coherent(devnode_table(hwdev), size, dma_handle,
+			flag);
+}
+
+static void pci_iommu_free_coherent(struct device *hwdev, size_t size,
+			 void *vaddr, dma_addr_t dma_handle)
+{
+	iommu_free_coherent(devnode_table(hwdev), size, vaddr, dma_handle);
+}
+
+/* Creates TCEs for a user provided buffer.  The user buffer must be 
+ * contiguous real kernel storage (not vmalloc).  The address of the buffer
+ * passed here is the kernel (virtual) address of the buffer.  The buffer
+ * need not be page aligned, the dma_addr_t returned will point to the same
+ * byte within the page as vaddr.
+ */
+static dma_addr_t pci_iommu_map_single(struct device *hwdev, void *vaddr,
+		size_t size, enum dma_data_direction direction)
+{
+	return iommu_map_single(devnode_table(hwdev), vaddr, size, direction);
+}
+
+
+static void pci_iommu_unmap_single(struct device *hwdev, dma_addr_t dma_handle,
+		size_t size, enum dma_data_direction direction)
+{
+	iommu_unmap_single(devnode_table(hwdev), dma_handle, size, direction);
+}
+
+
+static int pci_iommu_map_sg(struct device *pdev, struct scatterlist *sglist,
+		int nelems, enum dma_data_direction direction)
+{
+	return iommu_map_sg(pdev, devnode_table(pdev), sglist,
+			nelems, direction);
+}
+
+static void pci_iommu_unmap_sg(struct device *pdev, struct scatterlist *sglist,
+		int nelems, enum dma_data_direction direction)
+{
+	iommu_unmap_sg(devnode_table(pdev), sglist, nelems, direction);
+}
+
+/* We support DMA to/from any memory page via the iommu */
+static int pci_iommu_dma_supported(struct device *dev, u64 mask)
+{
+	return 1;
+}
+
+void pci_iommu_init(void)
+{
+	pci_dma_ops.alloc_coherent = pci_iommu_alloc_coherent;
+	pci_dma_ops.free_coherent = pci_iommu_free_coherent;
+	pci_dma_ops.map_single = pci_iommu_map_single;
+	pci_dma_ops.unmap_single = pci_iommu_unmap_single;
+	pci_dma_ops.map_sg = pci_iommu_map_sg;
+	pci_dma_ops.unmap_sg = pci_iommu_unmap_sg;
+	pci_dma_ops.dma_supported = pci_iommu_dma_supported;
+}
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index e7ca5b1f591e..06d5ef501218 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -4,4 +4,7 @@ obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_IBMVIO)	+= vio.o
 obj-$(CONFIG_XICS)	+= xics.o
 obj-$(CONFIG_SCANLOG)	+= scanlog.o
-obj-$(CONFIG_EEH)    += eeh.o eeh_event.o
+obj-$(CONFIG_EEH)	+= eeh.o eeh_event.o
+
+obj-$(CONFIG_HVC_CONSOLE)	+= hvconsole.o
+obj-$(CONFIG_HVCS)		+= hvcserver.o
diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c
new file mode 100644
index 000000000000..138e128a3886
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hvconsole.c
@@ -0,0 +1,74 @@
+/*
+ * hvconsole.c
+ * Copyright (C) 2004 Hollis Blanchard, IBM Corporation
+ * Copyright (C) 2004 IBM Corporation
+ *
+ * Additional Author(s):
+ *  Ryan S. Arnold <rsa@us.ibm.com>
+ *
+ * LPAR console support.
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/hvcall.h>
+#include <asm/hvconsole.h>
+
+/**
+ * hvc_get_chars - retrieve characters from firmware for denoted vterm adatper
+ * @vtermno: The vtermno or unit_address of the adapter from which to fetch the
+ *	data.
+ * @buf: The character buffer into which to put the character data fetched from
+ *	firmware.
+ * @count: not used?
+ */
+int hvc_get_chars(uint32_t vtermno, char *buf, int count)
+{
+	unsigned long got;
+
+	if (plpar_hcall(H_GET_TERM_CHAR, vtermno, 0, 0, 0, &got,
+		(unsigned long *)buf, (unsigned long *)buf+1) == H_Success)
+		return got;
+	return 0;
+}
+
+EXPORT_SYMBOL(hvc_get_chars);
+
+
+/**
+ * hvc_put_chars: send characters to firmware for denoted vterm adapter
+ * @vtermno: The vtermno or unit_address of the adapter from which the data
+ *	originated.
+ * @buf: The character buffer that contains the character data to send to
+ *	firmware.
+ * @count: Send this number of characters.
+ */
+int hvc_put_chars(uint32_t vtermno, const char *buf, int count)
+{
+	unsigned long *lbuf = (unsigned long *) buf;
+	long ret;
+
+	ret = plpar_hcall_norets(H_PUT_TERM_CHAR, vtermno, count, lbuf[0],
+				 lbuf[1]);
+	if (ret == H_Success)
+		return count;
+	if (ret == H_Busy)
+		return 0;
+	return -EIO;
+}
+
+EXPORT_SYMBOL(hvc_put_chars);
diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c
new file mode 100644
index 000000000000..4d584172055a
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hvcserver.c
@@ -0,0 +1,251 @@
+/*
+ * hvcserver.c
+ * Copyright (C) 2004 Ryan S Arnold, IBM Corporation
+ *
+ * PPC64 virtual I/O console server support.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <asm/hvcall.h>
+#include <asm/hvcserver.h>
+#include <asm/io.h>
+
+#define HVCS_ARCH_VERSION "1.0.0"
+
+MODULE_AUTHOR("Ryan S. Arnold <rsa@us.ibm.com>");
+MODULE_DESCRIPTION("IBM hvcs ppc64 API");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(HVCS_ARCH_VERSION);
+
+/*
+ * Convert arch specific return codes into relevant errnos.  The hvcs
+ * functions aren't performance sensitive, so this conversion isn't an
+ * issue.
+ */
+int hvcs_convert(long to_convert)
+{
+	switch (to_convert) {
+		case H_Success:
+			return 0;
+		case H_Parameter:
+			return -EINVAL;
+		case H_Hardware:
+			return -EIO;
+		case H_Busy:
+		case H_LongBusyOrder1msec:
+		case H_LongBusyOrder10msec:
+		case H_LongBusyOrder100msec:
+		case H_LongBusyOrder1sec:
+		case H_LongBusyOrder10sec:
+		case H_LongBusyOrder100sec:
+			return -EBUSY;
+		case H_Function: /* fall through */
+		default:
+			return -EPERM;
+	}
+}
+
+/**
+ * hvcs_free_partner_info - free pi allocated by hvcs_get_partner_info
+ * @head: list_head pointer for an allocated list of partner info structs to
+ *	free.
+ *
+ * This function is used to free the partner info list that was returned by
+ * calling hvcs_get_partner_info().
+ */
+int hvcs_free_partner_info(struct list_head *head)
+{
+	struct hvcs_partner_info *pi;
+	struct list_head *element;
+
+	if (!head)
+		return -EINVAL;
+
+	while (!list_empty(head)) {
+		element = head->next;
+		pi = list_entry(element, struct hvcs_partner_info, node);
+		list_del(element);
+		kfree(pi);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(hvcs_free_partner_info);
+
+/* Helper function for hvcs_get_partner_info */
+int hvcs_next_partner(uint32_t unit_address,
+		unsigned long last_p_partition_ID,
+		unsigned long last_p_unit_address, unsigned long *pi_buff)
+
+{
+	long retval;
+	retval = plpar_hcall_norets(H_VTERM_PARTNER_INFO, unit_address,
+			last_p_partition_ID,
+				last_p_unit_address, virt_to_phys(pi_buff));
+	return hvcs_convert(retval);
+}
+
+/**
+ * hvcs_get_partner_info - Get all of the partner info for a vty-server adapter
+ * @unit_address: The unit_address of the vty-server adapter for which this
+ *	function is fetching partner info.
+ * @head: An initialized list_head pointer to an empty list to use to return the
+ *	list of partner info fetched from the hypervisor to the caller.
+ * @pi_buff: A page sized buffer pre-allocated prior to calling this function
+ *	that is to be used to be used by firmware as an iterator to keep track
+ *	of the partner info retrieval.
+ *
+ * This function returns non-zero on success, or if there is no partner info.
+ *
+ * The pi_buff is pre-allocated prior to calling this function because this
+ * function may be called with a spin_lock held and kmalloc of a page is not
+ * recommended as GFP_ATOMIC.
+ *
+ * The first long of this buffer is used to store a partner unit address.  The
+ * second long is used to store a partner partition ID and starting at
+ * pi_buff[2] is the 79 character Converged Location Code (diff size than the
+ * unsigned longs, hence the casting mumbo jumbo you see later).
+ *
+ * Invocation of this function should always be followed by an invocation of
+ * hvcs_free_partner_info() using a pointer to the SAME list head instance
+ * that was passed as a parameter to this function.
+ */
+int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head,
+		unsigned long *pi_buff)
+{
+	/*
+	 * Dealt with as longs because of the hcall interface even though the
+	 * values are uint32_t.
+	 */
+	unsigned long	last_p_partition_ID;
+	unsigned long	last_p_unit_address;
+	struct hvcs_partner_info *next_partner_info = NULL;
+	int more = 1;
+	int retval;
+
+	memset(pi_buff, 0x00, PAGE_SIZE);
+	/* invalid parameters */
+	if (!head || !pi_buff)
+		return -EINVAL;
+
+	last_p_partition_ID = last_p_unit_address = ~0UL;
+	INIT_LIST_HEAD(head);
+
+	do {
+		retval = hvcs_next_partner(unit_address, last_p_partition_ID,
+				last_p_unit_address, pi_buff);
+		if (retval) {
+			/*
+			 * Don't indicate that we've failed if we have
+			 * any list elements.
+			 */
+			if (!list_empty(head))
+				return 0;
+			return retval;
+		}
+
+		last_p_partition_ID = pi_buff[0];
+		last_p_unit_address = pi_buff[1];
+
+		/* This indicates that there are no further partners */
+		if (last_p_partition_ID == ~0UL
+				&& last_p_unit_address == ~0UL)
+			break;
+
+		/* This is a very small struct and will be freed soon in
+		 * hvcs_free_partner_info(). */
+		next_partner_info = kmalloc(sizeof(struct hvcs_partner_info),
+				GFP_ATOMIC);
+
+		if (!next_partner_info) {
+			printk(KERN_WARNING "HVCONSOLE: kmalloc() failed to"
+				" allocate partner info struct.\n");
+			hvcs_free_partner_info(head);
+			return -ENOMEM;
+		}
+
+		next_partner_info->unit_address
+			= (unsigned int)last_p_unit_address;
+		next_partner_info->partition_ID
+			= (unsigned int)last_p_partition_ID;
+
+		/* copy the Null-term char too */
+		strncpy(&next_partner_info->location_code[0],
+			(char *)&pi_buff[2],
+			strlen((char *)&pi_buff[2]) + 1);
+
+		list_add_tail(&(next_partner_info->node), head);
+		next_partner_info = NULL;
+
+	} while (more);
+
+	return 0;
+}
+EXPORT_SYMBOL(hvcs_get_partner_info);
+
+/**
+ * hvcs_register_connection - establish a connection between this vty-server and
+ *	a vty.
+ * @unit_address: The unit address of the vty-server adapter that is to be
+ *	establish a connection.
+ * @p_partition_ID: The partition ID of the vty adapter that is to be connected.
+ * @p_unit_address: The unit address of the vty adapter to which the vty-server
+ *	is to be connected.
+ *
+ * If this function is called once and -EINVAL is returned it may
+ * indicate that the partner info needs to be refreshed for the
+ * target unit address at which point the caller must invoke
+ * hvcs_get_partner_info() and then call this function again.  If,
+ * for a second time, -EINVAL is returned then it indicates that
+ * there is probably already a partner connection registered to a
+ * different vty-server adapter.  It is also possible that a second
+ * -EINVAL may indicate that one of the parms is not valid, for
+ * instance if the link was removed between the vty-server adapter
+ * and the vty adapter that you are trying to open.  Don't shoot the
+ * messenger.  Firmware implemented it this way.
+ */
+int hvcs_register_connection( uint32_t unit_address,
+		uint32_t p_partition_ID, uint32_t p_unit_address)
+{
+	long retval;
+	retval = plpar_hcall_norets(H_REGISTER_VTERM, unit_address,
+				p_partition_ID, p_unit_address);
+	return hvcs_convert(retval);
+}
+EXPORT_SYMBOL(hvcs_register_connection);
+
+/**
+ * hvcs_free_connection - free the connection between a vty-server and vty
+ * @unit_address: The unit address of the vty-server that is to have its
+ *	connection severed.
+ *
+ * This function is used to free the partner connection between a vty-server
+ * adapter and a vty adapter.
+ *
+ * If -EBUSY is returned continue to call this function until 0 is returned.
+ */
+int hvcs_free_connection(uint32_t unit_address)
+{
+	long retval;
+	retval = plpar_hcall_norets(H_FREE_VTERM, unit_address);
+	return hvcs_convert(retval);
+}
+EXPORT_SYMBOL(hvcs_free_connection);
diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig
deleted file mode 100644
index 9d10c12e87fe..000000000000
--- a/arch/ppc64/Kconfig
+++ /dev/null
@@ -1,520 +0,0 @@
-#
-# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.txt.
-#
-
-config 64BIT
-	def_bool y
-
-config MMU
-	bool
-	default y
-
-config PPC_STD_MMU
-	def_bool y
-
-config UID16
-	bool
-
-config RWSEM_GENERIC_SPINLOCK
-	bool
-
-config RWSEM_XCHGADD_ALGORITHM
-	bool
-	default y
-
-config GENERIC_CALIBRATE_DELAY
-	bool
-	default y
-
-config GENERIC_ISA_DMA
-	bool
-	default y
-
-config EARLY_PRINTK
-	bool
-	default y
-
-config COMPAT
-	bool
-	default y
-
-config SCHED_NO_NO_OMIT_FRAME_POINTER
-	bool
-	default y
-
-config ARCH_MAY_HAVE_PC_FDC
-	bool
-	default y
-
-config PPC_STD_MMU
-	bool
-	default y
-
-# We optimistically allocate largepages from the VM, so make the limit
-# large enough (16MB). This badly named config option is actually
-# max order + 1
-config FORCE_MAX_ZONEORDER
-	int
-	default "9" if PPC_64K_PAGES
-	default "13"
-
-source "init/Kconfig"
-
-config SYSVIPC_COMPAT
-	bool
-	depends on COMPAT && SYSVIPC
-	default y
-
-menu "Platform support"
-
-choice
-	prompt "Platform Type"
-	default PPC_MULTIPLATFORM
-
-config PPC_ISERIES
-	bool "IBM Legacy iSeries"
-
-config PPC_MULTIPLATFORM
-	bool "Generic"
-
-endchoice
-
-config PPC_PSERIES
-	depends on PPC_MULTIPLATFORM
-	bool "  IBM pSeries & new iSeries"
-	default y
-
-config PPC_BPA
-	bool "  Broadband Processor Architecture"
-	depends on PPC_MULTIPLATFORM
-
-config PPC_PMAC
-	depends on PPC_MULTIPLATFORM
-	bool "  Apple G5 based machines"
-	default y
-	select U3_DART
-	select GENERIC_TBSYNC
-
-config PPC_MAPLE
-	depends on PPC_MULTIPLATFORM
-	bool "  Maple 970FX Evaluation Board"
-	select U3_DART
-	select MPIC_BROKEN_U3
-	select GENERIC_TBSYNC
-	default n
-	help
-          This option enables support for the Maple 970FX Evaluation Board.
-	  For more informations, refer to <http://www.970eval.com>
-
-config PPC
-	bool
-	default y
-
-config PPC64
-	bool
-	default y
-
-config PPC_OF
-	depends on PPC_MULTIPLATFORM
-	bool
-	default y
-
-config XICS
-	depends on PPC_PSERIES
-	bool
-	default y
-
-config MPIC
-	depends on PPC_PSERIES || PPC_PMAC || PPC_MAPLE
-	bool
-	default y
-
-config PPC_I8259
-	depends on PPC_PSERIES
-	bool
-	default y
-
-config BPA_IIC
-	depends on PPC_BPA
-	bool
-	default y
-
-# VMX is pSeries only for now until somebody writes the iSeries
-# exception vectors for it
-config ALTIVEC
-	bool "Support for VMX (Altivec) vector unit"
-	depends on PPC_MULTIPLATFORM
-	default y
-
-config PPC_SPLPAR
-	depends on PPC_PSERIES
-	bool "Support for shared-processor logical partitions"
-	default n
-	help
-	  Enabling this option will make the kernel run more efficiently
-	  on logically-partitioned pSeries systems which use shared
-	  processors, that is, which share physical processors between
-	  two or more partitions.
-
-config KEXEC
-	bool "kexec system call (EXPERIMENTAL)"
-	depends on PPC_MULTIPLATFORM && EXPERIMENTAL
-	help
-	  kexec is a system call that implements the ability to shutdown your
-	  current kernel, and to start another kernel.  It is like a reboot
-	  but it is indepedent of the system firmware.  And like a reboot
-	  you can start any kernel with it, not just Linux.
-
-	  The name comes from the similiarity to the exec system call.
-
-	  It is an ongoing process to be certain the hardware in a machine
-	  is properly shutdown, so do not be surprised if this code does not
-	  initially work for you.  It may help to enable device hotplugging
-	  support.  As of this writing the exact hardware interface is
-	  strongly in flux, so no good recommendation can be made.
-
-source "drivers/cpufreq/Kconfig"
-
-config CPU_FREQ_PMAC64
-	bool "Support for some Apple G5s"
-	depends on CPU_FREQ && PMAC_SMU && PPC64
-	select CPU_FREQ_TABLE
-	help
-	  This adds support for frequency switching on Apple iMac G5,
-	  and some of the more recent desktop G5 machines as well.
-
-config IBMVIO
-	depends on PPC_PSERIES || PPC_ISERIES
-	bool
-	default y
-
-config U3_DART
-	bool 
-	depends on PPC_MULTIPLATFORM
-	default n
-
-config MPIC_BROKEN_U3
-	bool
-	depends on PPC_MAPLE
-	default y
-
-config GENERIC_TBSYNC
-	def_bool n
-
-config PPC_PMAC64
-	bool
-	depends on PPC_PMAC
-	default y
-
-config BOOTX_TEXT
-	bool "Support for early boot text console"
-	depends PPC_OF
-	help
-	  Say Y here to see progress messages from the boot firmware in text
-	  mode. Requires an Open Firmware compatible video card.
-
-config POWER4
-	def_bool y
-
-config PPC_FPU
-	def_bool y
-
-config POWER4_ONLY
-	bool "Optimize for POWER4"
-	default n
-	---help---
-	  Cause the compiler to optimize for POWER4 processors. The resulting
-	  binary will not work on POWER3 or RS64 processors when compiled with
-	  binutils 2.15 or later.
-
-config IOMMU_VMERGE
-	bool "Enable IOMMU virtual merging (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
-	default n
-	help
-	  Cause IO segments sent to a device for DMA to be merged virtually
-	  by the IOMMU when they happen to have been allocated contiguously.
-	  This doesn't add pressure to the IOMMU allocator. However, some
-	  drivers don't support getting large merged segments coming back
-	  from *_map_sg(). Say Y if you know the drivers you are using are
-	  properly handling this case.
-
-config SMP
-	bool "Symmetric multi-processing support"
-	---help---
-	  This enables support for systems with more than one CPU. If you have
-	  a system with only one CPU, say N. If you have a system with more
-	  than one CPU, say Y.
-
-	  If you say N here, the kernel will run on single and multiprocessor
-	  machines, but will use only one CPU of a multiprocessor machine. If
-	  you say Y here, the kernel will run on single-processor machines.
-	  On a single-processor machine, the kernel will run faster if you say
-	  N here.
-
-	  If you don't know what to do here, say Y.
-
-config NR_CPUS
-	int "Maximum number of CPUs (2-128)"
-	range 2 128
-	depends on SMP
-	default "32"
-
-config HMT
-	bool "Hardware multithreading"
-	depends on SMP && PPC_PSERIES && BROKEN
-	help
-	  This option enables hardware multithreading on RS64 cpus.
-	  pSeries systems p620 and p660 have such a cpu type.
-
-config NUMA
-	bool "NUMA support"
-	default y if SMP && PPC_PSERIES
-
-config ARCH_SELECT_MEMORY_MODEL
-	def_bool y
-
-config ARCH_FLATMEM_ENABLE
-       def_bool y
-       depends on !NUMA
-
-config ARCH_SPARSEMEM_ENABLE
-	def_bool y
-
-config ARCH_SPARSEMEM_DEFAULT
-	def_bool y
-	depends on NUMA
-
-source "mm/Kconfig"
-
-config HAVE_ARCH_EARLY_PFN_TO_NID
-	def_bool y
-	depends on NEED_MULTIPLE_NODES
-
-config ARCH_MEMORY_PROBE
-	def_bool y
-	depends on MEMORY_HOTPLUG
-
-# Some NUMA nodes have memory ranges that span
-# other nodes.  Even though a pfn is valid and
-# between a node's start and end pfns, it may not
-# reside on that node.
-#
-# This is a relatively temporary hack that should
-# be able to go away when sparsemem is fully in
-# place
-config NODES_SPAN_OTHER_NODES
-	def_bool y
-	depends on NEED_MULTIPLE_NODES
-
-config PPC_64K_PAGES
-	bool "64k page size"
-	help
-	  This option changes the kernel logical page size to 64k. On machines
-          without processor support for 64k pages, the kernel will simulate
-          them by loading each individual 4k page on demand transparently,
-          while on hardware with such support, it will be used to map
-          normal application pages.
-
-config SCHED_SMT
-	bool "SMT (Hyperthreading) scheduler support"
-	depends on SMP
-	default off
-	help
-	  SMT scheduler support improves the CPU scheduler's decision making
-	  when dealing with POWER5 cpus at a cost of slightly increased
-	  overhead in some places. If unsure say N here.
-
-source "kernel/Kconfig.preempt"
-source kernel/Kconfig.hz
-
-config EEH
-	bool "PCI Extended Error Handling (EEH)" if EMBEDDED
-	depends on PPC_PSERIES
-	default y if !EMBEDDED
-
-#
-# Use the generic interrupt handling code in kernel/irq/:
-#
-config GENERIC_HARDIRQS
-	bool
-	default y
-
-config PPC_RTAS
-	bool
-	depends on PPC_PSERIES || PPC_BPA
-	default y
-
-config RTAS_ERROR_LOGGING
-	bool
-	depends on PPC_RTAS
-	default y
-
-config RTAS_PROC
-	bool "Proc interface to RTAS"
-	depends on PPC_RTAS
-	default y
-
-config RTAS_FLASH
-	tristate "Firmware flash interface"
-	depends on RTAS_PROC
-
-config SCANLOG
-	tristate "Scanlog dump interface"
-	depends on RTAS_PROC && PPC_PSERIES
-
-config LPARCFG
-	tristate "LPAR Configuration Data"
-	depends on PPC_PSERIES || PPC_ISERIES
-	help
-	Provide system capacity information via human readable
-	<key word>=<value> pairs through a /proc/ppc64/lparcfg interface.
-
-config SECCOMP
-	bool "Enable seccomp to safely compute untrusted bytecode"
-	depends on PROC_FS
-	default y
-	help
-	  This kernel feature is useful for number crunching applications
-	  that may need to compute untrusted bytecode during their
-	  execution. By using pipes or other transports made available to
-	  the process as file descriptors supporting the read/write
-	  syscalls, it's possible to isolate those applications in
-	  their own address space using seccomp. Once seccomp is
-	  enabled via /proc/<pid>/seccomp, it cannot be disabled
-	  and the task is only allowed to execute a few safe syscalls
-	  defined by each seccomp mode.
-
-	  If unsure, say Y. Only embedded should say N here.
-
-source "fs/Kconfig.binfmt"
-
-config HOTPLUG_CPU
-	bool "Support for hot-pluggable CPUs"
-	depends on SMP && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC)
-	select HOTPLUG
-	---help---
-	  Say Y here to be able to turn CPUs off and on.
-
-	  Say N if you are unsure.
-
-config PROC_DEVICETREE
-	bool "Support for Open Firmware device tree in /proc"
-	help
-	  This option adds a device-tree directory under /proc which contains
-	  an image of the device tree that the kernel copies from Open
-	  Firmware. If unsure, say Y here.
-
-config CMDLINE_BOOL
-	bool "Default bootloader kernel arguments"
-	depends on !PPC_ISERIES
-
-config CMDLINE
-	string "Initial kernel command string"
-	depends on CMDLINE_BOOL
-	default "console=ttyS0,9600 console=tty0 root=/dev/sda2"
-	help
-	  On some platforms, there is currently no way for the boot loader to
-	  pass arguments to the kernel. For these platforms, you can supply
-	  some command-line options at build time by entering them here.  In
-	  most cases you will need to specify the root device here.
-
-endmenu
-
-config ISA_DMA_API
-	bool
-	default y
-
-menu "Bus Options"
-
-config ISA
-	bool
-	help
-	  Find out whether you have ISA slots on your motherboard.  ISA is the
-	  name of a bus system, i.e. the way the CPU talks to the other stuff
-	  inside your box.  If you have an Apple machine, say N here; if you
-	  have an IBM RS/6000 or pSeries machine or a PReP machine, say Y.  If
-	  you have an embedded board, consult your board documentation.
-
-config SBUS
-	bool
-
-config MCA
-	bool
-
-config EISA
-	bool
-
-config PCI
-	bool "support for PCI devices" if (EMBEDDED && PPC_ISERIES)
-	default y
-	help
-	  Find out whether your system includes a PCI bus. PCI is the name of
-	  a bus system, i.e. the way the CPU talks to the other stuff inside
-	  your box.  If you say Y here, the kernel will include drivers and
-	  infrastructure code to support PCI bus devices.
-
-config PCI_DOMAINS
-	bool
-	default PCI
-
-source "drivers/pci/Kconfig"
-
-source "drivers/pcmcia/Kconfig"
-
-source "drivers/pci/hotplug/Kconfig"
-
-endmenu
-
-source "net/Kconfig"
-
-source "drivers/Kconfig"
-
-source "fs/Kconfig"
-
-menu "iSeries device drivers"
-	depends on PPC_ISERIES
-
-config VIOCONS
-	tristate "iSeries Virtual Console Support"
-
-config VIODASD
-	tristate "iSeries Virtual I/O disk support"
-	help
-	  If you are running on an iSeries system and you want to use
- 	  virtual disks created and managed by OS/400, say Y.
-
-config VIOCD
-	tristate "iSeries Virtual I/O CD support"
-	help
-	  If you are running Linux on an IBM iSeries system and you want to
-	  read a CD drive owned by OS/400, say Y here.
-
-config VIOTAPE
-	tristate "iSeries Virtual Tape Support"
-	help
-	  If you are running Linux on an iSeries system and you want Linux
-	  to read and/or write a tape drive owned by OS/400, say Y here.
-
-endmenu
-
-config VIOPATH
-	bool
-	depends on VIOCONS || VIODASD || VIOCD || VIOTAPE || VETH
-	default y
-
-source "arch/powerpc/oprofile/Kconfig"
-
-source "arch/ppc64/Kconfig.debug"
-
-source "security/Kconfig"
-
-config KEYS_COMPAT
-	bool
-	depends on COMPAT && KEYS
-	default y
-
-source "crypto/Kconfig"
-
-source "lib/Kconfig"
diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile
index d0edea503c49..e876c213f5ce 100644
--- a/arch/ppc64/kernel/Makefile
+++ b/arch/ppc64/kernel/Makefile
@@ -2,44 +2,6 @@
 # Makefile for the linux ppc64 kernel.
 #
 
-ifneq ($(CONFIG_PPC_MERGE),y)
-
-EXTRA_CFLAGS	+= -mno-minimal-toc
-extra-y		:= head.o vmlinux.lds
-
-obj-y               :=	misc.o prom.o
-
-endif
-
-obj-y               +=	idle.o dma.o \
-			align.o \
-			iommu.o
-
-pci-obj-$(CONFIG_PPC_MULTIPLATFORM)	+= pci_dn.o pci_direct_iommu.o
-
-obj-$(CONFIG_PCI)	+= pci.o pci_iommu.o iomap.o $(pci-obj-y)
+obj-y               +=	idle.o align.o
 
 obj-$(CONFIG_PPC_MULTIPLATFORM) += nvram.o
-ifneq ($(CONFIG_PPC_MERGE),y)
-obj-$(CONFIG_PPC_MULTIPLATFORM) += prom_init.o
-endif
-
-obj-$(CONFIG_KEXEC)		+= machine_kexec.o
-obj-$(CONFIG_MODULES)		+= module.o
-ifneq ($(CONFIG_PPC_MERGE),y)
-obj-$(CONFIG_MODULES)		+= ppc_ksyms.o
-endif
-obj-$(CONFIG_HVC_CONSOLE)	+= hvconsole.o
-ifneq ($(CONFIG_PPC_MERGE),y)
-obj-$(CONFIG_BOOTX_TEXT)	+= btext.o
-endif
-obj-$(CONFIG_HVCS)		+= hvcserver.o
-
-obj-$(CONFIG_KPROBES)		+= kprobes.o
-
-ifneq ($(CONFIG_PPC_MERGE),y)
-ifeq ($(CONFIG_PPC_ISERIES),y)
-arch/ppc64/kernel/head.o: arch/powerpc/kernel/lparmap.s
-AFLAGS_head.o += -Iarch/powerpc/kernel
-endif
-endif
diff --git a/arch/ppc64/kernel/asm-offsets.c b/arch/ppc64/kernel/asm-offsets.c
deleted file mode 100644
index 84ab5c18ef52..000000000000
--- a/arch/ppc64/kernel/asm-offsets.c
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * This program is used to generate definitions needed by
- * assembly language modules.
- *
- * We use the technique used in the OSF Mach kernel code:
- * generate asm statements containing #defines,
- * compile this file to assembler, and then extract the
- * #defines from the assembly-language output.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/config.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/time.h>
-#include <linux/hardirq.h>
-#include <asm/io.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-
-#include <asm/paca.h>
-#include <asm/lppaca.h>
-#include <asm/iseries/hv_lp_event.h>
-#include <asm/rtas.h>
-#include <asm/cputable.h>
-#include <asm/cache.h>
-#include <asm/systemcfg.h>
-#include <asm/compat.h>
-
-#define DEFINE(sym, val) \
-	asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
-int main(void)
-{
-	/* thread struct on stack */
-	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
-	DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
-	DEFINE(TI_SC_NOERR, offsetof(struct thread_info, syscall_noerror));
-
-	/* task_struct->thread */
-	DEFINE(THREAD, offsetof(struct task_struct, thread));
-	DEFINE(PT_REGS, offsetof(struct thread_struct, regs));
-	DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode));
-	DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0]));
-	DEFINE(THREAD_FPSCR, offsetof(struct thread_struct, fpscr));
-	DEFINE(KSP, offsetof(struct thread_struct, ksp));
-	DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid));
-
-#ifdef CONFIG_ALTIVEC
-	DEFINE(THREAD_VR0, offsetof(struct thread_struct, vr[0]));
-	DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave));
-	DEFINE(THREAD_VSCR, offsetof(struct thread_struct, vscr));
-	DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr));
-#endif /* CONFIG_ALTIVEC */
-	DEFINE(MM, offsetof(struct task_struct, mm));
-	DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context));
-
-	DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size));
-	DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_dline_size));
-	DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, dlines_per_page));
-	DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size));
-	DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size));
-	DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page));
-	DEFINE(PLATFORM_LPAR, PLATFORM_LPAR);
-
-	/* paca */
-        DEFINE(PACA_SIZE, sizeof(struct paca_struct));
-        DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index));
-        DEFINE(PACAPROCSTART, offsetof(struct paca_struct, cpu_start));
-        DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack));
-	DEFINE(PACACURRENT, offsetof(struct paca_struct, __current));
-        DEFINE(PACASAVEDMSR, offsetof(struct paca_struct, saved_msr));
-        DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real));
-        DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr));
-	DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr));
-        DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1));
-	DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc));
-	DEFINE(PACAPROCENABLED, offsetof(struct paca_struct, proc_enabled));
-	DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
-	DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
-	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
-#ifdef CONFIG_PPC_64K_PAGES
-	DEFINE(PACAPGDIR, offsetof(struct paca_struct, pgdir));
-#endif
-#ifdef CONFIG_HUGETLB_PAGE
-	DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
-	DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
-#endif /* CONFIG_HUGETLB_PAGE */
-	DEFINE(PACADEFAULTDECR, offsetof(struct paca_struct, default_decr));
-        DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
-        DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
-        DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb));
-        DEFINE(PACA_EXDSI, offsetof(struct paca_struct, exdsi));
-        DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
-	DEFINE(PACALPPACA, offsetof(struct paca_struct, lppaca));
-	DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
-	DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
-	DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
-	DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int));
-	DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int));
-
-	/* RTAS */
-	DEFINE(RTASBASE, offsetof(struct rtas_t, base));
-	DEFINE(RTASENTRY, offsetof(struct rtas_t, entry));
-
-	/* Interrupt register frame */
-	DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD);
-
-	DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
-
-	/* 288 = # of volatile regs, int & fp, for leaf routines */
-	/* which do not stack a frame.  See the PPC64 ABI.       */
-	DEFINE(INT_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 288);
-	/* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
-	DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
-	DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
-	DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0]));
-	DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1]));
-	DEFINE(GPR2, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[2]));
-	DEFINE(GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[3]));
-	DEFINE(GPR4, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[4]));
-	DEFINE(GPR5, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[5]));
-	DEFINE(GPR6, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[6]));
-	DEFINE(GPR7, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[7]));
-	DEFINE(GPR8, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[8]));
-	DEFINE(GPR9, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[9]));
-	DEFINE(GPR10, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[10]));
-	DEFINE(GPR11, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[11]));
-	DEFINE(GPR12, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[12]));
-	DEFINE(GPR13, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[13]));
-	/*
-	 * Note: these symbols include _ because they overlap with special
-	 * register names
-	 */
-	DEFINE(_NIP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, nip));
-	DEFINE(_MSR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, msr));
-	DEFINE(_CTR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ctr));
-	DEFINE(_LINK, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, link));
-	DEFINE(_CCR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ccr));
-	DEFINE(_XER, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, xer));
-	DEFINE(_DAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar));
-	DEFINE(_DSISR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr));
-	DEFINE(ORIG_GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, orig_gpr3));
-	DEFINE(RESULT, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, result));
-	DEFINE(_TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap));
-	DEFINE(SOFTE, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, softe));
-
-	/* These _only_ to be used with {PROM,RTAS}_FRAME_SIZE!!! */
-	DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs));
-	DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8);
-
-	DEFINE(CLONE_VM, CLONE_VM);
-	DEFINE(CLONE_UNTRACED, CLONE_UNTRACED);
-
-	/* About the CPU features table */
-	DEFINE(CPU_SPEC_ENTRY_SIZE, sizeof(struct cpu_spec));
-	DEFINE(CPU_SPEC_PVR_MASK, offsetof(struct cpu_spec, pvr_mask));
-	DEFINE(CPU_SPEC_PVR_VALUE, offsetof(struct cpu_spec, pvr_value));
-	DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features));
-	DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup));
-
-	/* systemcfg offsets for use by vdso */
-	DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct systemcfg, tb_orig_stamp));
-	DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct systemcfg, tb_ticks_per_sec));
-	DEFINE(CFG_TB_TO_XS, offsetof(struct systemcfg, tb_to_xs));
-	DEFINE(CFG_STAMP_XSEC, offsetof(struct systemcfg, stamp_xsec));
-	DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct systemcfg, tb_update_count));
-	DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct systemcfg, tz_minuteswest));
-	DEFINE(CFG_TZ_DSTTIME, offsetof(struct systemcfg, tz_dsttime));
-	DEFINE(CFG_SYSCALL_MAP32, offsetof(struct systemcfg, syscall_map_32));
-	DEFINE(CFG_SYSCALL_MAP64, offsetof(struct systemcfg, syscall_map_64));
-
-	/* timeval/timezone offsets for use by vdso */
-	DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec));
-	DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec));
-	DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec));
-	DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec));
-	DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
-	DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
-
-	return 0;
-}
diff --git a/arch/ppc64/kernel/btext.c b/arch/ppc64/kernel/btext.c
deleted file mode 100644
index 506a37885c5c..000000000000
--- a/arch/ppc64/kernel/btext.c
+++ /dev/null
@@ -1,792 +0,0 @@
-/*
- * Procedures for drawing on the screen early on in the boot process.
- *
- * Benjamin Herrenschmidt <benh@kernel.crashing.org>
- */
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/init.h>
-
-#include <asm/sections.h>
-#include <asm/prom.h>
-#include <asm/btext.h>
-#include <asm/prom.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/io.h>
-#include <asm/lmb.h>
-#include <asm/processor.h>
-#include <asm/udbg.h>
-
-#undef NO_SCROLL
-
-#ifndef NO_SCROLL
-static void scrollscreen(void);
-#endif
-
-static void draw_byte(unsigned char c, long locX, long locY);
-static void draw_byte_32(unsigned char *bits, unsigned int *base, int rb);
-static void draw_byte_16(unsigned char *bits, unsigned int *base, int rb);
-static void draw_byte_8(unsigned char *bits, unsigned int *base, int rb);
-
-static int g_loc_X;
-static int g_loc_Y;
-static int g_max_loc_X;
-static int g_max_loc_Y;
-
-static int dispDeviceRowBytes;
-static int dispDeviceDepth;
-static int dispDeviceRect[4];
-static unsigned char *dispDeviceBase, *logicalDisplayBase;
-
-unsigned long disp_BAT[2] __initdata = {0, 0};
-
-#define cmapsz	(16*256)
-
-static unsigned char vga_font[cmapsz];
-
-int boot_text_mapped;
-int force_printk_to_btext = 0;
-
-
-/* Here's a small text engine to use during early boot
- * or for debugging purposes
- *
- * todo:
- *
- *  - build some kind of vgacon with it to enable early printk
- *  - move to a separate file
- *  - add a few video driver hooks to keep in sync with display
- *    changes.
- */
-
-void map_boot_text(void)
-{
-	unsigned long base, offset, size;
-	unsigned char *vbase;
-
-	/* By default, we are no longer mapped */
-	boot_text_mapped = 0;
-	if (dispDeviceBase == 0)
-		return;
-	base = ((unsigned long) dispDeviceBase) & 0xFFFFF000UL;
-	offset = ((unsigned long) dispDeviceBase) - base;
-	size = dispDeviceRowBytes * dispDeviceRect[3] + offset
-		+ dispDeviceRect[0];
-	vbase = __ioremap(base, size, _PAGE_NO_CACHE);
-	if (vbase == 0)
-		return;
-	logicalDisplayBase = vbase + offset;
-	boot_text_mapped = 1;
-}
-
-int btext_initialize(struct device_node *np)
-{
-	unsigned int width, height, depth, pitch;
-	unsigned long address = 0;
-	u32 *prop;
-
-	prop = (u32 *)get_property(np, "width", NULL);
-	if (prop == NULL)
-		return -EINVAL;
-	width = *prop;
-	prop = (u32 *)get_property(np, "height", NULL);
-	if (prop == NULL)
-		return -EINVAL;
-	height = *prop;
-	prop = (u32 *)get_property(np, "depth", NULL);
-	if (prop == NULL)
-		return -EINVAL;
-	depth = *prop;
-	pitch = width * ((depth + 7) / 8);
-	prop = (u32 *)get_property(np, "linebytes", NULL);
-	if (prop)
-		pitch = *prop;
-	if (pitch == 1)
-		pitch = 0x1000;
-	prop = (u32 *)get_property(np, "address", NULL);
-	if (prop)
-		address = *prop;
-
-	/* FIXME: Add support for PCI reg properties */
-
-	if (address == 0)
-		return -EINVAL;
-
-	g_loc_X = 0;
-	g_loc_Y = 0;
-	g_max_loc_X = width / 8;
-	g_max_loc_Y = height / 16;
-	logicalDisplayBase = (unsigned char *)address;
-	dispDeviceBase = (unsigned char *)address;
-	dispDeviceRowBytes = pitch;
-	dispDeviceDepth = depth;
-	dispDeviceRect[0] = dispDeviceRect[1] = 0;
-	dispDeviceRect[2] = width;
-	dispDeviceRect[3] = height;
-
-	map_boot_text();
-
-	return 0;
-}
-
-static void btext_putc(unsigned char c)
-{
-	btext_drawchar(c);
-}
-
-void __init init_boot_display(void)
-{
-	char *name;
-	struct device_node *np = NULL; 
-	int rc = -ENODEV;
-
-	printk("trying to initialize btext ...\n");
-
-	name = (char *)get_property(of_chosen, "linux,stdout-path", NULL);
-	if (name != NULL) {
-		np = of_find_node_by_path(name);
-		if (np != NULL) {
-			if (strcmp(np->type, "display") != 0) {
-				printk("boot stdout isn't a display !\n");
-				of_node_put(np);
-				np = NULL;
-			}
-		}
-	}
-	if (np)
-		rc = btext_initialize(np);
-	if (rc) {
-		for (np = NULL; (np = of_find_node_by_type(np, "display"));) {
-			if (get_property(np, "linux,opened", NULL)) {
-				printk("trying %s ...\n", np->full_name);
-				rc = btext_initialize(np);
-				printk("result: %d\n", rc);
-			}
-			if (rc == 0)
-				break;
-		}
-	}
-	if (rc == 0 && udbg_putc == NULL)
-		udbg_putc = btext_putc;
-}
-
-
-/* Calc the base address of a given point (x,y) */
-static unsigned char * calc_base(int x, int y)
-{
-	unsigned char *base;
-
-	base = logicalDisplayBase;
-	if (base == 0)
-		base = dispDeviceBase;
-	base += (x + dispDeviceRect[0]) * (dispDeviceDepth >> 3);
-	base += (y + dispDeviceRect[1]) * dispDeviceRowBytes;
-	return base;
-}
-
-/* Adjust the display to a new resolution */
-void btext_update_display(unsigned long phys, int width, int height,
-			  int depth, int pitch)
-{
-	if (dispDeviceBase == 0)
-		return;
-
-	/* check it's the same frame buffer (within 256MB) */
-	if ((phys ^ (unsigned long)dispDeviceBase) & 0xf0000000)
-		return;
-
-	dispDeviceBase = (__u8 *) phys;
-	dispDeviceRect[0] = 0;
-	dispDeviceRect[1] = 0;
-	dispDeviceRect[2] = width;
-	dispDeviceRect[3] = height;
-	dispDeviceDepth = depth;
-	dispDeviceRowBytes = pitch;
-	if (boot_text_mapped) {
-		iounmap(logicalDisplayBase);
-		boot_text_mapped = 0;
-	}
-	map_boot_text();
-	g_loc_X = 0;
-	g_loc_Y = 0;
-	g_max_loc_X = width / 8;
-	g_max_loc_Y = height / 16;
-}
-
-void btext_clearscreen(void)
-{
-	unsigned long *base	= (unsigned long *)calc_base(0, 0);
-	unsigned long width 	= ((dispDeviceRect[2] - dispDeviceRect[0]) *
-					(dispDeviceDepth >> 3)) >> 3;
-	int i,j;
-
-	for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1]); i++)
-	{
-		unsigned long *ptr = base;
-		for(j=width; j; --j)
-			*(ptr++) = 0;
-		base += (dispDeviceRowBytes >> 3);
-	}
-}
-
-#ifndef NO_SCROLL
-static void scrollscreen(void)
-{
-	unsigned long *src     	= (unsigned long *)calc_base(0,16);
-	unsigned long *dst     	= (unsigned long *)calc_base(0,0);
-	unsigned long width    	= ((dispDeviceRect[2] - dispDeviceRect[0]) *
-				   (dispDeviceDepth >> 3)) >> 3;
-	int i,j;
-
-	for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1] - 16); i++)
-	{
-		unsigned long *src_ptr = src;
-		unsigned long *dst_ptr = dst;
-		for(j=width; j; --j)
-			*(dst_ptr++) = *(src_ptr++);
-		src += (dispDeviceRowBytes >> 3);
-		dst += (dispDeviceRowBytes >> 3);
-	}
-	for (i=0; i<16; i++)
-	{
-		unsigned long *dst_ptr = dst;
-		for(j=width; j; --j)
-			*(dst_ptr++) = 0;
-		dst += (dispDeviceRowBytes >> 3);
-	}
-}
-#endif /* ndef NO_SCROLL */
-
-void btext_drawchar(char c)
-{
-	int cline = 0;
-#ifdef NO_SCROLL
-	int x;
-#endif
-	if (!boot_text_mapped)
-		return;
-
-	switch (c) {
-	case '\b':
-		if (g_loc_X > 0)
-			--g_loc_X;
-		break;
-	case '\t':
-		g_loc_X = (g_loc_X & -8) + 8;
-		break;
-	case '\r':
-		g_loc_X = 0;
-		break;
-	case '\n':
-		g_loc_X = 0;
-		g_loc_Y++;
-		cline = 1;
-		break;
-	default:
-		draw_byte(c, g_loc_X++, g_loc_Y);
-	}
-	if (g_loc_X >= g_max_loc_X) {
-		g_loc_X = 0;
-		g_loc_Y++;
-		cline = 1;
-	}
-#ifndef NO_SCROLL
-	while (g_loc_Y >= g_max_loc_Y) {
-		scrollscreen();
-		g_loc_Y--;
-	}
-#else
-	/* wrap around from bottom to top of screen so we don't
-	   waste time scrolling each line.  -- paulus. */
-	if (g_loc_Y >= g_max_loc_Y)
-		g_loc_Y = 0;
-	if (cline) {
-		for (x = 0; x < g_max_loc_X; ++x)
-			draw_byte(' ', x, g_loc_Y);
-	}
-#endif
-}
-
-void btext_drawstring(const char *c)
-{
-	if (!boot_text_mapped)
-		return;
-	while (*c)
-		btext_drawchar(*c++);
-}
-
-void btext_drawhex(unsigned long v)
-{
-	char *hex_table = "0123456789abcdef";
-
-	if (!boot_text_mapped)
-		return;
-	btext_drawchar(hex_table[(v >> 60) & 0x0000000FUL]);
-	btext_drawchar(hex_table[(v >> 56) & 0x0000000FUL]);
-	btext_drawchar(hex_table[(v >> 52) & 0x0000000FUL]);
-	btext_drawchar(hex_table[(v >> 48) & 0x0000000FUL]);
-	btext_drawchar(hex_table[(v >> 44) & 0x0000000FUL]);
-	btext_drawchar(hex_table[(v >> 40) & 0x0000000FUL]);
-	btext_drawchar(hex_table[(v >> 36) & 0x0000000FUL]);
-	btext_drawchar(hex_table[(v >> 32) & 0x0000000FUL]);
-	btext_drawchar(hex_table[(v >> 28) & 0x0000000FUL]);
-	btext_drawchar(hex_table[(v >> 24) & 0x0000000FUL]);
-	btext_drawchar(hex_table[(v >> 20) & 0x0000000FUL]);
-	btext_drawchar(hex_table[(v >> 16) & 0x0000000FUL]);
-	btext_drawchar(hex_table[(v >> 12) & 0x0000000FUL]);
-	btext_drawchar(hex_table[(v >>  8) & 0x0000000FUL]);
-	btext_drawchar(hex_table[(v >>  4) & 0x0000000FUL]);
-	btext_drawchar(hex_table[(v >>  0) & 0x0000000FUL]);
-	btext_drawchar(' ');
-}
-
-static void draw_byte(unsigned char c, long locX, long locY)
-{
-	unsigned char *base	= calc_base(locX << 3, locY << 4);
-	unsigned char *font	= &vga_font[((unsigned int)c) * 16];
-	int rb			= dispDeviceRowBytes;
-
-	switch(dispDeviceDepth) {
-	case 24:
-	case 32:
-		draw_byte_32(font, (unsigned int *)base, rb);
-		break;
-	case 15:
-	case 16:
-		draw_byte_16(font, (unsigned int *)base, rb);
-		break;
-	case 8:
-		draw_byte_8(font, (unsigned int *)base, rb);
-		break;
-	}
-}
-
-static unsigned int expand_bits_8[16] = {
-	0x00000000,
-	0x000000ff,
-	0x0000ff00,
-	0x0000ffff,
-	0x00ff0000,
-	0x00ff00ff,
-	0x00ffff00,
-	0x00ffffff,
-	0xff000000,
-	0xff0000ff,
-	0xff00ff00,
-	0xff00ffff,
-	0xffff0000,
-	0xffff00ff,
-	0xffffff00,
-	0xffffffff
-};
-
-static unsigned int expand_bits_16[4] = {
-	0x00000000,
-	0x0000ffff,
-	0xffff0000,
-	0xffffffff
-};
-
-
-static void draw_byte_32(unsigned char *font, unsigned int *base, int rb)
-{
-	int l, bits;
-	int fg = 0xFFFFFFFFUL;
-	int bg = 0x00000000UL;
-
-	for (l = 0; l < 16; ++l)
-	{
-		bits = *font++;
-		base[0] = (-(bits >> 7) & fg) ^ bg;
-		base[1] = (-((bits >> 6) & 1) & fg) ^ bg;
-		base[2] = (-((bits >> 5) & 1) & fg) ^ bg;
-		base[3] = (-((bits >> 4) & 1) & fg) ^ bg;
-		base[4] = (-((bits >> 3) & 1) & fg) ^ bg;
-		base[5] = (-((bits >> 2) & 1) & fg) ^ bg;
-		base[6] = (-((bits >> 1) & 1) & fg) ^ bg;
-		base[7] = (-(bits & 1) & fg) ^ bg;
-		base = (unsigned int *) ((char *)base + rb);
-	}
-}
-
-static void draw_byte_16(unsigned char *font, unsigned int *base, int rb)
-{
-	int l, bits;
-	int fg = 0xFFFFFFFFUL;
-	int bg = 0x00000000UL;
-	unsigned int *eb = (int *)expand_bits_16;
-
-	for (l = 0; l < 16; ++l)
-	{
-		bits = *font++;
-		base[0] = (eb[bits >> 6] & fg) ^ bg;
-		base[1] = (eb[(bits >> 4) & 3] & fg) ^ bg;
-		base[2] = (eb[(bits >> 2) & 3] & fg) ^ bg;
-		base[3] = (eb[bits & 3] & fg) ^ bg;
-		base = (unsigned int *) ((char *)base + rb);
-	}
-}
-
-static void draw_byte_8(unsigned char *font, unsigned int *base, int rb)
-{
-	int l, bits;
-	int fg = 0x0F0F0F0FUL;
-	int bg = 0x00000000UL;
-	unsigned int *eb = (int *)expand_bits_8;
-
-	for (l = 0; l < 16; ++l)
-	{
-		bits = *font++;
-		base[0] = (eb[bits >> 4] & fg) ^ bg;
-		base[1] = (eb[bits & 0xf] & fg) ^ bg;
-		base = (unsigned int *) ((char *)base + rb);
-	}
-}
-
-static unsigned char vga_font[cmapsz] = {
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd,
-0x99, 0x81, 0x81, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xff,
-0xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe,
-0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18,
-0x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c,
-0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
-0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00,
-0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0x99, 0xbd,
-0xbd, 0x99, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x1e, 0x0e,
-0x1a, 0x32, 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x3c, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x30,
-0x30, 0x70, 0xf0, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x63,
-0x7f, 0x63, 0x63, 0x63, 0x63, 0x67, 0xe7, 0xe6, 0xc0, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x18, 0x18, 0xdb, 0x3c, 0xe7, 0x3c, 0xdb, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfe, 0xf8,
-0xf0, 0xe0, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x06, 0x0e,
-0x1e, 0x3e, 0xfe, 0x3e, 0x1e, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
-0x66, 0x00, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xdb,
-0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x7c, 0xc6, 0x60, 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x38, 0x0c, 0xc6,
-0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0xfe, 0xfe, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c,
-0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc0,
-0xc0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x38, 0x7c, 0x7c, 0xfe, 0xfe, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0x7c, 0x7c,
-0x38, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x24, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6c,
-0x6c, 0xfe, 0x6c, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00,
-0x18, 0x18, 0x7c, 0xc6, 0xc2, 0xc0, 0x7c, 0x06, 0x06, 0x86, 0xc6, 0x7c,
-0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc6, 0x0c, 0x18,
-0x30, 0x60, 0xc6, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c,
-0x6c, 0x38, 0x76, 0xdc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x30, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30,
-0x30, 0x30, 0x18, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x18,
-0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e,
-0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x02, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xce, 0xde, 0xf6, 0xe6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x38, 0x78, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
-0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x7c, 0xc6, 0x06, 0x06, 0x3c, 0x06, 0x06, 0x06, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x1c, 0x3c, 0x6c, 0xcc, 0xfe,
-0x0c, 0x0c, 0x0c, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0,
-0xc0, 0xc0, 0xfc, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x38, 0x60, 0xc0, 0xc0, 0xfc, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0x06, 0x06, 0x0c, 0x18,
-0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
-0xc6, 0xc6, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x06, 0x06, 0x0c, 0x78,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00,
-0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x06,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00,
-0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60,
-0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x0c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xde, 0xde,
-0xde, 0xdc, 0xc0, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38,
-0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x66, 0x66, 0x66, 0x66, 0xfc,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0,
-0xc0, 0xc2, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x6c,
-0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x62, 0x66, 0xfe,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68,
-0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66,
-0xc2, 0xc0, 0xc0, 0xde, 0xc6, 0xc6, 0x66, 0x3a, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x0c,
-0x0c, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xe6, 0x66, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0x66, 0xe6,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x60, 0x60, 0x60, 0x60, 0x60,
-0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xe7,
-0xff, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0xc6,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
-0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66,
-0x66, 0x66, 0x7c, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xde, 0x7c,
-0x0c, 0x0e, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x6c,
-0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
-0xc6, 0x60, 0x38, 0x0c, 0x06, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xff, 0xdb, 0x99, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
-0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3,
-0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x66,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18,
-0x3c, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3,
-0xc3, 0x66, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xff, 0xc3, 0x86, 0x0c, 0x18, 0x30, 0x60, 0xc1, 0xc3, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30,
-0x30, 0x30, 0x30, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-0xc0, 0xe0, 0x70, 0x38, 0x1c, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00,
-0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x60,
-0x60, 0x78, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc0, 0xc0, 0xc0, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x0c, 0x3c, 0x6c, 0xcc,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xf0,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc,
-0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xcc, 0x78, 0x00, 0x00, 0x00, 0xe0, 0x60,
-0x60, 0x6c, 0x76, 0x66, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x18, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x06, 0x00, 0x0e, 0x06, 0x06,
-0x06, 0x06, 0x06, 0x06, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0xe0, 0x60,
-0x60, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xdb,
-0xdb, 0xdb, 0xdb, 0xdb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66,
-0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0x0c, 0x1e, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0x60, 0x60, 0xf0,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0x60,
-0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x30,
-0x30, 0xfc, 0x30, 0x30, 0x30, 0x30, 0x36, 0x1c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3,
-0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6,
-0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xfe, 0xcc, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x18, 0x18, 0x18, 0x18, 0x0e,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x18,
-0x18, 0x18, 0x0e, 0x18, 0x18, 0x18, 0x18, 0x70, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6,
-0xc6, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66,
-0xc2, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x0c, 0x06, 0x7c, 0x00, 0x00,
-0x00, 0x00, 0xcc, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x00, 0x7c, 0xc6, 0xfe,
-0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c,
-0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xcc, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0x78, 0x0c, 0x7c,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38,
-0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x60, 0x60, 0x66, 0x3c, 0x0c, 0x06,
-0x3c, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xfe,
-0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00,
-0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x38, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, 0x66,
-0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x60, 0x30, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6,
-0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, 0x00,
-0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
-0x18, 0x30, 0x60, 0x00, 0xfe, 0x66, 0x60, 0x7c, 0x60, 0x60, 0x66, 0xfe,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x3b, 0x1b,
-0x7e, 0xd8, 0xdc, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x6c,
-0xcc, 0xcc, 0xfe, 0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xc6,
-0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18,
-0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x30, 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0xcc, 0xcc, 0xcc,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00,
-0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0x78, 0x00,
-0x00, 0xc6, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
-0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e,
-0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xe6, 0xfc,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0xff, 0x18,
-0xff, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66,
-0x7c, 0x62, 0x66, 0x6f, 0x66, 0x66, 0x66, 0xf3, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x0e, 0x1b, 0x18, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18,
-0xd8, 0x70, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0x78, 0x0c, 0x7c,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30,
-0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x18, 0x30, 0x60, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0xcc, 0xcc, 0xcc,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc,
-0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
-0x76, 0xdc, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c,
-0x38, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x30, 0x30, 0x00, 0x30, 0x30, 0x60, 0xc0, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0,
-0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xfe, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x60, 0xce, 0x9b, 0x06,
-0x0c, 0x1f, 0x00, 0x00, 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30,
-0x66, 0xce, 0x96, 0x3e, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18,
-0x00, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x6c, 0xd8, 0x6c, 0x36, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0x6c, 0x36,
-0x6c, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x44, 0x11, 0x44,
-0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44,
-0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa,
-0x55, 0xaa, 0x55, 0xaa, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77,
-0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0xf8,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36,
-0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0xf6,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x37,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36,
-0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
-0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xf0, 0xf0, 0xf0,
-0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
-0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-0x0f, 0x0f, 0x0f, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x76, 0xdc, 0xd8, 0xd8, 0xd8, 0xdc, 0x76, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xd8, 0xcc, 0xc6, 0xc6, 0xc6, 0xcc,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0xc6, 0xc0, 0xc0, 0xc0,
-0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0xfe, 0xc6, 0x60, 0x30, 0x18, 0x30, 0x60, 0xc6, 0xfe,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8,
-0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x18, 0x3c, 0x66, 0x66,
-0x66, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38,
-0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x6c, 0x6c, 0x6c, 0xee,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x30, 0x18, 0x0c, 0x3e, 0x66,
-0x66, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x7e, 0xdb, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x03, 0x06, 0x7e, 0xdb, 0xdb, 0xf3, 0x7e, 0x60, 0xc0,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x30, 0x60, 0x60, 0x7c, 0x60,
-0x60, 0x60, 0x30, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c,
-0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18,
-0x18, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30,
-0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x00, 0x7e,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x1b, 0x1b, 0x1b, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x7e, 0x00, 0x18, 0x18, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x00,
-0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c,
-0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x0c, 0x0c,
-0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x6c, 0x3c, 0x1c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xd8, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0xd8, 0x30, 0x60, 0xc8, 0xf8, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00,
-};
diff --git a/arch/ppc64/kernel/dma.c b/arch/ppc64/kernel/dma.c
deleted file mode 100644
index 7c3419656ccc..000000000000
--- a/arch/ppc64/kernel/dma.c
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Copyright (C) 2004 IBM Corporation
- *
- * Implements the generic device dma API for ppc64. Handles
- * the pci and vio busses
- */
-
-#include <linux/device.h>
-#include <linux/dma-mapping.h>
-/* Include the busses we support */
-#include <linux/pci.h>
-#include <asm/vio.h>
-#include <asm/scatterlist.h>
-#include <asm/bug.h>
-
-static struct dma_mapping_ops *get_dma_ops(struct device *dev)
-{
-#ifdef CONFIG_PCI
-	if (dev->bus == &pci_bus_type)
-		return &pci_dma_ops;
-#endif
-#ifdef CONFIG_IBMVIO
-	if (dev->bus == &vio_bus_type)
-		return &vio_dma_ops;
-#endif
-	return NULL;
-}
-
-int dma_supported(struct device *dev, u64 mask)
-{
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	if (dma_ops)
-		return dma_ops->dma_supported(dev, mask);
-	BUG();
-	return 0;
-}
-EXPORT_SYMBOL(dma_supported);
-
-int dma_set_mask(struct device *dev, u64 dma_mask)
-{
-#ifdef CONFIG_PCI
-	if (dev->bus == &pci_bus_type)
-		return pci_set_dma_mask(to_pci_dev(dev), dma_mask);
-#endif
-#ifdef CONFIG_IBMVIO
-	if (dev->bus == &vio_bus_type)
-		return -EIO;
-#endif /* CONFIG_IBMVIO */
-	BUG();
-	return 0;
-}
-EXPORT_SYMBOL(dma_set_mask);
-
-void *dma_alloc_coherent(struct device *dev, size_t size,
-		dma_addr_t *dma_handle, gfp_t flag)
-{
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	if (dma_ops)
-		return dma_ops->alloc_coherent(dev, size, dma_handle, flag);
-	BUG();
-	return NULL;
-}
-EXPORT_SYMBOL(dma_alloc_coherent);
-
-void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
-		dma_addr_t dma_handle)
-{
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	if (dma_ops)
-		dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
-	else
-		BUG();
-}
-EXPORT_SYMBOL(dma_free_coherent);
-
-dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, size_t size,
-		enum dma_data_direction direction)
-{
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	if (dma_ops)
-		return dma_ops->map_single(dev, cpu_addr, size, direction);
-	BUG();
-	return (dma_addr_t)0;
-}
-EXPORT_SYMBOL(dma_map_single);
-
-void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
-		enum dma_data_direction direction)
-{
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	if (dma_ops)
-		dma_ops->unmap_single(dev, dma_addr, size, direction);
-	else
-		BUG();
-}
-EXPORT_SYMBOL(dma_unmap_single);
-
-dma_addr_t dma_map_page(struct device *dev, struct page *page,
-		unsigned long offset, size_t size,
-		enum dma_data_direction direction)
-{
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	if (dma_ops)
-		return dma_ops->map_single(dev,
-				(page_address(page) + offset), size, direction);
-	BUG();
-	return (dma_addr_t)0;
-}
-EXPORT_SYMBOL(dma_map_page);
-
-void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
-		enum dma_data_direction direction)
-{
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	if (dma_ops)
-		dma_ops->unmap_single(dev, dma_address, size, direction);
-	else
-		BUG();
-}
-EXPORT_SYMBOL(dma_unmap_page);
-
-int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-		enum dma_data_direction direction)
-{
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	if (dma_ops)
-		return dma_ops->map_sg(dev, sg, nents, direction);
-	BUG();
-	return 0;
-}
-EXPORT_SYMBOL(dma_map_sg);
-
-void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
-		enum dma_data_direction direction)
-{
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	if (dma_ops)
-		dma_ops->unmap_sg(dev, sg, nhwentries, direction);
-	else
-		BUG();
-}
-EXPORT_SYMBOL(dma_unmap_sg);
diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S
deleted file mode 100644
index 1c869ea72d28..000000000000
--- a/arch/ppc64/kernel/head.S
+++ /dev/null
@@ -1,2007 +0,0 @@
-/*
- *  arch/ppc64/kernel/head.S
- *
- *  PowerPC version
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
- *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
- *  Adapted for Power Macintosh by Paul Mackerras.
- *  Low-level exception handlers and MMU support
- *  rewritten by Paul Mackerras.
- *    Copyright (C) 1996 Paul Mackerras.
- *
- *  Adapted for 64bit PowerPC by Dave Engebretsen, Peter Bergner, and
- *    Mike Corrigan {engebret|bergner|mikejc}@us.ibm.com
- *
- *  This file contains the low-level support and setup for the
- *  PowerPC-64 platform, including trap and interrupt dispatch.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#include <linux/config.h>
-#include <linux/threads.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/bug.h>
-#include <asm/cputable.h>
-#include <asm/setup.h>
-#include <asm/hvcall.h>
-#include <asm/iseries/lpar_map.h>
-#include <asm/thread_info.h>
-
-#ifdef CONFIG_PPC_ISERIES
-#define DO_SOFT_DISABLE
-#endif
-
-/*
- * We layout physical memory as follows:
- * 0x0000 - 0x00ff : Secondary processor spin code
- * 0x0100 - 0x2fff : pSeries Interrupt prologs
- * 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs
- * 0x6000 - 0x6fff : Initial (CPU0) segment table
- * 0x7000 - 0x7fff : FWNMI data area
- * 0x8000 -        : Early init and support code
- */
-
-/*
- *   SPRG Usage
- *
- *   Register	Definition
- *
- *   SPRG0	reserved for hypervisor
- *   SPRG1	temp - used to save gpr
- *   SPRG2	temp - used to save gpr
- *   SPRG3	virt addr of paca
- */
-
-/*
- * Entering into this code we make the following assumptions:
- *  For pSeries:
- *   1. The MMU is off & open firmware is running in real mode.
- *   2. The kernel is entered at __start
- *
- *  For iSeries:
- *   1. The MMU is on (as it always is for iSeries)
- *   2. The kernel is entered at system_reset_iSeries
- */
-
-	.text
-	.globl  _stext
-_stext:
-#ifdef CONFIG_PPC_MULTIPLATFORM
-_GLOBAL(__start)
-	/* NOP this out unconditionally */
-BEGIN_FTR_SECTION
-	b	.__start_initialization_multiplatform
-END_FTR_SECTION(0, 1)
-#endif /* CONFIG_PPC_MULTIPLATFORM */
-
-	/* Catch branch to 0 in real mode */
-	trap
-
-#ifdef CONFIG_PPC_ISERIES
-	/*
-	 * At offset 0x20, there is a pointer to iSeries LPAR data.
-	 * This is required by the hypervisor
-	 */
-	. = 0x20
-	.llong hvReleaseData-KERNELBASE
-
-	/*
-	 * At offset 0x28 and 0x30 are offsets to the mschunks_map
-	 * array (used by the iSeries LPAR debugger to do translation
-	 * between physical addresses and absolute addresses) and
-	 * to the pidhash table (also used by the debugger)
-	 */
-	.llong mschunks_map-KERNELBASE
-	.llong 0	/* pidhash-KERNELBASE SFRXXX */
-
-	/* Offset 0x38 - Pointer to start of embedded System.map */
-	.globl	embedded_sysmap_start
-embedded_sysmap_start:
-	.llong	0
-	/* Offset 0x40 - Pointer to end of embedded System.map */
-	.globl	embedded_sysmap_end
-embedded_sysmap_end:
-	.llong	0
-
-#endif /* CONFIG_PPC_ISERIES */
-
-	/* Secondary processors spin on this value until it goes to 1. */
-	.globl  __secondary_hold_spinloop
-__secondary_hold_spinloop:
-	.llong	0x0
-
-	/* Secondary processors write this value with their cpu # */
-	/* after they enter the spin loop immediately below.	  */
-	.globl	__secondary_hold_acknowledge
-__secondary_hold_acknowledge:
-	.llong	0x0
-
-	. = 0x60
-/*
- * The following code is used on pSeries to hold secondary processors
- * in a spin loop after they have been freed from OpenFirmware, but
- * before the bulk of the kernel has been relocated.  This code
- * is relocated to physical address 0x60 before prom_init is run.
- * All of it must fit below the first exception vector at 0x100.
- */
-_GLOBAL(__secondary_hold)
-	mfmsr	r24
-	ori	r24,r24,MSR_RI
-	mtmsrd	r24			/* RI on */
-
-	/* Grab our linux cpu number */
-	mr	r24,r3
-
-	/* Tell the master cpu we're here */
-	/* Relocation is off & we are located at an address less */
-	/* than 0x100, so only need to grab low order offset.    */
-	std	r24,__secondary_hold_acknowledge@l(0)
-	sync
-
-	/* All secondary cpus wait here until told to start. */
-100:	ld	r4,__secondary_hold_spinloop@l(0)
-	cmpdi	0,r4,1
-	bne	100b
-
-#ifdef CONFIG_HMT
-	b	.hmt_init
-#else
-#ifdef CONFIG_SMP
-	mr	r3,r24
-	b	.pSeries_secondary_smp_init
-#else
-	BUG_OPCODE
-#endif
-#endif
-
-/* This value is used to mark exception frames on the stack. */
-	.section ".toc","aw"
-exception_marker:
-	.tc	ID_72656773_68657265[TC],0x7265677368657265
-	.text
-
-/*
- * The following macros define the code that appears as
- * the prologue to each of the exception handlers.  They
- * are split into two parts to allow a single kernel binary
- * to be used for pSeries and iSeries.
- * LOL.  One day... - paulus
- */
-
-/*
- * We make as much of the exception code common between native
- * exception handlers (including pSeries LPAR) and iSeries LPAR
- * implementations as possible.
- */
-
-/*
- * This is the start of the interrupt handlers for pSeries
- * This code runs with relocation off.
- */
-#define EX_R9		0
-#define EX_R10		8
-#define EX_R11		16
-#define EX_R12		24
-#define EX_R13		32
-#define EX_SRR0		40
-#define EX_DAR		48
-#define EX_DSISR	56
-#define EX_CCR		60
-#define EX_R3		64
-#define EX_LR		72
-
-#define EXCEPTION_PROLOG_PSERIES(area, label)				\
-	mfspr	r13,SPRN_SPRG3;		/* get paca address into r13 */	\
-	std	r9,area+EX_R9(r13);	/* save r9 - r12 */		\
-	std	r10,area+EX_R10(r13);					\
-	std	r11,area+EX_R11(r13);					\
-	std	r12,area+EX_R12(r13);					\
-	mfspr	r9,SPRN_SPRG1;						\
-	std	r9,area+EX_R13(r13);					\
-	mfcr	r9;							\
-	clrrdi	r12,r13,32;		/* get high part of &label */	\
-	mfmsr	r10;							\
-	mfspr	r11,SPRN_SRR0;		/* save SRR0 */			\
-	ori	r12,r12,(label)@l;	/* virt addr of handler */	\
-	ori	r10,r10,MSR_IR|MSR_DR|MSR_RI;				\
-	mtspr	SPRN_SRR0,r12;						\
-	mfspr	r12,SPRN_SRR1;		/* and SRR1 */			\
-	mtspr	SPRN_SRR1,r10;						\
-	rfid;								\
-	b	.	/* prevent speculative execution */
-
-/*
- * This is the start of the interrupt handlers for iSeries
- * This code runs with relocation on.
- */
-#define EXCEPTION_PROLOG_ISERIES_1(area)				\
-	mfspr	r13,SPRN_SPRG3;		/* get paca address into r13 */	\
-	std	r9,area+EX_R9(r13);	/* save r9 - r12 */		\
-	std	r10,area+EX_R10(r13);					\
-	std	r11,area+EX_R11(r13);					\
-	std	r12,area+EX_R12(r13);					\
-	mfspr	r9,SPRN_SPRG1;						\
-	std	r9,area+EX_R13(r13);					\
-	mfcr	r9
-
-#define EXCEPTION_PROLOG_ISERIES_2					\
-	mfmsr	r10;							\
-	ld	r11,PACALPPACA+LPPACASRR0(r13);				\
-	ld	r12,PACALPPACA+LPPACASRR1(r13);				\
-	ori	r10,r10,MSR_RI;						\
-	mtmsrd	r10,1
-
-/*
- * The common exception prolog is used for all except a few exceptions
- * such as a segment miss on a kernel address.  We have to be prepared
- * to take another exception from the point where we first touch the
- * kernel stack onwards.
- *
- * On entry r13 points to the paca, r9-r13 are saved in the paca,
- * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
- * SRR1, and relocation is on.
- */
-#define EXCEPTION_PROLOG_COMMON(n, area)				   \
-	andi.	r10,r12,MSR_PR;		/* See if coming from user	*/ \
-	mr	r10,r1;			/* Save r1			*/ \
-	subi	r1,r1,INT_FRAME_SIZE;	/* alloc frame on kernel stack	*/ \
-	beq-	1f;							   \
-	ld	r1,PACAKSAVE(r13);	/* kernel stack to use		*/ \
-1:	cmpdi	cr1,r1,0;		/* check if r1 is in userspace	*/ \
-	bge-	cr1,bad_stack;		/* abort if it is		*/ \
-	std	r9,_CCR(r1);		/* save CR in stackframe	*/ \
-	std	r11,_NIP(r1);		/* save SRR0 in stackframe	*/ \
-	std	r12,_MSR(r1);		/* save SRR1 in stackframe	*/ \
-	std	r10,0(r1);		/* make stack chain pointer	*/ \
-	std	r0,GPR0(r1);		/* save r0 in stackframe	*/ \
-	std	r10,GPR1(r1);		/* save r1 in stackframe	*/ \
-	std	r2,GPR2(r1);		/* save r2 in stackframe	*/ \
-	SAVE_4GPRS(3, r1);		/* save r3 - r6 in stackframe	*/ \
-	SAVE_2GPRS(7, r1);		/* save r7, r8 in stackframe	*/ \
-	ld	r9,area+EX_R9(r13);	/* move r9, r10 to stackframe	*/ \
-	ld	r10,area+EX_R10(r13);					   \
-	std	r9,GPR9(r1);						   \
-	std	r10,GPR10(r1);						   \
-	ld	r9,area+EX_R11(r13);	/* move r11 - r13 to stackframe	*/ \
-	ld	r10,area+EX_R12(r13);					   \
-	ld	r11,area+EX_R13(r13);					   \
-	std	r9,GPR11(r1);						   \
-	std	r10,GPR12(r1);						   \
-	std	r11,GPR13(r1);						   \
-	ld	r2,PACATOC(r13);	/* get kernel TOC into r2	*/ \
-	mflr	r9;			/* save LR in stackframe	*/ \
-	std	r9,_LINK(r1);						   \
-	mfctr	r10;			/* save CTR in stackframe	*/ \
-	std	r10,_CTR(r1);						   \
-	mfspr	r11,SPRN_XER;		/* save XER in stackframe	*/ \
-	std	r11,_XER(r1);						   \
-	li	r9,(n)+1;						   \
-	std	r9,_TRAP(r1);		/* set trap number		*/ \
-	li	r10,0;							   \
-	ld	r11,exception_marker@toc(r2);				   \
-	std	r10,RESULT(r1);		/* clear regs->result		*/ \
-	std	r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame	*/
-
-/*
- * Exception vectors.
- */
-#define STD_EXCEPTION_PSERIES(n, label)			\
-	. = n;						\
-	.globl label##_pSeries;				\
-label##_pSeries:					\
-	HMT_MEDIUM;					\
-	mtspr	SPRN_SPRG1,r13;		/* save r13 */	\
-	RUNLATCH_ON(r13);				\
-	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common)
-
-#define STD_EXCEPTION_ISERIES(n, label, area)		\
-	.globl label##_iSeries;				\
-label##_iSeries:					\
-	HMT_MEDIUM;					\
-	mtspr	SPRN_SPRG1,r13;		/* save r13 */	\
-	RUNLATCH_ON(r13);				\
-	EXCEPTION_PROLOG_ISERIES_1(area);		\
-	EXCEPTION_PROLOG_ISERIES_2;			\
-	b	label##_common
-
-#define MASKABLE_EXCEPTION_ISERIES(n, label)				\
-	.globl label##_iSeries;						\
-label##_iSeries:							\
-	HMT_MEDIUM;							\
-	mtspr	SPRN_SPRG1,r13;		/* save r13 */			\
-	RUNLATCH_ON(r13);						\
-	EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN);				\
-	lbz	r10,PACAPROCENABLED(r13);				\
-	cmpwi	0,r10,0;						\
-	beq-	label##_iSeries_masked;					\
-	EXCEPTION_PROLOG_ISERIES_2;					\
-	b	label##_common;						\
-
-#ifdef DO_SOFT_DISABLE
-#define DISABLE_INTS				\
-	lbz	r10,PACAPROCENABLED(r13);	\
-	li	r11,0;				\
-	std	r10,SOFTE(r1);			\
-	mfmsr	r10;				\
-	stb	r11,PACAPROCENABLED(r13);	\
-	ori	r10,r10,MSR_EE;			\
-	mtmsrd	r10,1
-
-#define ENABLE_INTS				\
-	lbz	r10,PACAPROCENABLED(r13);	\
-	mfmsr	r11;				\
-	std	r10,SOFTE(r1);			\
-	ori	r11,r11,MSR_EE;			\
-	mtmsrd	r11,1
-
-#else	/* hard enable/disable interrupts */
-#define DISABLE_INTS
-
-#define ENABLE_INTS				\
-	ld	r12,_MSR(r1);			\
-	mfmsr	r11;				\
-	rlwimi	r11,r12,0,MSR_EE;		\
-	mtmsrd	r11,1
-
-#endif
-
-#define STD_EXCEPTION_COMMON(trap, label, hdlr)		\
-	.align	7;					\
-	.globl label##_common;				\
-label##_common:						\
-	EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN);	\
-	DISABLE_INTS;					\
-	bl	.save_nvgprs;				\
-	addi	r3,r1,STACK_FRAME_OVERHEAD;		\
-	bl	hdlr;					\
-	b	.ret_from_except
-
-#define STD_EXCEPTION_COMMON_LITE(trap, label, hdlr)	\
-	.align	7;					\
-	.globl label##_common;				\
-label##_common:						\
-	EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN);	\
-	DISABLE_INTS;					\
-	addi	r3,r1,STACK_FRAME_OVERHEAD;		\
-	bl	hdlr;					\
-	b	.ret_from_except_lite
-
-/*
- * Start of pSeries system interrupt routines
- */
-	. = 0x100
-	.globl __start_interrupts
-__start_interrupts:
-
-	STD_EXCEPTION_PSERIES(0x100, system_reset)
-
-	. = 0x200
-_machine_check_pSeries:
-	HMT_MEDIUM
-	mtspr	SPRN_SPRG1,r13		/* save r13 */
-	RUNLATCH_ON(r13)
-	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
-
-	. = 0x300
-	.globl data_access_pSeries
-data_access_pSeries:
-	HMT_MEDIUM
-	mtspr	SPRN_SPRG1,r13
-BEGIN_FTR_SECTION
-	mtspr	SPRN_SPRG2,r12
-	mfspr	r13,SPRN_DAR
-	mfspr	r12,SPRN_DSISR
-	srdi	r13,r13,60
-	rlwimi	r13,r12,16,0x20
-	mfcr	r12
-	cmpwi	r13,0x2c
-	beq	.do_stab_bolted_pSeries
-	mtcrf	0x80,r12
-	mfspr	r12,SPRN_SPRG2
-END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
-	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common)
-
-	. = 0x380
-	.globl data_access_slb_pSeries
-data_access_slb_pSeries:
-	HMT_MEDIUM
-	mtspr	SPRN_SPRG1,r13
-	RUNLATCH_ON(r13)
-	mfspr	r13,SPRN_SPRG3		/* get paca address into r13 */
-	std	r3,PACA_EXSLB+EX_R3(r13)
-	mfspr	r3,SPRN_DAR
-	std	r9,PACA_EXSLB+EX_R9(r13)	/* save r9 - r12 */
-	mfcr	r9
-#ifdef __DISABLED__
-	/* Keep that around for when we re-implement dynamic VSIDs */
-	cmpdi	r3,0
-	bge	slb_miss_user_pseries
-#endif /* __DISABLED__ */
-	std	r10,PACA_EXSLB+EX_R10(r13)
-	std	r11,PACA_EXSLB+EX_R11(r13)
-	std	r12,PACA_EXSLB+EX_R12(r13)
-	mfspr	r10,SPRN_SPRG1
-	std	r10,PACA_EXSLB+EX_R13(r13)
-	mfspr	r12,SPRN_SRR1		/* and SRR1 */
-	b	.slb_miss_realmode	/* Rel. branch works in real mode */
-
-	STD_EXCEPTION_PSERIES(0x400, instruction_access)
-
-	. = 0x480
-	.globl instruction_access_slb_pSeries
-instruction_access_slb_pSeries:
-	HMT_MEDIUM
-	mtspr	SPRN_SPRG1,r13
-	RUNLATCH_ON(r13)
-	mfspr	r13,SPRN_SPRG3		/* get paca address into r13 */
-	std	r3,PACA_EXSLB+EX_R3(r13)
-	mfspr	r3,SPRN_SRR0		/* SRR0 is faulting address */
-	std	r9,PACA_EXSLB+EX_R9(r13)	/* save r9 - r12 */
-	mfcr	r9
-#ifdef __DISABLED__
-	/* Keep that around for when we re-implement dynamic VSIDs */
-	cmpdi	r3,0
-	bge	slb_miss_user_pseries
-#endif /* __DISABLED__ */
-	std	r10,PACA_EXSLB+EX_R10(r13)
-	std	r11,PACA_EXSLB+EX_R11(r13)
-	std	r12,PACA_EXSLB+EX_R12(r13)
-	mfspr	r10,SPRN_SPRG1
-	std	r10,PACA_EXSLB+EX_R13(r13)
-	mfspr	r12,SPRN_SRR1		/* and SRR1 */
-	b	.slb_miss_realmode	/* Rel. branch works in real mode */
-
-	STD_EXCEPTION_PSERIES(0x500, hardware_interrupt)
-	STD_EXCEPTION_PSERIES(0x600, alignment)
-	STD_EXCEPTION_PSERIES(0x700, program_check)
-	STD_EXCEPTION_PSERIES(0x800, fp_unavailable)
-	STD_EXCEPTION_PSERIES(0x900, decrementer)
-	STD_EXCEPTION_PSERIES(0xa00, trap_0a)
-	STD_EXCEPTION_PSERIES(0xb00, trap_0b)
-
-	. = 0xc00
-	.globl	system_call_pSeries
-system_call_pSeries:
-	HMT_MEDIUM
-	RUNLATCH_ON(r9)
-	mr	r9,r13
-	mfmsr	r10
-	mfspr	r13,SPRN_SPRG3
-	mfspr	r11,SPRN_SRR0
-	clrrdi	r12,r13,32
-	oris	r12,r12,system_call_common@h
-	ori	r12,r12,system_call_common@l
-	mtspr	SPRN_SRR0,r12
-	ori	r10,r10,MSR_IR|MSR_DR|MSR_RI
-	mfspr	r12,SPRN_SRR1
-	mtspr	SPRN_SRR1,r10
-	rfid
-	b	.	/* prevent speculative execution */
-
-	STD_EXCEPTION_PSERIES(0xd00, single_step)
-	STD_EXCEPTION_PSERIES(0xe00, trap_0e)
-
-	/* We need to deal with the Altivec unavailable exception
-	 * here which is at 0xf20, thus in the middle of the
-	 * prolog code of the PerformanceMonitor one. A little
-	 * trickery is thus necessary
-	 */
-	. = 0xf00
-	b	performance_monitor_pSeries
-
-	STD_EXCEPTION_PSERIES(0xf20, altivec_unavailable)
-
-	STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint)
-	STD_EXCEPTION_PSERIES(0x1700, altivec_assist)
-
-	. = 0x3000
-
-/*** pSeries interrupt support ***/
-
-	/* moved from 0xf00 */
-	STD_EXCEPTION_PSERIES(., performance_monitor)
-
-	.align	7
-_GLOBAL(do_stab_bolted_pSeries)
-	mtcrf	0x80,r12
-	mfspr	r12,SPRN_SPRG2
-	EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
-
-/*
- * We have some room here  we use that to put
- * the peries slb miss user trampoline code so it's reasonably
- * away from slb_miss_user_common to avoid problems with rfid
- *
- * This is used for when the SLB miss handler has to go virtual,
- * which doesn't happen for now anymore but will once we re-implement
- * dynamic VSIDs for shared page tables
- */
-#ifdef __DISABLED__
-slb_miss_user_pseries:
-	std	r10,PACA_EXGEN+EX_R10(r13)
-	std	r11,PACA_EXGEN+EX_R11(r13)
-	std	r12,PACA_EXGEN+EX_R12(r13)
-	mfspr	r10,SPRG1
-	ld	r11,PACA_EXSLB+EX_R9(r13)
-	ld	r12,PACA_EXSLB+EX_R3(r13)
-	std	r10,PACA_EXGEN+EX_R13(r13)
-	std	r11,PACA_EXGEN+EX_R9(r13)
-	std	r12,PACA_EXGEN+EX_R3(r13)
-	clrrdi	r12,r13,32
-	mfmsr	r10
-	mfspr	r11,SRR0			/* save SRR0 */
-	ori	r12,r12,slb_miss_user_common@l	/* virt addr of handler */
-	ori	r10,r10,MSR_IR|MSR_DR|MSR_RI
-	mtspr	SRR0,r12
-	mfspr	r12,SRR1			/* and SRR1 */
-	mtspr	SRR1,r10
-	rfid
-	b	.				/* prevent spec. execution */
-#endif /* __DISABLED__ */
-
-/*
- * Vectors for the FWNMI option.  Share common code.
- */
-	.globl system_reset_fwnmi
-system_reset_fwnmi:
-	HMT_MEDIUM
-	mtspr	SPRN_SPRG1,r13		/* save r13 */
-	RUNLATCH_ON(r13)
-	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common)
-
-	.globl machine_check_fwnmi
-machine_check_fwnmi:
-	HMT_MEDIUM
-	mtspr	SPRN_SPRG1,r13		/* save r13 */
-	RUNLATCH_ON(r13)
-	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
-
-#ifdef CONFIG_PPC_ISERIES
-/***  ISeries-LPAR interrupt handlers ***/
-
-	STD_EXCEPTION_ISERIES(0x200, machine_check, PACA_EXMC)
-
-	.globl data_access_iSeries
-data_access_iSeries:
-	mtspr	SPRN_SPRG1,r13
-BEGIN_FTR_SECTION
-	mtspr	SPRN_SPRG2,r12
-	mfspr	r13,SPRN_DAR
-	mfspr	r12,SPRN_DSISR
-	srdi	r13,r13,60
-	rlwimi	r13,r12,16,0x20
-	mfcr	r12
-	cmpwi	r13,0x2c
-	beq	.do_stab_bolted_iSeries
-	mtcrf	0x80,r12
-	mfspr	r12,SPRN_SPRG2
-END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
-	EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN)
-	EXCEPTION_PROLOG_ISERIES_2
-	b	data_access_common
-
-.do_stab_bolted_iSeries:
-	mtcrf	0x80,r12
-	mfspr	r12,SPRN_SPRG2
-	EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
-	EXCEPTION_PROLOG_ISERIES_2
-	b	.do_stab_bolted
-
-	.globl	data_access_slb_iSeries
-data_access_slb_iSeries:
-	mtspr	SPRN_SPRG1,r13		/* save r13 */
-	mfspr	r13,SPRN_SPRG3		/* get paca address into r13 */
-	std	r3,PACA_EXSLB+EX_R3(r13)
-	mfspr	r3,SPRN_DAR
-	std	r9,PACA_EXSLB+EX_R9(r13)
-	mfcr	r9
-#ifdef __DISABLED__
-	cmpdi	r3,0
-	bge	slb_miss_user_iseries
-#endif
-	std	r10,PACA_EXSLB+EX_R10(r13)
-	std	r11,PACA_EXSLB+EX_R11(r13)
-	std	r12,PACA_EXSLB+EX_R12(r13)
-	mfspr	r10,SPRN_SPRG1
-	std	r10,PACA_EXSLB+EX_R13(r13)
-	ld	r12,PACALPPACA+LPPACASRR1(r13);
-	b	.slb_miss_realmode
-
-	STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN)
-
-	.globl	instruction_access_slb_iSeries
-instruction_access_slb_iSeries:
-	mtspr	SPRN_SPRG1,r13		/* save r13 */
-	mfspr	r13,SPRN_SPRG3		/* get paca address into r13 */
-	std	r3,PACA_EXSLB+EX_R3(r13)
-	ld	r3,PACALPPACA+LPPACASRR0(r13)	/* get SRR0 value */
-	std	r9,PACA_EXSLB+EX_R9(r13)
-	mfcr	r9
-#ifdef __DISABLED__
-	cmpdi	r3,0
-	bge	.slb_miss_user_iseries
-#endif
-	std	r10,PACA_EXSLB+EX_R10(r13)
-	std	r11,PACA_EXSLB+EX_R11(r13)
-	std	r12,PACA_EXSLB+EX_R12(r13)
-	mfspr	r10,SPRN_SPRG1
-	std	r10,PACA_EXSLB+EX_R13(r13)
-	ld	r12,PACALPPACA+LPPACASRR1(r13);
-	b	.slb_miss_realmode
-
-#ifdef __DISABLED__
-slb_miss_user_iseries:
-	std	r10,PACA_EXGEN+EX_R10(r13)
-	std	r11,PACA_EXGEN+EX_R11(r13)
-	std	r12,PACA_EXGEN+EX_R12(r13)
-	mfspr	r10,SPRG1
-	ld	r11,PACA_EXSLB+EX_R9(r13)
-	ld	r12,PACA_EXSLB+EX_R3(r13)
-	std	r10,PACA_EXGEN+EX_R13(r13)
-	std	r11,PACA_EXGEN+EX_R9(r13)
-	std	r12,PACA_EXGEN+EX_R3(r13)
-	EXCEPTION_PROLOG_ISERIES_2
-	b	slb_miss_user_common
-#endif
-
-	MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt)
-	STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN)
-	STD_EXCEPTION_ISERIES(0x700, program_check, PACA_EXGEN)
-	STD_EXCEPTION_ISERIES(0x800, fp_unavailable, PACA_EXGEN)
-	MASKABLE_EXCEPTION_ISERIES(0x900, decrementer)
-	STD_EXCEPTION_ISERIES(0xa00, trap_0a, PACA_EXGEN)
-	STD_EXCEPTION_ISERIES(0xb00, trap_0b, PACA_EXGEN)
-
-	.globl	system_call_iSeries
-system_call_iSeries:
-	mr	r9,r13
-	mfspr	r13,SPRN_SPRG3
-	EXCEPTION_PROLOG_ISERIES_2
-	b	system_call_common
-
-	STD_EXCEPTION_ISERIES( 0xd00, single_step, PACA_EXGEN)
-	STD_EXCEPTION_ISERIES( 0xe00, trap_0e, PACA_EXGEN)
-	STD_EXCEPTION_ISERIES( 0xf00, performance_monitor, PACA_EXGEN)
-
-	.globl system_reset_iSeries
-system_reset_iSeries:
-	mfspr	r13,SPRN_SPRG3		/* Get paca address */
-	mfmsr	r24
-	ori	r24,r24,MSR_RI
-	mtmsrd	r24			/* RI on */
-	lhz	r24,PACAPACAINDEX(r13)	/* Get processor # */
-	cmpwi	0,r24,0			/* Are we processor 0? */
-	beq	.__start_initialization_iSeries	/* Start up the first processor */
-	mfspr	r4,SPRN_CTRLF
-	li	r5,CTRL_RUNLATCH	/* Turn off the run light */
-	andc	r4,r4,r5
-	mtspr	SPRN_CTRLT,r4
-
-1:
-	HMT_LOW
-#ifdef CONFIG_SMP
-	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor
-					 * should start */
-	sync
-	LOADADDR(r3,current_set)
-	sldi	r28,r24,3		/* get current_set[cpu#] */
-	ldx	r3,r3,r28
-	addi	r1,r3,THREAD_SIZE
-	subi	r1,r1,STACK_FRAME_OVERHEAD
-
-	cmpwi	0,r23,0
-	beq	iSeries_secondary_smp_loop	/* Loop until told to go */
-	bne	.__secondary_start		/* Loop until told to go */
-iSeries_secondary_smp_loop:
-	/* Let the Hypervisor know we are alive */
-	/* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
-	lis	r3,0x8002
-	rldicr	r3,r3,32,15		/* r0 = (r3 << 32) & 0xffff000000000000 */
-#else /* CONFIG_SMP */
-	/* Yield the processor.  This is required for non-SMP kernels
-		which are running on multi-threaded machines. */
-	lis	r3,0x8000
-	rldicr	r3,r3,32,15		/* r3 = (r3 << 32) & 0xffff000000000000 */
-	addi	r3,r3,18		/* r3 = 0x8000000000000012 which is "yield" */
-	li	r4,0			/* "yield timed" */
-	li	r5,-1			/* "yield forever" */
-#endif /* CONFIG_SMP */
-	li	r0,-1			/* r0=-1 indicates a Hypervisor call */
-	sc				/* Invoke the hypervisor via a system call */
-	mfspr	r13,SPRN_SPRG3		/* Put r13 back ???? */
-	b	1b			/* If SMP not configured, secondaries
-					 * loop forever */
-
-	.globl decrementer_iSeries_masked
-decrementer_iSeries_masked:
-	li	r11,1
-	stb	r11,PACALPPACA+LPPACADECRINT(r13)
-	lwz	r12,PACADEFAULTDECR(r13)
-	mtspr	SPRN_DEC,r12
-	/* fall through */
-
-	.globl hardware_interrupt_iSeries_masked
-hardware_interrupt_iSeries_masked:
-	mtcrf	0x80,r9		/* Restore regs */
-	ld	r11,PACALPPACA+LPPACASRR0(r13)
-	ld	r12,PACALPPACA+LPPACASRR1(r13)
-	mtspr	SPRN_SRR0,r11
-	mtspr	SPRN_SRR1,r12
-	ld	r9,PACA_EXGEN+EX_R9(r13)
-	ld	r10,PACA_EXGEN+EX_R10(r13)
-	ld	r11,PACA_EXGEN+EX_R11(r13)
-	ld	r12,PACA_EXGEN+EX_R12(r13)
-	ld	r13,PACA_EXGEN+EX_R13(r13)
-	rfid
-	b	.	/* prevent speculative execution */
-#endif /* CONFIG_PPC_ISERIES */
-
-/*** Common interrupt handlers ***/
-
-	STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception)
-
-	/*
-	 * Machine check is different because we use a different
-	 * save area: PACA_EXMC instead of PACA_EXGEN.
-	 */
-	.align	7
-	.globl machine_check_common
-machine_check_common:
-	EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
-	DISABLE_INTS
-	bl	.save_nvgprs
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.machine_check_exception
-	b	.ret_from_except
-
-	STD_EXCEPTION_COMMON_LITE(0x900, decrementer, .timer_interrupt)
-	STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception)
-	STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
-	STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
-	STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
-	STD_EXCEPTION_COMMON(0xf00, performance_monitor, .performance_monitor_exception)
-	STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
-#ifdef CONFIG_ALTIVEC
-	STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception)
-#else
-	STD_EXCEPTION_COMMON(0x1700, altivec_assist, .unknown_exception)
-#endif
-
-/*
- * Here we have detected that the kernel stack pointer is bad.
- * R9 contains the saved CR, r13 points to the paca,
- * r10 contains the (bad) kernel stack pointer,
- * r11 and r12 contain the saved SRR0 and SRR1.
- * We switch to using an emergency stack, save the registers there,
- * and call kernel_bad_stack(), which panics.
- */
-bad_stack:
-	ld	r1,PACAEMERGSP(r13)
-	subi	r1,r1,64+INT_FRAME_SIZE
-	std	r9,_CCR(r1)
-	std	r10,GPR1(r1)
-	std	r11,_NIP(r1)
-	std	r12,_MSR(r1)
-	mfspr	r11,SPRN_DAR
-	mfspr	r12,SPRN_DSISR
-	std	r11,_DAR(r1)
-	std	r12,_DSISR(r1)
-	mflr	r10
-	mfctr	r11
-	mfxer	r12
-	std	r10,_LINK(r1)
-	std	r11,_CTR(r1)
-	std	r12,_XER(r1)
-	SAVE_GPR(0,r1)
-	SAVE_GPR(2,r1)
-	SAVE_4GPRS(3,r1)
-	SAVE_2GPRS(7,r1)
-	SAVE_10GPRS(12,r1)
-	SAVE_10GPRS(22,r1)
-	addi	r11,r1,INT_FRAME_SIZE
-	std	r11,0(r1)
-	li	r12,0
-	std	r12,0(r11)
-	ld	r2,PACATOC(r13)
-1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.kernel_bad_stack
-	b	1b
-
-/*
- * Return from an exception with minimal checks.
- * The caller is assumed to have done EXCEPTION_PROLOG_COMMON.
- * If interrupts have been enabled, or anything has been
- * done that might have changed the scheduling status of
- * any task or sent any task a signal, you should use
- * ret_from_except or ret_from_except_lite instead of this.
- */
-	.globl	fast_exception_return
-fast_exception_return:
-	ld	r12,_MSR(r1)
-	ld	r11,_NIP(r1)
-	andi.	r3,r12,MSR_RI		/* check if RI is set */
-	beq-	unrecov_fer
-	ld	r3,_CCR(r1)
-	ld	r4,_LINK(r1)
-	ld	r5,_CTR(r1)
-	ld	r6,_XER(r1)
-	mtcr	r3
-	mtlr	r4
-	mtctr	r5
-	mtxer	r6
-	REST_GPR(0, r1)
-	REST_8GPRS(2, r1)
-
-	mfmsr	r10
-	clrrdi	r10,r10,2		/* clear RI (LE is 0 already) */
-	mtmsrd	r10,1
-
-	mtspr	SPRN_SRR1,r12
-	mtspr	SPRN_SRR0,r11
-	REST_4GPRS(10, r1)
-	ld	r1,GPR1(r1)
-	rfid
-	b	.	/* prevent speculative execution */
-
-unrecov_fer:
-	bl	.save_nvgprs
-1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.unrecoverable_exception
-	b	1b
-
-/*
- * Here r13 points to the paca, r9 contains the saved CR,
- * SRR0 and SRR1 are saved in r11 and r12,
- * r9 - r13 are saved in paca->exgen.
- */
-	.align	7
-	.globl data_access_common
-data_access_common:
-	RUNLATCH_ON(r10)		/* It wont fit in the 0x300 handler */
-	mfspr	r10,SPRN_DAR
-	std	r10,PACA_EXGEN+EX_DAR(r13)
-	mfspr	r10,SPRN_DSISR
-	stw	r10,PACA_EXGEN+EX_DSISR(r13)
-	EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
-	ld	r3,PACA_EXGEN+EX_DAR(r13)
-	lwz	r4,PACA_EXGEN+EX_DSISR(r13)
-	li	r5,0x300
-	b	.do_hash_page	 	/* Try to handle as hpte fault */
-
-	.align	7
-	.globl instruction_access_common
-instruction_access_common:
-	EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
-	ld	r3,_NIP(r1)
-	andis.	r4,r12,0x5820
-	li	r5,0x400
-	b	.do_hash_page		/* Try to handle as hpte fault */
-
-/*
- * Here is the common SLB miss user that is used when going to virtual
- * mode for SLB misses, that is currently not used
- */
-#ifdef __DISABLED__
-	.align	7
-	.globl	slb_miss_user_common
-slb_miss_user_common:
-	mflr	r10
-	std	r3,PACA_EXGEN+EX_DAR(r13)
-	stw	r9,PACA_EXGEN+EX_CCR(r13)
-	std	r10,PACA_EXGEN+EX_LR(r13)
-	std	r11,PACA_EXGEN+EX_SRR0(r13)
-	bl	.slb_allocate_user
-
-	ld	r10,PACA_EXGEN+EX_LR(r13)
-	ld	r3,PACA_EXGEN+EX_R3(r13)
-	lwz	r9,PACA_EXGEN+EX_CCR(r13)
-	ld	r11,PACA_EXGEN+EX_SRR0(r13)
-	mtlr	r10
-	beq-	slb_miss_fault
-
-	andi.	r10,r12,MSR_RI		/* check for unrecoverable exception */
-	beq-	unrecov_user_slb
-	mfmsr	r10
-
-.machine push
-.machine "power4"
-	mtcrf	0x80,r9
-.machine pop
-
-	clrrdi	r10,r10,2		/* clear RI before setting SRR0/1 */
-	mtmsrd	r10,1
-
-	mtspr	SRR0,r11
-	mtspr	SRR1,r12
-
-	ld	r9,PACA_EXGEN+EX_R9(r13)
-	ld	r10,PACA_EXGEN+EX_R10(r13)
-	ld	r11,PACA_EXGEN+EX_R11(r13)
-	ld	r12,PACA_EXGEN+EX_R12(r13)
-	ld	r13,PACA_EXGEN+EX_R13(r13)
-	rfid
-	b	.
-
-slb_miss_fault:
-	EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
-	ld	r4,PACA_EXGEN+EX_DAR(r13)
-	li	r5,0
-	std	r4,_DAR(r1)
-	std	r5,_DSISR(r1)
-	b	.handle_page_fault
-
-unrecov_user_slb:
-	EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
-	DISABLE_INTS
-	bl	.save_nvgprs
-1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.unrecoverable_exception
-	b	1b
-
-#endif /* __DISABLED__ */
-
-
-/*
- * r13 points to the PACA, r9 contains the saved CR,
- * r12 contain the saved SRR1, SRR0 is still ready for return
- * r3 has the faulting address
- * r9 - r13 are saved in paca->exslb.
- * r3 is saved in paca->slb_r3
- * We assume we aren't going to take any exceptions during this procedure.
- */
-_GLOBAL(slb_miss_realmode)
-	mflr	r10
-
-	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */
-	std	r10,PACA_EXSLB+EX_LR(r13)	/* save LR */
-
-	bl	.slb_allocate_realmode
-
-	/* All done -- return from exception. */
-
-	ld	r10,PACA_EXSLB+EX_LR(r13)
-	ld	r3,PACA_EXSLB+EX_R3(r13)
-	lwz	r9,PACA_EXSLB+EX_CCR(r13)	/* get saved CR */
-#ifdef CONFIG_PPC_ISERIES
-	ld	r11,PACALPPACA+LPPACASRR0(r13)	/* get SRR0 value */
-#endif /* CONFIG_PPC_ISERIES */
-
-	mtlr	r10
-
-	andi.	r10,r12,MSR_RI	/* check for unrecoverable exception */
-	beq-	unrecov_slb
-
-.machine	push
-.machine	"power4"
-	mtcrf	0x80,r9
-	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */
-.machine	pop
-
-#ifdef CONFIG_PPC_ISERIES
-	mtspr	SPRN_SRR0,r11
-	mtspr	SPRN_SRR1,r12
-#endif /* CONFIG_PPC_ISERIES */
-	ld	r9,PACA_EXSLB+EX_R9(r13)
-	ld	r10,PACA_EXSLB+EX_R10(r13)
-	ld	r11,PACA_EXSLB+EX_R11(r13)
-	ld	r12,PACA_EXSLB+EX_R12(r13)
-	ld	r13,PACA_EXSLB+EX_R13(r13)
-	rfid
-	b	.	/* prevent speculative execution */
-
-unrecov_slb:
-	EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
-	DISABLE_INTS
-	bl	.save_nvgprs
-1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.unrecoverable_exception
-	b	1b
-
-	.align	7
-	.globl hardware_interrupt_common
-	.globl hardware_interrupt_entry
-hardware_interrupt_common:
-	EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN)
-hardware_interrupt_entry:
-	DISABLE_INTS
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.do_IRQ
-	b	.ret_from_except_lite
-
-	.align	7
-	.globl alignment_common
-alignment_common:
-	mfspr	r10,SPRN_DAR
-	std	r10,PACA_EXGEN+EX_DAR(r13)
-	mfspr	r10,SPRN_DSISR
-	stw	r10,PACA_EXGEN+EX_DSISR(r13)
-	EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN)
-	ld	r3,PACA_EXGEN+EX_DAR(r13)
-	lwz	r4,PACA_EXGEN+EX_DSISR(r13)
-	std	r3,_DAR(r1)
-	std	r4,_DSISR(r1)
-	bl	.save_nvgprs
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	ENABLE_INTS
-	bl	.alignment_exception
-	b	.ret_from_except
-
-	.align	7
-	.globl program_check_common
-program_check_common:
-	EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
-	bl	.save_nvgprs
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	ENABLE_INTS
-	bl	.program_check_exception
-	b	.ret_from_except
-
-	.align	7
-	.globl fp_unavailable_common
-fp_unavailable_common:
-	EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
-	bne	.load_up_fpu		/* if from user, just load it up */
-	bl	.save_nvgprs
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	ENABLE_INTS
-	bl	.kernel_fp_unavailable_exception
-	BUG_OPCODE
-
-	.align	7
-	.globl altivec_unavailable_common
-altivec_unavailable_common:
-	EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN)
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
-	bne	.load_up_altivec	/* if from user, just load it up */
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif
-	bl	.save_nvgprs
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	ENABLE_INTS
-	bl	.altivec_unavailable_exception
-	b	.ret_from_except
-
-#ifdef CONFIG_ALTIVEC
-/*
- * load_up_altivec(unused, unused, tsk)
- * Disable VMX for the task which had it previously,
- * and save its vector registers in its thread_struct.
- * Enables the VMX for use in the kernel on return.
- * On SMP we know the VMX is free, since we give it up every
- * switch (ie, no lazy save of the vector registers).
- * On entry: r13 == 'current' && last_task_used_altivec != 'current'
- */
-_STATIC(load_up_altivec)
-	mfmsr	r5			/* grab the current MSR */
-	oris	r5,r5,MSR_VEC@h
-	mtmsrd	r5			/* enable use of VMX now */
-	isync
-
-/*
- * For SMP, we don't do lazy VMX switching because it just gets too
- * horrendously complex, especially when a task switches from one CPU
- * to another.  Instead we call giveup_altvec in switch_to.
- * VRSAVE isn't dealt with here, that is done in the normal context
- * switch code. Note that we could rely on vrsave value to eventually
- * avoid saving all of the VREGs here...
- */
-#ifndef CONFIG_SMP
-	ld	r3,last_task_used_altivec@got(r2)
-	ld	r4,0(r3)
-	cmpdi	0,r4,0
-	beq	1f
-	/* Save VMX state to last_task_used_altivec's THREAD struct */
-	addi	r4,r4,THREAD
-	SAVE_32VRS(0,r5,r4)
-	mfvscr	vr0
-	li	r10,THREAD_VSCR
-	stvx	vr0,r10,r4
-	/* Disable VMX for last_task_used_altivec */
-	ld	r5,PT_REGS(r4)
-	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-	lis	r6,MSR_VEC@h
-	andc	r4,r4,r6
-	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#endif /* CONFIG_SMP */
-	/* Hack: if we get an altivec unavailable trap with VRSAVE
-	 * set to all zeros, we assume this is a broken application
-	 * that fails to set it properly, and thus we switch it to
-	 * all 1's
-	 */
-	mfspr	r4,SPRN_VRSAVE
-	cmpdi	0,r4,0
-	bne+	1f
-	li	r4,-1
-	mtspr	SPRN_VRSAVE,r4
-1:
-	/* enable use of VMX after return */
-	ld	r4,PACACURRENT(r13)
-	addi	r5,r4,THREAD		/* Get THREAD */
-	oris	r12,r12,MSR_VEC@h
-	std	r12,_MSR(r1)
-	li	r4,1
-	li	r10,THREAD_VSCR
-	stw	r4,THREAD_USED_VR(r5)
-	lvx	vr0,r10,r5
-	mtvscr	vr0
-	REST_32VRS(0,r4,r5)
-#ifndef CONFIG_SMP
-	/* Update last_task_used_math to 'current' */
-	subi	r4,r5,THREAD		/* Back to 'current' */
-	std	r4,0(r3)
-#endif /* CONFIG_SMP */
-	/* restore registers and return */
-	b	fast_exception_return
-#endif /* CONFIG_ALTIVEC */
-
-/*
- * Hash table stuff
- */
-	.align	7
-_GLOBAL(do_hash_page)
-	std	r3,_DAR(r1)
-	std	r4,_DSISR(r1)
-
-	andis.	r0,r4,0xa450		/* weird error? */
-	bne-	.handle_page_fault	/* if not, try to insert a HPTE */
-BEGIN_FTR_SECTION
-	andis.	r0,r4,0x0020		/* Is it a segment table fault? */
-	bne-	.do_ste_alloc		/* If so handle it */
-END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
-
-	/*
-	 * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
-	 * accessing a userspace segment (even from the kernel). We assume
-	 * kernel addresses always have the high bit set.
-	 */
-	rlwinm	r4,r4,32-25+9,31-9,31-9	/* DSISR_STORE -> _PAGE_RW */
-	rotldi	r0,r3,15		/* Move high bit into MSR_PR posn */
-	orc	r0,r12,r0		/* MSR_PR | ~high_bit */
-	rlwimi	r4,r0,32-13,30,30	/* becomes _PAGE_USER access bit */
-	ori	r4,r4,1			/* add _PAGE_PRESENT */
-	rlwimi	r4,r5,22+2,31-2,31-2	/* Set _PAGE_EXEC if trap is 0x400 */
-
-	/*
-	 * On iSeries, we soft-disable interrupts here, then
-	 * hard-enable interrupts so that the hash_page code can spin on
-	 * the hash_table_lock without problems on a shared processor.
-	 */
-	DISABLE_INTS
-
-	/*
-	 * r3 contains the faulting address
-	 * r4 contains the required access permissions
-	 * r5 contains the trap number
-	 *
-	 * at return r3 = 0 for success
-	 */
-	bl	.hash_page		/* build HPTE if possible */
-	cmpdi	r3,0			/* see if hash_page succeeded */
-
-#ifdef DO_SOFT_DISABLE
-	/*
-	 * If we had interrupts soft-enabled at the point where the
-	 * DSI/ISI occurred, and an interrupt came in during hash_page,
-	 * handle it now.
-	 * We jump to ret_from_except_lite rather than fast_exception_return
-	 * because ret_from_except_lite will check for and handle pending
-	 * interrupts if necessary.
-	 */
-	beq	.ret_from_except_lite
-	/* For a hash failure, we don't bother re-enabling interrupts */
-	ble-	12f
-
-	/*
-	 * hash_page couldn't handle it, set soft interrupt enable back
-	 * to what it was before the trap.  Note that .local_irq_restore
-	 * handles any interrupts pending at this point.
-	 */
-	ld	r3,SOFTE(r1)
-	bl	.local_irq_restore
-	b	11f
-#else
-	beq	fast_exception_return   /* Return from exception on success */
-	ble-	12f			/* Failure return from hash_page */
-
-	/* fall through */
-#endif
-
-/* Here we have a page fault that hash_page can't handle. */
-_GLOBAL(handle_page_fault)
-	ENABLE_INTS
-11:	ld	r4,_DAR(r1)
-	ld	r5,_DSISR(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.do_page_fault
-	cmpdi	r3,0
-	beq+	.ret_from_except_lite
-	bl	.save_nvgprs
-	mr	r5,r3
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	lwz	r4,_DAR(r1)
-	bl	.bad_page_fault
-	b	.ret_from_except
-
-/* We have a page fault that hash_page could handle but HV refused
- * the PTE insertion
- */
-12:	bl	.save_nvgprs
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	lwz	r4,_DAR(r1)
-	bl	.low_hash_fault
-	b	.ret_from_except
-
-	/* here we have a segment miss */
-_GLOBAL(do_ste_alloc)
-	bl	.ste_allocate		/* try to insert stab entry */
-	cmpdi	r3,0
-	beq+	fast_exception_return
-	b	.handle_page_fault
-
-/*
- * r13 points to the PACA, r9 contains the saved CR,
- * r11 and r12 contain the saved SRR0 and SRR1.
- * r9 - r13 are saved in paca->exslb.
- * We assume we aren't going to take any exceptions during this procedure.
- * We assume (DAR >> 60) == 0xc.
- */
-	.align	7
-_GLOBAL(do_stab_bolted)
-	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */
-	std	r11,PACA_EXSLB+EX_SRR0(r13)	/* save SRR0 in exc. frame */
-
-	/* Hash to the primary group */
-	ld	r10,PACASTABVIRT(r13)
-	mfspr	r11,SPRN_DAR
-	srdi	r11,r11,28
-	rldimi	r10,r11,7,52	/* r10 = first ste of the group */
-
-	/* Calculate VSID */
-	/* This is a kernel address, so protovsid = ESID */
-	ASM_VSID_SCRAMBLE(r11, r9)
-	rldic	r9,r11,12,16	/* r9 = vsid << 12 */
-
-	/* Search the primary group for a free entry */
-1:	ld	r11,0(r10)	/* Test valid bit of the current ste	*/
-	andi.	r11,r11,0x80
-	beq	2f
-	addi	r10,r10,16
-	andi.	r11,r10,0x70
-	bne	1b
-
-	/* Stick for only searching the primary group for now.		*/
-	/* At least for now, we use a very simple random castout scheme */
-	/* Use the TB as a random number ;  OR in 1 to avoid entry 0	*/
-	mftb	r11
-	rldic	r11,r11,4,57	/* r11 = (r11 << 4) & 0x70 */
-	ori	r11,r11,0x10
-
-	/* r10 currently points to an ste one past the group of interest */
-	/* make it point to the randomly selected entry			*/
-	subi	r10,r10,128
-	or 	r10,r10,r11	/* r10 is the entry to invalidate	*/
-
-	isync			/* mark the entry invalid		*/
-	ld	r11,0(r10)
-	rldicl	r11,r11,56,1	/* clear the valid bit */
-	rotldi	r11,r11,8
-	std	r11,0(r10)
-	sync
-
-	clrrdi	r11,r11,28	/* Get the esid part of the ste		*/
-	slbie	r11
-
-2:	std	r9,8(r10)	/* Store the vsid part of the ste	*/
-	eieio
-
-	mfspr	r11,SPRN_DAR		/* Get the new esid			*/
-	clrrdi	r11,r11,28	/* Permits a full 32b of ESID		*/
-	ori	r11,r11,0x90	/* Turn on valid and kp			*/
-	std	r11,0(r10)	/* Put new entry back into the stab	*/
-
-	sync
-
-	/* All done -- return from exception. */
-	lwz	r9,PACA_EXSLB+EX_CCR(r13)	/* get saved CR */
-	ld	r11,PACA_EXSLB+EX_SRR0(r13)	/* get saved SRR0 */
-
-	andi.	r10,r12,MSR_RI
-	beq-	unrecov_slb
-
-	mtcrf	0x80,r9			/* restore CR */
-
-	mfmsr	r10
-	clrrdi	r10,r10,2
-	mtmsrd	r10,1
-
-	mtspr	SPRN_SRR0,r11
-	mtspr	SPRN_SRR1,r12
-	ld	r9,PACA_EXSLB+EX_R9(r13)
-	ld	r10,PACA_EXSLB+EX_R10(r13)
-	ld	r11,PACA_EXSLB+EX_R11(r13)
-	ld	r12,PACA_EXSLB+EX_R12(r13)
-	ld	r13,PACA_EXSLB+EX_R13(r13)
-	rfid
-	b	.	/* prevent speculative execution */
-
-/*
- * Space for CPU0's segment table.
- *
- * On iSeries, the hypervisor must fill in at least one entry before
- * we get control (with relocate on).  The address is give to the hv
- * as a page number (see xLparMap in lpardata.c), so this must be at a
- * fixed address (the linker can't compute (u64)&initial_stab >>
- * PAGE_SHIFT).
- */
-	. = STAB0_PHYS_ADDR	/* 0x6000 */
-	.globl initial_stab
-initial_stab:
-	.space	4096
-
-/*
- * Data area reserved for FWNMI option.
- * This address (0x7000) is fixed by the RPA.
- */
-	.= 0x7000
-	.globl fwnmi_data_area
-fwnmi_data_area:
-
-	/* iSeries does not use the FWNMI stuff, so it is safe to put
-	 * this here, even if we later allow kernels that will boot on
-	 * both pSeries and iSeries */
-#ifdef CONFIG_PPC_ISERIES
-        . = LPARMAP_PHYS
-#include "lparmap.s"
-/*
- * This ".text" is here for old compilers that generate a trailing
- * .note section when compiling .c files to .s
- */
-	.text
-#endif /* CONFIG_PPC_ISERIES */
-
-        . = 0x8000
-
-/*
- * On pSeries, secondary processors spin in the following code.
- * At entry, r3 = this processor's number (physical cpu id)
- */
-_GLOBAL(pSeries_secondary_smp_init)
-	mr	r24,r3
-	
-	/* turn on 64-bit mode */
-	bl	.enable_64b_mode
-	isync
-
-	/* Copy some CPU settings from CPU 0 */
-	bl	.__restore_cpu_setup
-
-	/* Set up a paca value for this processor. Since we have the
-	 * physical cpu id in r24, we need to search the pacas to find
-	 * which logical id maps to our physical one.
-	 */
-	LOADADDR(r13, paca) 		/* Get base vaddr of paca array	 */
-	li	r5,0			/* logical cpu id                */
-1:	lhz	r6,PACAHWCPUID(r13)	/* Load HW procid from paca      */
-	cmpw	r6,r24			/* Compare to our id             */
-	beq	2f
-	addi	r13,r13,PACA_SIZE	/* Loop to next PACA on miss     */
-	addi	r5,r5,1
-	cmpwi	r5,NR_CPUS
-	blt	1b
-
-	mr	r3,r24			/* not found, copy phys to r3	 */
-	b	.kexec_wait		/* next kernel might do better	 */
-
-2:	mtspr	SPRN_SPRG3,r13		/* Save vaddr of paca in SPRG3	 */
-	/* From now on, r24 is expected to be logical cpuid */
-	mr	r24,r5
-3:	HMT_LOW
-	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor should */
-					/* start.			 */
-	sync
-
-	/* Create a temp kernel stack for use before relocation is on.	*/
-	ld	r1,PACAEMERGSP(r13)
-	subi	r1,r1,STACK_FRAME_OVERHEAD
-
-	cmpwi	0,r23,0
-#ifdef CONFIG_SMP
-	bne	.__secondary_start
-#endif
-	b 	3b			/* Loop until told to go	 */
-
-#ifdef CONFIG_PPC_ISERIES
-_STATIC(__start_initialization_iSeries)
-	/* Clear out the BSS */
-	LOADADDR(r11,__bss_stop)
-	LOADADDR(r8,__bss_start)
-	sub	r11,r11,r8		/* bss size			*/
-	addi	r11,r11,7		/* round up to an even double word */
-	rldicl. r11,r11,61,3		/* shift right by 3		*/
-	beq	4f
-	addi	r8,r8,-8
-	li	r0,0
-	mtctr	r11			/* zero this many doublewords	*/
-3:	stdu	r0,8(r8)
-	bdnz	3b
-4:
-	LOADADDR(r1,init_thread_union)
-	addi	r1,r1,THREAD_SIZE
-	li	r0,0
-	stdu	r0,-STACK_FRAME_OVERHEAD(r1)
-
-	LOADADDR(r3,cpu_specs)
-	LOADADDR(r4,cur_cpu_spec)
-	li	r5,0
-	bl	.identify_cpu
-
-	LOADADDR(r2,__toc_start)
-	addi	r2,r2,0x4000
-	addi	r2,r2,0x4000
-
-	bl	.iSeries_early_setup
-	bl	.early_setup
-
-	/* relocation is on at this point */
-
-	b	.start_here_common
-#endif /* CONFIG_PPC_ISERIES */
-
-#ifdef CONFIG_PPC_MULTIPLATFORM
-
-_STATIC(__mmu_off)
-	mfmsr	r3
-	andi.	r0,r3,MSR_IR|MSR_DR
-	beqlr
-	andc	r3,r3,r0
-	mtspr	SPRN_SRR0,r4
-	mtspr	SPRN_SRR1,r3
-	sync
-	rfid
-	b	.	/* prevent speculative execution */
-
-
-/*
- * Here is our main kernel entry point. We support currently 2 kind of entries
- * depending on the value of r5.
- *
- *   r5 != NULL -> OF entry, we go to prom_init, "legacy" parameter content
- *                 in r3...r7
- *   
- *   r5 == NULL -> kexec style entry. r3 is a physical pointer to the
- *                 DT block, r4 is a physical pointer to the kernel itself
- *
- */
-_GLOBAL(__start_initialization_multiplatform)
-	/*
-	 * Are we booted from a PROM Of-type client-interface ?
-	 */
-	cmpldi	cr0,r5,0
-	bne	.__boot_from_prom		/* yes -> prom */
-
-	/* Save parameters */
-	mr	r31,r3
-	mr	r30,r4
-
-	/* Make sure we are running in 64 bits mode */
-	bl	.enable_64b_mode
-
-	/* Setup some critical 970 SPRs before switching MMU off */
-	bl	.__970_cpu_preinit
-
-	/* cpu # */
-	li	r24,0
-
-	/* Switch off MMU if not already */
-	LOADADDR(r4, .__after_prom_start - KERNELBASE)
-	add	r4,r4,r30
-	bl	.__mmu_off
-	b	.__after_prom_start
-
-_STATIC(__boot_from_prom)
-	/* Save parameters */
-	mr	r31,r3
-	mr	r30,r4
-	mr	r29,r5
-	mr	r28,r6
-	mr	r27,r7
-
-	/* Make sure we are running in 64 bits mode */
-	bl	.enable_64b_mode
-
-	/* put a relocation offset into r3 */
-	bl	.reloc_offset
-
-	LOADADDR(r2,__toc_start)
-	addi	r2,r2,0x4000
-	addi	r2,r2,0x4000
-
-	/* Relocate the TOC from a virt addr to a real addr */
-	sub	r2,r2,r3
-
-	/* Restore parameters */
-	mr	r3,r31
-	mr	r4,r30
-	mr	r5,r29
-	mr	r6,r28
-	mr	r7,r27
-
-	/* Do all of the interaction with OF client interface */
-	bl	.prom_init
-	/* We never return */
-	trap
-
-/*
- * At this point, r3 contains the physical address we are running at,
- * returned by prom_init()
- */
-_STATIC(__after_prom_start)
-
-/*
- * We need to run with __start at physical address 0.
- * This will leave some code in the first 256B of
- * real memory, which are reserved for software use.
- * The remainder of the first page is loaded with the fixed
- * interrupt vectors.  The next two pages are filled with
- * unknown exception placeholders.
- *
- * Note: This process overwrites the OF exception vectors.
- *	r26 == relocation offset
- *	r27 == KERNELBASE
- */
-	bl	.reloc_offset
-	mr	r26,r3
-	SET_REG_TO_CONST(r27,KERNELBASE)
-
-	li	r3,0			/* target addr */
-
-	// XXX FIXME: Use phys returned by OF (r30)
-	sub	r4,r27,r26 		/* source addr			 */
-					/* current address of _start	 */
-					/*   i.e. where we are running	 */
-					/*	the source addr		 */
-
-	LOADADDR(r5,copy_to_here)	/* # bytes of memory to copy	 */
-	sub	r5,r5,r27
-
-	li	r6,0x100		/* Start offset, the first 0x100 */
-					/* bytes were copied earlier.	 */
-
-	bl	.copy_and_flush		/* copy the first n bytes	 */
-					/* this includes the code being	 */
-					/* executed here.		 */
-
-	LOADADDR(r0, 4f)		/* Jump to the copy of this code */
-	mtctr	r0			/* that we just made/relocated	 */
-	bctr
-
-4:	LOADADDR(r5,klimit)
-	sub	r5,r5,r26
-	ld	r5,0(r5)		/* get the value of klimit */
-	sub	r5,r5,r27
-	bl	.copy_and_flush		/* copy the rest */
-	b	.start_here_multiplatform
-
-#endif /* CONFIG_PPC_MULTIPLATFORM */
-
-/*
- * Copy routine used to copy the kernel to start at physical address 0
- * and flush and invalidate the caches as needed.
- * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset
- * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5.
- *
- * Note: this routine *only* clobbers r0, r6 and lr
- */
-_GLOBAL(copy_and_flush)
-	addi	r5,r5,-8
-	addi	r6,r6,-8
-4:	li	r0,16			/* Use the least common		*/
-					/* denominator cache line	*/
-					/* size.  This results in	*/
-					/* extra cache line flushes	*/
-					/* but operation is correct.	*/
-					/* Can't get cache line size	*/
-					/* from NACA as it is being	*/
-					/* moved too.			*/
-
-	mtctr	r0			/* put # words/line in ctr	*/
-3:	addi	r6,r6,8			/* copy a cache line		*/
-	ldx	r0,r6,r4
-	stdx	r0,r6,r3
-	bdnz	3b
-	dcbst	r6,r3			/* write it to memory		*/
-	sync
-	icbi	r6,r3			/* flush the icache line	*/
-	cmpld	0,r6,r5
-	blt	4b
-	sync
-	addi	r5,r5,8
-	addi	r6,r6,8
-	blr
-
-.align 8
-copy_to_here:
-
-#ifdef CONFIG_SMP
-#ifdef CONFIG_PPC_PMAC
-/*
- * On PowerMac, secondary processors starts from the reset vector, which
- * is temporarily turned into a call to one of the functions below.
- */
-	.section ".text";
-	.align 2 ;
-
-	.globl	__secondary_start_pmac_0
-__secondary_start_pmac_0:
-	/* NB the entries for cpus 0, 1, 2 must each occupy 8 bytes. */
-	li	r24,0
-	b	1f
-	li	r24,1
-	b	1f
-	li	r24,2
-	b	1f
-	li	r24,3
-1:
-	
-_GLOBAL(pmac_secondary_start)
-	/* turn on 64-bit mode */
-	bl	.enable_64b_mode
-	isync
-
-	/* Copy some CPU settings from CPU 0 */
-	bl	.__restore_cpu_setup
-
-	/* pSeries do that early though I don't think we really need it */
-	mfmsr	r3
-	ori	r3,r3,MSR_RI
-	mtmsrd	r3			/* RI on */
-
-	/* Set up a paca value for this processor. */
-	LOADADDR(r4, paca) 		 /* Get base vaddr of paca array	*/
-	mulli	r13,r24,PACA_SIZE	 /* Calculate vaddr of right paca */
-	add	r13,r13,r4		/* for this processor.		*/
-	mtspr	SPRN_SPRG3,r13		 /* Save vaddr of paca in SPRG3	*/
-
-	/* Create a temp kernel stack for use before relocation is on.	*/
-	ld	r1,PACAEMERGSP(r13)
-	subi	r1,r1,STACK_FRAME_OVERHEAD
-
-	b	.__secondary_start
-
-#endif /* CONFIG_PPC_PMAC */
-
-/*
- * This function is called after the master CPU has released the
- * secondary processors.  The execution environment is relocation off.
- * The paca for this processor has the following fields initialized at
- * this point:
- *   1. Processor number
- *   2. Segment table pointer (virtual address)
- * On entry the following are set:
- *   r1	= stack pointer.  vaddr for iSeries, raddr (temp stack) for pSeries
- *   r24   = cpu# (in Linux terms)
- *   r13   = paca virtual address
- *   SPRG3 = paca virtual address
- */
-_GLOBAL(__secondary_start)
-
-	HMT_MEDIUM			/* Set thread priority to MEDIUM */
-
-	ld	r2,PACATOC(r13)
-
-	/* Do early setup for that CPU */
-	bl	.early_setup_secondary
-
-	/* Initialize the kernel stack.  Just a repeat for iSeries.	 */
-	LOADADDR(r3,current_set)
-	sldi	r28,r24,3		/* get current_set[cpu#]	 */
-	ldx	r1,r3,r28
-	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
-	std	r1,PACAKSAVE(r13)
-
-	li	r7,0
-	mtlr	r7
-
-	/* enable MMU and jump to start_secondary */
-	LOADADDR(r3,.start_secondary_prolog)
-	SET_REG_TO_CONST(r4, MSR_KERNEL)
-#ifdef DO_SOFT_DISABLE
-	ori	r4,r4,MSR_EE
-#endif
-	mtspr	SPRN_SRR0,r3
-	mtspr	SPRN_SRR1,r4
-	rfid
-	b	.	/* prevent speculative execution */
-
-/* 
- * Running with relocation on at this point.  All we want to do is
- * zero the stack back-chain pointer before going into C code.
- */
-_GLOBAL(start_secondary_prolog)
-	li	r3,0
-	std	r3,0(r1)		/* Zero the stack frame pointer	*/
-	bl	.start_secondary
-#endif
-
-/*
- * This subroutine clobbers r11 and r12
- */
-_GLOBAL(enable_64b_mode)
-	mfmsr	r11			/* grab the current MSR */
-	li	r12,1
-	rldicr	r12,r12,MSR_SF_LG,(63-MSR_SF_LG)
-	or	r11,r11,r12
-	li	r12,1
-	rldicr	r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG)
-	or	r11,r11,r12
-	mtmsrd	r11
-	isync
-	blr
-
-#ifdef CONFIG_PPC_MULTIPLATFORM
-/*
- * This is where the main kernel code starts.
- */
-_STATIC(start_here_multiplatform)
-	/* get a new offset, now that the kernel has moved. */
-	bl	.reloc_offset
-	mr	r26,r3
-
-	/* Clear out the BSS. It may have been done in prom_init,
-	 * already but that's irrelevant since prom_init will soon
-	 * be detached from the kernel completely. Besides, we need
-	 * to clear it now for kexec-style entry.
-	 */
-	LOADADDR(r11,__bss_stop)
-	LOADADDR(r8,__bss_start)
-	sub	r11,r11,r8		/* bss size			*/
-	addi	r11,r11,7		/* round up to an even double word */
-	rldicl. r11,r11,61,3		/* shift right by 3		*/
-	beq	4f
-	addi	r8,r8,-8
-	li	r0,0
-	mtctr	r11			/* zero this many doublewords	*/
-3:	stdu	r0,8(r8)
-	bdnz	3b
-4:
-
-	mfmsr	r6
-	ori	r6,r6,MSR_RI
-	mtmsrd	r6			/* RI on */
-
-#ifdef CONFIG_HMT
-	/* Start up the second thread on cpu 0 */
-	mfspr	r3,SPRN_PVR
-	srwi	r3,r3,16
-	cmpwi	r3,0x34			/* Pulsar  */
-	beq	90f
-	cmpwi	r3,0x36			/* Icestar */
-	beq	90f
-	cmpwi	r3,0x37			/* SStar   */
-	beq	90f
-	b	91f			/* HMT not supported */
-90:	li	r3,0
-	bl	.hmt_start_secondary
-91:
-#endif
-
-	/* The following gets the stack and TOC set up with the regs */
-	/* pointing to the real addr of the kernel stack.  This is   */
-	/* all done to support the C function call below which sets  */
-	/* up the htab.  This is done because we have relocated the  */
-	/* kernel but are still running in real mode. */
-
-	LOADADDR(r3,init_thread_union)
-	sub	r3,r3,r26
-
-	/* set up a stack pointer (physical address) */
-	addi	r1,r3,THREAD_SIZE
-	li	r0,0
-	stdu	r0,-STACK_FRAME_OVERHEAD(r1)
-
-	/* set up the TOC (physical address) */
-	LOADADDR(r2,__toc_start)
-	addi	r2,r2,0x4000
-	addi	r2,r2,0x4000
-	sub	r2,r2,r26
-
-	LOADADDR(r3,cpu_specs)
-	sub	r3,r3,r26
-	LOADADDR(r4,cur_cpu_spec)
-	sub	r4,r4,r26
-	mr	r5,r26
-	bl	.identify_cpu
-
-	/* Save some low level config HIDs of CPU0 to be copied to
-	 * other CPUs later on, or used for suspend/resume
-	 */
-	bl	.__save_cpu_setup
-	sync
-
-	/* Setup a valid physical PACA pointer in SPRG3 for early_setup
-	 * note that boot_cpuid can always be 0 nowadays since there is
-	 * nowhere it can be initialized differently before we reach this
-	 * code
-	 */
-	LOADADDR(r27, boot_cpuid)
-	sub	r27,r27,r26
-	lwz	r27,0(r27)
-
-	LOADADDR(r24, paca) 		/* Get base vaddr of paca array	 */
-	mulli	r13,r27,PACA_SIZE	/* Calculate vaddr of right paca */
-	add	r13,r13,r24		/* for this processor.		 */
-	sub	r13,r13,r26		/* convert to physical addr	 */
-	mtspr	SPRN_SPRG3,r13		/* PPPBBB: Temp... -Peter */
-	
-	/* Do very early kernel initializations, including initial hash table,
-	 * stab and slb setup before we turn on relocation.	*/
-
-	/* Restore parameters passed from prom_init/kexec */
-	mr	r3,r31
- 	bl	.early_setup
-
-	LOADADDR(r3,.start_here_common)
-	SET_REG_TO_CONST(r4, MSR_KERNEL)
-	mtspr	SPRN_SRR0,r3
-	mtspr	SPRN_SRR1,r4
-	rfid
-	b	.	/* prevent speculative execution */
-#endif /* CONFIG_PPC_MULTIPLATFORM */
-	
-	/* This is where all platforms converge execution */
-_STATIC(start_here_common)
-	/* relocation is on at this point */
-
-	/* The following code sets up the SP and TOC now that we are */
-	/* running with translation enabled. */
-
-	LOADADDR(r3,init_thread_union)
-
-	/* set up the stack */
-	addi	r1,r3,THREAD_SIZE
-	li	r0,0
-	stdu	r0,-STACK_FRAME_OVERHEAD(r1)
-
-	/* Apply the CPUs-specific fixups (nop out sections not relevant
-	 * to this CPU
-	 */
-	li	r3,0
-	bl	.do_cpu_ftr_fixups
-
-	LOADADDR(r26, boot_cpuid)
-	lwz	r26,0(r26)
-
-	LOADADDR(r24, paca) 		/* Get base vaddr of paca array  */
-	mulli	r13,r26,PACA_SIZE	/* Calculate vaddr of right paca */
-	add	r13,r13,r24		/* for this processor.		 */
-	mtspr	SPRN_SPRG3,r13
-
-	/* ptr to current */
-	LOADADDR(r4,init_task)
-	std	r4,PACACURRENT(r13)
-
-	/* Load the TOC */
-	ld	r2,PACATOC(r13)
-	std	r1,PACAKSAVE(r13)
-
-	bl	.setup_system
-
-	/* Load up the kernel context */
-5:
-#ifdef DO_SOFT_DISABLE
-	li	r5,0
-	stb	r5,PACAPROCENABLED(r13)	/* Soft Disabled */
-	mfmsr	r5
-	ori	r5,r5,MSR_EE		/* Hard Enabled */
-	mtmsrd	r5
-#endif
-
-	bl .start_kernel
-
-_GLOBAL(hmt_init)
-#ifdef CONFIG_HMT
-	LOADADDR(r5, hmt_thread_data)
-	mfspr	r7,SPRN_PVR
-	srwi	r7,r7,16
-	cmpwi	r7,0x34			/* Pulsar  */
-	beq	90f
-	cmpwi	r7,0x36			/* Icestar */
-	beq	91f
-	cmpwi	r7,0x37			/* SStar   */
-	beq	91f
-	b	101f
-90:	mfspr	r6,SPRN_PIR
-	andi.	r6,r6,0x1f
-	b	92f
-91:	mfspr	r6,SPRN_PIR
-	andi.	r6,r6,0x3ff
-92:	sldi	r4,r24,3
-	stwx	r6,r5,r4
-	bl	.hmt_start_secondary
-	b	101f
-
-__hmt_secondary_hold:
-	LOADADDR(r5, hmt_thread_data)
-	clrldi	r5,r5,4
-	li	r7,0
-	mfspr	r6,SPRN_PIR
-	mfspr	r8,SPRN_PVR
-	srwi	r8,r8,16
-	cmpwi	r8,0x34
-	bne	93f
-	andi.	r6,r6,0x1f
-	b	103f
-93:	andi.	r6,r6,0x3f
-
-103:	lwzx	r8,r5,r7
-	cmpw	r8,r6
-	beq	104f
-	addi	r7,r7,8
-	b	103b
-
-104:	addi	r7,r7,4
-	lwzx	r9,r5,r7
-	mr	r24,r9
-101:
-#endif
-	mr	r3,r24
-	b	.pSeries_secondary_smp_init
-
-#ifdef CONFIG_HMT
-_GLOBAL(hmt_start_secondary)
-	LOADADDR(r4,__hmt_secondary_hold)
-	clrldi	r4,r4,4
-	mtspr	SPRN_NIADORM, r4
-	mfspr	r4, SPRN_MSRDORM
-	li	r5, -65
-	and	r4, r4, r5
-	mtspr	SPRN_MSRDORM, r4
-	lis	r4,0xffef
-	ori	r4,r4,0x7403
-	mtspr	SPRN_TSC, r4
-	li	r4,0x1f4
-	mtspr	SPRN_TST, r4
-	mfspr	r4, SPRN_HID0
-	ori	r4, r4, 0x1
-	mtspr	SPRN_HID0, r4
-	mfspr	r4, SPRN_CTRLF
-	oris	r4, r4, 0x40
-	mtspr	SPRN_CTRLT, r4
-	blr
-#endif
-
-/*
- * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the bss, which is page-aligned.
- */
-	.section ".bss"
-
-	.align	PAGE_SHIFT
-
-	.globl	empty_zero_page
-empty_zero_page:
-	.space	PAGE_SIZE
-
-	.globl	swapper_pg_dir
-swapper_pg_dir:
-	.space	PAGE_SIZE
-
-/*
- * This space gets a copy of optional info passed to us by the bootstrap
- * Used to pass parameters into the kernel like root=/dev/sda1, etc.
- */
-	.globl	cmd_line
-cmd_line:
-	.space	COMMAND_LINE_SIZE
diff --git a/arch/ppc64/kernel/hvconsole.c b/arch/ppc64/kernel/hvconsole.c
deleted file mode 100644
index 138e128a3886..000000000000
--- a/arch/ppc64/kernel/hvconsole.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * hvconsole.c
- * Copyright (C) 2004 Hollis Blanchard, IBM Corporation
- * Copyright (C) 2004 IBM Corporation
- *
- * Additional Author(s):
- *  Ryan S. Arnold <rsa@us.ibm.com>
- *
- * LPAR console support.
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <asm/hvcall.h>
-#include <asm/hvconsole.h>
-
-/**
- * hvc_get_chars - retrieve characters from firmware for denoted vterm adatper
- * @vtermno: The vtermno or unit_address of the adapter from which to fetch the
- *	data.
- * @buf: The character buffer into which to put the character data fetched from
- *	firmware.
- * @count: not used?
- */
-int hvc_get_chars(uint32_t vtermno, char *buf, int count)
-{
-	unsigned long got;
-
-	if (plpar_hcall(H_GET_TERM_CHAR, vtermno, 0, 0, 0, &got,
-		(unsigned long *)buf, (unsigned long *)buf+1) == H_Success)
-		return got;
-	return 0;
-}
-
-EXPORT_SYMBOL(hvc_get_chars);
-
-
-/**
- * hvc_put_chars: send characters to firmware for denoted vterm adapter
- * @vtermno: The vtermno or unit_address of the adapter from which the data
- *	originated.
- * @buf: The character buffer that contains the character data to send to
- *	firmware.
- * @count: Send this number of characters.
- */
-int hvc_put_chars(uint32_t vtermno, const char *buf, int count)
-{
-	unsigned long *lbuf = (unsigned long *) buf;
-	long ret;
-
-	ret = plpar_hcall_norets(H_PUT_TERM_CHAR, vtermno, count, lbuf[0],
-				 lbuf[1]);
-	if (ret == H_Success)
-		return count;
-	if (ret == H_Busy)
-		return 0;
-	return -EIO;
-}
-
-EXPORT_SYMBOL(hvc_put_chars);
diff --git a/arch/ppc64/kernel/hvcserver.c b/arch/ppc64/kernel/hvcserver.c
deleted file mode 100644
index 4d584172055a..000000000000
--- a/arch/ppc64/kernel/hvcserver.c
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * hvcserver.c
- * Copyright (C) 2004 Ryan S Arnold, IBM Corporation
- *
- * PPC64 virtual I/O console server support.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-
-#include <asm/hvcall.h>
-#include <asm/hvcserver.h>
-#include <asm/io.h>
-
-#define HVCS_ARCH_VERSION "1.0.0"
-
-MODULE_AUTHOR("Ryan S. Arnold <rsa@us.ibm.com>");
-MODULE_DESCRIPTION("IBM hvcs ppc64 API");
-MODULE_LICENSE("GPL");
-MODULE_VERSION(HVCS_ARCH_VERSION);
-
-/*
- * Convert arch specific return codes into relevant errnos.  The hvcs
- * functions aren't performance sensitive, so this conversion isn't an
- * issue.
- */
-int hvcs_convert(long to_convert)
-{
-	switch (to_convert) {
-		case H_Success:
-			return 0;
-		case H_Parameter:
-			return -EINVAL;
-		case H_Hardware:
-			return -EIO;
-		case H_Busy:
-		case H_LongBusyOrder1msec:
-		case H_LongBusyOrder10msec:
-		case H_LongBusyOrder100msec:
-		case H_LongBusyOrder1sec:
-		case H_LongBusyOrder10sec:
-		case H_LongBusyOrder100sec:
-			return -EBUSY;
-		case H_Function: /* fall through */
-		default:
-			return -EPERM;
-	}
-}
-
-/**
- * hvcs_free_partner_info - free pi allocated by hvcs_get_partner_info
- * @head: list_head pointer for an allocated list of partner info structs to
- *	free.
- *
- * This function is used to free the partner info list that was returned by
- * calling hvcs_get_partner_info().
- */
-int hvcs_free_partner_info(struct list_head *head)
-{
-	struct hvcs_partner_info *pi;
-	struct list_head *element;
-
-	if (!head)
-		return -EINVAL;
-
-	while (!list_empty(head)) {
-		element = head->next;
-		pi = list_entry(element, struct hvcs_partner_info, node);
-		list_del(element);
-		kfree(pi);
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(hvcs_free_partner_info);
-
-/* Helper function for hvcs_get_partner_info */
-int hvcs_next_partner(uint32_t unit_address,
-		unsigned long last_p_partition_ID,
-		unsigned long last_p_unit_address, unsigned long *pi_buff)
-
-{
-	long retval;
-	retval = plpar_hcall_norets(H_VTERM_PARTNER_INFO, unit_address,
-			last_p_partition_ID,
-				last_p_unit_address, virt_to_phys(pi_buff));
-	return hvcs_convert(retval);
-}
-
-/**
- * hvcs_get_partner_info - Get all of the partner info for a vty-server adapter
- * @unit_address: The unit_address of the vty-server adapter for which this
- *	function is fetching partner info.
- * @head: An initialized list_head pointer to an empty list to use to return the
- *	list of partner info fetched from the hypervisor to the caller.
- * @pi_buff: A page sized buffer pre-allocated prior to calling this function
- *	that is to be used to be used by firmware as an iterator to keep track
- *	of the partner info retrieval.
- *
- * This function returns non-zero on success, or if there is no partner info.
- *
- * The pi_buff is pre-allocated prior to calling this function because this
- * function may be called with a spin_lock held and kmalloc of a page is not
- * recommended as GFP_ATOMIC.
- *
- * The first long of this buffer is used to store a partner unit address.  The
- * second long is used to store a partner partition ID and starting at
- * pi_buff[2] is the 79 character Converged Location Code (diff size than the
- * unsigned longs, hence the casting mumbo jumbo you see later).
- *
- * Invocation of this function should always be followed by an invocation of
- * hvcs_free_partner_info() using a pointer to the SAME list head instance
- * that was passed as a parameter to this function.
- */
-int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head,
-		unsigned long *pi_buff)
-{
-	/*
-	 * Dealt with as longs because of the hcall interface even though the
-	 * values are uint32_t.
-	 */
-	unsigned long	last_p_partition_ID;
-	unsigned long	last_p_unit_address;
-	struct hvcs_partner_info *next_partner_info = NULL;
-	int more = 1;
-	int retval;
-
-	memset(pi_buff, 0x00, PAGE_SIZE);
-	/* invalid parameters */
-	if (!head || !pi_buff)
-		return -EINVAL;
-
-	last_p_partition_ID = last_p_unit_address = ~0UL;
-	INIT_LIST_HEAD(head);
-
-	do {
-		retval = hvcs_next_partner(unit_address, last_p_partition_ID,
-				last_p_unit_address, pi_buff);
-		if (retval) {
-			/*
-			 * Don't indicate that we've failed if we have
-			 * any list elements.
-			 */
-			if (!list_empty(head))
-				return 0;
-			return retval;
-		}
-
-		last_p_partition_ID = pi_buff[0];
-		last_p_unit_address = pi_buff[1];
-
-		/* This indicates that there are no further partners */
-		if (last_p_partition_ID == ~0UL
-				&& last_p_unit_address == ~0UL)
-			break;
-
-		/* This is a very small struct and will be freed soon in
-		 * hvcs_free_partner_info(). */
-		next_partner_info = kmalloc(sizeof(struct hvcs_partner_info),
-				GFP_ATOMIC);
-
-		if (!next_partner_info) {
-			printk(KERN_WARNING "HVCONSOLE: kmalloc() failed to"
-				" allocate partner info struct.\n");
-			hvcs_free_partner_info(head);
-			return -ENOMEM;
-		}
-
-		next_partner_info->unit_address
-			= (unsigned int)last_p_unit_address;
-		next_partner_info->partition_ID
-			= (unsigned int)last_p_partition_ID;
-
-		/* copy the Null-term char too */
-		strncpy(&next_partner_info->location_code[0],
-			(char *)&pi_buff[2],
-			strlen((char *)&pi_buff[2]) + 1);
-
-		list_add_tail(&(next_partner_info->node), head);
-		next_partner_info = NULL;
-
-	} while (more);
-
-	return 0;
-}
-EXPORT_SYMBOL(hvcs_get_partner_info);
-
-/**
- * hvcs_register_connection - establish a connection between this vty-server and
- *	a vty.
- * @unit_address: The unit address of the vty-server adapter that is to be
- *	establish a connection.
- * @p_partition_ID: The partition ID of the vty adapter that is to be connected.
- * @p_unit_address: The unit address of the vty adapter to which the vty-server
- *	is to be connected.
- *
- * If this function is called once and -EINVAL is returned it may
- * indicate that the partner info needs to be refreshed for the
- * target unit address at which point the caller must invoke
- * hvcs_get_partner_info() and then call this function again.  If,
- * for a second time, -EINVAL is returned then it indicates that
- * there is probably already a partner connection registered to a
- * different vty-server adapter.  It is also possible that a second
- * -EINVAL may indicate that one of the parms is not valid, for
- * instance if the link was removed between the vty-server adapter
- * and the vty adapter that you are trying to open.  Don't shoot the
- * messenger.  Firmware implemented it this way.
- */
-int hvcs_register_connection( uint32_t unit_address,
-		uint32_t p_partition_ID, uint32_t p_unit_address)
-{
-	long retval;
-	retval = plpar_hcall_norets(H_REGISTER_VTERM, unit_address,
-				p_partition_ID, p_unit_address);
-	return hvcs_convert(retval);
-}
-EXPORT_SYMBOL(hvcs_register_connection);
-
-/**
- * hvcs_free_connection - free the connection between a vty-server and vty
- * @unit_address: The unit address of the vty-server that is to have its
- *	connection severed.
- *
- * This function is used to free the partner connection between a vty-server
- * adapter and a vty adapter.
- *
- * If -EBUSY is returned continue to call this function until 0 is returned.
- */
-int hvcs_free_connection(uint32_t unit_address)
-{
-	long retval;
-	retval = plpar_hcall_norets(H_FREE_VTERM, unit_address);
-	return hvcs_convert(retval);
-}
-EXPORT_SYMBOL(hvcs_free_connection);
diff --git a/arch/ppc64/kernel/iomap.c b/arch/ppc64/kernel/iomap.c
deleted file mode 100644
index 6160c8dbb7c5..000000000000
--- a/arch/ppc64/kernel/iomap.c
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * arch/ppc64/kernel/iomap.c
- *
- * ppc64 "iomap" interface implementation.
- *
- * (C) Copyright 2004 Linus Torvalds
- */
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/mm.h>
-#include <asm/io.h>
-
-/*
- * Here comes the ppc64 implementation of the IOMAP 
- * interfaces.
- */
-unsigned int fastcall ioread8(void __iomem *addr)
-{
-	return readb(addr);
-}
-unsigned int fastcall ioread16(void __iomem *addr)
-{
-	return readw(addr);
-}
-unsigned int fastcall ioread16be(void __iomem *addr)
-{
-	return in_be16(addr);
-}
-unsigned int fastcall ioread32(void __iomem *addr)
-{
-	return readl(addr);
-}
-unsigned int fastcall ioread32be(void __iomem *addr)
-{
-	return in_be32(addr);
-}
-EXPORT_SYMBOL(ioread8);
-EXPORT_SYMBOL(ioread16);
-EXPORT_SYMBOL(ioread16be);
-EXPORT_SYMBOL(ioread32);
-EXPORT_SYMBOL(ioread32be);
-
-void fastcall iowrite8(u8 val, void __iomem *addr)
-{
-	writeb(val, addr);
-}
-void fastcall iowrite16(u16 val, void __iomem *addr)
-{
-	writew(val, addr);
-}
-void fastcall iowrite16be(u16 val, void __iomem *addr)
-{
-	out_be16(addr, val);
-}
-void fastcall iowrite32(u32 val, void __iomem *addr)
-{
-	writel(val, addr);
-}
-void fastcall iowrite32be(u32 val, void __iomem *addr)
-{
-	out_be32(addr, val);
-}
-EXPORT_SYMBOL(iowrite8);
-EXPORT_SYMBOL(iowrite16);
-EXPORT_SYMBOL(iowrite16be);
-EXPORT_SYMBOL(iowrite32);
-EXPORT_SYMBOL(iowrite32be);
-
-/*
- * These are the "repeat read/write" functions. Note the
- * non-CPU byte order. We do things in "IO byteorder"
- * here.
- *
- * FIXME! We could make these do EEH handling if we really
- * wanted. Not clear if we do.
- */
-void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
-{
-	_insb((u8 __iomem *) addr, dst, count);
-}
-void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
-{
-	_insw_ns((u16 __iomem *) addr, dst, count);
-}
-void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
-{
-	_insl_ns((u32 __iomem *) addr, dst, count);
-}
-EXPORT_SYMBOL(ioread8_rep);
-EXPORT_SYMBOL(ioread16_rep);
-EXPORT_SYMBOL(ioread32_rep);
-
-void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)
-{
-	_outsb((u8 __iomem *) addr, src, count);
-}
-void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)
-{
-	_outsw_ns((u16 __iomem *) addr, src, count);
-}
-void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)
-{
-	_outsl_ns((u32 __iomem *) addr, src, count);
-}
-EXPORT_SYMBOL(iowrite8_rep);
-EXPORT_SYMBOL(iowrite16_rep);
-EXPORT_SYMBOL(iowrite32_rep);
-
-void __iomem *ioport_map(unsigned long port, unsigned int len)
-{
-	if (!_IO_IS_VALID(port))
-		return NULL;
-	return (void __iomem *) (port+pci_io_base);
-}
-
-void ioport_unmap(void __iomem *addr)
-{
-	/* Nothing to do */
-}
-EXPORT_SYMBOL(ioport_map);
-EXPORT_SYMBOL(ioport_unmap);
-
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
-{
-	unsigned long start = pci_resource_start(dev, bar);
-	unsigned long len = pci_resource_len(dev, bar);
-	unsigned long flags = pci_resource_flags(dev, bar);
-
-	if (!len)
-		return NULL;
-	if (max && len > max)
-		len = max;
-	if (flags & IORESOURCE_IO)
-		return ioport_map(start, len);
-	if (flags & IORESOURCE_MEM)
-		return ioremap(start, len);
-	/* What? */
-	return NULL;
-}
-
-void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
-{
-	/* Nothing to do */
-}
-EXPORT_SYMBOL(pci_iomap);
-EXPORT_SYMBOL(pci_iounmap);
diff --git a/arch/ppc64/kernel/iommu.c b/arch/ppc64/kernel/iommu.c
deleted file mode 100644
index 4d9b4388918b..000000000000
--- a/arch/ppc64/kernel/iommu.c
+++ /dev/null
@@ -1,572 +0,0 @@
-/*
- * arch/ppc64/kernel/iommu.c
- * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
- * 
- * Rewrite, cleanup, new allocation schemes, virtual merging: 
- * Copyright (C) 2004 Olof Johansson, IBM Corporation
- *               and  Ben. Herrenschmidt, IBM Corporation
- *
- * Dynamic DMA mapping support, bus-independent parts.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-
-#include <linux/config.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/dma-mapping.h>
-#include <linux/init.h>
-#include <linux/bitops.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/iommu.h>
-#include <asm/pci-bridge.h>
-#include <asm/machdep.h>
-
-#define DBG(...)
-
-#ifdef CONFIG_IOMMU_VMERGE
-static int novmerge = 0;
-#else
-static int novmerge = 1;
-#endif
-
-static int __init setup_iommu(char *str)
-{
-	if (!strcmp(str, "novmerge"))
-		novmerge = 1;
-	else if (!strcmp(str, "vmerge"))
-		novmerge = 0;
-	return 1;
-}
-
-__setup("iommu=", setup_iommu);
-
-static unsigned long iommu_range_alloc(struct iommu_table *tbl,
-                                       unsigned long npages,
-                                       unsigned long *handle,
-                                       unsigned int align_order)
-{ 
-	unsigned long n, end, i, start;
-	unsigned long limit;
-	int largealloc = npages > 15;
-	int pass = 0;
-	unsigned long align_mask;
-
-	align_mask = 0xffffffffffffffffl >> (64 - align_order);
-
-	/* This allocator was derived from x86_64's bit string search */
-
-	/* Sanity check */
-	if (unlikely(npages) == 0) {
-		if (printk_ratelimit())
-			WARN_ON(1);
-		return DMA_ERROR_CODE;
-	}
-
-	if (handle && *handle)
-		start = *handle;
-	else
-		start = largealloc ? tbl->it_largehint : tbl->it_hint;
-
-	/* Use only half of the table for small allocs (15 pages or less) */
-	limit = largealloc ? tbl->it_size : tbl->it_halfpoint;
-
-	if (largealloc && start < tbl->it_halfpoint)
-		start = tbl->it_halfpoint;
-
-	/* The case below can happen if we have a small segment appended
-	 * to a large, or when the previous alloc was at the very end of
-	 * the available space. If so, go back to the initial start.
-	 */
-	if (start >= limit)
-		start = largealloc ? tbl->it_largehint : tbl->it_hint;
-	
- again:
-
-	n = find_next_zero_bit(tbl->it_map, limit, start);
-
-	/* Align allocation */
-	n = (n + align_mask) & ~align_mask;
-
-	end = n + npages;
-
-	if (unlikely(end >= limit)) {
-		if (likely(pass < 2)) {
-			/* First failure, just rescan the half of the table.
-			 * Second failure, rescan the other half of the table.
-			 */
-			start = (largealloc ^ pass) ? tbl->it_halfpoint : 0;
-			limit = pass ? tbl->it_size : limit;
-			pass++;
-			goto again;
-		} else {
-			/* Third failure, give up */
-			return DMA_ERROR_CODE;
-		}
-	}
-
-	for (i = n; i < end; i++)
-		if (test_bit(i, tbl->it_map)) {
-			start = i+1;
-			goto again;
-		}
-
-	for (i = n; i < end; i++)
-		__set_bit(i, tbl->it_map);
-
-	/* Bump the hint to a new block for small allocs. */
-	if (largealloc) {
-		/* Don't bump to new block to avoid fragmentation */
-		tbl->it_largehint = end;
-	} else {
-		/* Overflow will be taken care of at the next allocation */
-		tbl->it_hint = (end + tbl->it_blocksize - 1) &
-		                ~(tbl->it_blocksize - 1);
-	}
-
-	/* Update handle for SG allocations */
-	if (handle)
-		*handle = end;
-
-	return n;
-}
-
-static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *page,
-		       unsigned int npages, enum dma_data_direction direction,
-		       unsigned int align_order)
-{
-	unsigned long entry, flags;
-	dma_addr_t ret = DMA_ERROR_CODE;
-	
-	spin_lock_irqsave(&(tbl->it_lock), flags);
-
-	entry = iommu_range_alloc(tbl, npages, NULL, align_order);
-
-	if (unlikely(entry == DMA_ERROR_CODE)) {
-		spin_unlock_irqrestore(&(tbl->it_lock), flags);
-		return DMA_ERROR_CODE;
-	}
-
-	entry += tbl->it_offset;	/* Offset into real TCE table */
-	ret = entry << PAGE_SHIFT;	/* Set the return dma address */
-
-	/* Put the TCEs in the HW table */
-	ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & PAGE_MASK,
-			 direction);
-
-
-	/* Flush/invalidate TLB caches if necessary */
-	if (ppc_md.tce_flush)
-		ppc_md.tce_flush(tbl);
-
-	spin_unlock_irqrestore(&(tbl->it_lock), flags);
-
-	/* Make sure updates are seen by hardware */
-	mb();
-
-	return ret;
-}
-
-static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, 
-			 unsigned int npages)
-{
-	unsigned long entry, free_entry;
-	unsigned long i;
-
-	entry = dma_addr >> PAGE_SHIFT;
-	free_entry = entry - tbl->it_offset;
-
-	if (((free_entry + npages) > tbl->it_size) ||
-	    (entry < tbl->it_offset)) {
-		if (printk_ratelimit()) {
-			printk(KERN_INFO "iommu_free: invalid entry\n");
-			printk(KERN_INFO "\tentry     = 0x%lx\n", entry); 
-			printk(KERN_INFO "\tdma_addr  = 0x%lx\n", (u64)dma_addr);
-			printk(KERN_INFO "\tTable     = 0x%lx\n", (u64)tbl);
-			printk(KERN_INFO "\tbus#      = 0x%lx\n", (u64)tbl->it_busno);
-			printk(KERN_INFO "\tsize      = 0x%lx\n", (u64)tbl->it_size);
-			printk(KERN_INFO "\tstartOff  = 0x%lx\n", (u64)tbl->it_offset);
-			printk(KERN_INFO "\tindex     = 0x%lx\n", (u64)tbl->it_index);
-			WARN_ON(1);
-		}
-		return;
-	}
-
-	ppc_md.tce_free(tbl, entry, npages);
-	
-	for (i = 0; i < npages; i++)
-		__clear_bit(free_entry+i, tbl->it_map);
-}
-
-static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
-		unsigned int npages)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&(tbl->it_lock), flags);
-
-	__iommu_free(tbl, dma_addr, npages);
-
-	/* Make sure TLB cache is flushed if the HW needs it. We do
-	 * not do an mb() here on purpose, it is not needed on any of
-	 * the current platforms.
-	 */
-	if (ppc_md.tce_flush)
-		ppc_md.tce_flush(tbl);
-
-	spin_unlock_irqrestore(&(tbl->it_lock), flags);
-}
-
-int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
-		struct scatterlist *sglist, int nelems,
-		enum dma_data_direction direction)
-{
-	dma_addr_t dma_next = 0, dma_addr;
-	unsigned long flags;
-	struct scatterlist *s, *outs, *segstart;
-	int outcount, incount;
-	unsigned long handle;
-
-	BUG_ON(direction == DMA_NONE);
-
-	if ((nelems == 0) || !tbl)
-		return 0;
-
-	outs = s = segstart = &sglist[0];
-	outcount = 1;
-	incount = nelems;
-	handle = 0;
-
-	/* Init first segment length for backout at failure */
-	outs->dma_length = 0;
-
-	DBG("mapping %d elements:\n", nelems);
-
-	spin_lock_irqsave(&(tbl->it_lock), flags);
-
-	for (s = outs; nelems; nelems--, s++) {
-		unsigned long vaddr, npages, entry, slen;
-
-		slen = s->length;
-		/* Sanity check */
-		if (slen == 0) {
-			dma_next = 0;
-			continue;
-		}
-		/* Allocate iommu entries for that segment */
-		vaddr = (unsigned long)page_address(s->page) + s->offset;
-		npages = PAGE_ALIGN(vaddr + slen) - (vaddr & PAGE_MASK);
-		npages >>= PAGE_SHIFT;
-		entry = iommu_range_alloc(tbl, npages, &handle, 0);
-
-		DBG("  - vaddr: %lx, size: %lx\n", vaddr, slen);
-
-		/* Handle failure */
-		if (unlikely(entry == DMA_ERROR_CODE)) {
-			if (printk_ratelimit())
-				printk(KERN_INFO "iommu_alloc failed, tbl %p vaddr %lx"
-				       " npages %lx\n", tbl, vaddr, npages);
-			goto failure;
-		}
-
-		/* Convert entry to a dma_addr_t */
-		entry += tbl->it_offset;
-		dma_addr = entry << PAGE_SHIFT;
-		dma_addr |= s->offset;
-
-		DBG("  - %lx pages, entry: %lx, dma_addr: %lx\n",
-			    npages, entry, dma_addr);
-
-		/* Insert into HW table */
-		ppc_md.tce_build(tbl, entry, npages, vaddr & PAGE_MASK, direction);
-
-		/* If we are in an open segment, try merging */
-		if (segstart != s) {
-			DBG("  - trying merge...\n");
-			/* We cannot merge if:
-			 * - allocated dma_addr isn't contiguous to previous allocation
-			 */
-			if (novmerge || (dma_addr != dma_next)) {
-				/* Can't merge: create a new segment */
-				segstart = s;
-				outcount++; outs++;
-				DBG("    can't merge, new segment.\n");
-			} else {
-				outs->dma_length += s->length;
-				DBG("    merged, new len: %lx\n", outs->dma_length);
-			}
-		}
-
-		if (segstart == s) {
-			/* This is a new segment, fill entries */
-			DBG("  - filling new segment.\n");
-			outs->dma_address = dma_addr;
-			outs->dma_length = slen;
-		}
-
-		/* Calculate next page pointer for contiguous check */
-		dma_next = dma_addr + slen;
-
-		DBG("  - dma next is: %lx\n", dma_next);
-	}
-
-	/* Flush/invalidate TLB caches if necessary */
-	if (ppc_md.tce_flush)
-		ppc_md.tce_flush(tbl);
-
-	spin_unlock_irqrestore(&(tbl->it_lock), flags);
-
-	/* Make sure updates are seen by hardware */
-	mb();
-
-	DBG("mapped %d elements:\n", outcount);
-
-	/* For the sake of iommu_unmap_sg, we clear out the length in the
-	 * next entry of the sglist if we didn't fill the list completely
-	 */
-	if (outcount < incount) {
-		outs++;
-		outs->dma_address = DMA_ERROR_CODE;
-		outs->dma_length = 0;
-	}
-	return outcount;
-
- failure:
-	for (s = &sglist[0]; s <= outs; s++) {
-		if (s->dma_length != 0) {
-			unsigned long vaddr, npages;
-
-			vaddr = s->dma_address & PAGE_MASK;
-			npages = (PAGE_ALIGN(s->dma_address + s->dma_length) - vaddr)
-				>> PAGE_SHIFT;
-			__iommu_free(tbl, vaddr, npages);
-		}
-	}
-	spin_unlock_irqrestore(&(tbl->it_lock), flags);
-	return 0;
-}
-
-
-void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
-		int nelems, enum dma_data_direction direction)
-{
-	unsigned long flags;
-
-	BUG_ON(direction == DMA_NONE);
-
-	if (!tbl)
-		return;
-
-	spin_lock_irqsave(&(tbl->it_lock), flags);
-
-	while (nelems--) {
-		unsigned int npages;
-		dma_addr_t dma_handle = sglist->dma_address;
-
-		if (sglist->dma_length == 0)
-			break;
-		npages = (PAGE_ALIGN(dma_handle + sglist->dma_length)
-			  - (dma_handle & PAGE_MASK)) >> PAGE_SHIFT;
-		__iommu_free(tbl, dma_handle, npages);
-		sglist++;
-	}
-
-	/* Flush/invalidate TLBs if necessary. As for iommu_free(), we
-	 * do not do an mb() here, the affected platforms do not need it
-	 * when freeing.
-	 */
-	if (ppc_md.tce_flush)
-		ppc_md.tce_flush(tbl);
-
-	spin_unlock_irqrestore(&(tbl->it_lock), flags);
-}
-
-/*
- * Build a iommu_table structure.  This contains a bit map which
- * is used to manage allocation of the tce space.
- */
-struct iommu_table *iommu_init_table(struct iommu_table *tbl)
-{
-	unsigned long sz;
-	static int welcomed = 0;
-
-	/* Set aside 1/4 of the table for large allocations. */
-	tbl->it_halfpoint = tbl->it_size * 3 / 4;
-
-	/* number of bytes needed for the bitmap */
-	sz = (tbl->it_size + 7) >> 3;
-
-	tbl->it_map = (unsigned long *)__get_free_pages(GFP_ATOMIC, get_order(sz));
-	if (!tbl->it_map)
-		panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
-
-	memset(tbl->it_map, 0, sz);
-
-	tbl->it_hint = 0;
-	tbl->it_largehint = tbl->it_halfpoint;
-	spin_lock_init(&tbl->it_lock);
-
-	/* Clear the hardware table in case firmware left allocations in it */
-	ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
-
-	if (!welcomed) {
-		printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
-		       novmerge ? "disabled" : "enabled");
-		welcomed = 1;
-	}
-
-	return tbl;
-}
-
-void iommu_free_table(struct device_node *dn)
-{
-	struct pci_dn *pdn = dn->data;
-	struct iommu_table *tbl = pdn->iommu_table;
-	unsigned long bitmap_sz, i;
-	unsigned int order;
-
-	if (!tbl || !tbl->it_map) {
-		printk(KERN_ERR "%s: expected TCE map for %s\n", __FUNCTION__,
-				dn->full_name);
-		return;
-	}
-
-	/* verify that table contains no entries */
-	/* it_size is in entries, and we're examining 64 at a time */
-	for (i = 0; i < (tbl->it_size/64); i++) {
-		if (tbl->it_map[i] != 0) {
-			printk(KERN_WARNING "%s: Unexpected TCEs for %s\n",
-				__FUNCTION__, dn->full_name);
-			break;
-		}
-	}
-
-	/* calculate bitmap size in bytes */
-	bitmap_sz = (tbl->it_size + 7) / 8;
-
-	/* free bitmap */
-	order = get_order(bitmap_sz);
-	free_pages((unsigned long) tbl->it_map, order);
-
-	/* free table */
-	kfree(tbl);
-}
-
-/* Creates TCEs for a user provided buffer.  The user buffer must be
- * contiguous real kernel storage (not vmalloc).  The address of the buffer
- * passed here is the kernel (virtual) address of the buffer.  The buffer
- * need not be page aligned, the dma_addr_t returned will point to the same
- * byte within the page as vaddr.
- */
-dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr,
-		size_t size, enum dma_data_direction direction)
-{
-	dma_addr_t dma_handle = DMA_ERROR_CODE;
-	unsigned long uaddr;
-	unsigned int npages;
-
-	BUG_ON(direction == DMA_NONE);
-
-	uaddr = (unsigned long)vaddr;
-	npages = PAGE_ALIGN(uaddr + size) - (uaddr & PAGE_MASK);
-	npages >>= PAGE_SHIFT;
-
-	if (tbl) {
-		dma_handle = iommu_alloc(tbl, vaddr, npages, direction, 0);
-		if (dma_handle == DMA_ERROR_CODE) {
-			if (printk_ratelimit())  {
-				printk(KERN_INFO "iommu_alloc failed, "
-						"tbl %p vaddr %p npages %d\n",
-						tbl, vaddr, npages);
-			}
-		} else
-			dma_handle |= (uaddr & ~PAGE_MASK);
-	}
-
-	return dma_handle;
-}
-
-void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle,
-		size_t size, enum dma_data_direction direction)
-{
-	BUG_ON(direction == DMA_NONE);
-
-	if (tbl)
-		iommu_free(tbl, dma_handle, (PAGE_ALIGN(dma_handle + size) -
-					(dma_handle & PAGE_MASK)) >> PAGE_SHIFT);
-}
-
-/* Allocates a contiguous real buffer and creates mappings over it.
- * Returns the virtual address of the buffer and sets dma_handle
- * to the dma address (mapping) of the first page.
- */
-void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size,
-		dma_addr_t *dma_handle, gfp_t flag)
-{
-	void *ret = NULL;
-	dma_addr_t mapping;
-	unsigned int npages, order;
-
-	size = PAGE_ALIGN(size);
-	npages = size >> PAGE_SHIFT;
-	order = get_order(size);
-
- 	/*
-	 * Client asked for way too much space.  This is checked later
-	 * anyway.  It is easier to debug here for the drivers than in
-	 * the tce tables.
-	 */
-	if (order >= IOMAP_MAX_ORDER) {
-		printk("iommu_alloc_consistent size too large: 0x%lx\n", size);
-		return NULL;
-	}
-
-	if (!tbl)
-		return NULL;
-
-	/* Alloc enough pages (and possibly more) */
-	ret = (void *)__get_free_pages(flag, order);
-	if (!ret)
-		return NULL;
-	memset(ret, 0, size);
-
-	/* Set up tces to cover the allocated range */
-	mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL, order);
-	if (mapping == DMA_ERROR_CODE) {
-		free_pages((unsigned long)ret, order);
-		ret = NULL;
-	} else
-		*dma_handle = mapping;
-	return ret;
-}
-
-void iommu_free_coherent(struct iommu_table *tbl, size_t size,
-			 void *vaddr, dma_addr_t dma_handle)
-{
-	unsigned int npages;
-
-	if (tbl) {
-		size = PAGE_ALIGN(size);
-		npages = size >> PAGE_SHIFT;
-		iommu_free(tbl, dma_handle, npages);
-		free_pages((unsigned long)vaddr, get_order(size));
-	}
-}
diff --git a/arch/ppc64/kernel/kprobes.c b/arch/ppc64/kernel/kprobes.c
deleted file mode 100644
index 511af54e6230..000000000000
--- a/arch/ppc64/kernel/kprobes.c
+++ /dev/null
@@ -1,459 +0,0 @@
-/*
- *  Kernel Probes (KProbes)
- *  arch/ppc64/kernel/kprobes.c
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2002, 2004
- *
- * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
- *		Probes initial implementation ( includes contributions from
- *		Rusty Russell).
- * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
- *		interface to access function arguments.
- * 2004-Nov	Ananth N Mavinakayanahalli <ananth@in.ibm.com> kprobes port
- *		for PPC64
- */
-
-#include <linux/config.h>
-#include <linux/kprobes.h>
-#include <linux/ptrace.h>
-#include <linux/preempt.h>
-#include <asm/cacheflush.h>
-#include <asm/kdebug.h>
-#include <asm/sstep.h>
-
-static DECLARE_MUTEX(kprobe_mutex);
-DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
-DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
-
-int __kprobes arch_prepare_kprobe(struct kprobe *p)
-{
-	int ret = 0;
-	kprobe_opcode_t insn = *p->addr;
-
-	if ((unsigned long)p->addr & 0x03) {
-		printk("Attempt to register kprobe at an unaligned address\n");
-		ret = -EINVAL;
-	} else if (IS_MTMSRD(insn) || IS_RFID(insn)) {
-		printk("Cannot register a kprobe on rfid or mtmsrd\n");
-		ret = -EINVAL;
-	}
-
-	/* insn must be on a special executable page on ppc64 */
-	if (!ret) {
-		down(&kprobe_mutex);
-		p->ainsn.insn = get_insn_slot();
-		up(&kprobe_mutex);
-		if (!p->ainsn.insn)
-			ret = -ENOMEM;
-	}
-	return ret;
-}
-
-void __kprobes arch_copy_kprobe(struct kprobe *p)
-{
-	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
-	p->opcode = *p->addr;
-}
-
-void __kprobes arch_arm_kprobe(struct kprobe *p)
-{
-	*p->addr = BREAKPOINT_INSTRUCTION;
-	flush_icache_range((unsigned long) p->addr,
-			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
-}
-
-void __kprobes arch_disarm_kprobe(struct kprobe *p)
-{
-	*p->addr = p->opcode;
-	flush_icache_range((unsigned long) p->addr,
-			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
-}
-
-void __kprobes arch_remove_kprobe(struct kprobe *p)
-{
-	down(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn);
-	up(&kprobe_mutex);
-}
-
-static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
-{
-	kprobe_opcode_t insn = *p->ainsn.insn;
-
-	regs->msr |= MSR_SE;
-
-	/* single step inline if it is a trap variant */
-	if (is_trap(insn))
-		regs->nip = (unsigned long)p->addr;
-	else
-		regs->nip = (unsigned long)p->ainsn.insn;
-}
-
-static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
-{
-	kcb->prev_kprobe.kp = kprobe_running();
-	kcb->prev_kprobe.status = kcb->kprobe_status;
-	kcb->prev_kprobe.saved_msr = kcb->kprobe_saved_msr;
-}
-
-static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
-{
-	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
-	kcb->kprobe_status = kcb->prev_kprobe.status;
-	kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr;
-}
-
-static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
-				struct kprobe_ctlblk *kcb)
-{
-	__get_cpu_var(current_kprobe) = p;
-	kcb->kprobe_saved_msr = regs->msr;
-}
-
-/* Called with kretprobe_lock held */
-void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
-				      struct pt_regs *regs)
-{
-	struct kretprobe_instance *ri;
-
-	if ((ri = get_free_rp_inst(rp)) != NULL) {
-		ri->rp = rp;
-		ri->task = current;
-		ri->ret_addr = (kprobe_opcode_t *)regs->link;
-
-		/* Replace the return addr with trampoline addr */
-		regs->link = (unsigned long)kretprobe_trampoline;
-		add_rp_inst(ri);
-	} else {
-		rp->nmissed++;
-	}
-}
-
-static inline int kprobe_handler(struct pt_regs *regs)
-{
-	struct kprobe *p;
-	int ret = 0;
-	unsigned int *addr = (unsigned int *)regs->nip;
-	struct kprobe_ctlblk *kcb;
-
-	/*
-	 * We don't want to be preempted for the entire
-	 * duration of kprobe processing
-	 */
-	preempt_disable();
-	kcb = get_kprobe_ctlblk();
-
-	/* Check we're not actually recursing */
-	if (kprobe_running()) {
-		p = get_kprobe(addr);
-		if (p) {
-			kprobe_opcode_t insn = *p->ainsn.insn;
-			if (kcb->kprobe_status == KPROBE_HIT_SS &&
-					is_trap(insn)) {
-				regs->msr &= ~MSR_SE;
-				regs->msr |= kcb->kprobe_saved_msr;
-				goto no_kprobe;
-			}
-			/* We have reentered the kprobe_handler(), since
-			 * another probe was hit while within the handler.
-			 * We here save the original kprobes variables and
-			 * just single step on the instruction of the new probe
-			 * without calling any user handlers.
-			 */
-			save_previous_kprobe(kcb);
-			set_current_kprobe(p, regs, kcb);
-			kcb->kprobe_saved_msr = regs->msr;
-			p->nmissed++;
-			prepare_singlestep(p, regs);
-			kcb->kprobe_status = KPROBE_REENTER;
-			return 1;
-		} else {
-			p = __get_cpu_var(current_kprobe);
-			if (p->break_handler && p->break_handler(p, regs)) {
-				goto ss_probe;
-			}
-		}
-		goto no_kprobe;
-	}
-
-	p = get_kprobe(addr);
-	if (!p) {
-		if (*addr != BREAKPOINT_INSTRUCTION) {
-			/*
-			 * PowerPC has multiple variants of the "trap"
-			 * instruction. If the current instruction is a
-			 * trap variant, it could belong to someone else
-			 */
-			kprobe_opcode_t cur_insn = *addr;
-			if (is_trap(cur_insn))
-		       		goto no_kprobe;
-			/*
-			 * The breakpoint instruction was removed right
-			 * after we hit it.  Another cpu has removed
-			 * either a probepoint or a debugger breakpoint
-			 * at this address.  In either case, no further
-			 * handling of this interrupt is appropriate.
-			 */
-			ret = 1;
-		}
-		/* Not one of ours: let kernel handle it */
-		goto no_kprobe;
-	}
-
-	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
-	set_current_kprobe(p, regs, kcb);
-	if (p->pre_handler && p->pre_handler(p, regs))
-		/* handler has already set things up, so skip ss setup */
-		return 1;
-
-ss_probe:
-	prepare_singlestep(p, regs);
-	kcb->kprobe_status = KPROBE_HIT_SS;
-	return 1;
-
-no_kprobe:
-	preempt_enable_no_resched();
-	return ret;
-}
-
-/*
- * Function return probe trampoline:
- * 	- init_kprobes() establishes a probepoint here
- * 	- When the probed function returns, this probe
- * 		causes the handlers to fire
- */
-void kretprobe_trampoline_holder(void)
-{
-	asm volatile(".global kretprobe_trampoline\n"
-			"kretprobe_trampoline:\n"
-			"nop\n");
-}
-
-/*
- * Called when the probe at kretprobe trampoline is hit
- */
-int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
-{
-        struct kretprobe_instance *ri = NULL;
-        struct hlist_head *head;
-        struct hlist_node *node, *tmp;
-	unsigned long flags, orig_ret_address = 0;
-	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
-
-	spin_lock_irqsave(&kretprobe_lock, flags);
-        head = kretprobe_inst_table_head(current);
-
-	/*
-	 * It is possible to have multiple instances associated with a given
-	 * task either because an multiple functions in the call path
-	 * have a return probe installed on them, and/or more then one return
-	 * return probe was registered for a target function.
-	 *
-	 * We can handle this because:
-	 *     - instances are always inserted at the head of the list
-	 *     - when multiple return probes are registered for the same
-         *       function, the first instance's ret_addr will point to the
-	 *       real return address, and all the rest will point to
-	 *       kretprobe_trampoline
-	 */
-	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
-                if (ri->task != current)
-			/* another task is sharing our hash bucket */
-                        continue;
-
-		if (ri->rp && ri->rp->handler)
-			ri->rp->handler(ri, regs);
-
-		orig_ret_address = (unsigned long)ri->ret_addr;
-		recycle_rp_inst(ri);
-
-		if (orig_ret_address != trampoline_address)
-			/*
-			 * This is the real return address. Any other
-			 * instances associated with this task are for
-			 * other calls deeper on the call stack
-			 */
-			break;
-	}
-
-	BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
-	regs->nip = orig_ret_address;
-
-	reset_current_kprobe();
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
-	preempt_enable_no_resched();
-
-        /*
-         * By returning a non-zero value, we are telling
-         * kprobe_handler() that we don't want the post_handler
-         * to run (and have re-enabled preemption)
-         */
-        return 1;
-}
-
-/*
- * Called after single-stepping.  p->addr is the address of the
- * instruction whose first byte has been replaced by the "breakpoint"
- * instruction.  To avoid the SMP problems that can occur when we
- * temporarily put back the original opcode to single-step, we
- * single-stepped a copy of the instruction.  The address of this
- * copy is p->ainsn.insn.
- */
-static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
-{
-	int ret;
-	unsigned int insn = *p->ainsn.insn;
-
-	regs->nip = (unsigned long)p->addr;
-	ret = emulate_step(regs, insn);
-	if (ret == 0)
-		regs->nip = (unsigned long)p->addr + 4;
-}
-
-static inline int post_kprobe_handler(struct pt_regs *regs)
-{
-	struct kprobe *cur = kprobe_running();
-	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
-	if (!cur)
-		return 0;
-
-	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
-		kcb->kprobe_status = KPROBE_HIT_SSDONE;
-		cur->post_handler(cur, regs, 0);
-	}
-
-	resume_execution(cur, regs);
-	regs->msr |= kcb->kprobe_saved_msr;
-
-	/*Restore back the original saved kprobes variables and continue. */
-	if (kcb->kprobe_status == KPROBE_REENTER) {
-		restore_previous_kprobe(kcb);
-		goto out;
-	}
-	reset_current_kprobe();
-out:
-	preempt_enable_no_resched();
-
-	/*
-	 * if somebody else is singlestepping across a probe point, msr
-	 * will have SE set, in which case, continue the remaining processing
-	 * of do_debug, as if this is not a probe hit.
-	 */
-	if (regs->msr & MSR_SE)
-		return 0;
-
-	return 1;
-}
-
-static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
-{
-	struct kprobe *cur = kprobe_running();
-	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
-	if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
-		return 1;
-
-	if (kcb->kprobe_status & KPROBE_HIT_SS) {
-		resume_execution(cur, regs);
-		regs->msr &= ~MSR_SE;
-		regs->msr |= kcb->kprobe_saved_msr;
-
-		reset_current_kprobe();
-		preempt_enable_no_resched();
-	}
-	return 0;
-}
-
-/*
- * Wrapper routine to for handling exceptions.
- */
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
-				       unsigned long val, void *data)
-{
-	struct die_args *args = (struct die_args *)data;
-	int ret = NOTIFY_DONE;
-
-	switch (val) {
-	case DIE_BPT:
-		if (kprobe_handler(args->regs))
-			ret = NOTIFY_STOP;
-		break;
-	case DIE_SSTEP:
-		if (post_kprobe_handler(args->regs))
-			ret = NOTIFY_STOP;
-		break;
-	case DIE_PAGE_FAULT:
-		/* kprobe_running() needs smp_processor_id() */
-		preempt_disable();
-		if (kprobe_running() &&
-		    kprobe_fault_handler(args->regs, args->trapnr))
-			ret = NOTIFY_STOP;
-		preempt_enable();
-		break;
-	default:
-		break;
-	}
-	return ret;
-}
-
-int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
-{
-	struct jprobe *jp = container_of(p, struct jprobe, kp);
-	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
-	memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
-
-	/* setup return addr to the jprobe handler routine */
-	regs->nip = (unsigned long)(((func_descr_t *)jp->entry)->entry);
-	regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
-
-	return 1;
-}
-
-void __kprobes jprobe_return(void)
-{
-	asm volatile("trap" ::: "memory");
-}
-
-void __kprobes jprobe_return_end(void)
-{
-};
-
-int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
-{
-	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
-	/*
-	 * FIXME - we should ideally be validating that we got here 'cos
-	 * of the "trap" in jprobe_return() above, before restoring the
-	 * saved regs...
-	 */
-	memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
-	preempt_enable_no_resched();
-	return 1;
-}
-
-static struct kprobe trampoline_p = {
-	.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
-	.pre_handler = trampoline_probe_handler
-};
-
-int __init arch_init_kprobes(void)
-{
-	return register_kprobe(&trampoline_p);
-}
diff --git a/arch/ppc64/kernel/machine_kexec.c b/arch/ppc64/kernel/machine_kexec.c
deleted file mode 100644
index 97c51e452be7..000000000000
--- a/arch/ppc64/kernel/machine_kexec.c
+++ /dev/null
@@ -1,358 +0,0 @@
-/*
- * machine_kexec.c - handle transition of Linux booting another kernel
- *
- * Copyright (C) 2004-2005, IBM Corp.
- *
- * Created by: Milton D Miller II
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2.  See the file COPYING for more details.
- */
-
-
-#include <linux/cpumask.h>
-#include <linux/kexec.h>
-#include <linux/smp.h>
-#include <linux/thread_info.h>
-#include <linux/errno.h>
-
-#include <asm/page.h>
-#include <asm/current.h>
-#include <asm/machdep.h>
-#include <asm/cacheflush.h>
-#include <asm/paca.h>
-#include <asm/mmu.h>
-#include <asm/sections.h>	/* _end */
-#include <asm/prom.h>
-#include <asm/smp.h>
-
-#define HASH_GROUP_SIZE 0x80	/* size of each hash group, asm/mmu.h */
-
-/* Have this around till we move it into crash specific file */
-note_buf_t crash_notes[NR_CPUS];
-
-/* Dummy for now. Not sure if we need to have a crash shutdown in here
- * and if what it will achieve. Letting it be now to compile the code
- * in generic kexec environment
- */
-void machine_crash_shutdown(struct pt_regs *regs)
-{
-	/* do nothing right now */
-	/* smp_relase_cpus() if we want smp on panic kernel */
-	/* cpu_irq_down to isolate us until we are ready */
-}
-
-int machine_kexec_prepare(struct kimage *image)
-{
-	int i;
-	unsigned long begin, end;	/* limits of segment */
-	unsigned long low, high;	/* limits of blocked memory range */
-	struct device_node *node;
-	unsigned long *basep;
-	unsigned int *sizep;
-
-	if (!ppc_md.hpte_clear_all)
-		return -ENOENT;
-
-	/*
-	 * Since we use the kernel fault handlers and paging code to
-	 * handle the virtual mode, we must make sure no destination
-	 * overlaps kernel static data or bss.
-	 */
-	for (i = 0; i < image->nr_segments; i++)
-		if (image->segment[i].mem < __pa(_end))
-			return -ETXTBSY;
-
-	/*
-	 * For non-LPAR, we absolutely can not overwrite the mmu hash
-	 * table, since we are still using the bolted entries in it to
-	 * do the copy.  Check that here.
-	 *
-	 * It is safe if the end is below the start of the blocked
-	 * region (end <= low), or if the beginning is after the
-	 * end of the blocked region (begin >= high).  Use the
-	 * boolean identity !(a || b)  === (!a && !b).
-	 */
-	if (htab_address) {
-		low = __pa(htab_address);
-		high = low + (htab_hash_mask + 1) * HASH_GROUP_SIZE;
-
-		for (i = 0; i < image->nr_segments; i++) {
-			begin = image->segment[i].mem;
-			end = begin + image->segment[i].memsz;
-
-			if ((begin < high) && (end > low))
-				return -ETXTBSY;
-		}
-	}
-
-	/* We also should not overwrite the tce tables */
-	for (node = of_find_node_by_type(NULL, "pci"); node != NULL;
-			node = of_find_node_by_type(node, "pci")) {
-		basep = (unsigned long *)get_property(node, "linux,tce-base",
-							NULL);
-		sizep = (unsigned int *)get_property(node, "linux,tce-size",
-							NULL);
-		if (basep == NULL || sizep == NULL)
-			continue;
-
-		low = *basep;
-		high = low + (*sizep);
-
-		for (i = 0; i < image->nr_segments; i++) {
-			begin = image->segment[i].mem;
-			end = begin + image->segment[i].memsz;
-
-			if ((begin < high) && (end > low))
-				return -ETXTBSY;
-		}
-	}
-
-	return 0;
-}
-
-void machine_kexec_cleanup(struct kimage *image)
-{
-	/* we do nothing in prepare that needs to be undone */
-}
-
-#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE)
-
-static void copy_segments(unsigned long ind)
-{
-	unsigned long entry;
-	unsigned long *ptr;
-	void *dest;
-	void *addr;
-
-	/*
-	 * We rely on kexec_load to create a lists that properly
-	 * initializes these pointers before they are used.
-	 * We will still crash if the list is wrong, but at least
-	 * the compiler will be quiet.
-	 */
-	ptr = NULL;
-	dest = NULL;
-
-	for (entry = ind; !(entry & IND_DONE); entry = *ptr++) {
-		addr = __va(entry & PAGE_MASK);
-
-		switch (entry & IND_FLAGS) {
-		case IND_DESTINATION:
-			dest = addr;
-			break;
-		case IND_INDIRECTION:
-			ptr = addr;
-			break;
-		case IND_SOURCE:
-			copy_page(dest, addr);
-			dest += PAGE_SIZE;
-		}
-	}
-}
-
-void kexec_copy_flush(struct kimage *image)
-{
-	long i, nr_segments = image->nr_segments;
-	struct  kexec_segment ranges[KEXEC_SEGMENT_MAX];
-
-	/* save the ranges on the stack to efficiently flush the icache */
-	memcpy(ranges, image->segment, sizeof(ranges));
-
-	/*
-	 * After this call we may not use anything allocated in dynamic
-	 * memory, including *image.
-	 *
-	 * Only globals and the stack are allowed.
-	 */
-	copy_segments(image->head);
-
-	/*
-	 * we need to clear the icache for all dest pages sometime,
-	 * including ones that were in place on the original copy
-	 */
-	for (i = 0; i < nr_segments; i++)
-		flush_icache_range(ranges[i].mem + KERNELBASE,
-				ranges[i].mem + KERNELBASE +
-				ranges[i].memsz);
-}
-
-#ifdef CONFIG_SMP
-
-/* FIXME: we should schedule this function to be called on all cpus based
- * on calling the interrupts, but we would like to call it off irq level
- * so that the interrupt controller is clean.
- */
-void kexec_smp_down(void *arg)
-{
-	if (ppc_md.kexec_cpu_down)
-		ppc_md.kexec_cpu_down(0, 1);
-
-	local_irq_disable();
-	kexec_smp_wait();
-	/* NOTREACHED */
-}
-
-static void kexec_prepare_cpus(void)
-{
-	int my_cpu, i, notified=-1;
-
-	smp_call_function(kexec_smp_down, NULL, 0, /* wait */0);
-	my_cpu = get_cpu();
-
-	/* check the others cpus are now down (via paca hw cpu id == -1) */
-	for (i=0; i < NR_CPUS; i++) {
-		if (i == my_cpu)
-			continue;
-
-		while (paca[i].hw_cpu_id != -1) {
-			barrier();
-			if (!cpu_possible(i)) {
-				printk("kexec: cpu %d hw_cpu_id %d is not"
-						" possible, ignoring\n",
-						i, paca[i].hw_cpu_id);
-				break;
-			}
-			if (!cpu_online(i)) {
-				/* Fixme: this can be spinning in
-				 * pSeries_secondary_wait with a paca
-				 * waiting for it to go online.
-				 */
-				printk("kexec: cpu %d hw_cpu_id %d is not"
-						" online, ignoring\n",
-						i, paca[i].hw_cpu_id);
-				break;
-			}
-			if (i != notified) {
-				printk( "kexec: waiting for cpu %d (physical"
-						" %d) to go down\n",
-						i, paca[i].hw_cpu_id);
-				notified = i;
-			}
-		}
-	}
-
-	/* after we tell the others to go down */
-	if (ppc_md.kexec_cpu_down)
-		ppc_md.kexec_cpu_down(0, 0);
-
-	put_cpu();
-
-	local_irq_disable();
-}
-
-#else /* ! SMP */
-
-static void kexec_prepare_cpus(void)
-{
-	/*
-	 * move the secondarys to us so that we can copy
-	 * the new kernel 0-0x100 safely
-	 *
-	 * do this if kexec in setup.c ?
-	 *
-	 * We need to release the cpus if we are ever going from an
-	 * UP to an SMP kernel.
-	 */
-	smp_release_cpus();
-	if (ppc_md.kexec_cpu_down)
-		ppc_md.kexec_cpu_down(0, 0);
-	local_irq_disable();
-}
-
-#endif /* SMP */
-
-/*
- * kexec thread structure and stack.
- *
- * We need to make sure that this is 16384-byte aligned due to the
- * way process stacks are handled.  It also must be statically allocated
- * or allocated as part of the kimage, because everything else may be
- * overwritten when we copy the kexec image.  We piggyback on the
- * "init_task" linker section here to statically allocate a stack.
- *
- * We could use a smaller stack if we don't care about anything using
- * current, but that audit has not been performed.
- */
-union thread_union kexec_stack
-	__attribute__((__section__(".data.init_task"))) = { };
-
-/* Our assembly helper, in kexec_stub.S */
-extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
-					void *image, void *control,
-					void (*clear_all)(void)) ATTRIB_NORET;
-
-/* too late to fail here */
-void machine_kexec(struct kimage *image)
-{
-
-	/* prepare control code if any */
-
-	/* shutdown other cpus into our wait loop and quiesce interrupts */
-	kexec_prepare_cpus();
-
-	/* switch to a staticly allocated stack.  Based on irq stack code.
-	 * XXX: the task struct will likely be invalid once we do the copy!
-	 */
-	kexec_stack.thread_info.task = current_thread_info()->task;
-	kexec_stack.thread_info.flags = 0;
-
-	/* Some things are best done in assembly.  Finding globals with
-	 * a toc is easier in C, so pass in what we can.
-	 */
-	kexec_sequence(&kexec_stack, image->start, image,
-			page_address(image->control_code_page),
-			ppc_md.hpte_clear_all);
-	/* NOTREACHED */
-}
-
-/* Values we need to export to the second kernel via the device tree. */
-static unsigned long htab_base, htab_size, kernel_end;
-
-static struct property htab_base_prop = {
-	.name = "linux,htab-base",
-	.length = sizeof(unsigned long),
-	.value = (unsigned char *)&htab_base,
-};
-
-static struct property htab_size_prop = {
-	.name = "linux,htab-size",
-	.length = sizeof(unsigned long),
-	.value = (unsigned char *)&htab_size,
-};
-
-static struct property kernel_end_prop = {
-	.name = "linux,kernel-end",
-	.length = sizeof(unsigned long),
-	.value = (unsigned char *)&kernel_end,
-};
-
-static void __init export_htab_values(void)
-{
-	struct device_node *node;
-
-	node = of_find_node_by_path("/chosen");
-	if (!node)
-		return;
-
-	kernel_end = __pa(_end);
-	prom_add_property(node, &kernel_end_prop);
-
-	/* On machines with no htab htab_address is NULL */
-	if (NULL == htab_address)
-		goto out;
-
-	htab_base = __pa(htab_address);
-	prom_add_property(node, &htab_base_prop);
-
-	htab_size = 1UL << ppc64_pft_size;
-	prom_add_property(node, &htab_size_prop);
-
- out:
-	of_node_put(node);
-}
-
-void __init kexec_setup(void)
-{
-	export_htab_values();
-}
diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S
deleted file mode 100644
index 5e089deb0a2b..000000000000
--- a/arch/ppc64/kernel/misc.S
+++ /dev/null
@@ -1,940 +0,0 @@
-/*
- *  arch/ppc/kernel/misc.S
- *
- *  
- *
- * This file contains miscellaneous low-level functions.
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
- * and Paul Mackerras.
- * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
- * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) 
- * 
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/config.h>
-#include <linux/sys.h>
-#include <asm/unistd.h>
-#include <asm/errno.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cache.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-
-	.text
-
-/*
- * Returns (address we were linked at) - (address we are running at)
- * for use before the text and data are mapped to KERNELBASE.
- */
-
-_GLOBAL(reloc_offset)
-	mflr	r0
-	bl	1f
-1:	mflr	r3
-	LOADADDR(r4,1b)
-	sub	r3,r4,r3
-	mtlr	r0
-	blr
-
-_GLOBAL(get_msr)
-	mfmsr	r3
-	blr
-
-_GLOBAL(get_dar)
-	mfdar	r3
-	blr
-
-_GLOBAL(get_srr0)
-	mfsrr0  r3
-	blr
-
-_GLOBAL(get_srr1)
-	mfsrr1  r3
-	blr
-	
-_GLOBAL(get_sp)
-	mr	r3,r1
-	blr
-
-#ifdef CONFIG_IRQSTACKS
-_GLOBAL(call_do_softirq)
-	mflr	r0
-	std	r0,16(r1)
-	stdu	r1,THREAD_SIZE-112(r3)
-	mr	r1,r3
-	bl	.__do_softirq
-	ld	r1,0(r1)
-	ld	r0,16(r1)
-	mtlr	r0
-	blr
-
-_GLOBAL(call___do_IRQ)
-	mflr	r0
-	std	r0,16(r1)
-	stdu	r1,THREAD_SIZE-112(r5)
-	mr	r1,r5
-	bl	.__do_IRQ
-	ld	r1,0(r1)
-	ld	r0,16(r1)
-	mtlr	r0
-	blr
-#endif /* CONFIG_IRQSTACKS */
-
-	/*
- * To be called by C code which needs to do some operations with MMU
- * disabled. Note that interrupts have to be disabled by the caller
- * prior to calling us. The code called _MUST_ be in the RMO of course
- * and part of the linear mapping as we don't attempt to translate the
- * stack pointer at all. The function is called with the stack switched
- * to this CPU emergency stack
- *
- * prototype is void *call_with_mmu_off(void *func, void *data);
- *
- * the called function is expected to be of the form
- *
- * void *called(void *data); 
- */
-_GLOBAL(call_with_mmu_off)
-	mflr	r0			/* get link, save it on stackframe */
-	std	r0,16(r1)
-	mr	r1,r5			/* save old stack ptr */
-	ld	r1,PACAEMERGSP(r13)	/* get emerg. stack */
-	subi	r1,r1,STACK_FRAME_OVERHEAD
-	std	r0,16(r1)		/* save link on emerg. stack */
-	std	r5,0(r1)		/* save old stack ptr in backchain */
-	ld	r3,0(r3)		/* get to real function ptr (assume same TOC) */
-	bl	2f			/* we need LR to return, continue at label 2 */
-
-	ld	r0,16(r1)		/* we return here from the call, get LR and */
-	ld	r1,0(r1)		/* .. old stack ptr */
-	mtspr	SPRN_SRR0,r0		/* and get back to virtual mode with these */
-	mfmsr	r4
-	ori	r4,r4,MSR_IR|MSR_DR
-	mtspr	SPRN_SRR1,r4
-	rfid
-
-2:	mtspr	SPRN_SRR0,r3		/* coming from above, enter real mode */
-	mr	r3,r4			/* get parameter */
-	mfmsr	r0
-	ori	r0,r0,MSR_IR|MSR_DR
-	xori	r0,r0,MSR_IR|MSR_DR
-	mtspr	SPRN_SRR1,r0
-	rfid
-
-
-	.section	".toc","aw"
-PPC64_CACHES:
-	.tc		ppc64_caches[TC],ppc64_caches
-	.section	".text"
-
-/*
- * Write any modified data cache blocks out to memory
- * and invalidate the corresponding instruction cache blocks.
- *
- * flush_icache_range(unsigned long start, unsigned long stop)
- *
- *   flush all bytes from start through stop-1 inclusive
- */
-
-_KPROBE(__flush_icache_range)
-
-/*
- * Flush the data cache to memory 
- * 
- * Different systems have different cache line sizes
- * and in some cases i-cache and d-cache line sizes differ from
- * each other.
- */
- 	ld	r10,PPC64_CACHES@toc(r2)
-	lwz	r7,DCACHEL1LINESIZE(r10)/* Get cache line size */
-	addi	r5,r7,-1
-	andc	r6,r3,r5		/* round low to line bdy */
-	subf	r8,r6,r4		/* compute length */
-	add	r8,r8,r5		/* ensure we get enough */
-	lwz	r9,DCACHEL1LOGLINESIZE(r10)	/* Get log-2 of cache line size */
-	srw.	r8,r8,r9		/* compute line count */
-	beqlr				/* nothing to do? */
-	mtctr	r8
-1:	dcbst	0,r6
-	add	r6,r6,r7
-	bdnz	1b
-	sync
-
-/* Now invalidate the instruction cache */
-	
-	lwz	r7,ICACHEL1LINESIZE(r10)	/* Get Icache line size */
-	addi	r5,r7,-1
-	andc	r6,r3,r5		/* round low to line bdy */
-	subf	r8,r6,r4		/* compute length */
-	add	r8,r8,r5
-	lwz	r9,ICACHEL1LOGLINESIZE(r10)	/* Get log-2 of Icache line size */
-	srw.	r8,r8,r9		/* compute line count */
-	beqlr				/* nothing to do? */
-	mtctr	r8
-2:	icbi	0,r6
-	add	r6,r6,r7
-	bdnz	2b
-	isync
-	blr
-
-	.text
-/*
- * Like above, but only do the D-cache.
- *
- * flush_dcache_range(unsigned long start, unsigned long stop)
- *
- *    flush all bytes from start to stop-1 inclusive
- */
-_GLOBAL(flush_dcache_range)
-
-/*
- * Flush the data cache to memory 
- * 
- * Different systems have different cache line sizes
- */
- 	ld	r10,PPC64_CACHES@toc(r2)
-	lwz	r7,DCACHEL1LINESIZE(r10)	/* Get dcache line size */
-	addi	r5,r7,-1
-	andc	r6,r3,r5		/* round low to line bdy */
-	subf	r8,r6,r4		/* compute length */
-	add	r8,r8,r5		/* ensure we get enough */
-	lwz	r9,DCACHEL1LOGLINESIZE(r10)	/* Get log-2 of dcache line size */
-	srw.	r8,r8,r9		/* compute line count */
-	beqlr				/* nothing to do? */
-	mtctr	r8
-0:	dcbst	0,r6
-	add	r6,r6,r7
-	bdnz	0b
-	sync
-	blr
-
-/*
- * Like above, but works on non-mapped physical addresses.
- * Use only for non-LPAR setups ! It also assumes real mode
- * is cacheable. Used for flushing out the DART before using
- * it as uncacheable memory 
- *
- * flush_dcache_phys_range(unsigned long start, unsigned long stop)
- *
- *    flush all bytes from start to stop-1 inclusive
- */
-_GLOBAL(flush_dcache_phys_range)
- 	ld	r10,PPC64_CACHES@toc(r2)
-	lwz	r7,DCACHEL1LINESIZE(r10)	/* Get dcache line size */
-	addi	r5,r7,-1
-	andc	r6,r3,r5		/* round low to line bdy */
-	subf	r8,r6,r4		/* compute length */
-	add	r8,r8,r5		/* ensure we get enough */
-	lwz	r9,DCACHEL1LOGLINESIZE(r10)	/* Get log-2 of dcache line size */
-	srw.	r8,r8,r9		/* compute line count */
-	beqlr				/* nothing to do? */
-	mfmsr	r5			/* Disable MMU Data Relocation */
-	ori	r0,r5,MSR_DR
-	xori	r0,r0,MSR_DR
-	sync
-	mtmsr	r0
-	sync
-	isync
-	mtctr	r8
-0:	dcbst	0,r6
-	add	r6,r6,r7
-	bdnz	0b
-	sync
-	isync
-	mtmsr	r5			/* Re-enable MMU Data Relocation */
-	sync
-	isync
-	blr
-
-_GLOBAL(flush_inval_dcache_range)
- 	ld	r10,PPC64_CACHES@toc(r2)
-	lwz	r7,DCACHEL1LINESIZE(r10)	/* Get dcache line size */
-	addi	r5,r7,-1
-	andc	r6,r3,r5		/* round low to line bdy */
-	subf	r8,r6,r4		/* compute length */
-	add	r8,r8,r5		/* ensure we get enough */
-	lwz	r9,DCACHEL1LOGLINESIZE(r10)/* Get log-2 of dcache line size */
-	srw.	r8,r8,r9		/* compute line count */
-	beqlr				/* nothing to do? */
-	sync
-	isync
-	mtctr	r8
-0:	dcbf	0,r6
-	add	r6,r6,r7
-	bdnz	0b
-	sync
-	isync
-	blr
-
-
-/*
- * Flush a particular page from the data cache to RAM.
- * Note: this is necessary because the instruction cache does *not*
- * snoop from the data cache.
- *
- *	void __flush_dcache_icache(void *page)
- */
-_GLOBAL(__flush_dcache_icache)
-/*
- * Flush the data cache to memory 
- * 
- * Different systems have different cache line sizes
- */
-
-/* Flush the dcache */
- 	ld	r7,PPC64_CACHES@toc(r2)
-	clrrdi	r3,r3,PAGE_SHIFT           	    /* Page align */
-	lwz	r4,DCACHEL1LINESPERPAGE(r7)	/* Get # dcache lines per page */
-	lwz	r5,DCACHEL1LINESIZE(r7)		/* Get dcache line size */
-	mr	r6,r3
-	mtctr	r4
-0:	dcbst	0,r6
-	add	r6,r6,r5
-	bdnz	0b
-	sync
-
-/* Now invalidate the icache */	
-
-	lwz	r4,ICACHEL1LINESPERPAGE(r7)	/* Get # icache lines per page */
-	lwz	r5,ICACHEL1LINESIZE(r7)		/* Get icache line size */
-	mtctr	r4
-1:	icbi	0,r3
-	add	r3,r3,r5
-	bdnz	1b
-	isync
-	blr
-	
-/*
- * I/O string operations
- *
- * insb(port, buf, len)
- * outsb(port, buf, len)
- * insw(port, buf, len)
- * outsw(port, buf, len)
- * insl(port, buf, len)
- * outsl(port, buf, len)
- * insw_ns(port, buf, len)
- * outsw_ns(port, buf, len)
- * insl_ns(port, buf, len)
- * outsl_ns(port, buf, len)
- *
- * The *_ns versions don't do byte-swapping.
- */
-_GLOBAL(_insb)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,1
-	blelr-
-00:	lbz	r5,0(r3)
-	eieio
-	stbu	r5,1(r4)
-	bdnz	00b
-	twi	0,r5,0
-	isync
-	blr
-
-_GLOBAL(_outsb)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,1
-	blelr-
-00:	lbzu	r5,1(r4)
-	stb	r5,0(r3)
-	bdnz	00b
-	sync
-	blr	
-
-_GLOBAL(_insw)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,2
-	blelr-
-00:	lhbrx	r5,0,r3
-	eieio
-	sthu	r5,2(r4)
-	bdnz	00b
-	twi	0,r5,0
-	isync
-	blr
-
-_GLOBAL(_outsw)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,2
-	blelr-
-00:	lhzu	r5,2(r4)
-	sthbrx	r5,0,r3	
-	bdnz	00b
-	sync
-	blr	
-
-_GLOBAL(_insl)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,4
-	blelr-
-00:	lwbrx	r5,0,r3
-	eieio
-	stwu	r5,4(r4)
-	bdnz	00b
-	twi	0,r5,0
-	isync
-	blr
-
-_GLOBAL(_outsl)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,4
-	blelr-
-00:	lwzu	r5,4(r4)
-	stwbrx	r5,0,r3
-	bdnz	00b
-	sync
-	blr	
-
-/* _GLOBAL(ide_insw) now in drivers/ide/ide-iops.c */
-_GLOBAL(_insw_ns)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,2
-	blelr-
-00:	lhz	r5,0(r3)
-	eieio
-	sthu	r5,2(r4)
-	bdnz	00b
-	twi	0,r5,0
-	isync
-	blr
-
-/* _GLOBAL(ide_outsw) now in drivers/ide/ide-iops.c */
-_GLOBAL(_outsw_ns)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,2
-	blelr-
-00:	lhzu	r5,2(r4)
-	sth	r5,0(r3)
-	bdnz	00b
-	sync
-	blr	
-
-_GLOBAL(_insl_ns)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,4
-	blelr-
-00:	lwz	r5,0(r3)
-	eieio
-	stwu	r5,4(r4)
-	bdnz	00b
-	twi	0,r5,0
-	isync
-	blr
-
-_GLOBAL(_outsl_ns)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,4
-	blelr-
-00:	lwzu	r5,4(r4)
-	stw	r5,0(r3)
-	bdnz	00b
-	sync
-	blr	
-
-/*
- * identify_cpu and calls setup_cpu
- * In:	r3 = base of the cpu_specs array
- *	r4 = address of cur_cpu_spec
- *	r5 = relocation offset
- */
-_GLOBAL(identify_cpu)
-	mfpvr	r7
-1:
-	lwz	r8,CPU_SPEC_PVR_MASK(r3)
-	and	r8,r8,r7
-	lwz	r9,CPU_SPEC_PVR_VALUE(r3)
-	cmplw	0,r9,r8
-	beq	1f
-	addi	r3,r3,CPU_SPEC_ENTRY_SIZE
-	b	1b
-1:
-	add	r0,r3,r5
-	std	r0,0(r4)
-	ld	r4,CPU_SPEC_SETUP(r3)
-	sub	r4,r4,r5
-	ld	r4,0(r4)
-	sub	r4,r4,r5
-	mtctr	r4
-	/* Calling convention for cpu setup is r3=offset, r4=cur_cpu_spec */
-	mr	r4,r3
-	mr	r3,r5
-	bctr
-
-/*
- * do_cpu_ftr_fixups - goes through the list of CPU feature fixups
- * and writes nop's over sections of code that don't apply for this cpu.
- * r3 = data offset (not changed)
- */
-_GLOBAL(do_cpu_ftr_fixups)
-	/* Get CPU 0 features */
-	LOADADDR(r6,cur_cpu_spec)
-	sub	r6,r6,r3
-	ld	r4,0(r6)
-	sub	r4,r4,r3
-	ld	r4,CPU_SPEC_FEATURES(r4)
-	/* Get the fixup table */
-	LOADADDR(r6,__start___ftr_fixup)
-	sub	r6,r6,r3
-	LOADADDR(r7,__stop___ftr_fixup)
-	sub	r7,r7,r3
-	/* Do the fixup */
-1:	cmpld	r6,r7
-	bgelr
-	addi	r6,r6,32
-	ld	r8,-32(r6)	/* mask */
-	and	r8,r8,r4
-	ld	r9,-24(r6)	/* value */
-	cmpld	r8,r9
-	beq	1b
-	ld	r8,-16(r6)	/* section begin */
-	ld	r9,-8(r6)	/* section end */
-	subf.	r9,r8,r9
-	beq	1b
-	/* write nops over the section of code */
-	/* todo: if large section, add a branch at the start of it */
-	srwi	r9,r9,2
-	mtctr	r9
-	sub	r8,r8,r3
-	lis	r0,0x60000000@h	/* nop */
-3:	stw	r0,0(r8)
-	andi.	r10,r4,CPU_FTR_SPLIT_ID_CACHE@l
-	beq	2f
-	dcbst	0,r8		/* suboptimal, but simpler */
-	sync
-	icbi	0,r8
-2:	addi	r8,r8,4
-	bdnz	3b
-	sync			/* additional sync needed on g4 */
-	isync
-	b	1b
-
-#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE)
-/*
- * Do an IO access in real mode
- */
-_GLOBAL(real_readb)
-	mfmsr	r7
-	ori	r0,r7,MSR_DR
-	xori	r0,r0,MSR_DR
-	sync
-	mtmsrd	r0
-	sync
-	isync
-	mfspr	r6,SPRN_HID4
-	rldicl	r5,r6,32,0
-	ori	r5,r5,0x100
-	rldicl	r5,r5,32,0
-	sync
-	mtspr	SPRN_HID4,r5
-	isync
-	slbia
-	isync
-	lbz	r3,0(r3)
-	sync
-	mtspr	SPRN_HID4,r6
-	isync
-	slbia
-	isync
-	mtmsrd	r7
-	sync
-	isync
-	blr
-
-/*
- * Do an IO access in real mode
- */
-_GLOBAL(real_writeb)
-	mfmsr	r7
-	ori	r0,r7,MSR_DR
-	xori	r0,r0,MSR_DR
-	sync
-	mtmsrd	r0
-	sync
-	isync
-	mfspr	r6,SPRN_HID4
-	rldicl	r5,r6,32,0
-	ori	r5,r5,0x100
-	rldicl	r5,r5,32,0
-	sync
-	mtspr	SPRN_HID4,r5
-	isync
-	slbia
-	isync
-	stb	r3,0(r4)
-	sync
-	mtspr	SPRN_HID4,r6
-	isync
-	slbia
-	isync
-	mtmsrd	r7
-	sync
-	isync
-	blr
-#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */
-
-/*
- * SCOM access functions for 970 (FX only for now)
- *
- * unsigned long scom970_read(unsigned int address);
- * void scom970_write(unsigned int address, unsigned long value);
- *
- * The address passed in is the 24 bits register address. This code
- * is 970 specific and will not check the status bits, so you should
- * know what you are doing.
- */
-_GLOBAL(scom970_read)
-	/* interrupts off */
-	mfmsr	r4
-	ori	r0,r4,MSR_EE
-	xori	r0,r0,MSR_EE
-	mtmsrd	r0,1
-
-	/* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
-	 * (including parity). On current CPUs they must be 0'd,
-	 * and finally or in RW bit
-	 */
-	rlwinm	r3,r3,8,0,15
-	ori	r3,r3,0x8000
-
-	/* do the actual scom read */
-	sync
-	mtspr	SPRN_SCOMC,r3
-	isync
-	mfspr	r3,SPRN_SCOMD
-	isync
-	mfspr	r0,SPRN_SCOMC
-	isync
-
-	/* XXX:	fixup result on some buggy 970's (ouch ! we lost a bit, bah
-	 * that's the best we can do). Not implemented yet as we don't use
-	 * the scom on any of the bogus CPUs yet, but may have to be done
-	 * ultimately
-	 */
-
-	/* restore interrupts */
-	mtmsrd	r4,1
-	blr
-
-
-_GLOBAL(scom970_write)
-	/* interrupts off */
-	mfmsr	r5
-	ori	r0,r5,MSR_EE
-	xori	r0,r0,MSR_EE
-	mtmsrd	r0,1
-
-	/* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
-	 * (including parity). On current CPUs they must be 0'd.
-	 */
-
-	rlwinm	r3,r3,8,0,15
-
-	sync
-	mtspr	SPRN_SCOMD,r4      /* write data */
-	isync
-	mtspr	SPRN_SCOMC,r3      /* write command */
-	isync
-	mfspr	3,SPRN_SCOMC
-	isync
-
-	/* restore interrupts */
-	mtmsrd	r5,1
-	blr
-
-
-/*
- * Create a kernel thread
- *   kernel_thread(fn, arg, flags)
- */
-_GLOBAL(kernel_thread)
-	std	r29,-24(r1)
-	std	r30,-16(r1)
-	stdu	r1,-STACK_FRAME_OVERHEAD(r1)
-	mr	r29,r3
-	mr	r30,r4
-	ori	r3,r5,CLONE_VM	/* flags */
-	oris	r3,r3,(CLONE_UNTRACED>>16)
-	li	r4,0		/* new sp (unused) */
-	li	r0,__NR_clone
-	sc
-	cmpdi	0,r3,0		/* parent or child? */
-	bne	1f		/* return if parent */
-	li	r0,0
-	stdu	r0,-STACK_FRAME_OVERHEAD(r1)
-	ld	r2,8(r29)
-	ld	r29,0(r29)
-	mtlr	r29              /* fn addr in lr */
-	mr	r3,r30	        /* load arg and call fn */
-	blrl
-	li	r0,__NR_exit	/* exit after child exits */
-        li	r3,0
-	sc
-1:	addi	r1,r1,STACK_FRAME_OVERHEAD	
-	ld	r29,-24(r1)
-	ld	r30,-16(r1)
-	blr
-
-/*
- * disable_kernel_fp()
- * Disable the FPU.
- */
-_GLOBAL(disable_kernel_fp)
-	mfmsr	r3
-	rldicl	r0,r3,(63-MSR_FP_LG),1
-	rldicl	r3,r0,(MSR_FP_LG+1),0
-	mtmsrd	r3			/* disable use of fpu now */
-	isync
-	blr
-
-#ifdef CONFIG_ALTIVEC
-
-#if 0 /* this has no callers for now */
-/*
- * disable_kernel_altivec()
- * Disable the VMX.
- */
-_GLOBAL(disable_kernel_altivec)
-	mfmsr	r3
-	rldicl	r0,r3,(63-MSR_VEC_LG),1
-	rldicl	r3,r0,(MSR_VEC_LG+1),0
-	mtmsrd	r3			/* disable use of VMX now */
-	isync
-	blr
-#endif /* 0 */
-
-/*
- * giveup_altivec(tsk)
- * Disable VMX for the task given as the argument,
- * and save the vector registers in its thread_struct.
- * Enables the VMX for use in the kernel on return.
- */
-_GLOBAL(giveup_altivec)
-	mfmsr	r5
-	oris	r5,r5,MSR_VEC@h
-	mtmsrd	r5			/* enable use of VMX now */
-	isync
-	cmpdi	0,r3,0
-	beqlr-				/* if no previous owner, done */
-	addi	r3,r3,THREAD		/* want THREAD of task */
-	ld	r5,PT_REGS(r3)
-	cmpdi	0,r5,0
-	SAVE_32VRS(0,r4,r3)
-	mfvscr	vr0
-	li	r4,THREAD_VSCR
-	stvx	vr0,r4,r3
-	beq	1f
-	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-	lis	r3,MSR_VEC@h
-	andc	r4,r4,r3		/* disable FP for previous task */
-	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#ifndef CONFIG_SMP
-	li	r5,0
-	ld	r4,last_task_used_altivec@got(r2)
-	std	r5,0(r4)
-#endif /* CONFIG_SMP */
-	blr
-
-#endif /* CONFIG_ALTIVEC */
-
-_GLOBAL(__setup_cpu_power3)
-	blr
-
-_GLOBAL(execve)
-	li	r0,__NR_execve
-	sc
-	bnslr
-	neg	r3,r3
-	blr
-
-/* kexec_wait(phys_cpu)
- *
- * wait for the flag to change, indicating this kernel is going away but
- * the slave code for the next one is at addresses 0 to 100.
- *
- * This is used by all slaves.
- *
- * Physical (hardware) cpu id should be in r3.
- */
-_GLOBAL(kexec_wait)
-	bl	1f
-1:	mflr	r5
-	addi	r5,r5,kexec_flag-1b
-
-99:	HMT_LOW
-#ifdef CONFIG_KEXEC		/* use no memory without kexec */
-	lwz	r4,0(r5)
-	cmpwi	0,r4,0
-	bnea	0x60
-#endif
-	b	99b
-
-/* this can be in text because we won't change it until we are
- * running in real anyways
- */
-kexec_flag:
-	.long	0
-
-
-#ifdef CONFIG_KEXEC
-
-/* kexec_smp_wait(void)
- *
- * call with interrupts off
- * note: this is a terminal routine, it does not save lr
- *
- * get phys id from paca
- * set paca id to -1 to say we got here
- * switch to real mode
- * join other cpus in kexec_wait(phys_id)
- */
-_GLOBAL(kexec_smp_wait)
-	lhz	r3,PACAHWCPUID(r13)
-	li	r4,-1
-	sth	r4,PACAHWCPUID(r13)	/* let others know we left */
-	bl	real_mode
-	b	.kexec_wait
-
-/*
- * switch to real mode (turn mmu off)
- * we use the early kernel trick that the hardware ignores bits
- * 0 and 1 (big endian) of the effective address in real mode
- *
- * don't overwrite r3 here, it is live for kexec_wait above.
- */
-real_mode:	/* assume normal blr return */
-1:	li	r9,MSR_RI
-	li	r10,MSR_DR|MSR_IR
-	mflr	r11		/* return address to SRR0 */
-	mfmsr	r12
-	andc	r9,r12,r9
-	andc	r10,r12,r10
-
-	mtmsrd	r9,1
-	mtspr	SPRN_SRR1,r10
-	mtspr	SPRN_SRR0,r11
-	rfid
-
-
-/*
- * kexec_sequence(newstack, start, image, control, clear_all())
- *
- * does the grungy work with stack switching and real mode switches
- * also does simple calls to other code
- */
-
-_GLOBAL(kexec_sequence)
-	mflr	r0
-	std	r0,16(r1)
-
-	/* switch stacks to newstack -- &kexec_stack.stack */
-	stdu	r1,THREAD_SIZE-112(r3)
-	mr	r1,r3
-
-	li	r0,0
-	std	r0,16(r1)
-
-	/* save regs for local vars on new stack.
-	 * yes, we won't go back, but ...
-	 */
-	std	r31,-8(r1)
-	std	r30,-16(r1)
-	std	r29,-24(r1)
-	std	r28,-32(r1)
-	std	r27,-40(r1)
-	std	r26,-48(r1)
-	std	r25,-56(r1)
-
-	stdu	r1,-112-64(r1)
-
-	/* save args into preserved regs */
-	mr	r31,r3			/* newstack (both) */
-	mr	r30,r4			/* start (real) */
-	mr	r29,r5			/* image (virt) */
-	mr	r28,r6			/* control, unused */
-	mr	r27,r7			/* clear_all() fn desc */
-	mr	r26,r8			/* spare */
-	lhz	r25,PACAHWCPUID(r13)	/* get our phys cpu from paca */
-
-	/* disable interrupts, we are overwriting kernel data next */
-	mfmsr	r3
-	rlwinm	r3,r3,0,17,15
-	mtmsrd	r3,1
-
-	/* copy dest pages, flush whole dest image */
-	mr	r3,r29
-	bl	.kexec_copy_flush	/* (image) */
-
-	/* turn off mmu */
-	bl	real_mode
-
-	/* clear out hardware hash page table and tlb */
-	ld	r5,0(r27)		/* deref function descriptor */
-	mtctr	r5
-	bctrl				/* ppc_md.hash_clear_all(void); */
-
-/*
- *   kexec image calling is:
- *      the first 0x100 bytes of the entry point are copied to 0
- *
- *      all slaves branch to slave = 0x60 (absolute)
- *              slave(phys_cpu_id);
- *
- *      master goes to start = entry point
- *              start(phys_cpu_id, start, 0);
- *
- *
- *   a wrapper is needed to call existing kernels, here is an approximate
- *   description of one method:
- *
- * v2: (2.6.10)
- *   start will be near the boot_block (maybe 0x100 bytes before it?)
- *   it will have a 0x60, which will b to boot_block, where it will wait
- *   and 0 will store phys into struct boot-block and load r3 from there,
- *   copy kernel 0-0x100 and tell slaves to back down to 0x60 again
- *
- * v1: (2.6.9)
- *    boot block will have all cpus scanning device tree to see if they
- *    are the boot cpu ?????
- *    other device tree differences (prop sizes, va vs pa, etc)...
- */
-
-	/* copy  0x100 bytes starting at start to 0 */
-	li	r3,0
-	mr	r4,r30
-	li	r5,0x100
-	li	r6,0
-	bl	.copy_and_flush	/* (dest, src, copy limit, start offset) */
-1:	/* assume normal blr return */
-
-	/* release other cpus to the new kernel secondary start at 0x60 */
-	mflr	r5
-	li	r6,1
-	stw	r6,kexec_flag-1b(5)
-	mr	r3,r25	# my phys cpu
-	mr	r4,r30	# start, aka phys mem offset
-	mtlr	4
-	li	r5,0
-	blr	/* image->start(physid, image->start, 0); */
-#endif /* CONFIG_KEXEC */
diff --git a/arch/ppc64/kernel/module.c b/arch/ppc64/kernel/module.c
deleted file mode 100644
index 928b8581fcb0..000000000000
--- a/arch/ppc64/kernel/module.c
+++ /dev/null
@@ -1,455 +0,0 @@
-/*  Kernel module help for PPC64.
-    Copyright (C) 2001, 2003 Rusty Russell IBM Corporation.
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-*/
-#include <linux/module.h>
-#include <linux/elf.h>
-#include <linux/moduleloader.h>
-#include <linux/err.h>
-#include <linux/vmalloc.h>
-#include <asm/module.h>
-#include <asm/uaccess.h>
-
-/* FIXME: We don't do .init separately.  To do this, we'd need to have
-   a separate r2 value in the init and core section, and stub between
-   them, too.
-
-   Using a magic allocator which places modules within 32MB solves
-   this, and makes other things simpler.  Anton?
-   --RR.  */
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(fmt , ...)
-#endif
-
-/* There's actually a third entry here, but it's unused */
-struct ppc64_opd_entry
-{
-	unsigned long funcaddr;
-	unsigned long r2;
-};
-
-/* Like PPC32, we need little trampolines to do > 24-bit jumps (into
-   the kernel itself).  But on PPC64, these need to be used for every
-   jump, actually, to reset r2 (TOC+0x8000). */
-struct ppc64_stub_entry
-{
-	/* 28 byte jump instruction sequence (7 instructions) */
-	unsigned char jump[28];
-	unsigned char unused[4];
-	/* Data for the above code */
-	struct ppc64_opd_entry opd;
-};
-
-/* We use a stub to fix up r2 (TOC ptr) and to jump to the (external)
-   function which may be more than 24-bits away.  We could simply
-   patch the new r2 value and function pointer into the stub, but it's
-   significantly shorter to put these values at the end of the stub
-   code, and patch the stub address (32-bits relative to the TOC ptr,
-   r2) into the stub. */
-static struct ppc64_stub_entry ppc64_stub =
-{ .jump = {
-	0x3d, 0x82, 0x00, 0x00, /* addis   r12,r2, <high> */
-	0x39, 0x8c, 0x00, 0x00, /* addi    r12,r12, <low> */
-	/* Save current r2 value in magic place on the stack. */
-	0xf8, 0x41, 0x00, 0x28, /* std     r2,40(r1) */
-	0xe9, 0x6c, 0x00, 0x20, /* ld      r11,32(r12) */
-	0xe8, 0x4c, 0x00, 0x28, /* ld      r2,40(r12) */
-	0x7d, 0x69, 0x03, 0xa6, /* mtctr   r11 */
-	0x4e, 0x80, 0x04, 0x20  /* bctr */
-} };
-
-/* Count how many different 24-bit relocations (different symbol,
-   different addend) */
-static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num)
-{
-	unsigned int i, j, ret = 0;
-
-	/* FIXME: Only count external ones --RR */
-	/* Sure, this is order(n^2), but it's usually short, and not
-           time critical */
-	for (i = 0; i < num; i++) {
-		/* Only count 24-bit relocs, others don't need stubs */
-		if (ELF64_R_TYPE(rela[i].r_info) != R_PPC_REL24)
-			continue;
-		for (j = 0; j < i; j++) {
-			/* If this addend appeared before, it's
-                           already been counted */
-			if (rela[i].r_info == rela[j].r_info
-			    && rela[i].r_addend == rela[j].r_addend)
-				break;
-		}
-		if (j == i) ret++;
-	}
-	return ret;
-}
-
-void *module_alloc(unsigned long size)
-{
-	if (size == 0)
-		return NULL;
-
-	return vmalloc_exec(size);
-}
-
-/* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
-{
-	vfree(module_region);
-	/* FIXME: If module_region == mod->init_region, trim exception
-           table entries. */
-}
-
-/* Get size of potential trampolines required. */
-static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
-				    const Elf64_Shdr *sechdrs)
-{
-	/* One extra reloc so it's always 0-funcaddr terminated */
-	unsigned long relocs = 1;
-	unsigned i;
-
-	/* Every relocated section... */
-	for (i = 1; i < hdr->e_shnum; i++) {
-		if (sechdrs[i].sh_type == SHT_RELA) {
-			DEBUGP("Found relocations in section %u\n", i);
-			DEBUGP("Ptr: %p.  Number: %lu\n",
-			       (void *)sechdrs[i].sh_addr,
-			       sechdrs[i].sh_size / sizeof(Elf64_Rela));
-			relocs += count_relocs((void *)sechdrs[i].sh_addr,
-					       sechdrs[i].sh_size
-					       / sizeof(Elf64_Rela));
-		}
-	}
-
-	DEBUGP("Looks like a total of %lu stubs, max\n", relocs);
-	return relocs * sizeof(struct ppc64_stub_entry);
-}
-
-static void dedotify_versions(struct modversion_info *vers,
-			      unsigned long size)
-{
-	struct modversion_info *end;
-
-	for (end = (void *)vers + size; vers < end; vers++)
-		if (vers->name[0] == '.')
-			memmove(vers->name, vers->name+1, strlen(vers->name));
-}
-
-/* Undefined symbols which refer to .funcname, hack to funcname */
-static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
-{
-	unsigned int i;
-
-	for (i = 1; i < numsyms; i++) {
-		if (syms[i].st_shndx == SHN_UNDEF) {
-			char *name = strtab + syms[i].st_name;
-			if (name[0] == '.')
-				memmove(name, name+1, strlen(name));
-		}
-	}
-}
-
-int module_frob_arch_sections(Elf64_Ehdr *hdr,
-			      Elf64_Shdr *sechdrs,
-			      char *secstrings,
-			      struct module *me)
-{
-	unsigned int i;
-
-	/* Find .toc and .stubs sections, symtab and strtab */
-	for (i = 1; i < hdr->e_shnum; i++) {
-		char *p;
-		if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0)
-			me->arch.stubs_section = i;
-		else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0)
-			me->arch.toc_section = i;
-		else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
-			dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
-					  sechdrs[i].sh_size);
-
-		/* We don't handle .init for the moment: rename to _init */
-		while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init")))
-			p[0] = '_';
-
-		if (sechdrs[i].sh_type == SHT_SYMTAB)
-			dedotify((void *)hdr + sechdrs[i].sh_offset,
-				 sechdrs[i].sh_size / sizeof(Elf64_Sym),
-				 (void *)hdr
-				 + sechdrs[sechdrs[i].sh_link].sh_offset);
-	}
-	if (!me->arch.stubs_section || !me->arch.toc_section) {
-		printk("%s: doesn't contain .toc or .stubs.\n", me->name);
-		return -ENOEXEC;
-	}
-
-	/* Override the stubs size */
-	sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs);
-	return 0;
-}
-
-int apply_relocate(Elf64_Shdr *sechdrs,
-		   const char *strtab,
-		   unsigned int symindex,
-		   unsigned int relsec,
-		   struct module *me)
-{
-	printk(KERN_ERR "%s: Non-ADD RELOCATION unsupported\n", me->name);
-	return -ENOEXEC;
-}
-
-/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this
-   gives the value maximum span in an instruction which uses a signed
-   offset) */
-static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me)
-{
-	return sechdrs[me->arch.toc_section].sh_addr + 0x8000;
-}
-
-/* Both low and high 16 bits are added as SIGNED additions, so if low
-   16 bits has high bit set, high 16 bits must be adjusted.  These
-   macros do that (stolen from binutils). */
-#define PPC_LO(v) ((v) & 0xffff)
-#define PPC_HI(v) (((v) >> 16) & 0xffff)
-#define PPC_HA(v) PPC_HI ((v) + 0x8000)
-
-/* Patch stub to reference function and correct r2 value. */
-static inline int create_stub(Elf64_Shdr *sechdrs,
-			      struct ppc64_stub_entry *entry,
-			      struct ppc64_opd_entry *opd,
-			      struct module *me)
-{
-	Elf64_Half *loc1, *loc2;
-	long reladdr;
-
-	*entry = ppc64_stub;
-
-	loc1 = (Elf64_Half *)&entry->jump[2];
-	loc2 = (Elf64_Half *)&entry->jump[6];
-
-	/* Stub uses address relative to r2. */
-	reladdr = (unsigned long)entry - my_r2(sechdrs, me);
-	if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
-		printk("%s: Address %p of stub out of range of %p.\n",
-		       me->name, (void *)reladdr, (void *)my_r2);
-		return 0;
-	}
-	DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr);
-
-	*loc1 = PPC_HA(reladdr);
-	*loc2 = PPC_LO(reladdr);
-	entry->opd.funcaddr = opd->funcaddr;
-	entry->opd.r2 = opd->r2;
-	return 1;
-}
-
-/* Create stub to jump to function described in this OPD: we need the
-   stub to set up the TOC ptr (r2) for the function. */
-static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
-				   unsigned long opdaddr,
-				   struct module *me)
-{
-	struct ppc64_stub_entry *stubs;
-	struct ppc64_opd_entry *opd = (void *)opdaddr;
-	unsigned int i, num_stubs;
-
-	num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stubs);
-
-	/* Find this stub, or if that fails, the next avail. entry */
-	stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr;
-	for (i = 0; stubs[i].opd.funcaddr; i++) {
-		BUG_ON(i >= num_stubs);
-
-		if (stubs[i].opd.funcaddr == opd->funcaddr)
-			return (unsigned long)&stubs[i];
-	}
-
-	if (!create_stub(sechdrs, &stubs[i], opd, me))
-		return 0;
-
-	return (unsigned long)&stubs[i];
-}
-
-/* We expect a noop next: if it is, replace it with instruction to
-   restore r2. */
-static int restore_r2(u32 *instruction, struct module *me)
-{
-	if (*instruction != 0x60000000) {
-		printk("%s: Expect noop after relocate, got %08x\n",
-		       me->name, *instruction);
-		return 0;
-	}
-	*instruction = 0xe8410028;	/* ld r2,40(r1) */
-	return 1;
-}
-
-int apply_relocate_add(Elf64_Shdr *sechdrs,
-		       const char *strtab,
-		       unsigned int symindex,
-		       unsigned int relsec,
-		       struct module *me)
-{
-	unsigned int i;
-	Elf64_Rela *rela = (void *)sechdrs[relsec].sh_addr;
-	Elf64_Sym *sym;
-	unsigned long *location;
-	unsigned long value;
-
-	DEBUGP("Applying ADD relocate section %u to %u\n", relsec,
-	       sechdrs[relsec].sh_info);
-	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
-		/* This is where to make the change */
-		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
-			+ rela[i].r_offset;
-		/* This is the symbol it is referring to */
-		sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
-			+ ELF64_R_SYM(rela[i].r_info);
-
-		DEBUGP("RELOC at %p: %li-type as %s (%lu) + %li\n",
-		       location, (long)ELF64_R_TYPE(rela[i].r_info),
-		       strtab + sym->st_name, (unsigned long)sym->st_value,
-		       (long)rela[i].r_addend);
-
-		/* `Everything is relative'. */
-		value = sym->st_value + rela[i].r_addend;
-
-		switch (ELF64_R_TYPE(rela[i].r_info)) {
-		case R_PPC64_ADDR32:
-			/* Simply set it */
-			*(u32 *)location = value;
-			break;
-			
-		case R_PPC64_ADDR64:
-			/* Simply set it */
-			*(unsigned long *)location = value;
-			break;
-
-		case R_PPC64_TOC:
-			*(unsigned long *)location = my_r2(sechdrs, me);
-			break;
-
-		case R_PPC64_TOC16:
-			/* Subtact TOC pointer */
-			value -= my_r2(sechdrs, me);
-			if (value + 0x8000 > 0xffff) {
-				printk("%s: bad TOC16 relocation (%lu)\n",
-				       me->name, value);
-				return -ENOEXEC;
-			}
-			*((uint16_t *) location)
-				= (*((uint16_t *) location) & ~0xffff)
-				| (value & 0xffff);
-			break;
-
-		case R_PPC64_TOC16_DS:
-			/* Subtact TOC pointer */
-			value -= my_r2(sechdrs, me);
-			if ((value & 3) != 0 || value + 0x8000 > 0xffff) {
-				printk("%s: bad TOC16_DS relocation (%lu)\n",
-				       me->name, value);
-				return -ENOEXEC;
-			}
-			*((uint16_t *) location)
-				= (*((uint16_t *) location) & ~0xfffc)
-				| (value & 0xfffc);
-			break;
-
-		case R_PPC_REL24:
-			/* FIXME: Handle weak symbols here --RR */
-			if (sym->st_shndx == SHN_UNDEF) {
-				/* External: go via stub */
-				value = stub_for_addr(sechdrs, value, me);
-				if (!value)
-					return -ENOENT;
-				if (!restore_r2((u32 *)location + 1, me))
-					return -ENOEXEC;
-			}
-
-			/* Convert value to relative */
-			value -= (unsigned long)location;
-			if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0){
-				printk("%s: REL24 %li out of range!\n",
-				       me->name, (long int)value);
-				return -ENOEXEC;
-			}
-
-			/* Only replace bits 2 through 26 */
-			*(uint32_t *)location 
-				= (*(uint32_t *)location & ~0x03fffffc)
-				| (value & 0x03fffffc);
-			break;
-
-		default:
-			printk("%s: Unknown ADD relocation: %lu\n",
-			       me->name,
-			       (unsigned long)ELF64_R_TYPE(rela[i].r_info));
-			return -ENOEXEC;
-		}
-	}
-
-	return 0;
-}
-
-LIST_HEAD(module_bug_list);
-
-int module_finalize(const Elf_Ehdr *hdr,
-		const Elf_Shdr *sechdrs, struct module *me)
-{
-	char *secstrings;
-	unsigned int i;
-
-	me->arch.bug_table = NULL;
-	me->arch.num_bugs = 0;
-
-	/* Find the __bug_table section, if present */
-	secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
-	for (i = 1; i < hdr->e_shnum; i++) {
-		if (strcmp(secstrings+sechdrs[i].sh_name, "__bug_table"))
-			continue;
-		me->arch.bug_table = (void *) sechdrs[i].sh_addr;
-		me->arch.num_bugs = sechdrs[i].sh_size / sizeof(struct bug_entry);
-		break;
-	}
-
-	/*
-	 * Strictly speaking this should have a spinlock to protect against
-	 * traversals, but since we only traverse on BUG()s, a spinlock
-	 * could potentially lead to deadlock and thus be counter-productive.
-	 */
-	list_add(&me->arch.bug_list, &module_bug_list);
-
-	return 0;
-}
-
-void module_arch_cleanup(struct module *mod)
-{
-	list_del(&mod->arch.bug_list);
-}
-
-struct bug_entry *module_find_bug(unsigned long bugaddr)
-{
-	struct mod_arch_specific *mod;
-	unsigned int i;
-	struct bug_entry *bug;
-
-	list_for_each_entry(mod, &module_bug_list, bug_list) {
-		bug = mod->bug_table;
-		for (i = 0; i < mod->num_bugs; ++i, ++bug)
-			if (bugaddr == bug->bug_addr)
-				return bug;
-	}
-	return NULL;
-}
diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c
deleted file mode 100644
index 3cef1b8f57f0..000000000000
--- a/arch/ppc64/kernel/pci.c
+++ /dev/null
@@ -1,1319 +0,0 @@
-/*
- * Port for PPC64 David Engebretsen, IBM Corp.
- * Contains common pci routines for ppc64 platform, pSeries and iSeries brands.
- * 
- * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
- *   Rework, based on alpha PCI code.
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-
-#undef DEBUG
-
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/mm.h>
-#include <linux/list.h>
-#include <linux/syscalls.h>
-
-#include <asm/processor.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-#include <asm/byteorder.h>
-#include <asm/irq.h>
-#include <asm/machdep.h>
-#include <asm/udbg.h>
-#include <asm/ppc-pci.h>
-
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
-unsigned long pci_probe_only = 1;
-unsigned long pci_assign_all_buses = 0;
-
-/*
- * legal IO pages under MAX_ISA_PORT.  This is to ensure we don't touch
- * devices we don't have access to.
- */
-unsigned long io_page_mask;
-
-EXPORT_SYMBOL(io_page_mask);
-
-#ifdef CONFIG_PPC_MULTIPLATFORM
-static void fixup_resource(struct resource *res, struct pci_dev *dev);
-static void do_bus_setup(struct pci_bus *bus);
-#endif
-
-unsigned int pcibios_assign_all_busses(void)
-{
-	return pci_assign_all_buses;
-}
-
-/* pci_io_base -- the base address from which io bars are offsets.
- * This is the lowest I/O base address (so bar values are always positive),
- * and it *must* be the start of ISA space if an ISA bus exists because
- * ISA drivers use hard coded offsets.  If no ISA bus exists a dummy
- * page is mapped and isa_io_limit prevents access to it.
- */
-unsigned long isa_io_base;	/* NULL if no ISA bus */
-EXPORT_SYMBOL(isa_io_base);
-unsigned long pci_io_base;
-EXPORT_SYMBOL(pci_io_base);
-
-void iSeries_pcibios_init(void);
-
-LIST_HEAD(hose_list);
-
-struct dma_mapping_ops pci_dma_ops;
-EXPORT_SYMBOL(pci_dma_ops);
-
-int global_phb_number;		/* Global phb counter */
-
-/* Cached ISA bridge dev. */
-struct pci_dev *ppc64_isabridge_dev = NULL;
-
-static void fixup_broken_pcnet32(struct pci_dev* dev)
-{
-	if ((dev->class>>8 == PCI_CLASS_NETWORK_ETHERNET)) {
-		dev->vendor = PCI_VENDOR_ID_AMD;
-		pci_write_config_word(dev, PCI_VENDOR_ID, PCI_VENDOR_ID_AMD);
-	}
-}
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TRIDENT, PCI_ANY_ID, fixup_broken_pcnet32);
-
-void  pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
-			      struct resource *res)
-{
-	unsigned long offset = 0;
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
-
-	if (!hose)
-		return;
-
-	if (res->flags & IORESOURCE_IO)
-	        offset = (unsigned long)hose->io_base_virt - pci_io_base;
-
-	if (res->flags & IORESOURCE_MEM)
-		offset = hose->pci_mem_offset;
-
-	region->start = res->start - offset;
-	region->end = res->end - offset;
-}
-
-void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
-			      struct pci_bus_region *region)
-{
-	unsigned long offset = 0;
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
-
-	if (!hose)
-		return;
-
-	if (res->flags & IORESOURCE_IO)
-	        offset = (unsigned long)hose->io_base_virt - pci_io_base;
-
-	if (res->flags & IORESOURCE_MEM)
-		offset = hose->pci_mem_offset;
-
-	res->start = region->start + offset;
-	res->end = region->end + offset;
-}
-
-#ifdef CONFIG_HOTPLUG
-EXPORT_SYMBOL(pcibios_resource_to_bus);
-EXPORT_SYMBOL(pcibios_bus_to_resource);
-#endif
-
-/*
- * We need to avoid collisions with `mirrored' VGA ports
- * and other strange ISA hardware, so we always want the
- * addresses to be allocated in the 0x000-0x0ff region
- * modulo 0x400.
- *
- * Why? Because some silly external IO cards only decode
- * the low 10 bits of the IO address. The 0x00-0xff region
- * is reserved for motherboard devices that decode all 16
- * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
- * but we want to try to avoid allocating at 0x2900-0x2bff
- * which might have be mirrored at 0x0100-0x03ff..
- */
-void pcibios_align_resource(void *data, struct resource *res,
-			    unsigned long size, unsigned long align)
-{
-	struct pci_dev *dev = data;
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
-	unsigned long start = res->start;
-	unsigned long alignto;
-
-	if (res->flags & IORESOURCE_IO) {
-	        unsigned long offset = (unsigned long)hose->io_base_virt -
-					pci_io_base;
-		/* Make sure we start at our min on all hoses */
-		if (start - offset < PCIBIOS_MIN_IO)
-			start = PCIBIOS_MIN_IO + offset;
-
-		/*
-		 * Put everything into 0x00-0xff region modulo 0x400
-		 */
-		if (start & 0x300)
-			start = (start + 0x3ff) & ~0x3ff;
-
-	} else if (res->flags & IORESOURCE_MEM) {
-		/* Make sure we start at our min on all hoses */
-		if (start - hose->pci_mem_offset < PCIBIOS_MIN_MEM)
-			start = PCIBIOS_MIN_MEM + hose->pci_mem_offset;
-
-		/* Align to multiple of size of minimum base.  */
-		alignto = max(0x1000UL, align);
-		start = ALIGN(start, alignto);
-	}
-
-	res->start = start;
-}
-
-static DEFINE_SPINLOCK(hose_spinlock);
-
-/*
- * pci_controller(phb) initialized common variables.
- */
-void __devinit pci_setup_pci_controller(struct pci_controller *hose)
-{
-	memset(hose, 0, sizeof(struct pci_controller));
-
-	spin_lock(&hose_spinlock);
-	hose->global_number = global_phb_number++;
-	list_add_tail(&hose->list_node, &hose_list);
-	spin_unlock(&hose_spinlock);
-}
-
-static void __init pcibios_claim_one_bus(struct pci_bus *b)
-{
-	struct pci_dev *dev;
-	struct pci_bus *child_bus;
-
-	list_for_each_entry(dev, &b->devices, bus_list) {
-		int i;
-
-		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-			struct resource *r = &dev->resource[i];
-
-			if (r->parent || !r->start || !r->flags)
-				continue;
-			pci_claim_resource(dev, i);
-		}
-	}
-
-	list_for_each_entry(child_bus, &b->children, node)
-		pcibios_claim_one_bus(child_bus);
-}
-
-#ifndef CONFIG_PPC_ISERIES
-static void __init pcibios_claim_of_setup(void)
-{
-	struct pci_bus *b;
-
-	list_for_each_entry(b, &pci_root_buses, node)
-		pcibios_claim_one_bus(b);
-}
-#endif
-
-#ifdef CONFIG_PPC_MULTIPLATFORM
-static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
-{
-	u32 *prop;
-	int len;
-
-	prop = (u32 *) get_property(np, name, &len);
-	if (prop && len >= 4)
-		return *prop;
-	return def;
-}
-
-static unsigned int pci_parse_of_flags(u32 addr0)
-{
-	unsigned int flags = 0;
-
-	if (addr0 & 0x02000000) {
-		flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
-		flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
-		flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
-		if (addr0 & 0x40000000)
-			flags |= IORESOURCE_PREFETCH
-				 | PCI_BASE_ADDRESS_MEM_PREFETCH;
-	} else if (addr0 & 0x01000000)
-		flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
-	return flags;
-}
-
-#define GET_64BIT(prop, i)	((((u64) (prop)[(i)]) << 32) | (prop)[(i)+1])
-
-static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev)
-{
-	u64 base, size;
-	unsigned int flags;
-	struct resource *res;
-	u32 *addrs, i;
-	int proplen;
-
-	addrs = (u32 *) get_property(node, "assigned-addresses", &proplen);
-	if (!addrs)
-		return;
-	for (; proplen >= 20; proplen -= 20, addrs += 5) {
-		flags = pci_parse_of_flags(addrs[0]);
-		if (!flags)
-			continue;
-		base = GET_64BIT(addrs, 1);
-		size = GET_64BIT(addrs, 3);
-		if (!size)
-			continue;
-		i = addrs[0] & 0xff;
-		if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) {
-			res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
-		} else if (i == dev->rom_base_reg) {
-			res = &dev->resource[PCI_ROM_RESOURCE];
-			flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
-		} else {
-			printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
-			continue;
-		}
-		res->start = base;
-		res->end = base + size - 1;
-		res->flags = flags;
-		res->name = pci_name(dev);
-		fixup_resource(res, dev);
-	}
-}
-
-struct pci_dev *of_create_pci_dev(struct device_node *node,
-				 struct pci_bus *bus, int devfn)
-{
-	struct pci_dev *dev;
-	const char *type;
-
-	dev = kmalloc(sizeof(struct pci_dev), GFP_KERNEL);
-	if (!dev)
-		return NULL;
-	type = get_property(node, "device_type", NULL);
-	if (type == NULL)
-		type = "";
-
-	memset(dev, 0, sizeof(struct pci_dev));
-	dev->bus = bus;
-	dev->sysdata = node;
-	dev->dev.parent = bus->bridge;
-	dev->dev.bus = &pci_bus_type;
-	dev->devfn = devfn;
-	dev->multifunction = 0;		/* maybe a lie? */
-
-	dev->vendor = get_int_prop(node, "vendor-id", 0xffff);
-	dev->device = get_int_prop(node, "device-id", 0xffff);
-	dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0);
-	dev->subsystem_device = get_int_prop(node, "subsystem-id", 0);
-
-	dev->cfg_size = 256; /*pci_cfg_space_size(dev);*/
-
-	sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(bus),
-		dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn));
-	dev->class = get_int_prop(node, "class-code", 0);
-
-	dev->current_state = 4;		/* unknown power state */
-
-	if (!strcmp(type, "pci")) {
-		/* a PCI-PCI bridge */
-		dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
-		dev->rom_base_reg = PCI_ROM_ADDRESS1;
-	} else if (!strcmp(type, "cardbus")) {
-		dev->hdr_type = PCI_HEADER_TYPE_CARDBUS;
-	} else {
-		dev->hdr_type = PCI_HEADER_TYPE_NORMAL;
-		dev->rom_base_reg = PCI_ROM_ADDRESS;
-		dev->irq = NO_IRQ;
-		if (node->n_intrs > 0) {
-			dev->irq = node->intrs[0].line;
-			pci_write_config_byte(dev, PCI_INTERRUPT_LINE,
-					      dev->irq);
-		}
-	}
-
-	pci_parse_of_addrs(node, dev);
-
-	pci_device_add(dev, bus);
-
-	/* XXX pci_scan_msi_device(dev); */
-
-	return dev;
-}
-EXPORT_SYMBOL(of_create_pci_dev);
-
-void __devinit of_scan_bus(struct device_node *node,
-				  struct pci_bus *bus)
-{
-	struct device_node *child = NULL;
-	u32 *reg;
-	int reglen, devfn;
-	struct pci_dev *dev;
-
-	while ((child = of_get_next_child(node, child)) != NULL) {
-		reg = (u32 *) get_property(child, "reg", &reglen);
-		if (reg == NULL || reglen < 20)
-			continue;
-		devfn = (reg[0] >> 8) & 0xff;
-		/* create a new pci_dev for this device */
-		dev = of_create_pci_dev(child, bus, devfn);
-		if (!dev)
-			continue;
-		if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
-		    dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
-			of_scan_pci_bridge(child, dev);
-	}
-
-	do_bus_setup(bus);
-}
-EXPORT_SYMBOL(of_scan_bus);
-
-void __devinit of_scan_pci_bridge(struct device_node *node,
-			 	struct pci_dev *dev)
-{
-	struct pci_bus *bus;
-	u32 *busrange, *ranges;
-	int len, i, mode;
-	struct resource *res;
-	unsigned int flags;
-	u64 size;
-
-	/* parse bus-range property */
-	busrange = (u32 *) get_property(node, "bus-range", &len);
-	if (busrange == NULL || len != 8) {
-		printk(KERN_ERR "Can't get bus-range for PCI-PCI bridge %s\n",
-		       node->full_name);
-		return;
-	}
-	ranges = (u32 *) get_property(node, "ranges", &len);
-	if (ranges == NULL) {
-		printk(KERN_ERR "Can't get ranges for PCI-PCI bridge %s\n",
-		       node->full_name);
-		return;
-	}
-
-	bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
-	if (!bus) {
-		printk(KERN_ERR "Failed to create pci bus for %s\n",
-		       node->full_name);
-		return;
-	}
-
-	bus->primary = dev->bus->number;
-	bus->subordinate = busrange[1];
-	bus->bridge_ctl = 0;
-	bus->sysdata = node;
-
-	/* parse ranges property */
-	/* PCI #address-cells == 3 and #size-cells == 2 always */
-	res = &dev->resource[PCI_BRIDGE_RESOURCES];
-	for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) {
-		res->flags = 0;
-		bus->resource[i] = res;
-		++res;
-	}
-	i = 1;
-	for (; len >= 32; len -= 32, ranges += 8) {
-		flags = pci_parse_of_flags(ranges[0]);
-		size = GET_64BIT(ranges, 6);
-		if (flags == 0 || size == 0)
-			continue;
-		if (flags & IORESOURCE_IO) {
-			res = bus->resource[0];
-			if (res->flags) {
-				printk(KERN_ERR "PCI: ignoring extra I/O range"
-				       " for bridge %s\n", node->full_name);
-				continue;
-			}
-		} else {
-			if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
-				printk(KERN_ERR "PCI: too many memory ranges"
-				       " for bridge %s\n", node->full_name);
-				continue;
-			}
-			res = bus->resource[i];
-			++i;
-		}
-		res->start = GET_64BIT(ranges, 1);
-		res->end = res->start + size - 1;
-		res->flags = flags;
-		fixup_resource(res, dev);
-	}
-	sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),
-		bus->number);
-
-	mode = PCI_PROBE_NORMAL;
-	if (ppc_md.pci_probe_mode)
-		mode = ppc_md.pci_probe_mode(bus);
-	if (mode == PCI_PROBE_DEVTREE)
-		of_scan_bus(node, bus);
-	else if (mode == PCI_PROBE_NORMAL)
-		pci_scan_child_bus(bus);
-}
-EXPORT_SYMBOL(of_scan_pci_bridge);
-#endif /* CONFIG_PPC_MULTIPLATFORM */
-
-void __devinit scan_phb(struct pci_controller *hose)
-{
-	struct pci_bus *bus;
-	struct device_node *node = hose->arch_data;
-	int i, mode;
-	struct resource *res;
-
-	bus = pci_create_bus(NULL, hose->first_busno, hose->ops, node);
-	if (bus == NULL) {
-		printk(KERN_ERR "Failed to create bus for PCI domain %04x\n",
-		       hose->global_number);
-		return;
-	}
-	bus->secondary = hose->first_busno;
-	hose->bus = bus;
-
-	bus->resource[0] = res = &hose->io_resource;
-	if (res->flags && request_resource(&ioport_resource, res))
-		printk(KERN_ERR "Failed to request PCI IO region "
-		       "on PCI domain %04x\n", hose->global_number);
-
-	for (i = 0; i < 3; ++i) {
-		res = &hose->mem_resources[i];
-		bus->resource[i+1] = res;
-		if (res->flags && request_resource(&iomem_resource, res))
-			printk(KERN_ERR "Failed to request PCI memory region "
-			       "on PCI domain %04x\n", hose->global_number);
-	}
-
-	mode = PCI_PROBE_NORMAL;
-#ifdef CONFIG_PPC_MULTIPLATFORM
-	if (ppc_md.pci_probe_mode)
-		mode = ppc_md.pci_probe_mode(bus);
-	if (mode == PCI_PROBE_DEVTREE) {
-		bus->subordinate = hose->last_busno;
-		of_scan_bus(node, bus);
-	}
-#endif /* CONFIG_PPC_MULTIPLATFORM */
-	if (mode == PCI_PROBE_NORMAL)
-		hose->last_busno = bus->subordinate = pci_scan_child_bus(bus);
-	pci_bus_add_devices(bus);
-}
-
-static int __init pcibios_init(void)
-{
-	struct pci_controller *hose, *tmp;
-
-	/* For now, override phys_mem_access_prot. If we need it,
-	 * later, we may move that initialization to each ppc_md
-	 */
-	ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot;
-
-#ifdef CONFIG_PPC_ISERIES
-	iSeries_pcibios_init(); 
-#endif
-
-	printk("PCI: Probing PCI hardware\n");
-
-	/* Scan all of the recorded PCI controllers.  */
-	list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
-		scan_phb(hose);
-
-#ifndef CONFIG_PPC_ISERIES
-	if (pci_probe_only)
-		pcibios_claim_of_setup();
-	else
-		/* FIXME: `else' will be removed when
-		   pci_assign_unassigned_resources() is able to work
-		   correctly with [partially] allocated PCI tree. */
-		pci_assign_unassigned_resources();
-#endif /* !CONFIG_PPC_ISERIES */
-
-	/* Call machine dependent final fixup */
-	if (ppc_md.pcibios_fixup)
-		ppc_md.pcibios_fixup();
-
-	/* Cache the location of the ISA bridge (if we have one) */
-	ppc64_isabridge_dev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
-	if (ppc64_isabridge_dev != NULL)
-		printk("ISA bridge at %s\n", pci_name(ppc64_isabridge_dev));
-
-#ifdef CONFIG_PPC_MULTIPLATFORM
-	/* map in PCI I/O space */
-	phbs_remap_io();
-#endif
-
-	printk("PCI: Probing PCI hardware done\n");
-
-	return 0;
-}
-
-subsys_initcall(pcibios_init);
-
-char __init *pcibios_setup(char *str)
-{
-	return str;
-}
-
-int pcibios_enable_device(struct pci_dev *dev, int mask)
-{
-	u16 cmd, oldcmd;
-	int i;
-
-	pci_read_config_word(dev, PCI_COMMAND, &cmd);
-	oldcmd = cmd;
-
-	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-		struct resource *res = &dev->resource[i];
-
-		/* Only set up the requested stuff */
-		if (!(mask & (1<<i)))
-			continue;
-
-		if (res->flags & IORESOURCE_IO)
-			cmd |= PCI_COMMAND_IO;
-		if (res->flags & IORESOURCE_MEM)
-			cmd |= PCI_COMMAND_MEMORY;
-	}
-
-	if (cmd != oldcmd) {
-		printk(KERN_DEBUG "PCI: Enabling device: (%s), cmd %x\n",
-		       pci_name(dev), cmd);
-                /* Enable the appropriate bits in the PCI command register.  */
-		pci_write_config_word(dev, PCI_COMMAND, cmd);
-	}
-	return 0;
-}
-
-/*
- * Return the domain number for this bus.
- */
-int pci_domain_nr(struct pci_bus *bus)
-{
-#ifdef CONFIG_PPC_ISERIES
-	return 0;
-#else
-	struct pci_controller *hose = pci_bus_to_host(bus);
-
-	return hose->global_number;
-#endif
-}
-
-EXPORT_SYMBOL(pci_domain_nr);
-
-/* Decide whether to display the domain number in /proc */
-int pci_proc_domain(struct pci_bus *bus)
-{
-#ifdef CONFIG_PPC_ISERIES
-	return 0;
-#else
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	return hose->buid;
-#endif
-}
-
-/*
- * Platform support for /proc/bus/pci/X/Y mmap()s,
- * modelled on the sparc64 implementation by Dave Miller.
- *  -- paulus.
- */
-
-/*
- * Adjust vm_pgoff of VMA such that it is the physical page offset
- * corresponding to the 32-bit pci bus offset for DEV requested by the user.
- *
- * Basically, the user finds the base address for his device which he wishes
- * to mmap.  They read the 32-bit value from the config space base register,
- * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
- * offset parameter of mmap on /proc/bus/pci/XXX for that device.
- *
- * Returns negative error code on failure, zero on success.
- */
-static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
-					       unsigned long *offset,
-					       enum pci_mmap_state mmap_state)
-{
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
-	unsigned long io_offset = 0;
-	int i, res_bit;
-
-	if (hose == 0)
-		return NULL;		/* should never happen */
-
-	/* If memory, add on the PCI bridge address offset */
-	if (mmap_state == pci_mmap_mem) {
-		*offset += hose->pci_mem_offset;
-		res_bit = IORESOURCE_MEM;
-	} else {
-		io_offset = (unsigned long)hose->io_base_virt - pci_io_base;
-		*offset += io_offset;
-		res_bit = IORESOURCE_IO;
-	}
-
-	/*
-	 * Check that the offset requested corresponds to one of the
-	 * resources of the device.
-	 */
-	for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
-		struct resource *rp = &dev->resource[i];
-		int flags = rp->flags;
-
-		/* treat ROM as memory (should be already) */
-		if (i == PCI_ROM_RESOURCE)
-			flags |= IORESOURCE_MEM;
-
-		/* Active and same type? */
-		if ((flags & res_bit) == 0)
-			continue;
-
-		/* In the range of this resource? */
-		if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end)
-			continue;
-
-		/* found it! construct the final physical address */
-		if (mmap_state == pci_mmap_io)
-		       	*offset += hose->io_base_phys - io_offset;
-		return rp;
-	}
-
-	return NULL;
-}
-
-/*
- * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
- * device mapping.
- */
-static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp,
-				      pgprot_t protection,
-				      enum pci_mmap_state mmap_state,
-				      int write_combine)
-{
-	unsigned long prot = pgprot_val(protection);
-
-	/* Write combine is always 0 on non-memory space mappings. On
-	 * memory space, if the user didn't pass 1, we check for a
-	 * "prefetchable" resource. This is a bit hackish, but we use
-	 * this to workaround the inability of /sysfs to provide a write
-	 * combine bit
-	 */
-	if (mmap_state != pci_mmap_mem)
-		write_combine = 0;
-	else if (write_combine == 0) {
-		if (rp->flags & IORESOURCE_PREFETCH)
-			write_combine = 1;
-	}
-
-	/* XXX would be nice to have a way to ask for write-through */
-	prot |= _PAGE_NO_CACHE;
-	if (write_combine)
-		prot &= ~_PAGE_GUARDED;
-	else
-		prot |= _PAGE_GUARDED;
-
-	printk("PCI map for %s:%lx, prot: %lx\n", pci_name(dev), rp->start,
-	       prot);
-
-	return __pgprot(prot);
-}
-
-/*
- * This one is used by /dev/mem and fbdev who have no clue about the
- * PCI device, it tries to find the PCI device first and calls the
- * above routine
- */
-pgprot_t pci_phys_mem_access_prot(struct file *file,
-				  unsigned long pfn,
-				  unsigned long size,
-				  pgprot_t protection)
-{
-	struct pci_dev *pdev = NULL;
-	struct resource *found = NULL;
-	unsigned long prot = pgprot_val(protection);
-	unsigned long offset = pfn << PAGE_SHIFT;
-	int i;
-
-	if (page_is_ram(pfn))
-		return __pgprot(prot);
-
-	prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
-
-	for_each_pci_dev(pdev) {
-		for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
-			struct resource *rp = &pdev->resource[i];
-			int flags = rp->flags;
-
-			/* Active and same type? */
-			if ((flags & IORESOURCE_MEM) == 0)
-				continue;
-			/* In the range of this resource? */
-			if (offset < (rp->start & PAGE_MASK) ||
-			    offset > rp->end)
-				continue;
-			found = rp;
-			break;
-		}
-		if (found)
-			break;
-	}
-	if (found) {
-		if (found->flags & IORESOURCE_PREFETCH)
-			prot &= ~_PAGE_GUARDED;
-		pci_dev_put(pdev);
-	}
-
-	DBG("non-PCI map for %lx, prot: %lx\n", offset, prot);
-
-	return __pgprot(prot);
-}
-
-
-/*
- * Perform the actual remap of the pages for a PCI device mapping, as
- * appropriate for this architecture.  The region in the process to map
- * is described by vm_start and vm_end members of VMA, the base physical
- * address is found in vm_pgoff.
- * The pci device structure is provided so that architectures may make mapping
- * decisions on a per-device or per-bus basis.
- *
- * Returns a negative error code on failure, zero on success.
- */
-int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
-			enum pci_mmap_state mmap_state,
-			int write_combine)
-{
-	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
-	struct resource *rp;
-	int ret;
-
-	rp = __pci_mmap_make_offset(dev, &offset, mmap_state);
-	if (rp == NULL)
-		return -EINVAL;
-
-	vma->vm_pgoff = offset >> PAGE_SHIFT;
-	vma->vm_flags |= VM_SHM | VM_LOCKED | VM_IO;
-	vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp,
-						  vma->vm_page_prot,
-						  mmap_state, write_combine);
-
-	ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
-			       vma->vm_end - vma->vm_start, vma->vm_page_prot);
-
-	return ret;
-}
-
-#ifdef CONFIG_PPC_MULTIPLATFORM
-static ssize_t pci_show_devspec(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct pci_dev *pdev;
-	struct device_node *np;
-
-	pdev = to_pci_dev (dev);
-	np = pci_device_to_OF_node(pdev);
-	if (np == NULL || np->full_name == NULL)
-		return 0;
-	return sprintf(buf, "%s", np->full_name);
-}
-static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL);
-#endif /* CONFIG_PPC_MULTIPLATFORM */
-
-void pcibios_add_platform_entries(struct pci_dev *pdev)
-{
-#ifdef CONFIG_PPC_MULTIPLATFORM
-	device_create_file(&pdev->dev, &dev_attr_devspec);
-#endif /* CONFIG_PPC_MULTIPLATFORM */
-}
-
-#ifdef CONFIG_PPC_MULTIPLATFORM
-
-#define ISA_SPACE_MASK 0x1
-#define ISA_SPACE_IO 0x1
-
-static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node,
-				      unsigned long phb_io_base_phys,
-				      void __iomem * phb_io_base_virt)
-{
-	struct isa_range *range;
-	unsigned long pci_addr;
-	unsigned int isa_addr;
-	unsigned int size;
-	int rlen = 0;
-
-	range = (struct isa_range *) get_property(isa_node, "ranges", &rlen);
-	if (range == NULL || (rlen < sizeof(struct isa_range))) {
-		printk(KERN_ERR "no ISA ranges or unexpected isa range size,"
-		       "mapping 64k\n");
-		__ioremap_explicit(phb_io_base_phys,
-				   (unsigned long)phb_io_base_virt,
-				   0x10000, _PAGE_NO_CACHE | _PAGE_GUARDED);
-		return;	
-	}
-	
-	/* From "ISA Binding to 1275"
-	 * The ranges property is laid out as an array of elements,
-	 * each of which comprises:
-	 *   cells 0 - 1:	an ISA address
-	 *   cells 2 - 4:	a PCI address 
-	 *			(size depending on dev->n_addr_cells)
-	 *   cell 5:		the size of the range
-	 */
-	if ((range->isa_addr.a_hi && ISA_SPACE_MASK) == ISA_SPACE_IO) {
-		isa_addr = range->isa_addr.a_lo;
-		pci_addr = (unsigned long) range->pci_addr.a_mid << 32 | 
-			range->pci_addr.a_lo;
-
-		/* Assume these are both zero */
-		if ((pci_addr != 0) || (isa_addr != 0)) {
-			printk(KERN_ERR "unexpected isa to pci mapping: %s\n",
-					__FUNCTION__);
-			return;
-		}
-		
-		size = PAGE_ALIGN(range->size);
-
-		__ioremap_explicit(phb_io_base_phys, 
-				   (unsigned long) phb_io_base_virt, 
-				   size, _PAGE_NO_CACHE | _PAGE_GUARDED);
-	}
-}
-
-void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
-					    struct device_node *dev, int prim)
-{
-	unsigned int *ranges, pci_space;
-	unsigned long size;
-	int rlen = 0;
-	int memno = 0;
-	struct resource *res;
-	int np, na = prom_n_addr_cells(dev);
-	unsigned long pci_addr, cpu_phys_addr;
-
-	np = na + 5;
-
-	/* From "PCI Binding to 1275"
-	 * The ranges property is laid out as an array of elements,
-	 * each of which comprises:
-	 *   cells 0 - 2:	a PCI address
-	 *   cells 3 or 3+4:	a CPU physical address
-	 *			(size depending on dev->n_addr_cells)
-	 *   cells 4+5 or 5+6:	the size of the range
-	 */
-	rlen = 0;
-	hose->io_base_phys = 0;
-	ranges = (unsigned int *) get_property(dev, "ranges", &rlen);
-	while ((rlen -= np * sizeof(unsigned int)) >= 0) {
-		res = NULL;
-		pci_space = ranges[0];
-		pci_addr = ((unsigned long)ranges[1] << 32) | ranges[2];
-
-		cpu_phys_addr = ranges[3];
-		if (na >= 2)
-			cpu_phys_addr = (cpu_phys_addr << 32) | ranges[4];
-
-		size = ((unsigned long)ranges[na+3] << 32) | ranges[na+4];
-		ranges += np;
-		if (size == 0)
-			continue;
-
-		/* Now consume following elements while they are contiguous */
-		while (rlen >= np * sizeof(unsigned int)) {
-			unsigned long addr, phys;
-
-			if (ranges[0] != pci_space)
-				break;
-			addr = ((unsigned long)ranges[1] << 32) | ranges[2];
-			phys = ranges[3];
-			if (na >= 2)
-				phys = (phys << 32) | ranges[4];
-			if (addr != pci_addr + size ||
-			    phys != cpu_phys_addr + size)
-				break;
-
-			size += ((unsigned long)ranges[na+3] << 32)
-				| ranges[na+4];
-			ranges += np;
-			rlen -= np * sizeof(unsigned int);
-		}
-
-		switch ((pci_space >> 24) & 0x3) {
-		case 1:		/* I/O space */
-			hose->io_base_phys = cpu_phys_addr;
-			hose->pci_io_size = size;
-
-			res = &hose->io_resource;
-			res->flags = IORESOURCE_IO;
-			res->start = pci_addr;
-			DBG("phb%d: IO 0x%lx -> 0x%lx\n", hose->global_number,
-				    res->start, res->start + size - 1);
-			break;
-		case 2:		/* memory space */
-			memno = 0;
-			while (memno < 3 && hose->mem_resources[memno].flags)
-				++memno;
-
-			if (memno == 0)
-				hose->pci_mem_offset = cpu_phys_addr - pci_addr;
-			if (memno < 3) {
-				res = &hose->mem_resources[memno];
-				res->flags = IORESOURCE_MEM;
-				res->start = cpu_phys_addr;
-				DBG("phb%d: MEM 0x%lx -> 0x%lx\n", hose->global_number,
-					    res->start, res->start + size - 1);
-			}
-			break;
-		}
-		if (res != NULL) {
-			res->name = dev->full_name;
-			res->end = res->start + size - 1;
-			res->parent = NULL;
-			res->sibling = NULL;
-			res->child = NULL;
-		}
-	}
-}
-
-void __init pci_setup_phb_io(struct pci_controller *hose, int primary)
-{
-	unsigned long size = hose->pci_io_size;
-	unsigned long io_virt_offset;
-	struct resource *res;
-	struct device_node *isa_dn;
-
-	hose->io_base_virt = reserve_phb_iospace(size);
-	DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
-		hose->global_number, hose->io_base_phys,
-		(unsigned long) hose->io_base_virt);
-
-	if (primary) {
-		pci_io_base = (unsigned long)hose->io_base_virt;
-		isa_dn = of_find_node_by_type(NULL, "isa");
-		if (isa_dn) {
-			isa_io_base = pci_io_base;
-			pci_process_ISA_OF_ranges(isa_dn, hose->io_base_phys,
-						hose->io_base_virt);
-			of_node_put(isa_dn);
-			/* Allow all IO */
-			io_page_mask = -1;
-		}
-	}
-
-	io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
-	res = &hose->io_resource;
-	res->start += io_virt_offset;
-	res->end += io_virt_offset;
-}
-
-void __devinit pci_setup_phb_io_dynamic(struct pci_controller *hose,
-					int primary)
-{
-	unsigned long size = hose->pci_io_size;
-	unsigned long io_virt_offset;
-	struct resource *res;
-
-	hose->io_base_virt = __ioremap(hose->io_base_phys, size,
-					_PAGE_NO_CACHE | _PAGE_GUARDED);
-	DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
-		hose->global_number, hose->io_base_phys,
-		(unsigned long) hose->io_base_virt);
-
-	if (primary)
-		pci_io_base = (unsigned long)hose->io_base_virt;
-
-	io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
-	res = &hose->io_resource;
-	res->start += io_virt_offset;
-	res->end += io_virt_offset;
-}
-
-
-static int get_bus_io_range(struct pci_bus *bus, unsigned long *start_phys,
-				unsigned long *start_virt, unsigned long *size)
-{
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	struct pci_bus_region region;
-	struct resource *res;
-
-	if (bus->self) {
-		res = bus->resource[0];
-		pcibios_resource_to_bus(bus->self, &region, res);
-		*start_phys = hose->io_base_phys + region.start;
-		*start_virt = (unsigned long) hose->io_base_virt + 
-				region.start;
-		if (region.end > region.start) 
-			*size = region.end - region.start + 1;
-		else {
-			printk("%s(): unexpected region 0x%lx->0x%lx\n", 
-					__FUNCTION__, region.start, region.end);
-			return 1;
-		}
-		
-	} else {
-		/* Root Bus */
-		res = &hose->io_resource;
-		*start_phys = hose->io_base_phys;
-		*start_virt = (unsigned long) hose->io_base_virt;
-		if (res->end > res->start)
-			*size = res->end - res->start + 1;
-		else {
-			printk("%s(): unexpected region 0x%lx->0x%lx\n", 
-					__FUNCTION__, res->start, res->end);
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-int unmap_bus_range(struct pci_bus *bus)
-{
-	unsigned long start_phys;
-	unsigned long start_virt;
-	unsigned long size;
-
-	if (!bus) {
-		printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
-		return 1;
-	}
-	
-	if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
-		return 1;
-	if (iounmap_explicit((void __iomem *) start_virt, size))
-		return 1;
-
-	return 0;
-}
-EXPORT_SYMBOL(unmap_bus_range);
-
-int remap_bus_range(struct pci_bus *bus)
-{
-	unsigned long start_phys;
-	unsigned long start_virt;
-	unsigned long size;
-
-	if (!bus) {
-		printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
-		return 1;
-	}
-	
-	
-	if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
-		return 1;
-	printk("mapping IO %lx -> %lx, size: %lx\n", start_phys, start_virt, size);
-	if (__ioremap_explicit(start_phys, start_virt, size,
-			       _PAGE_NO_CACHE | _PAGE_GUARDED))
-		return 1;
-
-	return 0;
-}
-EXPORT_SYMBOL(remap_bus_range);
-
-void phbs_remap_io(void)
-{
-	struct pci_controller *hose, *tmp;
-
-	list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
-		remap_bus_range(hose->bus);
-}
-
-/*
- * ppc64 can have multifunction devices that do not respond to function 0.
- * In this case we must scan all functions.
- * XXX this can go now, we use the OF device tree in all the
- * cases that caused problems. -- paulus
- */
-int pcibios_scan_all_fns(struct pci_bus *bus, int devfn)
-{
-       return 0;
-}
-
-static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev)
-{
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
-	unsigned long start, end, mask, offset;
-
-	if (res->flags & IORESOURCE_IO) {
-		offset = (unsigned long)hose->io_base_virt - pci_io_base;
-
-		start = res->start += offset;
-		end = res->end += offset;
-
-		/* Need to allow IO access to pages that are in the
-		   ISA range */
-		if (start < MAX_ISA_PORT) {
-			if (end > MAX_ISA_PORT)
-				end = MAX_ISA_PORT;
-
-			start >>= PAGE_SHIFT;
-			end >>= PAGE_SHIFT;
-
-			/* get the range of pages for the map */
-			mask = ((1 << (end+1)) - 1) ^ ((1 << start) - 1);
-			io_page_mask |= mask;
-		}
-	} else if (res->flags & IORESOURCE_MEM) {
-		res->start += hose->pci_mem_offset;
-		res->end += hose->pci_mem_offset;
-	}
-}
-
-void __devinit pcibios_fixup_device_resources(struct pci_dev *dev,
-					      struct pci_bus *bus)
-{
-	/* Update device resources.  */
-	int i;
-
-	for (i = 0; i < PCI_NUM_RESOURCES; i++)
-		if (dev->resource[i].flags)
-			fixup_resource(&dev->resource[i], dev);
-}
-EXPORT_SYMBOL(pcibios_fixup_device_resources);
-
-static void __devinit do_bus_setup(struct pci_bus *bus)
-{
-	struct pci_dev *dev;
-
-	ppc_md.iommu_bus_setup(bus);
-
-	list_for_each_entry(dev, &bus->devices, bus_list)
-		ppc_md.iommu_dev_setup(dev);
-
-	if (ppc_md.irq_bus_setup)
-		ppc_md.irq_bus_setup(bus);
-}
-
-void __devinit pcibios_fixup_bus(struct pci_bus *bus)
-{
-	struct pci_dev *dev = bus->self;
-
-	if (dev && pci_probe_only &&
-	    (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
-		/* This is a subordinate bridge */
-
-		pci_read_bridge_bases(bus);
-		pcibios_fixup_device_resources(dev, bus);
-	}
-
-	do_bus_setup(bus);
-
-	if (!pci_probe_only)
-		return;
-
-	list_for_each_entry(dev, &bus->devices, bus_list)
-		if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
-			pcibios_fixup_device_resources(dev, bus);
-}
-EXPORT_SYMBOL(pcibios_fixup_bus);
-
-/*
- * Reads the interrupt pin to determine if interrupt is use by card.
- * If the interrupt is used, then gets the interrupt line from the 
- * openfirmware and sets it in the pci_dev and pci_config line.
- */
-int pci_read_irq_line(struct pci_dev *pci_dev)
-{
-	u8 intpin;
-	struct device_node *node;
-
-    	pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &intpin);
-	if (intpin == 0)
-		return 0;
-
-	node = pci_device_to_OF_node(pci_dev);
-	if (node == NULL)
-		return -1;
-
-	if (node->n_intrs == 0)
-		return -1;
-
-	pci_dev->irq = node->intrs[0].line;
-
-	pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, pci_dev->irq);
-
-	return 0;
-}
-EXPORT_SYMBOL(pci_read_irq_line);
-
-void pci_resource_to_user(const struct pci_dev *dev, int bar,
-			  const struct resource *rsrc,
-			  u64 *start, u64 *end)
-{
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
-	unsigned long offset = 0;
-
-	if (hose == NULL)
-		return;
-
-	if (rsrc->flags & IORESOURCE_IO)
-		offset = pci_io_base - (unsigned long)hose->io_base_virt +
-			hose->io_base_phys;
-
-	*start = rsrc->start + offset;
-	*end = rsrc->end + offset;
-}
-
-#endif /* CONFIG_PPC_MULTIPLATFORM */
-
-
-#define IOBASE_BRIDGE_NUMBER	0
-#define IOBASE_MEMORY		1
-#define IOBASE_IO		2
-#define IOBASE_ISA_IO		3
-#define IOBASE_ISA_MEM		4
-
-long sys_pciconfig_iobase(long which, unsigned long in_bus,
-			  unsigned long in_devfn)
-{
-	struct pci_controller* hose;
-	struct list_head *ln;
-	struct pci_bus *bus = NULL;
-	struct device_node *hose_node;
-
-	/* Argh ! Please forgive me for that hack, but that's the
-	 * simplest way to get existing XFree to not lockup on some
-	 * G5 machines... So when something asks for bus 0 io base
-	 * (bus 0 is HT root), we return the AGP one instead.
-	 */
-	if (machine_is_compatible("MacRISC4"))
-		if (in_bus == 0)
-			in_bus = 0xf0;
-
-	/* That syscall isn't quite compatible with PCI domains, but it's
-	 * used on pre-domains setup. We return the first match
-	 */
-
-	for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) {
-		bus = pci_bus_b(ln);
-		if (in_bus >= bus->number && in_bus < (bus->number + bus->subordinate))
-			break;
-		bus = NULL;
-	}
-	if (bus == NULL || bus->sysdata == NULL)
-		return -ENODEV;
-
-	hose_node = (struct device_node *)bus->sysdata;
-	hose = PCI_DN(hose_node)->phb;
-
-	switch (which) {
-	case IOBASE_BRIDGE_NUMBER:
-		return (long)hose->first_busno;
-	case IOBASE_MEMORY:
-		return (long)hose->pci_mem_offset;
-	case IOBASE_IO:
-		return (long)hose->io_base_phys;
-	case IOBASE_ISA_IO:
-		return (long)isa_io_base;
-	case IOBASE_ISA_MEM:
-		return -EINVAL;
-	}
-
-	return -EOPNOTSUPP;
-}
diff --git a/arch/ppc64/kernel/pci_direct_iommu.c b/arch/ppc64/kernel/pci_direct_iommu.c
deleted file mode 100644
index e1a32f802c0b..000000000000
--- a/arch/ppc64/kernel/pci_direct_iommu.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Support for DMA from PCI devices to main memory on
- * machines without an iommu or with directly addressable
- * RAM (typically a pmac with 2Gb of RAM or less)
- *
- * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/mm.h>
-#include <linux/dma-mapping.h>
-
-#include <asm/sections.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-#include <asm/machdep.h>
-#include <asm/pmac_feature.h>
-#include <asm/abs_addr.h>
-#include <asm/ppc-pci.h>
-
-static void *pci_direct_alloc_coherent(struct device *hwdev, size_t size,
-				   dma_addr_t *dma_handle, gfp_t flag)
-{
-	void *ret;
-
-	ret = (void *)__get_free_pages(flag, get_order(size));
-	if (ret != NULL) {
-		memset(ret, 0, size);
-		*dma_handle = virt_to_abs(ret);
-	}
-	return ret;
-}
-
-static void pci_direct_free_coherent(struct device *hwdev, size_t size,
-				 void *vaddr, dma_addr_t dma_handle)
-{
-	free_pages((unsigned long)vaddr, get_order(size));
-}
-
-static dma_addr_t pci_direct_map_single(struct device *hwdev, void *ptr,
-		size_t size, enum dma_data_direction direction)
-{
-	return virt_to_abs(ptr);
-}
-
-static void pci_direct_unmap_single(struct device *hwdev, dma_addr_t dma_addr,
-		size_t size, enum dma_data_direction direction)
-{
-}
-
-static int pci_direct_map_sg(struct device *hwdev, struct scatterlist *sg,
-		int nents, enum dma_data_direction direction)
-{
-	int i;
-
-	for (i = 0; i < nents; i++, sg++) {
-		sg->dma_address = page_to_phys(sg->page) + sg->offset;
-		sg->dma_length = sg->length;
-	}
-
-	return nents;
-}
-
-static void pci_direct_unmap_sg(struct device *hwdev, struct scatterlist *sg,
-		int nents, enum dma_data_direction direction)
-{
-}
-
-static int pci_direct_dma_supported(struct device *dev, u64 mask)
-{
-	return mask < 0x100000000ull;
-}
-
-void __init pci_direct_iommu_init(void)
-{
-	pci_dma_ops.alloc_coherent = pci_direct_alloc_coherent;
-	pci_dma_ops.free_coherent = pci_direct_free_coherent;
-	pci_dma_ops.map_single = pci_direct_map_single;
-	pci_dma_ops.unmap_single = pci_direct_unmap_single;
-	pci_dma_ops.map_sg = pci_direct_map_sg;
-	pci_dma_ops.unmap_sg = pci_direct_unmap_sg;
-	pci_dma_ops.dma_supported = pci_direct_dma_supported;
-}
diff --git a/arch/ppc64/kernel/pci_dn.c b/arch/ppc64/kernel/pci_dn.c
deleted file mode 100644
index 12c4c9e9bbc7..000000000000
--- a/arch/ppc64/kernel/pci_dn.c
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * pci_dn.c
- *
- * Copyright (C) 2001 Todd Inglett, IBM Corporation
- *
- * PCI manipulation via device_nodes.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *    
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/bootmem.h>
-
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-#include <asm/pSeries_reconfig.h>
-#include <asm/ppc-pci.h>
-
-/*
- * Traverse_func that inits the PCI fields of the device node.
- * NOTE: this *must* be done before read/write config to the device.
- */
-static void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
-{
-	struct pci_controller *phb = data;
-	int *type = (int *)get_property(dn, "ibm,pci-config-space-type", NULL);
-	u32 *regs;
-	struct pci_dn *pdn;
-
-	if (mem_init_done)
-		pdn = kmalloc(sizeof(*pdn), GFP_KERNEL);
-	else
-		pdn = alloc_bootmem(sizeof(*pdn));
-	if (pdn == NULL)
-		return NULL;
-	memset(pdn, 0, sizeof(*pdn));
-	dn->data = pdn;
-	pdn->node = dn;
-	pdn->phb = phb;
-	regs = (u32 *)get_property(dn, "reg", NULL);
-	if (regs) {
-		/* First register entry is addr (00BBSS00)  */
-		pdn->busno = (regs[0] >> 16) & 0xff;
-		pdn->devfn = (regs[0] >> 8) & 0xff;
-	}
-
-	pdn->pci_ext_config_space = (type && *type == 1);
-	return NULL;
-}
-
-/*
- * Traverse a device tree stopping each PCI device in the tree.
- * This is done depth first.  As each node is processed, a "pre"
- * function is called and the children are processed recursively.
- *
- * The "pre" func returns a value.  If non-zero is returned from
- * the "pre" func, the traversal stops and this value is returned.
- * This return value is useful when using traverse as a method of
- * finding a device.
- *
- * NOTE: we do not run the func for devices that do not appear to
- * be PCI except for the start node which we assume (this is good
- * because the start node is often a phb which may be missing PCI
- * properties).
- * We use the class-code as an indicator. If we run into
- * one of these nodes we also assume its siblings are non-pci for
- * performance.
- */
-void *traverse_pci_devices(struct device_node *start, traverse_func pre,
-		void *data)
-{
-	struct device_node *dn, *nextdn;
-	void *ret;
-
-	/* We started with a phb, iterate all childs */
-	for (dn = start->child; dn; dn = nextdn) {
-		u32 *classp, class;
-
-		nextdn = NULL;
-		classp = (u32 *)get_property(dn, "class-code", NULL);
-		class = classp ? *classp : 0;
-
-		if (pre && ((ret = pre(dn, data)) != NULL))
-			return ret;
-
-		/* If we are a PCI bridge, go down */
-		if (dn->child && ((class >> 8) == PCI_CLASS_BRIDGE_PCI ||
-				  (class >> 8) == PCI_CLASS_BRIDGE_CARDBUS))
-			/* Depth first...do children */
-			nextdn = dn->child;
-		else if (dn->sibling)
-			/* ok, try next sibling instead. */
-			nextdn = dn->sibling;
-		if (!nextdn) {
-			/* Walk up to next valid sibling. */
-			do {
-				dn = dn->parent;
-				if (dn == start)
-					return NULL;
-			} while (dn->sibling == NULL);
-			nextdn = dn->sibling;
-		}
-	}
-	return NULL;
-}
-
-/** 
- * pci_devs_phb_init_dynamic - setup pci devices under this PHB
- * phb: pci-to-host bridge (top-level bridge connecting to cpu)
- *
- * This routine is called both during boot, (before the memory
- * subsystem is set up, before kmalloc is valid) and during the 
- * dynamic lpar operation of adding a PHB to a running system.
- */
-void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb)
-{
-	struct device_node * dn = (struct device_node *) phb->arch_data;
-	struct pci_dn *pdn;
-
-	/* PHB nodes themselves must not match */
-	update_dn_pci_info(dn, phb);
-	pdn = dn->data;
-	if (pdn) {
-		pdn->devfn = pdn->busno = -1;
-		pdn->phb = phb;
-	}
-
-	/* Update dn->phb ptrs for new phb and children devices */
-	traverse_pci_devices(dn, update_dn_pci_info, phb);
-}
-
-/*
- * Traversal func that looks for a <busno,devfcn> value.
- * If found, the pci_dn is returned (thus terminating the traversal).
- */
-static void *is_devfn_node(struct device_node *dn, void *data)
-{
-	int busno = ((unsigned long)data >> 8) & 0xff;
-	int devfn = ((unsigned long)data) & 0xff;
-	struct pci_dn *pci = dn->data;
-
-	if (pci && (devfn == pci->devfn) && (busno == pci->busno))
-		return dn;
-	return NULL;
-}
-
-/*
- * This is the "slow" path for looking up a device_node from a
- * pci_dev.  It will hunt for the device under its parent's
- * phb and then update sysdata for a future fastpath.
- *
- * It may also do fixups on the actual device since this happens
- * on the first read/write.
- *
- * Note that it also must deal with devices that don't exist.
- * In this case it may probe for real hardware ("just in case")
- * and add a device_node to the device tree if necessary.
- *
- */
-struct device_node *fetch_dev_dn(struct pci_dev *dev)
-{
-	struct device_node *orig_dn = dev->sysdata;
-	struct device_node *dn;
-	unsigned long searchval = (dev->bus->number << 8) | dev->devfn;
-
-	dn = traverse_pci_devices(orig_dn, is_devfn_node, (void *)searchval);
-	if (dn)
-		dev->sysdata = dn;
-	return dn;
-}
-EXPORT_SYMBOL(fetch_dev_dn);
-
-static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
-{
-	struct device_node *np = node;
-	struct pci_dn *pci = NULL;
-	int err = NOTIFY_OK;
-
-	switch (action) {
-	case PSERIES_RECONFIG_ADD:
-		pci = np->parent->data;
-		if (pci)
-			update_dn_pci_info(np, pci->phb);
-		break;
-	default:
-		err = NOTIFY_DONE;
-		break;
-	}
-	return err;
-}
-
-static struct notifier_block pci_dn_reconfig_nb = {
-	.notifier_call = pci_dn_reconfig_notifier,
-};
-
-/** 
- * pci_devs_phb_init - Initialize phbs and pci devs under them.
- * 
- * This routine walks over all phb's (pci-host bridges) on the
- * system, and sets up assorted pci-related structures 
- * (including pci info in the device node structs) for each
- * pci device found underneath.  This routine runs once,
- * early in the boot sequence.
- */
-void __init pci_devs_phb_init(void)
-{
-	struct pci_controller *phb, *tmp;
-
-	/* This must be done first so the device nodes have valid pci info! */
-	list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
-		pci_devs_phb_init_dynamic(phb);
-
-	pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb);
-}
diff --git a/arch/ppc64/kernel/pci_iommu.c b/arch/ppc64/kernel/pci_iommu.c
deleted file mode 100644
index bdf15dbbf4f0..000000000000
--- a/arch/ppc64/kernel/pci_iommu.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * arch/ppc64/kernel/pci_iommu.c
- * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
- *
- * Rewrite, cleanup, new allocation schemes:
- * Copyright (C) 2004 Olof Johansson, IBM Corporation
- *
- * Dynamic DMA mapping support, platform-independent parts.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/iommu.h>
-#include <asm/pci-bridge.h>
-#include <asm/machdep.h>
-#include <asm/ppc-pci.h>
-
-/*
- * We can use ->sysdata directly and avoid the extra work in
- * pci_device_to_OF_node since ->sysdata will have been initialised
- * in the iommu init code for all devices.
- */
-#define PCI_GET_DN(dev) ((struct device_node *)((dev)->sysdata))
-
-static inline struct iommu_table *devnode_table(struct device *dev)
-{
-	struct pci_dev *pdev;
-
-	if (!dev) {
-		pdev = ppc64_isabridge_dev;
-		if (!pdev)
-			return NULL;
-	} else
-		pdev = to_pci_dev(dev);
-
-	return PCI_DN(PCI_GET_DN(pdev))->iommu_table;
-}
-
-
-/* Allocates a contiguous real buffer and creates mappings over it.
- * Returns the virtual address of the buffer and sets dma_handle
- * to the dma address (mapping) of the first page.
- */
-static void *pci_iommu_alloc_coherent(struct device *hwdev, size_t size,
-			   dma_addr_t *dma_handle, gfp_t flag)
-{
-	return iommu_alloc_coherent(devnode_table(hwdev), size, dma_handle,
-			flag);
-}
-
-static void pci_iommu_free_coherent(struct device *hwdev, size_t size,
-			 void *vaddr, dma_addr_t dma_handle)
-{
-	iommu_free_coherent(devnode_table(hwdev), size, vaddr, dma_handle);
-}
-
-/* Creates TCEs for a user provided buffer.  The user buffer must be 
- * contiguous real kernel storage (not vmalloc).  The address of the buffer
- * passed here is the kernel (virtual) address of the buffer.  The buffer
- * need not be page aligned, the dma_addr_t returned will point to the same
- * byte within the page as vaddr.
- */
-static dma_addr_t pci_iommu_map_single(struct device *hwdev, void *vaddr,
-		size_t size, enum dma_data_direction direction)
-{
-	return iommu_map_single(devnode_table(hwdev), vaddr, size, direction);
-}
-
-
-static void pci_iommu_unmap_single(struct device *hwdev, dma_addr_t dma_handle,
-		size_t size, enum dma_data_direction direction)
-{
-	iommu_unmap_single(devnode_table(hwdev), dma_handle, size, direction);
-}
-
-
-static int pci_iommu_map_sg(struct device *pdev, struct scatterlist *sglist,
-		int nelems, enum dma_data_direction direction)
-{
-	return iommu_map_sg(pdev, devnode_table(pdev), sglist,
-			nelems, direction);
-}
-
-static void pci_iommu_unmap_sg(struct device *pdev, struct scatterlist *sglist,
-		int nelems, enum dma_data_direction direction)
-{
-	iommu_unmap_sg(devnode_table(pdev), sglist, nelems, direction);
-}
-
-/* We support DMA to/from any memory page via the iommu */
-static int pci_iommu_dma_supported(struct device *dev, u64 mask)
-{
-	return 1;
-}
-
-void pci_iommu_init(void)
-{
-	pci_dma_ops.alloc_coherent = pci_iommu_alloc_coherent;
-	pci_dma_ops.free_coherent = pci_iommu_free_coherent;
-	pci_dma_ops.map_single = pci_iommu_map_single;
-	pci_dma_ops.unmap_single = pci_iommu_unmap_single;
-	pci_dma_ops.map_sg = pci_iommu_map_sg;
-	pci_dma_ops.unmap_sg = pci_iommu_unmap_sg;
-	pci_dma_ops.dma_supported = pci_iommu_dma_supported;
-}
diff --git a/arch/ppc64/kernel/ppc_ksyms.c b/arch/ppc64/kernel/ppc_ksyms.c
deleted file mode 100644
index 84006e26342c..000000000000
--- a/arch/ppc64/kernel/ppc_ksyms.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/* 
- * c 2001 PPC 64 Team, IBM Corp
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/console.h>
-#include <net/checksum.h>
-
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-#include <asm/system.h>
-#include <asm/hw_irq.h>
-#include <asm/abs_addr.h>
-#include <asm/cacheflush.h>
-
-EXPORT_SYMBOL(strcpy);
-EXPORT_SYMBOL(strncpy);
-EXPORT_SYMBOL(strcat);
-EXPORT_SYMBOL(strncat);
-EXPORT_SYMBOL(strchr);
-EXPORT_SYMBOL(strrchr);
-EXPORT_SYMBOL(strpbrk);
-EXPORT_SYMBOL(strstr);
-EXPORT_SYMBOL(strlen);
-EXPORT_SYMBOL(strnlen);
-EXPORT_SYMBOL(strcmp);
-EXPORT_SYMBOL(strncmp);
-
-EXPORT_SYMBOL(csum_partial);
-EXPORT_SYMBOL(csum_partial_copy_generic);
-EXPORT_SYMBOL(ip_fast_csum);
-EXPORT_SYMBOL(csum_tcpudp_magic);
-
-EXPORT_SYMBOL(__copy_tofrom_user);
-EXPORT_SYMBOL(__clear_user);
-EXPORT_SYMBOL(__strncpy_from_user);
-EXPORT_SYMBOL(__strnlen_user);
-
-EXPORT_SYMBOL(reloc_offset);
-
-EXPORT_SYMBOL(_insb);
-EXPORT_SYMBOL(_outsb);
-EXPORT_SYMBOL(_insw);
-EXPORT_SYMBOL(_outsw);
-EXPORT_SYMBOL(_insl);
-EXPORT_SYMBOL(_outsl);
-EXPORT_SYMBOL(_insw_ns);
-EXPORT_SYMBOL(_outsw_ns);
-EXPORT_SYMBOL(_insl_ns);
-EXPORT_SYMBOL(_outsl_ns);
-
-EXPORT_SYMBOL(kernel_thread);
-
-EXPORT_SYMBOL(giveup_fpu);
-#ifdef CONFIG_ALTIVEC
-EXPORT_SYMBOL(giveup_altivec);
-#endif
-EXPORT_SYMBOL(__flush_icache_range);
-EXPORT_SYMBOL(flush_dcache_range);
-
-EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memmove);
-EXPORT_SYMBOL(memscan);
-EXPORT_SYMBOL(memcmp);
-EXPORT_SYMBOL(memchr);
-
-EXPORT_SYMBOL(timer_interrupt);
-EXPORT_SYMBOL(console_drivers);
diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c
deleted file mode 100644
index 47cc26e78957..000000000000
--- a/arch/ppc64/kernel/prom.c
+++ /dev/null
@@ -1,1956 +0,0 @@
-/*
- * 
- *
- * Procedures for interfacing to Open Firmware.
- *
- * Paul Mackerras	August 1996.
- * Copyright (C) 1996 Paul Mackerras.
- * 
- *  Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner.
- *    {engebret|bergner}@us.ibm.com 
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-
-#undef DEBUG
-
-#include <stdarg.h>
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/threads.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/pci.h>
-#include <linux/stringify.h>
-#include <linux/delay.h>
-#include <linux/initrd.h>
-#include <linux/bitops.h>
-#include <linux/module.h>
-#include <linux/module.h>
-
-#include <asm/prom.h>
-#include <asm/rtas.h>
-#include <asm/lmb.h>
-#include <asm/abs_addr.h>
-#include <asm/page.h>
-#include <asm/processor.h>
-#include <asm/irq.h>
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/system.h>
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/pci.h>
-#include <asm/iommu.h>
-#include <asm/btext.h>
-#include <asm/sections.h>
-#include <asm/machdep.h>
-#include <asm/pSeries_reconfig.h>
-
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
-struct pci_reg_property {
-	struct pci_address addr;
-	u32 size_hi;
-	u32 size_lo;
-};
-
-struct isa_reg_property {
-	u32 space;
-	u32 address;
-	u32 size;
-};
-
-
-typedef int interpret_func(struct device_node *, unsigned long *,
-			   int, int, int);
-
-extern struct rtas_t rtas;
-extern struct lmb lmb;
-extern unsigned long klimit;
-extern unsigned long memory_limit;
-
-static int __initdata dt_root_addr_cells;
-static int __initdata dt_root_size_cells;
-static int __initdata iommu_is_off;
-int __initdata iommu_force_on;
-unsigned long tce_alloc_start, tce_alloc_end;
-
-typedef u32 cell_t;
-
-#if 0
-static struct boot_param_header *initial_boot_params __initdata;
-#else
-struct boot_param_header *initial_boot_params;
-#endif
-
-static struct device_node *allnodes = NULL;
-
-/* use when traversing tree through the allnext, child, sibling,
- * or parent members of struct device_node.
- */
-static DEFINE_RWLOCK(devtree_lock);
-
-/* export that to outside world */
-struct device_node *of_chosen;
-
-/*
- * Wrapper for allocating memory for various data that needs to be
- * attached to device nodes as they are processed at boot or when
- * added to the device tree later (e.g. DLPAR).  At boot there is
- * already a region reserved so we just increment *mem_start by size;
- * otherwise we call kmalloc.
- */
-static void * prom_alloc(unsigned long size, unsigned long *mem_start)
-{
-	unsigned long tmp;
-
-	if (!mem_start)
-		return kmalloc(size, GFP_KERNEL);
-
-	tmp = *mem_start;
-	*mem_start += size;
-	return (void *)tmp;
-}
-
-/*
- * Find the device_node with a given phandle.
- */
-static struct device_node * find_phandle(phandle ph)
-{
-	struct device_node *np;
-
-	for (np = allnodes; np != 0; np = np->allnext)
-		if (np->linux_phandle == ph)
-			return np;
-	return NULL;
-}
-
-/*
- * Find the interrupt parent of a node.
- */
-static struct device_node * __devinit intr_parent(struct device_node *p)
-{
-	phandle *parp;
-
-	parp = (phandle *) get_property(p, "interrupt-parent", NULL);
-	if (parp == NULL)
-		return p->parent;
-	return find_phandle(*parp);
-}
-
-/*
- * Find out the size of each entry of the interrupts property
- * for a node.
- */
-int __devinit prom_n_intr_cells(struct device_node *np)
-{
-	struct device_node *p;
-	unsigned int *icp;
-
-	for (p = np; (p = intr_parent(p)) != NULL; ) {
-		icp = (unsigned int *)
-			get_property(p, "#interrupt-cells", NULL);
-		if (icp != NULL)
-			return *icp;
-		if (get_property(p, "interrupt-controller", NULL) != NULL
-		    || get_property(p, "interrupt-map", NULL) != NULL) {
-			printk("oops, node %s doesn't have #interrupt-cells\n",
-			       p->full_name);
-			return 1;
-		}
-	}
-#ifdef DEBUG_IRQ
-	printk("prom_n_intr_cells failed for %s\n", np->full_name);
-#endif
-	return 1;
-}
-
-/*
- * Map an interrupt from a device up to the platform interrupt
- * descriptor.
- */
-static int __devinit map_interrupt(unsigned int **irq, struct device_node **ictrler,
-				   struct device_node *np, unsigned int *ints,
-				   int nintrc)
-{
-	struct device_node *p, *ipar;
-	unsigned int *imap, *imask, *ip;
-	int i, imaplen, match;
-	int newintrc = 0, newaddrc = 0;
-	unsigned int *reg;
-	int naddrc;
-
-	reg = (unsigned int *) get_property(np, "reg", NULL);
-	naddrc = prom_n_addr_cells(np);
-	p = intr_parent(np);
-	while (p != NULL) {
-		if (get_property(p, "interrupt-controller", NULL) != NULL)
-			/* this node is an interrupt controller, stop here */
-			break;
-		imap = (unsigned int *)
-			get_property(p, "interrupt-map", &imaplen);
-		if (imap == NULL) {
-			p = intr_parent(p);
-			continue;
-		}
-		imask = (unsigned int *)
-			get_property(p, "interrupt-map-mask", NULL);
-		if (imask == NULL) {
-			printk("oops, %s has interrupt-map but no mask\n",
-			       p->full_name);
-			return 0;
-		}
-		imaplen /= sizeof(unsigned int);
-		match = 0;
-		ipar = NULL;
-		while (imaplen > 0 && !match) {
-			/* check the child-interrupt field */
-			match = 1;
-			for (i = 0; i < naddrc && match; ++i)
-				match = ((reg[i] ^ imap[i]) & imask[i]) == 0;
-			for (; i < naddrc + nintrc && match; ++i)
-				match = ((ints[i-naddrc] ^ imap[i]) & imask[i]) == 0;
-			imap += naddrc + nintrc;
-			imaplen -= naddrc + nintrc;
-			/* grab the interrupt parent */
-			ipar = find_phandle((phandle) *imap++);
-			--imaplen;
-			if (ipar == NULL) {
-				printk("oops, no int parent %x in map of %s\n",
-				       imap[-1], p->full_name);
-				return 0;
-			}
-			/* find the parent's # addr and intr cells */
-			ip = (unsigned int *)
-				get_property(ipar, "#interrupt-cells", NULL);
-			if (ip == NULL) {
-				printk("oops, no #interrupt-cells on %s\n",
-				       ipar->full_name);
-				return 0;
-			}
-			newintrc = *ip;
-			ip = (unsigned int *)
-				get_property(ipar, "#address-cells", NULL);
-			newaddrc = (ip == NULL)? 0: *ip;
-			imap += newaddrc + newintrc;
-			imaplen -= newaddrc + newintrc;
-		}
-		if (imaplen < 0) {
-			printk("oops, error decoding int-map on %s, len=%d\n",
-			       p->full_name, imaplen);
-			return 0;
-		}
-		if (!match) {
-#ifdef DEBUG_IRQ
-			printk("oops, no match in %s int-map for %s\n",
-			       p->full_name, np->full_name);
-#endif
-			return 0;
-		}
-		p = ipar;
-		naddrc = newaddrc;
-		nintrc = newintrc;
-		ints = imap - nintrc;
-		reg = ints - naddrc;
-	}
-	if (p == NULL) {
-#ifdef DEBUG_IRQ
-		printk("hmmm, int tree for %s doesn't have ctrler\n",
-		       np->full_name);
-#endif
-		return 0;
-	}
-	*irq = ints;
-	*ictrler = p;
-	return nintrc;
-}
-
-static int __devinit finish_node_interrupts(struct device_node *np,
-					    unsigned long *mem_start,
-					    int measure_only)
-{
-	unsigned int *ints;
-	int intlen, intrcells, intrcount;
-	int i, j, n;
-	unsigned int *irq, virq;
-	struct device_node *ic;
-
-	ints = (unsigned int *) get_property(np, "interrupts", &intlen);
-	if (ints == NULL)
-		return 0;
-	intrcells = prom_n_intr_cells(np);
-	intlen /= intrcells * sizeof(unsigned int);
-
-	np->intrs = prom_alloc(intlen * sizeof(*(np->intrs)), mem_start);
-	if (!np->intrs)
-		return -ENOMEM;
-
-	if (measure_only)
-		return 0;
-
-	intrcount = 0;
-	for (i = 0; i < intlen; ++i, ints += intrcells) {
-		n = map_interrupt(&irq, &ic, np, ints, intrcells);
-		if (n <= 0)
-			continue;
-
-		/* don't map IRQ numbers under a cascaded 8259 controller */
-		if (ic && device_is_compatible(ic, "chrp,iic")) {
-			np->intrs[intrcount].line = irq[0];
-		} else {
-			virq = virt_irq_create_mapping(irq[0]);
-			if (virq == NO_IRQ) {
-				printk(KERN_CRIT "Could not allocate interrupt"
-				       " number for %s\n", np->full_name);
-				continue;
-			}
-			np->intrs[intrcount].line = irq_offset_up(virq);
-		}
-
-		/* We offset irq numbers for the u3 MPIC by 128 in PowerMac */
-		if (_machine == PLATFORM_POWERMAC && ic && ic->parent) {
-			char *name = get_property(ic->parent, "name", NULL);
-			if (name && !strcmp(name, "u3"))
-				np->intrs[intrcount].line += 128;
-			else if (!(name && !strcmp(name, "mac-io")))
-				/* ignore other cascaded controllers, such as
-				   the k2-sata-root */
-				break;
-		}
-		np->intrs[intrcount].sense = 1;
-		if (n > 1)
-			np->intrs[intrcount].sense = irq[1];
-		if (n > 2) {
-			printk("hmmm, got %d intr cells for %s:", n,
-			       np->full_name);
-			for (j = 0; j < n; ++j)
-				printk(" %d", irq[j]);
-			printk("\n");
-		}
-		++intrcount;
-	}
-	np->n_intrs = intrcount;
-
-	return 0;
-}
-
-static int __devinit interpret_pci_props(struct device_node *np,
-					 unsigned long *mem_start,
-					 int naddrc, int nsizec,
-					 int measure_only)
-{
-	struct address_range *adr;
-	struct pci_reg_property *pci_addrs;
-	int i, l, n_addrs;
-
-	pci_addrs = (struct pci_reg_property *)
-		get_property(np, "assigned-addresses", &l);
-	if (!pci_addrs)
-		return 0;
-
-	n_addrs = l / sizeof(*pci_addrs);
-
-	adr = prom_alloc(n_addrs * sizeof(*adr), mem_start);
-	if (!adr)
-		return -ENOMEM;
-
- 	if (measure_only)
- 		return 0;
-
- 	np->addrs = adr;
- 	np->n_addrs = n_addrs;
-
- 	for (i = 0; i < n_addrs; i++) {
- 		adr[i].space = pci_addrs[i].addr.a_hi;
- 		adr[i].address = pci_addrs[i].addr.a_lo |
-			((u64)pci_addrs[i].addr.a_mid << 32);
- 		adr[i].size = pci_addrs[i].size_lo;
-	}
-
-	return 0;
-}
-
-static int __init interpret_dbdma_props(struct device_node *np,
-					unsigned long *mem_start,
-					int naddrc, int nsizec,
-					int measure_only)
-{
-	struct reg_property32 *rp;
-	struct address_range *adr;
-	unsigned long base_address;
-	int i, l;
-	struct device_node *db;
-
-	base_address = 0;
-	if (!measure_only) {
-		for (db = np->parent; db != NULL; db = db->parent) {
-			if (!strcmp(db->type, "dbdma") && db->n_addrs != 0) {
-				base_address = db->addrs[0].address;
-				break;
-			}
-		}
-	}
-
-	rp = (struct reg_property32 *) get_property(np, "reg", &l);
-	if (rp != 0 && l >= sizeof(struct reg_property32)) {
-		i = 0;
-		adr = (struct address_range *) (*mem_start);
-		while ((l -= sizeof(struct reg_property32)) >= 0) {
-			if (!measure_only) {
-				adr[i].space = 2;
-				adr[i].address = rp[i].address + base_address;
-				adr[i].size = rp[i].size;
-			}
-			++i;
-		}
-		np->addrs = adr;
-		np->n_addrs = i;
-		(*mem_start) += i * sizeof(struct address_range);
-	}
-
-	return 0;
-}
-
-static int __init interpret_macio_props(struct device_node *np,
-					unsigned long *mem_start,
-					int naddrc, int nsizec,
-					int measure_only)
-{
-	struct reg_property32 *rp;
-	struct address_range *adr;
-	unsigned long base_address;
-	int i, l;
-	struct device_node *db;
-
-	base_address = 0;
-	if (!measure_only) {
-		for (db = np->parent; db != NULL; db = db->parent) {
-			if (!strcmp(db->type, "mac-io") && db->n_addrs != 0) {
-				base_address = db->addrs[0].address;
-				break;
-			}
-		}
-	}
-
-	rp = (struct reg_property32 *) get_property(np, "reg", &l);
-	if (rp != 0 && l >= sizeof(struct reg_property32)) {
-		i = 0;
-		adr = (struct address_range *) (*mem_start);
-		while ((l -= sizeof(struct reg_property32)) >= 0) {
-			if (!measure_only) {
-				adr[i].space = 2;
-				adr[i].address = rp[i].address + base_address;
-				adr[i].size = rp[i].size;
-			}
-			++i;
-		}
-		np->addrs = adr;
-		np->n_addrs = i;
-		(*mem_start) += i * sizeof(struct address_range);
-	}
-
-	return 0;
-}
-
-static int __init interpret_isa_props(struct device_node *np,
-				      unsigned long *mem_start,
-				      int naddrc, int nsizec,
-				      int measure_only)
-{
-	struct isa_reg_property *rp;
-	struct address_range *adr;
-	int i, l;
-
-	rp = (struct isa_reg_property *) get_property(np, "reg", &l);
-	if (rp != 0 && l >= sizeof(struct isa_reg_property)) {
-		i = 0;
-		adr = (struct address_range *) (*mem_start);
-		while ((l -= sizeof(struct isa_reg_property)) >= 0) {
-			if (!measure_only) {
-				adr[i].space = rp[i].space;
-				adr[i].address = rp[i].address;
-				adr[i].size = rp[i].size;
-			}
-			++i;
-		}
-		np->addrs = adr;
-		np->n_addrs = i;
-		(*mem_start) += i * sizeof(struct address_range);
-	}
-
-	return 0;
-}
-
-static int __init interpret_root_props(struct device_node *np,
-				       unsigned long *mem_start,
-				       int naddrc, int nsizec,
-				       int measure_only)
-{
-	struct address_range *adr;
-	int i, l;
-	unsigned int *rp;
-	int rpsize = (naddrc + nsizec) * sizeof(unsigned int);
-
-	rp = (unsigned int *) get_property(np, "reg", &l);
-	if (rp != 0 && l >= rpsize) {
-		i = 0;
-		adr = (struct address_range *) (*mem_start);
-		while ((l -= rpsize) >= 0) {
-			if (!measure_only) {
-				adr[i].space = 0;
-				adr[i].address = rp[naddrc - 1];
-				adr[i].size = rp[naddrc + nsizec - 1];
-			}
-			++i;
-			rp += naddrc + nsizec;
-		}
-		np->addrs = adr;
-		np->n_addrs = i;
-		(*mem_start) += i * sizeof(struct address_range);
-	}
-
-	return 0;
-}
-
-static int __devinit finish_node(struct device_node *np,
-				 unsigned long *mem_start,
-				 interpret_func *ifunc,
-				 int naddrc, int nsizec,
-				 int measure_only)
-{
-	struct device_node *child;
-	int *ip, rc = 0;
-
-	/* get the device addresses and interrupts */
-	if (ifunc != NULL)
-		rc = ifunc(np, mem_start, naddrc, nsizec, measure_only);
-	if (rc)
-		goto out;
-
-	rc = finish_node_interrupts(np, mem_start, measure_only);
-	if (rc)
-		goto out;
-
-	/* Look for #address-cells and #size-cells properties. */
-	ip = (int *) get_property(np, "#address-cells", NULL);
-	if (ip != NULL)
-		naddrc = *ip;
-	ip = (int *) get_property(np, "#size-cells", NULL);
-	if (ip != NULL)
-		nsizec = *ip;
-
-	if (!strcmp(np->name, "device-tree") || np->parent == NULL)
-		ifunc = interpret_root_props;
-	else if (np->type == 0)
-		ifunc = NULL;
-	else if (!strcmp(np->type, "pci") || !strcmp(np->type, "vci"))
-		ifunc = interpret_pci_props;
-	else if (!strcmp(np->type, "dbdma"))
-		ifunc = interpret_dbdma_props;
-	else if (!strcmp(np->type, "mac-io") || ifunc == interpret_macio_props)
-		ifunc = interpret_macio_props;
-	else if (!strcmp(np->type, "isa"))
-		ifunc = interpret_isa_props;
-	else if (!strcmp(np->name, "uni-n") || !strcmp(np->name, "u3"))
-		ifunc = interpret_root_props;
-	else if (!((ifunc == interpret_dbdma_props
-		    || ifunc == interpret_macio_props)
-		   && (!strcmp(np->type, "escc")
-		       || !strcmp(np->type, "media-bay"))))
-		ifunc = NULL;
-
-	for (child = np->child; child != NULL; child = child->sibling) {
-		rc = finish_node(child, mem_start, ifunc,
-				 naddrc, nsizec, measure_only);
-		if (rc)
-			goto out;
-	}
-out:
-	return rc;
-}
-
-/**
- * finish_device_tree is called once things are running normally
- * (i.e. with text and data mapped to the address they were linked at).
- * It traverses the device tree and fills in some of the additional,
- * fields in each node like {n_}addrs and {n_}intrs, the virt interrupt
- * mapping is also initialized at this point.
- */
-void __init finish_device_tree(void)
-{
-	unsigned long start, end, size = 0;
-
-	DBG(" -> finish_device_tree\n");
-
-	if (ppc64_interrupt_controller == IC_INVALID) {
-		DBG("failed to configure interrupt controller type\n");
-		panic("failed to configure interrupt controller type\n");
-	}
-	
-	/* Initialize virtual IRQ map */
-	virt_irq_init();
-
-	/*
-	 * Finish device-tree (pre-parsing some properties etc...)
-	 * We do this in 2 passes. One with "measure_only" set, which
-	 * will only measure the amount of memory needed, then we can
-	 * allocate that memory, and call finish_node again. However,
-	 * we must be careful as most routines will fail nowadays when
-	 * prom_alloc() returns 0, so we must make sure our first pass
-	 * doesn't start at 0. We pre-initialize size to 16 for that
-	 * reason and then remove those additional 16 bytes
-	 */
-	size = 16;
-	finish_node(allnodes, &size, NULL, 0, 0, 1);
-	size -= 16;
-	end = start = (unsigned long)abs_to_virt(lmb_alloc(size, 128));
-	finish_node(allnodes, &end, NULL, 0, 0, 0);
-	BUG_ON(end != start + size);
-
-	DBG(" <- finish_device_tree\n");
-}
-
-#ifdef DEBUG
-#define printk udbg_printf
-#endif
-
-static inline char *find_flat_dt_string(u32 offset)
-{
-	return ((char *)initial_boot_params) +
-		initial_boot_params->off_dt_strings + offset;
-}
-
-/**
- * This function is used to scan the flattened device-tree, it is
- * used to extract the memory informations at boot before we can
- * unflatten the tree
- */
-int __init of_scan_flat_dt(int (*it)(unsigned long node,
-				     const char *uname, int depth,
-				     void *data),
-			   void *data)
-{
-	unsigned long p = ((unsigned long)initial_boot_params) +
-		initial_boot_params->off_dt_struct;
-	int rc = 0;
-	int depth = -1;
-
-	do {
-		u32 tag = *((u32 *)p);
-		char *pathp;
-		
-		p += 4;
-		if (tag == OF_DT_END_NODE) {
-			depth --;
-			continue;
-		}
-		if (tag == OF_DT_NOP)
-			continue;
-		if (tag == OF_DT_END)
-			break;
-		if (tag == OF_DT_PROP) {
-			u32 sz = *((u32 *)p);
-			p += 8;
-			if (initial_boot_params->version < 0x10)
-				p = _ALIGN(p, sz >= 8 ? 8 : 4);
-			p += sz;
-			p = _ALIGN(p, 4);
-			continue;
-		}
-		if (tag != OF_DT_BEGIN_NODE) {
-			printk(KERN_WARNING "Invalid tag %x scanning flattened"
-			       " device tree !\n", tag);
-			return -EINVAL;
-		}
-		depth++;
-		pathp = (char *)p;
-		p = _ALIGN(p + strlen(pathp) + 1, 4);
-		if ((*pathp) == '/') {
-			char *lp, *np;
-			for (lp = NULL, np = pathp; *np; np++)
-				if ((*np) == '/')
-					lp = np+1;
-			if (lp != NULL)
-				pathp = lp;
-		}
-		rc = it(p, pathp, depth, data);
-		if (rc != 0)
-			break;		
-	} while(1);
-
-	return rc;
-}
-
-/**
- * This  function can be used within scan_flattened_dt callback to get
- * access to properties
- */
-void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
-				 unsigned long *size)
-{
-	unsigned long p = node;
-
-	do {
-		u32 tag = *((u32 *)p);
-		u32 sz, noff;
-		const char *nstr;
-
-		p += 4;
-		if (tag == OF_DT_NOP)
-			continue;
-		if (tag != OF_DT_PROP)
-			return NULL;
-
-		sz = *((u32 *)p);
-		noff = *((u32 *)(p + 4));
-		p += 8;
-		if (initial_boot_params->version < 0x10)
-			p = _ALIGN(p, sz >= 8 ? 8 : 4);
-
-		nstr = find_flat_dt_string(noff);
-		if (nstr == NULL) {
-			printk(KERN_WARNING "Can't find property index"
-			       " name !\n");
-			return NULL;
-		}
-		if (strcmp(name, nstr) == 0) {
-			if (size)
-				*size = sz;
-			return (void *)p;
-		}
-		p += sz;
-		p = _ALIGN(p, 4);
-	} while(1);
-}
-
-static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size,
-				       unsigned long align)
-{
-	void *res;
-
-	*mem = _ALIGN(*mem, align);
-	res = (void *)*mem;
-	*mem += size;
-
-	return res;
-}
-
-static unsigned long __init unflatten_dt_node(unsigned long mem,
-					      unsigned long *p,
-					      struct device_node *dad,
-					      struct device_node ***allnextpp,
-					      unsigned long fpsize)
-{
-	struct device_node *np;
-	struct property *pp, **prev_pp = NULL;
-	char *pathp;
-	u32 tag;
-	unsigned int l, allocl;
-	int has_name = 0;
-	int new_format = 0;
-
-	tag = *((u32 *)(*p));
-	if (tag != OF_DT_BEGIN_NODE) {
-		printk("Weird tag at start of node: %x\n", tag);
-		return mem;
-	}
-	*p += 4;
-	pathp = (char *)*p;
-	l = allocl = strlen(pathp) + 1;
-	*p = _ALIGN(*p + l, 4);
-
-	/* version 0x10 has a more compact unit name here instead of the full
-	 * path. we accumulate the full path size using "fpsize", we'll rebuild
-	 * it later. We detect this because the first character of the name is
-	 * not '/'.
-	 */
-	if ((*pathp) != '/') {
-		new_format = 1;
-		if (fpsize == 0) {
-			/* root node: special case. fpsize accounts for path
-			 * plus terminating zero. root node only has '/', so
-			 * fpsize should be 2, but we want to avoid the first
-			 * level nodes to have two '/' so we use fpsize 1 here
-			 */
-			fpsize = 1;
-			allocl = 2;
-		} else {
-			/* account for '/' and path size minus terminal 0
-			 * already in 'l'
-			 */
-			fpsize += l;
-			allocl = fpsize;
-		}
-	}
-
-
-	np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl,
-				__alignof__(struct device_node));
-	if (allnextpp) {
-		memset(np, 0, sizeof(*np));
-		np->full_name = ((char*)np) + sizeof(struct device_node);
-		if (new_format) {
-			char *p = np->full_name;
-			/* rebuild full path for new format */
-			if (dad && dad->parent) {
-				strcpy(p, dad->full_name);
-#ifdef DEBUG
-				if ((strlen(p) + l + 1) != allocl) {
-					DBG("%s: p: %d, l: %d, a: %d\n",
-					    pathp, strlen(p), l, allocl);
-				}
-#endif
-				p += strlen(p);
-			}
-			*(p++) = '/';
-			memcpy(p, pathp, l);
-		} else
-			memcpy(np->full_name, pathp, l);
-		prev_pp = &np->properties;
-		**allnextpp = np;
-		*allnextpp = &np->allnext;
-		if (dad != NULL) {
-			np->parent = dad;
-			/* we temporarily use the next field as `last_child'*/
-			if (dad->next == 0)
-				dad->child = np;
-			else
-				dad->next->sibling = np;
-			dad->next = np;
-		}
-		kref_init(&np->kref);
-	}
-	while(1) {
-		u32 sz, noff;
-		char *pname;
-
-		tag = *((u32 *)(*p));
-		if (tag == OF_DT_NOP) {
-			*p += 4;
-			continue;
-		}
-		if (tag != OF_DT_PROP)
-			break;
-		*p += 4;
-		sz = *((u32 *)(*p));
-		noff = *((u32 *)((*p) + 4));
-		*p += 8;
-		if (initial_boot_params->version < 0x10)
-			*p = _ALIGN(*p, sz >= 8 ? 8 : 4);
-
-		pname = find_flat_dt_string(noff);
-		if (pname == NULL) {
-			printk("Can't find property name in list !\n");
-			break;
-		}
-		if (strcmp(pname, "name") == 0)
-			has_name = 1;
-		l = strlen(pname) + 1;
-		pp = unflatten_dt_alloc(&mem, sizeof(struct property),
-					__alignof__(struct property));
-		if (allnextpp) {
-			if (strcmp(pname, "linux,phandle") == 0) {
-				np->node = *((u32 *)*p);
-				if (np->linux_phandle == 0)
-					np->linux_phandle = np->node;
-			}
-			if (strcmp(pname, "ibm,phandle") == 0)
-				np->linux_phandle = *((u32 *)*p);
-			pp->name = pname;
-			pp->length = sz;
-			pp->value = (void *)*p;
-			*prev_pp = pp;
-			prev_pp = &pp->next;
-		}
-		*p = _ALIGN((*p) + sz, 4);
-	}
-	/* with version 0x10 we may not have the name property, recreate
-	 * it here from the unit name if absent
-	 */
-	if (!has_name) {
-		char *p = pathp, *ps = pathp, *pa = NULL;
-		int sz;
-
-		while (*p) {
-			if ((*p) == '@')
-				pa = p;
-			if ((*p) == '/')
-				ps = p + 1;
-			p++;
-		}
-		if (pa < ps)
-			pa = p;
-		sz = (pa - ps) + 1;
-		pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz,
-					__alignof__(struct property));
-		if (allnextpp) {
-			pp->name = "name";
-			pp->length = sz;
-			pp->value = (unsigned char *)(pp + 1);
-			*prev_pp = pp;
-			prev_pp = &pp->next;
-			memcpy(pp->value, ps, sz - 1);
-			((char *)pp->value)[sz - 1] = 0;
-			DBG("fixed up name for %s -> %s\n", pathp, pp->value);
-		}
-	}
-	if (allnextpp) {
-		*prev_pp = NULL;
-		np->name = get_property(np, "name", NULL);
-		np->type = get_property(np, "device_type", NULL);
-
-		if (!np->name)
-			np->name = "<NULL>";
-		if (!np->type)
-			np->type = "<NULL>";
-	}
-	while (tag == OF_DT_BEGIN_NODE) {
-		mem = unflatten_dt_node(mem, p, np, allnextpp, fpsize);
-		tag = *((u32 *)(*p));
-	}
-	if (tag != OF_DT_END_NODE) {
-		printk("Weird tag at end of node: %x\n", tag);
-		return mem;
-	}
-	*p += 4;
-	return mem;
-}
-
-
-/**
- * unflattens the device-tree passed by the firmware, creating the
- * tree of struct device_node. It also fills the "name" and "type"
- * pointers of the nodes so the normal device-tree walking functions
- * can be used (this used to be done by finish_device_tree)
- */
-void __init unflatten_device_tree(void)
-{
-	unsigned long start, mem, size;
-	struct device_node **allnextp = &allnodes;
-	char *p = NULL;
-	int l = 0;
-
-	DBG(" -> unflatten_device_tree()\n");
-
-	/* First pass, scan for size */
-	start = ((unsigned long)initial_boot_params) +
-		initial_boot_params->off_dt_struct;
-	size = unflatten_dt_node(0, &start, NULL, NULL, 0);
-	size = (size | 3) + 1;
-
-	DBG("  size is %lx, allocating...\n", size);
-
-	/* Allocate memory for the expanded device tree */
-	mem = lmb_alloc(size + 4, __alignof__(struct device_node));
-	if (!mem) {
-		DBG("Couldn't allocate memory with lmb_alloc()!\n");
-		panic("Couldn't allocate memory with lmb_alloc()!\n");
-	}
-	mem = (unsigned long)abs_to_virt(mem);
-
-	((u32 *)mem)[size / 4] = 0xdeadbeef;
-
-	DBG("  unflattening...\n", mem);
-
-	/* Second pass, do actual unflattening */
-	start = ((unsigned long)initial_boot_params) +
-		initial_boot_params->off_dt_struct;
-	unflatten_dt_node(mem, &start, NULL, &allnextp, 0);
-	if (*((u32 *)start) != OF_DT_END)
-		printk(KERN_WARNING "Weird tag at end of tree: %08x\n", *((u32 *)start));
-	if (((u32 *)mem)[size / 4] != 0xdeadbeef)
-		printk(KERN_WARNING "End of tree marker overwritten: %08x\n",
-		       ((u32 *)mem)[size / 4] );
-	*allnextp = NULL;
-
-	/* Get pointer to OF "/chosen" node for use everywhere */
-	of_chosen = of_find_node_by_path("/chosen");
-
-	/* Retreive command line */
-	if (of_chosen != NULL) {
-		p = (char *)get_property(of_chosen, "bootargs", &l);
-		if (p != NULL && l > 0)
-			strlcpy(cmd_line, p, min(l, COMMAND_LINE_SIZE));
-	}
-#ifdef CONFIG_CMDLINE
-	if (l == 0 || (l == 1 && (*p) == 0))
-		strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-#endif /* CONFIG_CMDLINE */
-
-	DBG("Command line is: %s\n", cmd_line);
-
-	DBG(" <- unflatten_device_tree()\n");
-}
-
-
-static int __init early_init_dt_scan_cpus(unsigned long node,
-					  const char *uname, int depth, void *data)
-{
-	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	u32 *prop;
-	unsigned long size;
-
-	/* We are scanning "cpu" nodes only */
-	if (type == NULL || strcmp(type, "cpu") != 0)
-		return 0;
-
-	if (initial_boot_params && initial_boot_params->version >= 2) {
-		/* version 2 of the kexec param format adds the phys cpuid
-		 * of booted proc.
-		 */
-		boot_cpuid_phys = initial_boot_params->boot_cpuid_phys;
-		boot_cpuid = 0;
-	} else {
-		/* Check if it's the boot-cpu, set it's hw index in paca now */
-		if (of_get_flat_dt_prop(node, "linux,boot-cpu", NULL)
-		    != NULL) {
-			u32 *prop = of_get_flat_dt_prop(node, "reg", NULL);
-			set_hard_smp_processor_id(0, prop == NULL ? 0 : *prop);
-			boot_cpuid_phys = get_hard_smp_processor_id(0);
-		}
-	}
-
-#ifdef CONFIG_ALTIVEC
-	/* Check if we have a VMX and eventually update CPU features */
-	prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", NULL);
-	if (prop && (*prop) > 0) {
-		cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
-		cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
-	}
-
-	/* Same goes for Apple's "altivec" property */
-	prop = (u32 *)of_get_flat_dt_prop(node, "altivec", NULL);
-	if (prop) {
-		cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
-		cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
-	}
-#endif /* CONFIG_ALTIVEC */
-
-	/*
-	 * Check for an SMT capable CPU and set the CPU feature. We do
-	 * this by looking at the size of the ibm,ppc-interrupt-server#s
-	 * property
-	 */
-	prop = (u32 *)of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s",
-				       &size);
-	cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
-	if (prop && ((size / sizeof(u32)) > 1))
-		cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
-
-	return 0;
-}
-
-static int __init early_init_dt_scan_chosen(unsigned long node,
-					    const char *uname, int depth, void *data)
-{
-	u32 *prop;
-	u64 *prop64;
-
-	DBG("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
-
-	if (depth != 1 || strcmp(uname, "chosen") != 0)
-		return 0;
-
-	/* get platform type */
-	prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL);
-	if (prop == NULL)
-		return 0;
-	_machine = *prop;
-
-	/* check if iommu is forced on or off */
-	if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
-		iommu_is_off = 1;
-	if (of_get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
-		iommu_force_on = 1;
-
- 	prop64 = (u64*)of_get_flat_dt_prop(node, "linux,memory-limit", NULL);
- 	if (prop64)
- 		memory_limit = *prop64;
-
- 	prop64 = (u64*)of_get_flat_dt_prop(node, "linux,tce-alloc-start",NULL);
- 	if (prop64)
- 		tce_alloc_start = *prop64;
-
- 	prop64 = (u64*)of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
- 	if (prop64)
- 		tce_alloc_end = *prop64;
-
-#ifdef CONFIG_PPC_RTAS
-	/* To help early debugging via the front panel, we retreive a minimal
-	 * set of RTAS infos now if available
-	 */
-	{
-		u64 *basep, *entryp;
-
-		basep = (u64*)of_get_flat_dt_prop(node,
-						  "linux,rtas-base", NULL);
-		entryp = (u64*)of_get_flat_dt_prop(node,
-						   "linux,rtas-entry", NULL);
-		prop = (u32*)of_get_flat_dt_prop(node,
-						 "linux,rtas-size", NULL);
-		if (basep && entryp && prop) {
-			rtas.base = *basep;
-			rtas.entry = *entryp;
-			rtas.size = *prop;
-		}
-	}
-#endif /* CONFIG_PPC_RTAS */
-
-	/* break now */
-	return 1;
-}
-
-static int __init early_init_dt_scan_root(unsigned long node,
-					  const char *uname, int depth, void *data)
-{
-	u32 *prop;
-
-	if (depth != 0)
-		return 0;
-
-	prop = (u32 *)of_get_flat_dt_prop(node, "#size-cells", NULL);
-	dt_root_size_cells = (prop == NULL) ? 1 : *prop;
-	DBG("dt_root_size_cells = %x\n", dt_root_size_cells);
-
-	prop = (u32 *)of_get_flat_dt_prop(node, "#address-cells", NULL);
-	dt_root_addr_cells = (prop == NULL) ? 2 : *prop;
-	DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells);
-	
-	/* break now */
-	return 1;
-}
-
-static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp)
-{
-	cell_t *p = *cellp;
-	unsigned long r = 0;
-
-	/* Ignore more than 2 cells */
-	while (s > 2) {
-		p++;
-		s--;
-	}
-	while (s) {
-		r <<= 32;
-		r |= *(p++);
-		s--;
-	}
-
-	*cellp = p;
-	return r;
-}
-
-
-static int __init early_init_dt_scan_memory(unsigned long node,
-					    const char *uname, int depth, void *data)
-{
-	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	cell_t *reg, *endp;
-	unsigned long l;
-
-	/* We are scanning "memory" nodes only */
-	if (type == NULL || strcmp(type, "memory") != 0)
-		return 0;
-
-	reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l);
-	if (reg == NULL)
-		return 0;
-
-	endp = reg + (l / sizeof(cell_t));
-
-	DBG("memory scan node %s ..., reg size %ld, data: %x %x %x %x, ...\n",
-	    uname, l, reg[0], reg[1], reg[2], reg[3]);
-
-	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
-		unsigned long base, size;
-
-		base = dt_mem_next_cell(dt_root_addr_cells, &reg);
-		size = dt_mem_next_cell(dt_root_size_cells, &reg);
-
-		if (size == 0)
-			continue;
-		DBG(" - %lx ,  %lx\n", base, size);
-		if (iommu_is_off) {
-			if (base >= 0x80000000ul)
-				continue;
-			if ((base + size) > 0x80000000ul)
-				size = 0x80000000ul - base;
-		}
-		lmb_add(base, size);
-	}
-	return 0;
-}
-
-static void __init early_reserve_mem(void)
-{
-	u64 base, size;
-	u64 *reserve_map = (u64 *)(((unsigned long)initial_boot_params) +
-				   initial_boot_params->off_mem_rsvmap);
-	while (1) {
-		base = *(reserve_map++);
-		size = *(reserve_map++);
-		if (size == 0)
-			break;
-		DBG("reserving: %lx -> %lx\n", base, size);
-		lmb_reserve(base, size);
-	}
-
-#if 0
-	DBG("memory reserved, lmbs :\n");
-      	lmb_dump_all();
-#endif
-}
-
-void __init early_init_devtree(void *params)
-{
-	DBG(" -> early_init_devtree()\n");
-
-	/* Setup flat device-tree pointer */
-	initial_boot_params = params;
-
-	/* Retreive various informations from the /chosen node of the
-	 * device-tree, including the platform type, initrd location and
-	 * size, TCE reserve, and more ...
-	 */
-	of_scan_flat_dt(early_init_dt_scan_chosen, NULL);
-
-	/* Scan memory nodes and rebuild LMBs */
-	lmb_init();
-	of_scan_flat_dt(early_init_dt_scan_root, NULL);
-	of_scan_flat_dt(early_init_dt_scan_memory, NULL);
-	lmb_enforce_memory_limit(memory_limit);
-	lmb_analyze();
-	lmb_reserve(0, __pa(klimit));
-
-	/* Reserve LMB regions used by kernel, initrd, dt, etc... */
-	early_reserve_mem();
-
-	DBG("Scanning CPUs ...\n");
-
-	/* Retreive hash table size from flattened tree plus other
-	 * CPU related informations (altivec support, boot CPU ID, ...)
-	 */
-	of_scan_flat_dt(early_init_dt_scan_cpus, NULL);
-
-	DBG(" <- early_init_devtree()\n");
-}
-
-#undef printk
-
-int
-prom_n_addr_cells(struct device_node* np)
-{
-	int* ip;
-	do {
-		if (np->parent)
-			np = np->parent;
-		ip = (int *) get_property(np, "#address-cells", NULL);
-		if (ip != NULL)
-			return *ip;
-	} while (np->parent);
-	/* No #address-cells property for the root node, default to 1 */
-	return 1;
-}
-EXPORT_SYMBOL_GPL(prom_n_addr_cells);
-
-int
-prom_n_size_cells(struct device_node* np)
-{
-	int* ip;
-	do {
-		if (np->parent)
-			np = np->parent;
-		ip = (int *) get_property(np, "#size-cells", NULL);
-		if (ip != NULL)
-			return *ip;
-	} while (np->parent);
-	/* No #size-cells property for the root node, default to 1 */
-	return 1;
-}
-EXPORT_SYMBOL_GPL(prom_n_size_cells);
-
-/**
- * Work out the sense (active-low level / active-high edge)
- * of each interrupt from the device tree.
- */
-void __init prom_get_irq_senses(unsigned char *senses, int off, int max)
-{
-	struct device_node *np;
-	int i, j;
-
-	/* default to level-triggered */
-	memset(senses, 1, max - off);
-
-	for (np = allnodes; np != 0; np = np->allnext) {
-		for (j = 0; j < np->n_intrs; j++) {
-			i = np->intrs[j].line;
-			if (i >= off && i < max)
-				senses[i-off] = np->intrs[j].sense ?
-					IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE :
-					IRQ_SENSE_EDGE | IRQ_POLARITY_POSITIVE;
-		}
-	}
-}
-
-/**
- * Construct and return a list of the device_nodes with a given name.
- */
-struct device_node *
-find_devices(const char *name)
-{
-	struct device_node *head, **prevp, *np;
-
-	prevp = &head;
-	for (np = allnodes; np != 0; np = np->allnext) {
-		if (np->name != 0 && strcasecmp(np->name, name) == 0) {
-			*prevp = np;
-			prevp = &np->next;
-		}
-	}
-	*prevp = NULL;
-	return head;
-}
-EXPORT_SYMBOL(find_devices);
-
-/**
- * Construct and return a list of the device_nodes with a given type.
- */
-struct device_node *
-find_type_devices(const char *type)
-{
-	struct device_node *head, **prevp, *np;
-
-	prevp = &head;
-	for (np = allnodes; np != 0; np = np->allnext) {
-		if (np->type != 0 && strcasecmp(np->type, type) == 0) {
-			*prevp = np;
-			prevp = &np->next;
-		}
-	}
-	*prevp = NULL;
-	return head;
-}
-EXPORT_SYMBOL(find_type_devices);
-
-/**
- * Returns all nodes linked together
- */
-struct device_node *
-find_all_nodes(void)
-{
-	struct device_node *head, **prevp, *np;
-
-	prevp = &head;
-	for (np = allnodes; np != 0; np = np->allnext) {
-		*prevp = np;
-		prevp = &np->next;
-	}
-	*prevp = NULL;
-	return head;
-}
-EXPORT_SYMBOL(find_all_nodes);
-
-/** Checks if the given "compat" string matches one of the strings in
- * the device's "compatible" property
- */
-int
-device_is_compatible(struct device_node *device, const char *compat)
-{
-	const char* cp;
-	int cplen, l;
-
-	cp = (char *) get_property(device, "compatible", &cplen);
-	if (cp == NULL)
-		return 0;
-	while (cplen > 0) {
-		if (strncasecmp(cp, compat, strlen(compat)) == 0)
-			return 1;
-		l = strlen(cp) + 1;
-		cp += l;
-		cplen -= l;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(device_is_compatible);
-
-
-/**
- * Indicates whether the root node has a given value in its
- * compatible property.
- */
-int
-machine_is_compatible(const char *compat)
-{
-	struct device_node *root;
-	int rc = 0;
-
-	root = of_find_node_by_path("/");
-	if (root) {
-		rc = device_is_compatible(root, compat);
-		of_node_put(root);
-	}
-	return rc;
-}
-EXPORT_SYMBOL(machine_is_compatible);
-
-/**
- * Construct and return a list of the device_nodes with a given type
- * and compatible property.
- */
-struct device_node *
-find_compatible_devices(const char *type, const char *compat)
-{
-	struct device_node *head, **prevp, *np;
-
-	prevp = &head;
-	for (np = allnodes; np != 0; np = np->allnext) {
-		if (type != NULL
-		    && !(np->type != 0 && strcasecmp(np->type, type) == 0))
-			continue;
-		if (device_is_compatible(np, compat)) {
-			*prevp = np;
-			prevp = &np->next;
-		}
-	}
-	*prevp = NULL;
-	return head;
-}
-EXPORT_SYMBOL(find_compatible_devices);
-
-/**
- * Find the device_node with a given full_name.
- */
-struct device_node *
-find_path_device(const char *path)
-{
-	struct device_node *np;
-
-	for (np = allnodes; np != 0; np = np->allnext)
-		if (np->full_name != 0 && strcasecmp(np->full_name, path) == 0)
-			return np;
-	return NULL;
-}
-EXPORT_SYMBOL(find_path_device);
-
-/*******
- *
- * New implementation of the OF "find" APIs, return a refcounted
- * object, call of_node_put() when done.  The device tree and list
- * are protected by a rw_lock.
- *
- * Note that property management will need some locking as well,
- * this isn't dealt with yet.
- *
- *******/
-
-/**
- *	of_find_node_by_name - Find a node by its "name" property
- *	@from:	The node to start searching from or NULL, the node
- *		you pass will not be searched, only the next one
- *		will; typically, you pass what the previous call
- *		returned. of_node_put() will be called on it
- *	@name:	The name string to match against
- *
- *	Returns a node pointer with refcount incremented, use
- *	of_node_put() on it when done.
- */
-struct device_node *of_find_node_by_name(struct device_node *from,
-	const char *name)
-{
-	struct device_node *np;
-
-	read_lock(&devtree_lock);
-	np = from ? from->allnext : allnodes;
-	for (; np != 0; np = np->allnext)
-		if (np->name != 0 && strcasecmp(np->name, name) == 0
-		    && of_node_get(np))
-			break;
-	if (from)
-		of_node_put(from);
-	read_unlock(&devtree_lock);
-	return np;
-}
-EXPORT_SYMBOL(of_find_node_by_name);
-
-/**
- *	of_find_node_by_type - Find a node by its "device_type" property
- *	@from:	The node to start searching from or NULL, the node
- *		you pass will not be searched, only the next one
- *		will; typically, you pass what the previous call
- *		returned. of_node_put() will be called on it
- *	@name:	The type string to match against
- *
- *	Returns a node pointer with refcount incremented, use
- *	of_node_put() on it when done.
- */
-struct device_node *of_find_node_by_type(struct device_node *from,
-	const char *type)
-{
-	struct device_node *np;
-
-	read_lock(&devtree_lock);
-	np = from ? from->allnext : allnodes;
-	for (; np != 0; np = np->allnext)
-		if (np->type != 0 && strcasecmp(np->type, type) == 0
-		    && of_node_get(np))
-			break;
-	if (from)
-		of_node_put(from);
-	read_unlock(&devtree_lock);
-	return np;
-}
-EXPORT_SYMBOL(of_find_node_by_type);
-
-/**
- *	of_find_compatible_node - Find a node based on type and one of the
- *                                tokens in its "compatible" property
- *	@from:		The node to start searching from or NULL, the node
- *			you pass will not be searched, only the next one
- *			will; typically, you pass what the previous call
- *			returned. of_node_put() will be called on it
- *	@type:		The type string to match "device_type" or NULL to ignore
- *	@compatible:	The string to match to one of the tokens in the device
- *			"compatible" list.
- *
- *	Returns a node pointer with refcount incremented, use
- *	of_node_put() on it when done.
- */
-struct device_node *of_find_compatible_node(struct device_node *from,
-	const char *type, const char *compatible)
-{
-	struct device_node *np;
-
-	read_lock(&devtree_lock);
-	np = from ? from->allnext : allnodes;
-	for (; np != 0; np = np->allnext) {
-		if (type != NULL
-		    && !(np->type != 0 && strcasecmp(np->type, type) == 0))
-			continue;
-		if (device_is_compatible(np, compatible) && of_node_get(np))
-			break;
-	}
-	if (from)
-		of_node_put(from);
-	read_unlock(&devtree_lock);
-	return np;
-}
-EXPORT_SYMBOL(of_find_compatible_node);
-
-/**
- *	of_find_node_by_path - Find a node matching a full OF path
- *	@path:	The full path to match
- *
- *	Returns a node pointer with refcount incremented, use
- *	of_node_put() on it when done.
- */
-struct device_node *of_find_node_by_path(const char *path)
-{
-	struct device_node *np = allnodes;
-
-	read_lock(&devtree_lock);
-	for (; np != 0; np = np->allnext) {
-		if (np->full_name != 0 && strcasecmp(np->full_name, path) == 0
-		    && of_node_get(np))
-			break;
-	}
-	read_unlock(&devtree_lock);
-	return np;
-}
-EXPORT_SYMBOL(of_find_node_by_path);
-
-/**
- *	of_find_node_by_phandle - Find a node given a phandle
- *	@handle:	phandle of the node to find
- *
- *	Returns a node pointer with refcount incremented, use
- *	of_node_put() on it when done.
- */
-struct device_node *of_find_node_by_phandle(phandle handle)
-{
-	struct device_node *np;
-
-	read_lock(&devtree_lock);
-	for (np = allnodes; np != 0; np = np->allnext)
-		if (np->linux_phandle == handle)
-			break;
-	if (np)
-		of_node_get(np);
-	read_unlock(&devtree_lock);
-	return np;
-}
-EXPORT_SYMBOL(of_find_node_by_phandle);
-
-/**
- *	of_find_all_nodes - Get next node in global list
- *	@prev:	Previous node or NULL to start iteration
- *		of_node_put() will be called on it
- *
- *	Returns a node pointer with refcount incremented, use
- *	of_node_put() on it when done.
- */
-struct device_node *of_find_all_nodes(struct device_node *prev)
-{
-	struct device_node *np;
-
-	read_lock(&devtree_lock);
-	np = prev ? prev->allnext : allnodes;
-	for (; np != 0; np = np->allnext)
-		if (of_node_get(np))
-			break;
-	if (prev)
-		of_node_put(prev);
-	read_unlock(&devtree_lock);
-	return np;
-}
-EXPORT_SYMBOL(of_find_all_nodes);
-
-/**
- *	of_get_parent - Get a node's parent if any
- *	@node:	Node to get parent
- *
- *	Returns a node pointer with refcount incremented, use
- *	of_node_put() on it when done.
- */
-struct device_node *of_get_parent(const struct device_node *node)
-{
-	struct device_node *np;
-
-	if (!node)
-		return NULL;
-
-	read_lock(&devtree_lock);
-	np = of_node_get(node->parent);
-	read_unlock(&devtree_lock);
-	return np;
-}
-EXPORT_SYMBOL(of_get_parent);
-
-/**
- *	of_get_next_child - Iterate a node childs
- *	@node:	parent node
- *	@prev:	previous child of the parent node, or NULL to get first
- *
- *	Returns a node pointer with refcount incremented, use
- *	of_node_put() on it when done.
- */
-struct device_node *of_get_next_child(const struct device_node *node,
-	struct device_node *prev)
-{
-	struct device_node *next;
-
-	read_lock(&devtree_lock);
-	next = prev ? prev->sibling : node->child;
-	for (; next != 0; next = next->sibling)
-		if (of_node_get(next))
-			break;
-	if (prev)
-		of_node_put(prev);
-	read_unlock(&devtree_lock);
-	return next;
-}
-EXPORT_SYMBOL(of_get_next_child);
-
-/**
- *	of_node_get - Increment refcount of a node
- *	@node:	Node to inc refcount, NULL is supported to
- *		simplify writing of callers
- *
- *	Returns node.
- */
-struct device_node *of_node_get(struct device_node *node)
-{
-	if (node)
-		kref_get(&node->kref);
-	return node;
-}
-EXPORT_SYMBOL(of_node_get);
-
-static inline struct device_node * kref_to_device_node(struct kref *kref)
-{
-	return container_of(kref, struct device_node, kref);
-}
-
-/**
- *	of_node_release - release a dynamically allocated node
- *	@kref:  kref element of the node to be released
- *
- *	In of_node_put() this function is passed to kref_put()
- *	as the destructor.
- */
-static void of_node_release(struct kref *kref)
-{
-	struct device_node *node = kref_to_device_node(kref);
-	struct property *prop = node->properties;
-
-	if (!OF_IS_DYNAMIC(node))
-		return;
-	while (prop) {
-		struct property *next = prop->next;
-		kfree(prop->name);
-		kfree(prop->value);
-		kfree(prop);
-		prop = next;
-	}
-	kfree(node->intrs);
-	kfree(node->addrs);
-	kfree(node->full_name);
-	kfree(node->data);
-	kfree(node);
-}
-
-/**
- *	of_node_put - Decrement refcount of a node
- *	@node:	Node to dec refcount, NULL is supported to
- *		simplify writing of callers
- *
- */
-void of_node_put(struct device_node *node)
-{
-	if (node)
-		kref_put(&node->kref, of_node_release);
-}
-EXPORT_SYMBOL(of_node_put);
-
-/*
- * Fix up the uninitialized fields in a new device node:
- * name, type, n_addrs, addrs, n_intrs, intrs, and pci-specific fields
- *
- * A lot of boot-time code is duplicated here, because functions such
- * as finish_node_interrupts, interpret_pci_props, etc. cannot use the
- * slab allocator.
- *
- * This should probably be split up into smaller chunks.
- */
-
-static int of_finish_dynamic_node(struct device_node *node,
-				  unsigned long *unused1, int unused2,
-				  int unused3, int unused4)
-{
-	struct device_node *parent = of_get_parent(node);
-	int err = 0;
-	phandle *ibm_phandle;
-
-	node->name = get_property(node, "name", NULL);
-	node->type = get_property(node, "device_type", NULL);
-
-	if (!parent) {
-		err = -ENODEV;
-		goto out;
-	}
-
-	/* We don't support that function on PowerMac, at least
-	 * not yet
-	 */
-	if (_machine == PLATFORM_POWERMAC)
-		return -ENODEV;
-
-	/* fix up new node's linux_phandle field */
-	if ((ibm_phandle = (unsigned int *)get_property(node, "ibm,phandle", NULL)))
-		node->linux_phandle = *ibm_phandle;
-
-out:
-	of_node_put(parent);
-	return err;
-}
-
-/*
- * Plug a device node into the tree and global list.
- */
-void of_attach_node(struct device_node *np)
-{
-	write_lock(&devtree_lock);
-	np->sibling = np->parent->child;
-	np->allnext = allnodes;
-	np->parent->child = np;
-	allnodes = np;
-	write_unlock(&devtree_lock);
-}
-
-/*
- * "Unplug" a node from the device tree.  The caller must hold
- * a reference to the node.  The memory associated with the node
- * is not freed until its refcount goes to zero.
- */
-void of_detach_node(const struct device_node *np)
-{
-	struct device_node *parent;
-
-	write_lock(&devtree_lock);
-
-	parent = np->parent;
-
-	if (allnodes == np)
-		allnodes = np->allnext;
-	else {
-		struct device_node *prev;
-		for (prev = allnodes;
-		     prev->allnext != np;
-		     prev = prev->allnext)
-			;
-		prev->allnext = np->allnext;
-	}
-
-	if (parent->child == np)
-		parent->child = np->sibling;
-	else {
-		struct device_node *prevsib;
-		for (prevsib = np->parent->child;
-		     prevsib->sibling != np;
-		     prevsib = prevsib->sibling)
-			;
-		prevsib->sibling = np->sibling;
-	}
-
-	write_unlock(&devtree_lock);
-}
-
-static int prom_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
-{
-	int err;
-
-	switch (action) {
-	case PSERIES_RECONFIG_ADD:
-		err = finish_node(node, NULL, of_finish_dynamic_node, 0, 0, 0);
-		if (err < 0) {
-			printk(KERN_ERR "finish_node returned %d\n", err);
-			err = NOTIFY_BAD;
-		}
-		break;
-	default:
-		err = NOTIFY_DONE;
-		break;
-	}
-	return err;
-}
-
-static struct notifier_block prom_reconfig_nb = {
-	.notifier_call = prom_reconfig_notifier,
-	.priority = 10, /* This one needs to run first */
-};
-
-static int __init prom_reconfig_setup(void)
-{
-	return pSeries_reconfig_notifier_register(&prom_reconfig_nb);
-}
-__initcall(prom_reconfig_setup);
-
-/*
- * Find a property with a given name for a given node
- * and return the value.
- */
-unsigned char *
-get_property(struct device_node *np, const char *name, int *lenp)
-{
-	struct property *pp;
-
-	for (pp = np->properties; pp != 0; pp = pp->next)
-		if (strcmp(pp->name, name) == 0) {
-			if (lenp != 0)
-				*lenp = pp->length;
-			return pp->value;
-		}
-	return NULL;
-}
-EXPORT_SYMBOL(get_property);
-
-/*
- * Add a property to a node.
- */
-int
-prom_add_property(struct device_node* np, struct property* prop)
-{
-	struct property **next;
-
-	prop->next = NULL;	
-	write_lock(&devtree_lock);
-	next = &np->properties;
-	while (*next) {
-		if (strcmp(prop->name, (*next)->name) == 0) {
-			/* duplicate ! don't insert it */
-			write_unlock(&devtree_lock);
-			return -1;
-		}
-		next = &(*next)->next;
-	}
-	*next = prop;
-	write_unlock(&devtree_lock);
-
-	/* try to add to proc as well if it was initialized */
-	if (np->pde)
-		proc_device_tree_add_prop(np->pde, prop);
-
-	return 0;
-}
-
-#if 0
-void
-print_properties(struct device_node *np)
-{
-	struct property *pp;
-	char *cp;
-	int i, n;
-
-	for (pp = np->properties; pp != 0; pp = pp->next) {
-		printk(KERN_INFO "%s", pp->name);
-		for (i = strlen(pp->name); i < 16; ++i)
-			printk(" ");
-		cp = (char *) pp->value;
-		for (i = pp->length; i > 0; --i, ++cp)
-			if ((i > 1 && (*cp < 0x20 || *cp > 0x7e))
-			    || (i == 1 && *cp != 0))
-				break;
-		if (i == 0 && pp->length > 1) {
-			/* looks like a string */
-			printk(" %s\n", (char *) pp->value);
-		} else {
-			/* dump it in hex */
-			n = pp->length;
-			if (n > 64)
-				n = 64;
-			if (pp->length % 4 == 0) {
-				unsigned int *p = (unsigned int *) pp->value;
-
-				n /= 4;
-				for (i = 0; i < n; ++i) {
-					if (i != 0 && (i % 4) == 0)
-						printk("\n                ");
-					printk(" %08x", *p++);
-				}
-			} else {
-				unsigned char *bp = pp->value;
-
-				for (i = 0; i < n; ++i) {
-					if (i != 0 && (i % 16) == 0)
-						printk("\n                ");
-					printk(" %02x", *bp++);
-				}
-			}
-			printk("\n");
-			if (pp->length > 64)
-				printk("                 ... (length = %d)\n",
-				       pp->length);
-		}
-	}
-}
-#endif
-
-
-
-
-
-
-
-
-
-
diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c
deleted file mode 100644
index 6375f40b23db..000000000000
--- a/arch/ppc64/kernel/prom_init.c
+++ /dev/null
@@ -1,2051 +0,0 @@
-/*
- * 
- *
- * Procedures for interfacing to Open Firmware.
- *
- * Paul Mackerras	August 1996.
- * Copyright (C) 1996 Paul Mackerras.
- * 
- *  Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner.
- *    {engebret|bergner}@us.ibm.com 
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-
-#undef DEBUG_PROM
-
-#include <stdarg.h>
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/threads.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/pci.h>
-#include <linux/proc_fs.h>
-#include <linux/stringify.h>
-#include <linux/delay.h>
-#include <linux/initrd.h>
-#include <linux/bitops.h>
-#include <asm/prom.h>
-#include <asm/rtas.h>
-#include <asm/abs_addr.h>
-#include <asm/page.h>
-#include <asm/processor.h>
-#include <asm/irq.h>
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/system.h>
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/pci.h>
-#include <asm/iommu.h>
-#include <asm/btext.h>
-#include <asm/sections.h>
-#include <asm/machdep.h>
-
-#ifdef CONFIG_LOGO_LINUX_CLUT224
-#include <linux/linux_logo.h>
-extern const struct linux_logo logo_linux_clut224;
-#endif
-
-/*
- * Properties whose value is longer than this get excluded from our
- * copy of the device tree. This value does need to be big enough to
- * ensure that we don't lose things like the interrupt-map property
- * on a PCI-PCI bridge.
- */
-#define MAX_PROPERTY_LENGTH	(1UL * 1024 * 1024)
-
-/*
- * Eventually bump that one up
- */
-#define DEVTREE_CHUNK_SIZE	0x100000
-
-/*
- * This is the size of the local memory reserve map that gets copied
- * into the boot params passed to the kernel. That size is totally
- * flexible as the kernel just reads the list until it encounters an
- * entry with size 0, so it can be changed without breaking binary
- * compatibility
- */
-#define MEM_RESERVE_MAP_SIZE	8
-
-/*
- * prom_init() is called very early on, before the kernel text
- * and data have been mapped to KERNELBASE.  At this point the code
- * is running at whatever address it has been loaded at, so
- * references to extern and static variables must be relocated
- * explicitly.  The procedure reloc_offset() returns the address
- * we're currently running at minus the address we were linked at.
- * (Note that strings count as static variables.)
- *
- * Because OF may have mapped I/O devices into the area starting at
- * KERNELBASE, particularly on CHRP machines, we can't safely call
- * OF once the kernel has been mapped to KERNELBASE.  Therefore all
- * OF calls should be done within prom_init(), and prom_init()
- * and all routines called within it must be careful to relocate
- * references as necessary.
- *
- * Note that the bss is cleared *after* prom_init runs, so we have
- * to make sure that any static or extern variables it accesses
- * are put in the data segment.
- */
-
-
-#define PROM_BUG() do {						\
-        prom_printf("kernel BUG at %s line 0x%x!\n",		\
-		    RELOC(__FILE__), __LINE__);			\
-        __asm__ __volatile__(".long " BUG_ILLEGAL_INSTR);	\
-} while (0)
-
-#ifdef DEBUG_PROM
-#define prom_debug(x...)	prom_printf(x)
-#else
-#define prom_debug(x...)
-#endif
-
-
-typedef u32 prom_arg_t;
-
-struct prom_args {
-        u32 service;
-        u32 nargs;
-        u32 nret;
-        prom_arg_t args[10];
-        prom_arg_t *rets;     /* Pointer to return values in args[16]. */
-};
-
-struct prom_t {
-	unsigned long entry;
-	ihandle root;
-	ihandle chosen;
-	int cpu;
-	ihandle stdout;
-	ihandle disp_node;
-	struct prom_args args;
-	unsigned long version;
-	unsigned long root_size_cells;
-	unsigned long root_addr_cells;
-};
-
-struct pci_reg_property {
-	struct pci_address addr;
-	u32 size_hi;
-	u32 size_lo;
-};
-
-struct mem_map_entry {
-	u64	base;
-	u64	size;
-};
-
-typedef u32 cell_t;
-
-extern void __start(unsigned long r3, unsigned long r4, unsigned long r5);
-
-extern void enter_prom(struct prom_args *args, unsigned long entry);
-extern void copy_and_flush(unsigned long dest, unsigned long src,
-			   unsigned long size, unsigned long offset);
-
-extern unsigned long klimit;
-
-/* prom structure */
-static struct prom_t __initdata prom;
-
-#define PROM_SCRATCH_SIZE 256
-
-static char __initdata of_stdout_device[256];
-static char __initdata prom_scratch[PROM_SCRATCH_SIZE];
-
-static unsigned long __initdata dt_header_start;
-static unsigned long __initdata dt_struct_start, dt_struct_end;
-static unsigned long __initdata dt_string_start, dt_string_end;
-
-static unsigned long __initdata prom_initrd_start, prom_initrd_end;
-
-static int __initdata iommu_force_on;
-static int __initdata ppc64_iommu_off;
-static int __initdata of_platform;
-
-static char __initdata prom_cmd_line[COMMAND_LINE_SIZE];
-
-static unsigned long __initdata prom_memory_limit;
-static unsigned long __initdata prom_tce_alloc_start;
-static unsigned long __initdata prom_tce_alloc_end;
-
-static unsigned long __initdata alloc_top;
-static unsigned long __initdata alloc_top_high;
-static unsigned long __initdata alloc_bottom;
-static unsigned long __initdata rmo_top;
-static unsigned long __initdata ram_top;
-
-static struct mem_map_entry __initdata mem_reserve_map[MEM_RESERVE_MAP_SIZE];
-static int __initdata mem_reserve_cnt;
-
-static cell_t __initdata regbuf[1024];
-
-
-#define MAX_CPU_THREADS 2
-
-/* TO GO */
-#ifdef CONFIG_HMT
-struct {
-	unsigned int pir;
-	unsigned int threadid;
-} hmt_thread_data[NR_CPUS];
-#endif /* CONFIG_HMT */
-
-/*
- * This are used in calls to call_prom.  The 4th and following
- * arguments to call_prom should be 32-bit values.  64 bit values
- * are truncated to 32 bits (and fortunately don't get interpreted
- * as two arguments).
- */
-#define ADDR(x)		(u32) ((unsigned long)(x) - offset)
-
-/*
- * Error results ... some OF calls will return "-1" on error, some
- * will return 0, some will return either. To simplify, here are
- * macros to use with any ihandle or phandle return value to check if
- * it is valid
- */
-
-#define PROM_ERROR		(-1u)
-#define PHANDLE_VALID(p)	((p) != 0 && (p) != PROM_ERROR)
-#define IHANDLE_VALID(i)	((i) != 0 && (i) != PROM_ERROR)
-
-
-/* This is the one and *ONLY* place where we actually call open
- * firmware from, since we need to make sure we're running in 32b
- * mode when we do.  We switch back to 64b mode upon return.
- */
-
-static int __init call_prom(const char *service, int nargs, int nret, ...)
-{
-	int i;
-	unsigned long offset = reloc_offset();
-	struct prom_t *_prom = PTRRELOC(&prom);
-	va_list list;
-
-	_prom->args.service = ADDR(service);
-	_prom->args.nargs = nargs;
-	_prom->args.nret = nret;
-	_prom->args.rets = (prom_arg_t *)&(_prom->args.args[nargs]);
-
-	va_start(list, nret);
-	for (i=0; i < nargs; i++)
-		_prom->args.args[i] = va_arg(list, prom_arg_t);
-	va_end(list);
-
-	for (i=0; i < nret ;i++)
-		_prom->args.rets[i] = 0;
-
-	enter_prom(&_prom->args, _prom->entry);
-
-	return (nret > 0) ? _prom->args.rets[0] : 0;
-}
-
-
-static unsigned int __init prom_claim(unsigned long virt, unsigned long size,
-				unsigned long align)
-{
-	return (unsigned int)call_prom("claim", 3, 1,
-				       (prom_arg_t)virt, (prom_arg_t)size,
-				       (prom_arg_t)align);
-}
-
-static void __init prom_print(const char *msg)
-{
-	const char *p, *q;
-	unsigned long offset = reloc_offset();
-	struct prom_t *_prom = PTRRELOC(&prom);
-
-	if (_prom->stdout == 0)
-		return;
-
-	for (p = msg; *p != 0; p = q) {
-		for (q = p; *q != 0 && *q != '\n'; ++q)
-			;
-		if (q > p)
-			call_prom("write", 3, 1, _prom->stdout, p, q - p);
-		if (*q == 0)
-			break;
-		++q;
-		call_prom("write", 3, 1, _prom->stdout, ADDR("\r\n"), 2);
-	}
-}
-
-
-static void __init prom_print_hex(unsigned long val)
-{
-	unsigned long offset = reloc_offset();
-	int i, nibbles = sizeof(val)*2;
-	char buf[sizeof(val)*2+1];
-	struct prom_t *_prom = PTRRELOC(&prom);
-
-	for (i = nibbles-1;  i >= 0;  i--) {
-		buf[i] = (val & 0xf) + '0';
-		if (buf[i] > '9')
-			buf[i] += ('a'-'0'-10);
-		val >>= 4;
-	}
-	buf[nibbles] = '\0';
-	call_prom("write", 3, 1, _prom->stdout, buf, nibbles);
-}
-
-
-static void __init prom_printf(const char *format, ...)
-{
-	unsigned long offset = reloc_offset();
-	const char *p, *q, *s;
-	va_list args;
-	unsigned long v;
-	struct prom_t *_prom = PTRRELOC(&prom);
-
-	va_start(args, format);
-	for (p = PTRRELOC(format); *p != 0; p = q) {
-		for (q = p; *q != 0 && *q != '\n' && *q != '%'; ++q)
-			;
-		if (q > p)
-			call_prom("write", 3, 1, _prom->stdout, p, q - p);
-		if (*q == 0)
-			break;
-		if (*q == '\n') {
-			++q;
-			call_prom("write", 3, 1, _prom->stdout,
-				  ADDR("\r\n"), 2);
-			continue;
-		}
-		++q;
-		if (*q == 0)
-			break;
-		switch (*q) {
-		case 's':
-			++q;
-			s = va_arg(args, const char *);
-			prom_print(s);
-			break;
-		case 'x':
-			++q;
-			v = va_arg(args, unsigned long);
-			prom_print_hex(v);
-			break;
-		}
-	}
-}
-
-
-static void __init __attribute__((noreturn)) prom_panic(const char *reason)
-{
-	unsigned long offset = reloc_offset();
-
-	prom_print(PTRRELOC(reason));
-	/* ToDo: should put up an SRC here */
-	call_prom("exit", 0, 0);
-
-	for (;;)			/* should never get here */
-		;
-}
-
-
-static int __init prom_next_node(phandle *nodep)
-{
-	phandle node;
-
-	if ((node = *nodep) != 0
-	    && (*nodep = call_prom("child", 1, 1, node)) != 0)
-		return 1;
-	if ((*nodep = call_prom("peer", 1, 1, node)) != 0)
-		return 1;
-	for (;;) {
-		if ((node = call_prom("parent", 1, 1, node)) == 0)
-			return 0;
-		if ((*nodep = call_prom("peer", 1, 1, node)) != 0)
-			return 1;
-	}
-}
-
-static int __init prom_getprop(phandle node, const char *pname,
-			       void *value, size_t valuelen)
-{
-	unsigned long offset = reloc_offset();
-
-	return call_prom("getprop", 4, 1, node, ADDR(pname),
-			 (u32)(unsigned long) value, (u32) valuelen);
-}
-
-static int __init prom_getproplen(phandle node, const char *pname)
-{
-	unsigned long offset = reloc_offset();
-
-	return call_prom("getproplen", 2, 1, node, ADDR(pname));
-}
-
-static int __init prom_setprop(phandle node, const char *pname,
-			       void *value, size_t valuelen)
-{
-	unsigned long offset = reloc_offset();
-
-	return call_prom("setprop", 4, 1, node, ADDR(pname),
-			 (u32)(unsigned long) value, (u32) valuelen);
-}
-
-/* We can't use the standard versions because of RELOC headaches. */
-#define isxdigit(c)	(('0' <= (c) && (c) <= '9') \
-			 || ('a' <= (c) && (c) <= 'f') \
-			 || ('A' <= (c) && (c) <= 'F'))
-
-#define isdigit(c)	('0' <= (c) && (c) <= '9')
-#define islower(c)	('a' <= (c) && (c) <= 'z')
-#define toupper(c)	(islower(c) ? ((c) - 'a' + 'A') : (c))
-
-unsigned long prom_strtoul(const char *cp, const char **endp)
-{
-	unsigned long result = 0, base = 10, value;
-
-	if (*cp == '0') {
-		base = 8;
-		cp++;
-		if (toupper(*cp) == 'X') {
-			cp++;
-			base = 16;
-		}
-	}
-
-	while (isxdigit(*cp) &&
-	       (value = isdigit(*cp) ? *cp - '0' : toupper(*cp) - 'A' + 10) < base) {
-		result = result * base + value;
-		cp++;
-	}
-
-	if (endp)
-		*endp = cp;
-
-	return result;
-}
-
-unsigned long prom_memparse(const char *ptr, const char **retptr)
-{
-	unsigned long ret = prom_strtoul(ptr, retptr);
-	int shift = 0;
-
-	/*
-	 * We can't use a switch here because GCC *may* generate a
-	 * jump table which won't work, because we're not running at
-	 * the address we're linked at.
-	 */
-	if ('G' == **retptr || 'g' == **retptr)
-		shift = 30;
-
-	if ('M' == **retptr || 'm' == **retptr)
-		shift = 20;
-
-	if ('K' == **retptr || 'k' == **retptr)
-		shift = 10;
-
-	if (shift) {
-		ret <<= shift;
-		(*retptr)++;
-	}
-
-	return ret;
-}
-
-/*
- * Early parsing of the command line passed to the kernel, used for
- * "mem=x" and the options that affect the iommu
- */
-static void __init early_cmdline_parse(void)
-{
-	unsigned long offset = reloc_offset();
-	struct prom_t *_prom = PTRRELOC(&prom);
-	char *opt, *p;
-	int l = 0;
-
-	RELOC(prom_cmd_line[0]) = 0;
-	p = RELOC(prom_cmd_line);
-	if ((long)_prom->chosen > 0)
-		l = prom_getprop(_prom->chosen, "bootargs", p, COMMAND_LINE_SIZE-1);
-#ifdef CONFIG_CMDLINE
-	if (l == 0) /* dbl check */
-		strlcpy(RELOC(prom_cmd_line),
-			RELOC(CONFIG_CMDLINE), sizeof(prom_cmd_line));
-#endif /* CONFIG_CMDLINE */
-	prom_printf("command line: %s\n", RELOC(prom_cmd_line));
-
-	opt = strstr(RELOC(prom_cmd_line), RELOC("iommu="));
-	if (opt) {
-		prom_printf("iommu opt is: %s\n", opt);
-		opt += 6;
-		while (*opt && *opt == ' ')
-			opt++;
-		if (!strncmp(opt, RELOC("off"), 3))
-			RELOC(ppc64_iommu_off) = 1;
-		else if (!strncmp(opt, RELOC("force"), 5))
-			RELOC(iommu_force_on) = 1;
-	}
-
-	opt = strstr(RELOC(prom_cmd_line), RELOC("mem="));
-	if (opt) {
-		opt += 4;
-		RELOC(prom_memory_limit) = prom_memparse(opt, (const char **)&opt);
-		/* Align to 16 MB == size of large page */
-		RELOC(prom_memory_limit) = ALIGN(RELOC(prom_memory_limit), 0x1000000);
-	}
-}
-
-/*
- * To tell the firmware what our capabilities are, we have to pass
- * it a fake 32-bit ELF header containing a couple of PT_NOTE sections
- * that contain structures that contain the actual values.
- */
-static struct fake_elf {
-	Elf32_Ehdr	elfhdr;
-	Elf32_Phdr	phdr[2];
-	struct chrpnote {
-		u32	namesz;
-		u32	descsz;
-		u32	type;
-		char	name[8];	/* "PowerPC" */
-		struct chrpdesc {
-			u32	real_mode;
-			u32	real_base;
-			u32	real_size;
-			u32	virt_base;
-			u32	virt_size;
-			u32	load_base;
-		} chrpdesc;
-	} chrpnote;
-	struct rpanote {
-		u32	namesz;
-		u32	descsz;
-		u32	type;
-		char	name[24];	/* "IBM,RPA-Client-Config" */
-		struct rpadesc {
-			u32	lpar_affinity;
-			u32	min_rmo_size;
-			u32	min_rmo_percent;
-			u32	max_pft_size;
-			u32	splpar;
-			u32	min_load;
-			u32	new_mem_def;
-			u32	ignore_me;
-		} rpadesc;
-	} rpanote;
-} fake_elf = {
-	.elfhdr = {
-		.e_ident = { 0x7f, 'E', 'L', 'F',
-			     ELFCLASS32, ELFDATA2MSB, EV_CURRENT },
-		.e_type = ET_EXEC,	/* yeah right */
-		.e_machine = EM_PPC,
-		.e_version = EV_CURRENT,
-		.e_phoff = offsetof(struct fake_elf, phdr),
-		.e_phentsize = sizeof(Elf32_Phdr),
-		.e_phnum = 2
-	},
-	.phdr = {
-		[0] = {
-			.p_type = PT_NOTE,
-			.p_offset = offsetof(struct fake_elf, chrpnote),
-			.p_filesz = sizeof(struct chrpnote)
-		}, [1] = {
-			.p_type = PT_NOTE,
-			.p_offset = offsetof(struct fake_elf, rpanote),
-			.p_filesz = sizeof(struct rpanote)
-		}
-	},
-	.chrpnote = {
-		.namesz = sizeof("PowerPC"),
-		.descsz = sizeof(struct chrpdesc),
-		.type = 0x1275,
-		.name = "PowerPC",
-		.chrpdesc = {
-			.real_mode = ~0U,	/* ~0 means "don't care" */
-			.real_base = ~0U,
-			.real_size = ~0U,
-			.virt_base = ~0U,
-			.virt_size = ~0U,
-			.load_base = ~0U
-		},
-	},
-	.rpanote = {
-		.namesz = sizeof("IBM,RPA-Client-Config"),
-		.descsz = sizeof(struct rpadesc),
-		.type = 0x12759999,
-		.name = "IBM,RPA-Client-Config",
-		.rpadesc = {
-			.lpar_affinity = 0,
-			.min_rmo_size = 64,	/* in megabytes */
-			.min_rmo_percent = 0,
-			.max_pft_size = 48,	/* 2^48 bytes max PFT size */
-			.splpar = 1,
-			.min_load = ~0U,
-			.new_mem_def = 0
-		}
-	}
-};
-
-static void __init prom_send_capabilities(void)
-{
-	unsigned long offset = reloc_offset();
-	ihandle elfloader;
-
-	elfloader = call_prom("open", 1, 1, ADDR("/packages/elf-loader"));
-	if (elfloader == 0) {
-		prom_printf("couldn't open /packages/elf-loader\n");
-		return;
-	}
-	call_prom("call-method", 3, 1, ADDR("process-elf-header"),
-			elfloader, ADDR(&fake_elf));
-	call_prom("close", 1, 0, elfloader);
-}
-
-/*
- * Memory allocation strategy... our layout is normally:
- *
- *  at 14Mb or more we vmlinux, then a gap and initrd. In some rare cases, initrd
- *  might end up beeing before the kernel though. We assume this won't override
- *  the final kernel at 0, we have no provision to handle that in this version,
- *  but it should hopefully never happen.
- *
- *  alloc_top is set to the top of RMO, eventually shrink down if the TCEs overlap
- *  alloc_bottom is set to the top of kernel/initrd
- *
- *  from there, allocations are done that way : rtas is allocated topmost, and
- *  the device-tree is allocated from the bottom. We try to grow the device-tree
- *  allocation as we progress. If we can't, then we fail, we don't currently have
- *  a facility to restart elsewhere, but that shouldn't be necessary neither
- *
- *  Note that calls to reserve_mem have to be done explicitely, memory allocated
- *  with either alloc_up or alloc_down isn't automatically reserved.
- */
-
-
-/*
- * Allocates memory in the RMO upward from the kernel/initrd
- *
- * When align is 0, this is a special case, it means to allocate in place
- * at the current location of alloc_bottom or fail (that is basically
- * extending the previous allocation). Used for the device-tree flattening
- */
-static unsigned long __init alloc_up(unsigned long size, unsigned long align)
-{
-	unsigned long offset = reloc_offset();
-	unsigned long base = _ALIGN_UP(RELOC(alloc_bottom), align);
-	unsigned long addr = 0;
-
-	prom_debug("alloc_up(%x, %x)\n", size, align);
-	if (RELOC(ram_top) == 0)
-		prom_panic("alloc_up() called with mem not initialized\n");
-
-	if (align)
-		base = _ALIGN_UP(RELOC(alloc_bottom), align);
-	else
-		base = RELOC(alloc_bottom);
-
-	for(; (base + size) <= RELOC(alloc_top); 
-	    base = _ALIGN_UP(base + 0x100000, align)) {
-		prom_debug("    trying: 0x%x\n\r", base);
-		addr = (unsigned long)prom_claim(base, size, 0);
-		if (addr != PROM_ERROR)
-			break;
-		addr = 0;
-		if (align == 0)
-			break;
-	}
-	if (addr == 0)
-		return 0;
-	RELOC(alloc_bottom) = addr;
-
-	prom_debug(" -> %x\n", addr);
-	prom_debug("  alloc_bottom : %x\n", RELOC(alloc_bottom));
-	prom_debug("  alloc_top    : %x\n", RELOC(alloc_top));
-	prom_debug("  alloc_top_hi : %x\n", RELOC(alloc_top_high));
-	prom_debug("  rmo_top      : %x\n", RELOC(rmo_top));
-	prom_debug("  ram_top      : %x\n", RELOC(ram_top));
-
-	return addr;
-}
-
-/*
- * Allocates memory downard, either from top of RMO, or if highmem
- * is set, from the top of RAM. Note that this one doesn't handle
- * failures. In does claim memory if highmem is not set.
- */
-static unsigned long __init alloc_down(unsigned long size, unsigned long align,
-				       int highmem)
-{
-	unsigned long offset = reloc_offset();
-	unsigned long base, addr = 0;
-
-	prom_debug("alloc_down(%x, %x, %s)\n", size, align,
-		   highmem ? RELOC("(high)") : RELOC("(low)"));
-	if (RELOC(ram_top) == 0)
-		prom_panic("alloc_down() called with mem not initialized\n");
-
-	if (highmem) {
-		/* Carve out storage for the TCE table. */
-		addr = _ALIGN_DOWN(RELOC(alloc_top_high) - size, align);
-		if (addr <= RELOC(alloc_bottom))
-			return 0;
-		else {
-			/* Will we bump into the RMO ? If yes, check out that we
-			 * didn't overlap existing allocations there, if we did,
-			 * we are dead, we must be the first in town !
-			 */
-			if (addr < RELOC(rmo_top)) {
-				/* Good, we are first */
-				if (RELOC(alloc_top) == RELOC(rmo_top))
-					RELOC(alloc_top) = RELOC(rmo_top) = addr;
-				else
-					return 0;
-			}
-			RELOC(alloc_top_high) = addr;
-		}
-		goto bail;
-	}
-
-	base = _ALIGN_DOWN(RELOC(alloc_top) - size, align);
-	for(; base > RELOC(alloc_bottom); base = _ALIGN_DOWN(base - 0x100000, align))  {
-		prom_debug("    trying: 0x%x\n\r", base);
-		addr = (unsigned long)prom_claim(base, size, 0);
-		if (addr != PROM_ERROR)
-			break;
-		addr = 0;
-	}
-	if (addr == 0)
-		return 0;
-	RELOC(alloc_top) = addr;
-
- bail:
-	prom_debug(" -> %x\n", addr);
-	prom_debug("  alloc_bottom : %x\n", RELOC(alloc_bottom));
-	prom_debug("  alloc_top    : %x\n", RELOC(alloc_top));
-	prom_debug("  alloc_top_hi : %x\n", RELOC(alloc_top_high));
-	prom_debug("  rmo_top      : %x\n", RELOC(rmo_top));
-	prom_debug("  ram_top      : %x\n", RELOC(ram_top));
-
-	return addr;
-}
-
-/*
- * Parse a "reg" cell
- */
-static unsigned long __init prom_next_cell(int s, cell_t **cellp)
-{
-	cell_t *p = *cellp;
-	unsigned long r = 0;
-
-	/* Ignore more than 2 cells */
-	while (s > 2) {
-		p++;
-		s--;
-	}
-	while (s) {
-		r <<= 32;
-		r |= *(p++);
-		s--;
-	}
-
-	*cellp = p;
-	return r;
-}
-
-/*
- * Very dumb function for adding to the memory reserve list, but
- * we don't need anything smarter at this point
- *
- * XXX Eventually check for collisions. They should NEVER happen
- * if problems seem to show up, it would be a good start to track
- * them down.
- */
-static void reserve_mem(unsigned long base, unsigned long size)
-{
-	unsigned long offset = reloc_offset();
-	unsigned long top = base + size;
-	unsigned long cnt = RELOC(mem_reserve_cnt);
-
-	if (size == 0)
-		return;
-
-	/* We need to always keep one empty entry so that we
-	 * have our terminator with "size" set to 0 since we are
-	 * dumb and just copy this entire array to the boot params
-	 */
-	base = _ALIGN_DOWN(base, PAGE_SIZE);
-	top = _ALIGN_UP(top, PAGE_SIZE);
-	size = top - base;
-
-	if (cnt >= (MEM_RESERVE_MAP_SIZE - 1))
-		prom_panic("Memory reserve map exhausted !\n");
-	RELOC(mem_reserve_map)[cnt].base = base;
-	RELOC(mem_reserve_map)[cnt].size = size;
-	RELOC(mem_reserve_cnt) = cnt + 1;
-}
-
-/*
- * Initialize memory allocation mecanism, parse "memory" nodes and
- * obtain that way the top of memory and RMO to setup out local allocator
- */
-static void __init prom_init_mem(void)
-{
-	phandle node;
-	char *path, type[64];
-	unsigned int plen;
-	cell_t *p, *endp;
-	unsigned long offset = reloc_offset();
-	struct prom_t *_prom = PTRRELOC(&prom);
-
-	/*
-	 * We iterate the memory nodes to find
-	 * 1) top of RMO (first node)
-	 * 2) top of memory
-	 */
-	prom_debug("root_addr_cells: %x\n", (long)_prom->root_addr_cells);
-	prom_debug("root_size_cells: %x\n", (long)_prom->root_size_cells);
-
-	prom_debug("scanning memory:\n");
-	path = RELOC(prom_scratch);
-
-	for (node = 0; prom_next_node(&node); ) {
-		type[0] = 0;
-		prom_getprop(node, "device_type", type, sizeof(type));
-
-		if (strcmp(type, RELOC("memory")))
-			continue;
-	
-		plen = prom_getprop(node, "reg", RELOC(regbuf), sizeof(regbuf));
-		if (plen > sizeof(regbuf)) {
-			prom_printf("memory node too large for buffer !\n");
-			plen = sizeof(regbuf);
-		}
-		p = RELOC(regbuf);
-		endp = p + (plen / sizeof(cell_t));
-
-#ifdef DEBUG_PROM
-		memset(path, 0, PROM_SCRATCH_SIZE);
-		call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1);
-		prom_debug("  node %s :\n", path);
-#endif /* DEBUG_PROM */
-
-		while ((endp - p) >= (_prom->root_addr_cells + _prom->root_size_cells)) {
-			unsigned long base, size;
-
-			base = prom_next_cell(_prom->root_addr_cells, &p);
-			size = prom_next_cell(_prom->root_size_cells, &p);
-
-			if (size == 0)
-				continue;
-			prom_debug("    %x %x\n", base, size);
-			if (base == 0)
-				RELOC(rmo_top) = size;
-			if ((base + size) > RELOC(ram_top))
-				RELOC(ram_top) = base + size;
-		}
-	}
-
-	RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(klimit) - offset + 0x4000);
-
-	/* Check if we have an initrd after the kernel, if we do move our bottom
-	 * point to after it
-	 */
-	if (RELOC(prom_initrd_start)) {
-		if (RELOC(prom_initrd_end) > RELOC(alloc_bottom))
-			RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(prom_initrd_end));
-	}
-
-	/*
-	 * If prom_memory_limit is set we reduce the upper limits *except* for
-	 * alloc_top_high. This must be the real top of RAM so we can put
-	 * TCE's up there.
-	 */
-
-	RELOC(alloc_top_high) = RELOC(ram_top);
-
-	if (RELOC(prom_memory_limit)) {
-		if (RELOC(prom_memory_limit) <= RELOC(alloc_bottom)) {
-			prom_printf("Ignoring mem=%x <= alloc_bottom.\n",
-				RELOC(prom_memory_limit));
-			RELOC(prom_memory_limit) = 0;
-		} else if (RELOC(prom_memory_limit) >= RELOC(ram_top)) {
-			prom_printf("Ignoring mem=%x >= ram_top.\n",
-				RELOC(prom_memory_limit));
-			RELOC(prom_memory_limit) = 0;
-		} else {
-			RELOC(ram_top) = RELOC(prom_memory_limit);
-			RELOC(rmo_top) = min(RELOC(rmo_top), RELOC(prom_memory_limit));
-		}
-	}
-
-	/*
-	 * Setup our top alloc point, that is top of RMO or top of
-	 * segment 0 when running non-LPAR.
-	 */
-	if ( RELOC(of_platform) == PLATFORM_PSERIES_LPAR )
-		RELOC(alloc_top) = RELOC(rmo_top);
-	else
-		/* Some RS64 machines have buggy firmware where claims up at 1GB
-		 * fails. Cap at 768MB as a workaround. Still plenty of room.
-		 */
-		RELOC(alloc_top) = RELOC(rmo_top) = min(0x30000000ul, RELOC(ram_top));
-
-	prom_printf("memory layout at init:\n");
-	prom_printf("  memory_limit : %x (16 MB aligned)\n", RELOC(prom_memory_limit));
-	prom_printf("  alloc_bottom : %x\n", RELOC(alloc_bottom));
-	prom_printf("  alloc_top    : %x\n", RELOC(alloc_top));
-	prom_printf("  alloc_top_hi : %x\n", RELOC(alloc_top_high));
-	prom_printf("  rmo_top      : %x\n", RELOC(rmo_top));
-	prom_printf("  ram_top      : %x\n", RELOC(ram_top));
-}
-
-
-/*
- * Allocate room for and instanciate RTAS
- */
-static void __init prom_instantiate_rtas(void)
-{
-	unsigned long offset = reloc_offset();
-	struct prom_t *_prom = PTRRELOC(&prom);
-	phandle rtas_node;
-	ihandle rtas_inst;
-	u32 base, entry = 0;
-	u32 size = 0;
-
-	prom_debug("prom_instantiate_rtas: start...\n");
-
-	rtas_node = call_prom("finddevice", 1, 1, ADDR("/rtas"));
-	prom_debug("rtas_node: %x\n", rtas_node);
-	if (!PHANDLE_VALID(rtas_node))
-		return;
-
-	prom_getprop(rtas_node, "rtas-size", &size, sizeof(size));
-	if (size == 0)
-		return;
-
-	base = alloc_down(size, PAGE_SIZE, 0);
-	if (base == 0) {
-		prom_printf("RTAS allocation failed !\n");
-		return;
-	}
-
-	rtas_inst = call_prom("open", 1, 1, ADDR("/rtas"));
-	if (!IHANDLE_VALID(rtas_inst)) {
-		prom_printf("opening rtas package failed");
-		return;
-	}
-
-	prom_printf("instantiating rtas at 0x%x ...", base);
-
-	if (call_prom("call-method", 3, 2,
-		      ADDR("instantiate-rtas"),
-		      rtas_inst, base) != PROM_ERROR) {
-		entry = (long)_prom->args.rets[1];
-	}
-	if (entry == 0) {
-		prom_printf(" failed\n");
-		return;
-	}
-	prom_printf(" done\n");
-
-	reserve_mem(base, size);
-
-	prom_setprop(rtas_node, "linux,rtas-base", &base, sizeof(base));
-	prom_setprop(rtas_node, "linux,rtas-entry", &entry, sizeof(entry));
-
-	prom_debug("rtas base     = 0x%x\n", base);
-	prom_debug("rtas entry    = 0x%x\n", entry);
-	prom_debug("rtas size     = 0x%x\n", (long)size);
-
-	prom_debug("prom_instantiate_rtas: end...\n");
-}
-
-
-/*
- * Allocate room for and initialize TCE tables
- */
-static void __init prom_initialize_tce_table(void)
-{
-	phandle node;
-	ihandle phb_node;
-	unsigned long offset = reloc_offset();
-	char compatible[64], type[64], model[64];
-	char *path = RELOC(prom_scratch);
-	u64 base, align;
-	u32 minalign, minsize;
-	u64 tce_entry, *tce_entryp;
-	u64 local_alloc_top, local_alloc_bottom;
-	u64 i;
-
-	if (RELOC(ppc64_iommu_off))
-		return;
-
-	prom_debug("starting prom_initialize_tce_table\n");
-
-	/* Cache current top of allocs so we reserve a single block */
-	local_alloc_top = RELOC(alloc_top_high);
-	local_alloc_bottom = local_alloc_top;
-
-	/* Search all nodes looking for PHBs. */
-	for (node = 0; prom_next_node(&node); ) {
-		compatible[0] = 0;
-		type[0] = 0;
-		model[0] = 0;
-		prom_getprop(node, "compatible",
-			     compatible, sizeof(compatible));
-		prom_getprop(node, "device_type", type, sizeof(type));
-		prom_getprop(node, "model", model, sizeof(model));
-
-		if ((type[0] == 0) || (strstr(type, RELOC("pci")) == NULL))
-			continue;
-
-		/* Keep the old logic in tack to avoid regression. */
-		if (compatible[0] != 0) {
-			if ((strstr(compatible, RELOC("python")) == NULL) &&
-			    (strstr(compatible, RELOC("Speedwagon")) == NULL) &&
-			    (strstr(compatible, RELOC("Winnipeg")) == NULL))
-				continue;
-		} else if (model[0] != 0) {
-			if ((strstr(model, RELOC("ython")) == NULL) &&
-			    (strstr(model, RELOC("peedwagon")) == NULL) &&
-			    (strstr(model, RELOC("innipeg")) == NULL))
-				continue;
-		}
-
-		if (prom_getprop(node, "tce-table-minalign", &minalign,
-				 sizeof(minalign)) == PROM_ERROR)
-			minalign = 0;
-		if (prom_getprop(node, "tce-table-minsize", &minsize,
-				 sizeof(minsize)) == PROM_ERROR)
-			minsize = 4UL << 20;
-
-		/*
-		 * Even though we read what OF wants, we just set the table
-		 * size to 4 MB.  This is enough to map 2GB of PCI DMA space.
-		 * By doing this, we avoid the pitfalls of trying to DMA to
-		 * MMIO space and the DMA alias hole.
-		 *
-		 * On POWER4, firmware sets the TCE region by assuming
-		 * each TCE table is 8MB. Using this memory for anything
-		 * else will impact performance, so we always allocate 8MB.
-		 * Anton
-		 */
-		if (__is_processor(PV_POWER4) || __is_processor(PV_POWER4p))
-			minsize = 8UL << 20;
-		else
-			minsize = 4UL << 20;
-
-		/* Align to the greater of the align or size */
-		align = max(minalign, minsize);
-		base = alloc_down(minsize, align, 1);
-		if (base == 0)
-			prom_panic("ERROR, cannot find space for TCE table.\n");
-		if (base < local_alloc_bottom)
-			local_alloc_bottom = base;
-
-		/* Save away the TCE table attributes for later use. */
-		prom_setprop(node, "linux,tce-base", &base, sizeof(base));
-		prom_setprop(node, "linux,tce-size", &minsize, sizeof(minsize));
-
-		/* It seems OF doesn't null-terminate the path :-( */
-		memset(path, 0, sizeof(path));
-		/* Call OF to setup the TCE hardware */
-		if (call_prom("package-to-path", 3, 1, node,
-			      path, PROM_SCRATCH_SIZE-1) == PROM_ERROR) {
-			prom_printf("package-to-path failed\n");
-		}
-
-		prom_debug("TCE table: %s\n", path);
-		prom_debug("\tnode = 0x%x\n", node);
-		prom_debug("\tbase = 0x%x\n", base);
-		prom_debug("\tsize = 0x%x\n", minsize);
-
-		/* Initialize the table to have a one-to-one mapping
-		 * over the allocated size.
-		 */
-		tce_entryp = (unsigned long *)base;
-		for (i = 0; i < (minsize >> 3) ;tce_entryp++, i++) {
-			tce_entry = (i << PAGE_SHIFT);
-			tce_entry |= 0x3;
-			*tce_entryp = tce_entry;
-		}
-
-		prom_printf("opening PHB %s", path);
-		phb_node = call_prom("open", 1, 1, path);
-		if (phb_node == 0)
-			prom_printf("... failed\n");
-		else
-			prom_printf("... done\n");
-
-		call_prom("call-method", 6, 0, ADDR("set-64-bit-addressing"),
-			  phb_node, -1, minsize,
-			  (u32) base, (u32) (base >> 32));
-		call_prom("close", 1, 0, phb_node);
-	}
-
-	reserve_mem(local_alloc_bottom, local_alloc_top - local_alloc_bottom);
-
-	if (RELOC(prom_memory_limit)) {
-		/*
-		 * We align the start to a 16MB boundary so we can map the TCE area
-		 * using large pages if possible. The end should be the top of RAM
-		 * so no need to align it.
-		 */
-		RELOC(prom_tce_alloc_start) = _ALIGN_DOWN(local_alloc_bottom, 0x1000000);
-		RELOC(prom_tce_alloc_end) = local_alloc_top;
-	}
-
-	/* Flag the first invalid entry */
-	prom_debug("ending prom_initialize_tce_table\n");
-}
-
-/*
- * With CHRP SMP we need to use the OF to start the other
- * processors so we can't wait until smp_boot_cpus (the OF is
- * trashed by then) so we have to put the processors into
- * a holding pattern controlled by the kernel (not OF) before
- * we destroy the OF.
- *
- * This uses a chunk of low memory, puts some holding pattern
- * code there and sends the other processors off to there until
- * smp_boot_cpus tells them to do something.  The holding pattern
- * checks that address until its cpu # is there, when it is that
- * cpu jumps to __secondary_start().  smp_boot_cpus() takes care
- * of setting those values.
- *
- * We also use physical address 0x4 here to tell when a cpu
- * is in its holding pattern code.
- *
- * Fixup comment... DRENG / PPPBBB - Peter
- *
- * -- Cort
- */
-static void __init prom_hold_cpus(void)
-{
-	unsigned long i;
-	unsigned int reg;
-	phandle node;
-	unsigned long offset = reloc_offset();
-	char type[64];
-	int cpuid = 0;
-	unsigned int interrupt_server[MAX_CPU_THREADS];
-	unsigned int cpu_threads, hw_cpu_num;
-	int propsize;
-	extern void __secondary_hold(void);
-	extern unsigned long __secondary_hold_spinloop;
-	extern unsigned long __secondary_hold_acknowledge;
-	unsigned long *spinloop
-		= (void *)virt_to_abs(&__secondary_hold_spinloop);
-	unsigned long *acknowledge
-		= (void *)virt_to_abs(&__secondary_hold_acknowledge);
-	unsigned long secondary_hold
-		= virt_to_abs(*PTRRELOC((unsigned long *)__secondary_hold));
-	struct prom_t *_prom = PTRRELOC(&prom);
-
-	prom_debug("prom_hold_cpus: start...\n");
-	prom_debug("    1) spinloop       = 0x%x\n", (unsigned long)spinloop);
-	prom_debug("    1) *spinloop      = 0x%x\n", *spinloop);
-	prom_debug("    1) acknowledge    = 0x%x\n",
-		   (unsigned long)acknowledge);
-	prom_debug("    1) *acknowledge   = 0x%x\n", *acknowledge);
-	prom_debug("    1) secondary_hold = 0x%x\n", secondary_hold);
-
-	/* Set the common spinloop variable, so all of the secondary cpus
-	 * will block when they are awakened from their OF spinloop.
-	 * This must occur for both SMP and non SMP kernels, since OF will
-	 * be trashed when we move the kernel.
-	 */
-	*spinloop = 0;
-
-#ifdef CONFIG_HMT
-	for (i=0; i < NR_CPUS; i++) {
-		RELOC(hmt_thread_data)[i].pir = 0xdeadbeef;
-	}
-#endif
-	/* look for cpus */
-	for (node = 0; prom_next_node(&node); ) {
-		type[0] = 0;
-		prom_getprop(node, "device_type", type, sizeof(type));
-		if (strcmp(type, RELOC("cpu")) != 0)
-			continue;
-
-		/* Skip non-configured cpus. */
-		if (prom_getprop(node, "status", type, sizeof(type)) > 0)
-			if (strcmp(type, RELOC("okay")) != 0)
-				continue;
-
-		reg = -1;
-		prom_getprop(node, "reg", &reg, sizeof(reg));
-
-		prom_debug("\ncpuid        = 0x%x\n", cpuid);
-		prom_debug("cpu hw idx   = 0x%x\n", reg);
-
-		/* Init the acknowledge var which will be reset by
-		 * the secondary cpu when it awakens from its OF
-		 * spinloop.
-		 */
-		*acknowledge = (unsigned long)-1;
-
-		propsize = prom_getprop(node, "ibm,ppc-interrupt-server#s",
-					&interrupt_server,
-					sizeof(interrupt_server));
-		if (propsize < 0) {
-			/* no property.  old hardware has no SMT */
-			cpu_threads = 1;
-			interrupt_server[0] = reg; /* fake it with phys id */
-		} else {
-			/* We have a threaded processor */
-			cpu_threads = propsize / sizeof(u32);
-			if (cpu_threads > MAX_CPU_THREADS) {
-				prom_printf("SMT: too many threads!\n"
-					    "SMT: found %x, max is %x\n",
-					    cpu_threads, MAX_CPU_THREADS);
-				cpu_threads = 1; /* ToDo: panic? */
-			}
-		}
-
-		hw_cpu_num = interrupt_server[0];
-		if (hw_cpu_num != _prom->cpu) {
-			/* Primary Thread of non-boot cpu */
-			prom_printf("%x : starting cpu hw idx %x... ", cpuid, reg);
-			call_prom("start-cpu", 3, 0, node,
-				  secondary_hold, reg);
-
-			for ( i = 0 ; (i < 100000000) && 
-			      (*acknowledge == ((unsigned long)-1)); i++ )
-				mb();
-
-			if (*acknowledge == reg) {
-				prom_printf("done\n");
-				/* We have to get every CPU out of OF,
-				 * even if we never start it. */
-				if (cpuid >= NR_CPUS)
-					goto next;
-			} else {
-				prom_printf("failed: %x\n", *acknowledge);
-			}
-		}
-#ifdef CONFIG_SMP
-		else
-			prom_printf("%x : boot cpu     %x\n", cpuid, reg);
-#endif
-next:
-#ifdef CONFIG_SMP
-		/* Init paca for secondary threads.   They start later. */
-		for (i=1; i < cpu_threads; i++) {
-			cpuid++;
-			if (cpuid >= NR_CPUS)
-				continue;
-		}
-#endif /* CONFIG_SMP */
-		cpuid++;
-	}
-#ifdef CONFIG_HMT
-	/* Only enable HMT on processors that provide support. */
-	if (__is_processor(PV_PULSAR) || 
-	    __is_processor(PV_ICESTAR) ||
-	    __is_processor(PV_SSTAR)) {
-		prom_printf("    starting secondary threads\n");
-
-		for (i = 0; i < NR_CPUS; i += 2) {
-			if (!cpu_online(i))
-				continue;
-
-			if (i == 0) {
-				unsigned long pir = mfspr(SPRN_PIR);
-				if (__is_processor(PV_PULSAR)) {
-					RELOC(hmt_thread_data)[i].pir = 
-						pir & 0x1f;
-				} else {
-					RELOC(hmt_thread_data)[i].pir = 
-						pir & 0x3ff;
-				}
-			}
-		}
-	} else {
-		prom_printf("Processor is not HMT capable\n");
-	}
-#endif
-
-	if (cpuid > NR_CPUS)
-		prom_printf("WARNING: maximum CPUs (" __stringify(NR_CPUS)
-			    ") exceeded: ignoring extras\n");
-
-	prom_debug("prom_hold_cpus: end...\n");
-}
-
-
-static void __init prom_init_client_services(unsigned long pp)
-{
-	unsigned long offset = reloc_offset();
-	struct prom_t *_prom = PTRRELOC(&prom);
-
-	/* Get a handle to the prom entry point before anything else */
-	_prom->entry = pp;
-
-	/* Init default value for phys size */
-	_prom->root_size_cells = 1;
-	_prom->root_addr_cells = 2;
-
-	/* get a handle for the stdout device */
-	_prom->chosen = call_prom("finddevice", 1, 1, ADDR("/chosen"));
-	if (!PHANDLE_VALID(_prom->chosen))
-		prom_panic("cannot find chosen"); /* msg won't be printed :( */
-
-	/* get device tree root */
-	_prom->root = call_prom("finddevice", 1, 1, ADDR("/"));
-	if (!PHANDLE_VALID(_prom->root))
-		prom_panic("cannot find device tree root"); /* msg won't be printed :( */
-}
-
-static void __init prom_init_stdout(void)
-{
-	unsigned long offset = reloc_offset();
-	struct prom_t *_prom = PTRRELOC(&prom);
-	char *path = RELOC(of_stdout_device);
-	char type[16];
-	u32 val;
-
-	if (prom_getprop(_prom->chosen, "stdout", &val, sizeof(val)) <= 0)
-		prom_panic("cannot find stdout");
-
-	_prom->stdout = val;
-
-	/* Get the full OF pathname of the stdout device */
-	memset(path, 0, 256);
-	call_prom("instance-to-path", 3, 1, _prom->stdout, path, 255);
-	val = call_prom("instance-to-package", 1, 1, _prom->stdout);
-	prom_setprop(_prom->chosen, "linux,stdout-package", &val, sizeof(val));
-	prom_printf("OF stdout device is: %s\n", RELOC(of_stdout_device));
-	prom_setprop(_prom->chosen, "linux,stdout-path",
-		     RELOC(of_stdout_device), strlen(RELOC(of_stdout_device))+1);
-
-	/* If it's a display, note it */
-	memset(type, 0, sizeof(type));
-	prom_getprop(val, "device_type", type, sizeof(type));
-	if (strcmp(type, RELOC("display")) == 0) {
-		_prom->disp_node = val;
-		prom_setprop(val, "linux,boot-display", NULL, 0);
-	}
-}
-
-static void __init prom_close_stdin(void)
-{
-	unsigned long offset = reloc_offset();
-	struct prom_t *_prom = PTRRELOC(&prom);
-	ihandle val;
-
-	if (prom_getprop(_prom->chosen, "stdin", &val, sizeof(val)) > 0)
-		call_prom("close", 1, 0, val);
-}
-
-static int __init prom_find_machine_type(void)
-{
-	unsigned long offset = reloc_offset();
-	struct prom_t *_prom = PTRRELOC(&prom);
-	char compat[256];
-	int len, i = 0;
-	phandle rtas;
-
-	len = prom_getprop(_prom->root, "compatible",
-			   compat, sizeof(compat)-1);
-	if (len > 0) {
-		compat[len] = 0;
-		while (i < len) {
-			char *p = &compat[i];
-			int sl = strlen(p);
-			if (sl == 0)
-				break;
-			if (strstr(p, RELOC("Power Macintosh")) ||
-			    strstr(p, RELOC("MacRISC4")))
-				return PLATFORM_POWERMAC;
-			if (strstr(p, RELOC("Momentum,Maple")))
-				return PLATFORM_MAPLE;
-			i += sl + 1;
-		}
-	}
-	/* Default to pSeries. We need to know if we are running LPAR */
-	rtas = call_prom("finddevice", 1, 1, ADDR("/rtas"));
-	if (PHANDLE_VALID(rtas)) {
-		int x = prom_getproplen(rtas, "ibm,hypertas-functions");
-		if (x != PROM_ERROR) {
-			prom_printf("Hypertas detected, assuming LPAR !\n");
-			return PLATFORM_PSERIES_LPAR;
-		}
-	}
-	return PLATFORM_PSERIES;
-}
-
-static int __init prom_set_color(ihandle ih, int i, int r, int g, int b)
-{
-	unsigned long offset = reloc_offset();
-
-	return call_prom("call-method", 6, 1, ADDR("color!"), ih, i, b, g, r);
-}
-
-/*
- * If we have a display that we don't know how to drive,
- * we will want to try to execute OF's open method for it
- * later.  However, OF will probably fall over if we do that
- * we've taken over the MMU.
- * So we check whether we will need to open the display,
- * and if so, open it now.
- */
-static void __init prom_check_displays(void)
-{
-	unsigned long offset = reloc_offset();
-	struct prom_t *_prom = PTRRELOC(&prom);
-	char type[16], *path;
-	phandle node;
-	ihandle ih;
-	int i;
-
-	static unsigned char default_colors[] = {
-		0x00, 0x00, 0x00,
-		0x00, 0x00, 0xaa,
-		0x00, 0xaa, 0x00,
-		0x00, 0xaa, 0xaa,
-		0xaa, 0x00, 0x00,
-		0xaa, 0x00, 0xaa,
-		0xaa, 0xaa, 0x00,
-		0xaa, 0xaa, 0xaa,
-		0x55, 0x55, 0x55,
-		0x55, 0x55, 0xff,
-		0x55, 0xff, 0x55,
-		0x55, 0xff, 0xff,
-		0xff, 0x55, 0x55,
-		0xff, 0x55, 0xff,
-		0xff, 0xff, 0x55,
-		0xff, 0xff, 0xff
-	};
-	const unsigned char *clut;
-
-	prom_printf("Looking for displays\n");
-	for (node = 0; prom_next_node(&node); ) {
-		memset(type, 0, sizeof(type));
-		prom_getprop(node, "device_type", type, sizeof(type));
-		if (strcmp(type, RELOC("display")) != 0)
-			continue;
-
-		/* It seems OF doesn't null-terminate the path :-( */
-		path = RELOC(prom_scratch);
-		memset(path, 0, PROM_SCRATCH_SIZE);
-
-		/*
-		 * leave some room at the end of the path for appending extra
-		 * arguments
-		 */
-		if (call_prom("package-to-path", 3, 1, node, path,
-			      PROM_SCRATCH_SIZE-10) == PROM_ERROR)
-			continue;
-		prom_printf("found display   : %s, opening ... ", path);
-		
-		ih = call_prom("open", 1, 1, path);
-		if (ih == 0) {
-			prom_printf("failed\n");
-			continue;
-		}
-
-		/* Success */
-		prom_printf("done\n");
-		prom_setprop(node, "linux,opened", NULL, 0);
-
-		/*
-		 * stdout wasn't a display node, pick the first we can find
-		 * for btext
-		 */
-		if (_prom->disp_node == 0)
-			_prom->disp_node = node;
-
-		/* Setup a useable color table when the appropriate
-		 * method is available. Should update this to set-colors */
-		clut = RELOC(default_colors);
-		for (i = 0; i < 32; i++, clut += 3)
-			if (prom_set_color(ih, i, clut[0], clut[1],
-					   clut[2]) != 0)
-				break;
-
-#ifdef CONFIG_LOGO_LINUX_CLUT224
-		clut = PTRRELOC(RELOC(logo_linux_clut224.clut));
-		for (i = 0; i < RELOC(logo_linux_clut224.clutsize); i++, clut += 3)
-			if (prom_set_color(ih, i + 32, clut[0], clut[1],
-					   clut[2]) != 0)
-				break;
-#endif /* CONFIG_LOGO_LINUX_CLUT224 */
-	}
-}
-
-
-/* Return (relocated) pointer to this much memory: moves initrd if reqd. */
-static void __init *make_room(unsigned long *mem_start, unsigned long *mem_end,
-			      unsigned long needed, unsigned long align)
-{
-	unsigned long offset = reloc_offset();
-	void *ret;
-
-	*mem_start = _ALIGN(*mem_start, align);
-	while ((*mem_start + needed) > *mem_end) {
-		unsigned long room, chunk;
-
-		prom_debug("Chunk exhausted, claiming more at %x...\n",
-			   RELOC(alloc_bottom));
-		room = RELOC(alloc_top) - RELOC(alloc_bottom);
-		if (room > DEVTREE_CHUNK_SIZE)
-			room = DEVTREE_CHUNK_SIZE;
-		if (room < PAGE_SIZE)
-			prom_panic("No memory for flatten_device_tree (no room)");
-		chunk = alloc_up(room, 0);
-		if (chunk == 0)
-			prom_panic("No memory for flatten_device_tree (claim failed)");
-		*mem_end = RELOC(alloc_top);
-	}
-
-	ret = (void *)*mem_start;
-	*mem_start += needed;
-
-	return ret;
-}
-
-#define dt_push_token(token, mem_start, mem_end) \
-	do { *((u32 *)make_room(mem_start, mem_end, 4, 4)) = token; } while(0)
-
-static unsigned long __init dt_find_string(char *str)
-{
-	unsigned long offset = reloc_offset();
-	char *s, *os;
-
-	s = os = (char *)RELOC(dt_string_start);
-	s += 4;
-	while (s <  (char *)RELOC(dt_string_end)) {
-		if (strcmp(s, str) == 0)
-			return s - os;
-		s += strlen(s) + 1;
-	}
-	return 0;
-}
-
-/*
- * The Open Firmware 1275 specification states properties must be 31 bytes or
- * less, however not all firmwares obey this. Make it 64 bytes to be safe.
- */
-#define MAX_PROPERTY_NAME 64
-
-static void __init scan_dt_build_strings(phandle node,
-					 unsigned long *mem_start,
-					 unsigned long *mem_end)
-{
-	unsigned long offset = reloc_offset();
-	char *prev_name, *namep, *sstart;
-	unsigned long soff;
-	phandle child;
-
-	sstart =  (char *)RELOC(dt_string_start);
-
-	/* get and store all property names */
-	prev_name = RELOC("");
-	for (;;) {
-		/* 64 is max len of name including nul. */
-		namep = make_room(mem_start, mem_end, MAX_PROPERTY_NAME, 1);
-		if (call_prom("nextprop", 3, 1, node, prev_name, namep) != 1) {
-			/* No more nodes: unwind alloc */
-			*mem_start = (unsigned long)namep;
-			break;
-		}
-
- 		/* skip "name" */
- 		if (strcmp(namep, RELOC("name")) == 0) {
- 			*mem_start = (unsigned long)namep;
- 			prev_name = RELOC("name");
- 			continue;
- 		}
-		/* get/create string entry */
-		soff = dt_find_string(namep);
-		if (soff != 0) {
-			*mem_start = (unsigned long)namep;
-			namep = sstart + soff;
-		} else {
-			/* Trim off some if we can */
-			*mem_start = (unsigned long)namep + strlen(namep) + 1;
-			RELOC(dt_string_end) = *mem_start;
-		}
-		prev_name = namep;
-	}
-
-	/* do all our children */
-	child = call_prom("child", 1, 1, node);
-	while (child != 0) {
-		scan_dt_build_strings(child, mem_start, mem_end);
-		child = call_prom("peer", 1, 1, child);
-	}
-}
-
-static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
-					unsigned long *mem_end)
-{
-	phandle child;
-	char *namep, *prev_name, *sstart, *p, *ep, *lp, *path;
-	unsigned long soff;
-	unsigned char *valp;
-	unsigned long offset = reloc_offset();
-	static char pname[MAX_PROPERTY_NAME];
-	int l;
-
-	dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end);
-
-	/* get the node's full name */
-	namep = (char *)*mem_start;
-	l = call_prom("package-to-path", 3, 1, node,
-		      namep, *mem_end - *mem_start);
-	if (l >= 0) {
-		/* Didn't fit?  Get more room. */
-		if ((l+1) > (*mem_end - *mem_start)) {
-			namep = make_room(mem_start, mem_end, l+1, 1);
-			call_prom("package-to-path", 3, 1, node, namep, l);
-		}
-		namep[l] = '\0';
-
-		/* Fixup an Apple bug where they have bogus \0 chars in the
-		 * middle of the path in some properties
-		 */
-		for (p = namep, ep = namep + l; p < ep; p++)
-			if (*p == '\0') {
-				memmove(p, p+1, ep - p);
-				ep--; l--; p--;
-			}
-
-		/* now try to extract the unit name in that mess */
-		for (p = namep, lp = NULL; *p; p++)
-			if (*p == '/')
-				lp = p + 1;
-		if (lp != NULL)
-			memmove(namep, lp, strlen(lp) + 1);
-		*mem_start = _ALIGN(((unsigned long) namep) +
-				    strlen(namep) + 1, 4);
-	}
-
-	/* get it again for debugging */
-	path = RELOC(prom_scratch);
-	memset(path, 0, PROM_SCRATCH_SIZE);
-	call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1);
-
-	/* get and store all properties */
-	prev_name = RELOC("");
-	sstart = (char *)RELOC(dt_string_start);
-	for (;;) {
-		if (call_prom("nextprop", 3, 1, node, prev_name,
-			      RELOC(pname)) != 1)
-			break;
-
- 		/* skip "name" */
- 		if (strcmp(RELOC(pname), RELOC("name")) == 0) {
- 			prev_name = RELOC("name");
- 			continue;
- 		}
-
-		/* find string offset */
-		soff = dt_find_string(RELOC(pname));
-		if (soff == 0) {
-			prom_printf("WARNING: Can't find string index for"
-				    " <%s>, node %s\n", RELOC(pname), path);
-			break;
-		}
-		prev_name = sstart + soff;
-
-		/* get length */
-		l = call_prom("getproplen", 2, 1, node, RELOC(pname));
-
-		/* sanity checks */
-		if (l == PROM_ERROR)
-			continue;
-		if (l > MAX_PROPERTY_LENGTH) {
-			prom_printf("WARNING: ignoring large property ");
-			/* It seems OF doesn't null-terminate the path :-( */
-			prom_printf("[%s] ", path);
-			prom_printf("%s length 0x%x\n", RELOC(pname), l);
-			continue;
-		}
-
-		/* push property head */
-		dt_push_token(OF_DT_PROP, mem_start, mem_end);
-		dt_push_token(l, mem_start, mem_end);
-		dt_push_token(soff, mem_start, mem_end);
-
-		/* push property content */
-		valp = make_room(mem_start, mem_end, l, 4);
-		call_prom("getprop", 4, 1, node, RELOC(pname), valp, l);
-		*mem_start = _ALIGN(*mem_start, 4);
-	}
-
-	/* Add a "linux,phandle" property. */
-	soff = dt_find_string(RELOC("linux,phandle"));
-	if (soff == 0)
-		prom_printf("WARNING: Can't find string index for"
-			    " <linux-phandle> node %s\n", path);
-	else {
-		dt_push_token(OF_DT_PROP, mem_start, mem_end);
-		dt_push_token(4, mem_start, mem_end);
-		dt_push_token(soff, mem_start, mem_end);
-		valp = make_room(mem_start, mem_end, 4, 4);
-		*(u32 *)valp = node;
-	}
-
-	/* do all our children */
-	child = call_prom("child", 1, 1, node);
-	while (child != 0) {
-		scan_dt_build_struct(child, mem_start, mem_end);
-		child = call_prom("peer", 1, 1, child);
-	}
-
-	dt_push_token(OF_DT_END_NODE, mem_start, mem_end);
-}
-
-static void __init flatten_device_tree(void)
-{
-	phandle root;
-	unsigned long offset = reloc_offset();
-	unsigned long mem_start, mem_end, room;
-	struct boot_param_header *hdr;
-	struct prom_t *_prom = PTRRELOC(&prom);
-	char *namep;
-	u64 *rsvmap;
-
-	/*
-	 * Check how much room we have between alloc top & bottom (+/- a
-	 * few pages), crop to 4Mb, as this is our "chuck" size
-	 */
-	room = RELOC(alloc_top) - RELOC(alloc_bottom) - 0x4000;
-	if (room > DEVTREE_CHUNK_SIZE)
-		room = DEVTREE_CHUNK_SIZE;
-	prom_debug("starting device tree allocs at %x\n", RELOC(alloc_bottom));
-
-	/* Now try to claim that */
-	mem_start = (unsigned long)alloc_up(room, PAGE_SIZE);
-	if (mem_start == 0)
-		prom_panic("Can't allocate initial device-tree chunk\n");
-	mem_end = RELOC(alloc_top);
-
-	/* Get root of tree */
-	root = call_prom("peer", 1, 1, (phandle)0);
-	if (root == (phandle)0)
-		prom_panic ("couldn't get device tree root\n");
-
-	/* Build header and make room for mem rsv map */ 
-	mem_start = _ALIGN(mem_start, 4);
-	hdr = make_room(&mem_start, &mem_end,
-			sizeof(struct boot_param_header), 4);
-	RELOC(dt_header_start) = (unsigned long)hdr;
-	rsvmap = make_room(&mem_start, &mem_end, sizeof(mem_reserve_map), 8);
-
-	/* Start of strings */
-	mem_start = PAGE_ALIGN(mem_start);
-	RELOC(dt_string_start) = mem_start;
-	mem_start += 4; /* hole */
-
-	/* Add "linux,phandle" in there, we'll need it */
-	namep = make_room(&mem_start, &mem_end, 16, 1);
-	strcpy(namep, RELOC("linux,phandle"));
-	mem_start = (unsigned long)namep + strlen(namep) + 1;
-
-	/* Build string array */
-	prom_printf("Building dt strings...\n"); 
-	scan_dt_build_strings(root, &mem_start, &mem_end);
-	RELOC(dt_string_end) = mem_start;
-
-	/* Build structure */
-	mem_start = PAGE_ALIGN(mem_start);
-	RELOC(dt_struct_start) = mem_start;
-	prom_printf("Building dt structure...\n"); 
-	scan_dt_build_struct(root, &mem_start, &mem_end);
-	dt_push_token(OF_DT_END, &mem_start, &mem_end);
-	RELOC(dt_struct_end) = PAGE_ALIGN(mem_start);
-
-	/* Finish header */
-	hdr->boot_cpuid_phys = _prom->cpu;
-	hdr->magic = OF_DT_HEADER;
-	hdr->totalsize = RELOC(dt_struct_end) - RELOC(dt_header_start);
-	hdr->off_dt_struct = RELOC(dt_struct_start) - RELOC(dt_header_start);
-	hdr->off_dt_strings = RELOC(dt_string_start) - RELOC(dt_header_start);
-	hdr->dt_strings_size = RELOC(dt_string_end) - RELOC(dt_string_start);
-	hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - RELOC(dt_header_start);
-	hdr->version = OF_DT_VERSION;
-	/* Version 16 is not backward compatible */
-	hdr->last_comp_version = 0x10;
-
-	/* Reserve the whole thing and copy the reserve map in, we
-	 * also bump mem_reserve_cnt to cause further reservations to
-	 * fail since it's too late.
-	 */
-	reserve_mem(RELOC(dt_header_start), hdr->totalsize);
-	memcpy(rsvmap, RELOC(mem_reserve_map), sizeof(mem_reserve_map));
-
-#ifdef DEBUG_PROM
-	{
-		int i;
-		prom_printf("reserved memory map:\n");
-		for (i = 0; i < RELOC(mem_reserve_cnt); i++)
-			prom_printf("  %x - %x\n", RELOC(mem_reserve_map)[i].base,
-				    RELOC(mem_reserve_map)[i].size);
-	}
-#endif
-	RELOC(mem_reserve_cnt) = MEM_RESERVE_MAP_SIZE;
-
-	prom_printf("Device tree strings 0x%x -> 0x%x\n",
-		    RELOC(dt_string_start), RELOC(dt_string_end)); 
-	prom_printf("Device tree struct  0x%x -> 0x%x\n",
-		    RELOC(dt_struct_start), RELOC(dt_struct_end));
-
-}
-
-
-static void __init fixup_device_tree(void)
-{
-	unsigned long offset = reloc_offset();
-	phandle u3, i2c, mpic;
-	u32 u3_rev;
-	u32 interrupts[2];
-	u32 parent;
-
-	/* Some G5s have a missing interrupt definition, fix it up here */
-	u3 = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000"));
-	if (!PHANDLE_VALID(u3))
-		return;
-	i2c = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000/i2c@f8001000"));
-	if (!PHANDLE_VALID(i2c))
-		return;
-	mpic = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000/mpic@f8040000"));
-	if (!PHANDLE_VALID(mpic))
-		return;
-
-	/* check if proper rev of u3 */
-	if (prom_getprop(u3, "device-rev", &u3_rev, sizeof(u3_rev))
-	    == PROM_ERROR)
-		return;
-	if (u3_rev < 0x35 || u3_rev > 0x39)
-		return;
-	/* does it need fixup ? */
-	if (prom_getproplen(i2c, "interrupts") > 0)
-		return;
-
-	prom_printf("fixing up bogus interrupts for u3 i2c...\n");
-
-	/* interrupt on this revision of u3 is number 0 and level */
-	interrupts[0] = 0;
-	interrupts[1] = 1;
-	prom_setprop(i2c, "interrupts", &interrupts, sizeof(interrupts));
-	parent = (u32)mpic;
-	prom_setprop(i2c, "interrupt-parent", &parent, sizeof(parent));
-}
-
-
-static void __init prom_find_boot_cpu(void)
-{
-	unsigned long offset = reloc_offset();
-       	struct prom_t *_prom = PTRRELOC(&prom);
-	u32 getprop_rval;
-	ihandle prom_cpu;
-	phandle cpu_pkg;
-
-	if (prom_getprop(_prom->chosen, "cpu", &prom_cpu, sizeof(prom_cpu)) <= 0)
-		prom_panic("cannot find boot cpu");
-
-	cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu);
-
-	prom_getprop(cpu_pkg, "reg", &getprop_rval, sizeof(getprop_rval));
-	_prom->cpu = getprop_rval;
-
-	prom_debug("Booting CPU hw index = 0x%x\n", _prom->cpu);
-}
-
-static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
-{
-#ifdef CONFIG_BLK_DEV_INITRD
-	unsigned long offset = reloc_offset();
-       	struct prom_t *_prom = PTRRELOC(&prom);
-
-	if ( r3 && r4 && r4 != 0xdeadbeef) {
-		u64 val;
-
-		RELOC(prom_initrd_start) = (r3 >= KERNELBASE) ? __pa(r3) : r3;
-		RELOC(prom_initrd_end) = RELOC(prom_initrd_start) + r4;
-
-		val = (u64)RELOC(prom_initrd_start);
-		prom_setprop(_prom->chosen, "linux,initrd-start", &val, sizeof(val));
-		val = (u64)RELOC(prom_initrd_end);
-		prom_setprop(_prom->chosen, "linux,initrd-end", &val, sizeof(val));
-
-		reserve_mem(RELOC(prom_initrd_start),
-			    RELOC(prom_initrd_end) - RELOC(prom_initrd_start));
-
-		prom_debug("initrd_start=0x%x\n", RELOC(prom_initrd_start));
-		prom_debug("initrd_end=0x%x\n", RELOC(prom_initrd_end));
-	}
-#endif /* CONFIG_BLK_DEV_INITRD */
-}
-
-/*
- * We enter here early on, when the Open Firmware prom is still
- * handling exceptions and the MMU hash table for us.
- */
-
-unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long pp,
-			       unsigned long r6, unsigned long r7)
-{	
-	unsigned long offset = reloc_offset();
-       	struct prom_t *_prom = PTRRELOC(&prom);
-	unsigned long phys = KERNELBASE - offset;
-	u32 getprop_rval;
-	
-	/*
-	 * First zero the BSS
-	 */
-	memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start);
-
-	/*
-	 * Init interface to Open Firmware, get some node references,
-	 * like /chosen
-	 */
-	prom_init_client_services(pp);
-
-	/*
-	 * Init prom stdout device
-	 */
-	prom_init_stdout();
-	prom_debug("klimit=0x%x\n", RELOC(klimit));
-	prom_debug("offset=0x%x\n", offset);
-
-	/*
-	 * Check for an initrd
-	 */
-	prom_check_initrd(r3, r4);
-
-	/*
-	 * Get default machine type. At this point, we do not differenciate
-	 * between pSeries SMP and pSeries LPAR
-	 */
-	RELOC(of_platform) = prom_find_machine_type();
-	getprop_rval = RELOC(of_platform);
-	prom_setprop(_prom->chosen, "linux,platform",
-		     &getprop_rval, sizeof(getprop_rval));
-
-	/*
-	 * On pSeries, inform the firmware about our capabilities
-	 */
-	if (RELOC(of_platform) == PLATFORM_PSERIES ||
-	    RELOC(of_platform) == PLATFORM_PSERIES_LPAR)
-		prom_send_capabilities();
-
-	/*
-	 * On pSeries and Cell, copy the CPU hold code
-	 */
-       	if (RELOC(of_platform) & (PLATFORM_PSERIES | PLATFORM_CELL))
-       		copy_and_flush(0, KERNELBASE - offset, 0x100, 0);
-
-	/*
-	 * Get memory cells format
-	 */
-	getprop_rval = 1;
-	prom_getprop(_prom->root, "#size-cells",
-		     &getprop_rval, sizeof(getprop_rval));
-	_prom->root_size_cells = getprop_rval;
-	getprop_rval = 2;
-	prom_getprop(_prom->root, "#address-cells",
-		     &getprop_rval, sizeof(getprop_rval));
-	_prom->root_addr_cells = getprop_rval;
-
-	/*
-	 * Do early parsing of command line
-	 */
-	early_cmdline_parse();
-
-	/*
-	 * Initialize memory management within prom_init
-	 */
-	prom_init_mem();
-
-	/*
-	 * Determine which cpu is actually running right _now_
-	 */
-	prom_find_boot_cpu();
-
-	/* 
-	 * Initialize display devices
-	 */
-	prom_check_displays();
-
-	/*
-	 * Initialize IOMMU (TCE tables) on pSeries. Do that before anything else
-	 * that uses the allocator, we need to make sure we get the top of memory
-	 * available for us here...
-	 */
-	if (RELOC(of_platform) == PLATFORM_PSERIES)
-		prom_initialize_tce_table();
-
-	/*
-	 * On non-powermacs, try to instantiate RTAS and puts all CPUs
-	 * in spin-loops. PowerMacs don't have a working RTAS and use
-	 * a different way to spin CPUs
-	 */
-	if (RELOC(of_platform) != PLATFORM_POWERMAC) {
-		prom_instantiate_rtas();
-		prom_hold_cpus();
-	}
-
-	/*
-	 * Fill in some infos for use by the kernel later on
-	 */
-	if (RELOC(ppc64_iommu_off))
-		prom_setprop(_prom->chosen, "linux,iommu-off", NULL, 0);
-
-	if (RELOC(iommu_force_on))
-		prom_setprop(_prom->chosen, "linux,iommu-force-on", NULL, 0);
-
-	if (RELOC(prom_memory_limit))
-		prom_setprop(_prom->chosen, "linux,memory-limit",
-			PTRRELOC(&prom_memory_limit), sizeof(RELOC(prom_memory_limit)));
-
-	if (RELOC(prom_tce_alloc_start)) {
-		prom_setprop(_prom->chosen, "linux,tce-alloc-start",
-			PTRRELOC(&prom_tce_alloc_start), sizeof(RELOC(prom_tce_alloc_start)));
-		prom_setprop(_prom->chosen, "linux,tce-alloc-end",
-			PTRRELOC(&prom_tce_alloc_end), sizeof(RELOC(prom_tce_alloc_end)));
-	}
-
-	/*
-	 * Fixup any known bugs in the device-tree
-	 */
-	fixup_device_tree();
-
-	/*
-	 * Now finally create the flattened device-tree
-	 */
-       	prom_printf("copying OF device tree ...\n");
-       	flatten_device_tree();
-
-	/* in case stdin is USB and still active on IBM machines... */
-	prom_close_stdin();
-
-	/*
-	 * Call OF "quiesce" method to shut down pending DMA's from
-	 * devices etc...
-	 */
-	prom_printf("Calling quiesce ...\n");
-	call_prom("quiesce", 0, 0);
-
-	/*
-	 * And finally, call the kernel passing it the flattened device
-	 * tree and NULL as r5, thus triggering the new entry point which
-	 * is common to us and kexec
-	 */
-	prom_printf("returning from prom_init\n");
-	prom_debug("->dt_header_start=0x%x\n", RELOC(dt_header_start));
-	prom_debug("->phys=0x%x\n", phys);
-
-	__start(RELOC(dt_header_start), phys, 0);
-
-	return 0;
-}
-
diff --git a/arch/ppc64/kernel/semaphore.c b/arch/ppc64/kernel/semaphore.c
deleted file mode 100644
index a1c1db573e9c..000000000000
--- a/arch/ppc64/kernel/semaphore.c
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * 
- *
- * PowerPC-specific semaphore code.
- *
- * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * April 2001 - Reworked by Paul Mackerras <paulus@samba.org>
- * to eliminate the SMP races in the old version between the updates
- * of `count' and `waking'.  Now we use negative `count' values to
- * indicate that some process(es) are waiting for the semaphore.
- */
-
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/module.h>
-
-#include <asm/atomic.h>
-#include <asm/semaphore.h>
-#include <asm/errno.h>
-
-/*
- * Atomically update sem->count.
- * This does the equivalent of the following:
- *
- *	old_count = sem->count;
- *	tmp = MAX(old_count, 0) + incr;
- *	sem->count = tmp;
- *	return old_count;
- */
-static inline int __sem_update_count(struct semaphore *sem, int incr)
-{
-	int old_count, tmp;
-
-	__asm__ __volatile__("\n"
-"1:	lwarx	%0,0,%3\n"
-"	srawi	%1,%0,31\n"
-"	andc	%1,%0,%1\n"
-"	add	%1,%1,%4\n"
-"	stwcx.	%1,0,%3\n"
-"	bne	1b"
-	: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
-	: "r" (&sem->count), "r" (incr), "m" (sem->count)
-	: "cc");
-
-	return old_count;
-}
-
-void __up(struct semaphore *sem)
-{
-	/*
-	 * Note that we incremented count in up() before we came here,
-	 * but that was ineffective since the result was <= 0, and
-	 * any negative value of count is equivalent to 0.
-	 * This ends up setting count to 1, unless count is now > 0
-	 * (i.e. because some other cpu has called up() in the meantime),
-	 * in which case we just increment count.
-	 */
-	__sem_update_count(sem, 1);
-	wake_up(&sem->wait);
-}
-EXPORT_SYMBOL(__up);
-
-/*
- * Note that when we come in to __down or __down_interruptible,
- * we have already decremented count, but that decrement was
- * ineffective since the result was < 0, and any negative value
- * of count is equivalent to 0.
- * Thus it is only when we decrement count from some value > 0
- * that we have actually got the semaphore.
- */
-void __sched __down(struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	__set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	/*
-	 * Try to get the semaphore.  If the count is > 0, then we've
-	 * got the semaphore; we decrement count and exit the loop.
-	 * If the count is 0 or negative, we set it to -1, indicating
-	 * that we are asleep, and then sleep.
-	 */
-	while (__sem_update_count(sem, -1) <= 0) {
-		schedule();
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	__set_task_state(tsk, TASK_RUNNING);
-
-	/*
-	 * If there are any more sleepers, wake one of them up so
-	 * that it can either get the semaphore, or set count to -1
-	 * indicating that there are still processes sleeping.
-	 */
-	wake_up(&sem->wait);
-}
-EXPORT_SYMBOL(__down);
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	__set_task_state(tsk, TASK_INTERRUPTIBLE);
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	while (__sem_update_count(sem, -1) <= 0) {
-		if (signal_pending(current)) {
-			/*
-			 * A signal is pending - give up trying.
-			 * Set sem->count to 0 if it is negative,
-			 * since we are no longer sleeping.
-			 */
-			__sem_update_count(sem, 0);
-			retval = -EINTR;
-			break;
-		}
-		schedule();
-		set_task_state(tsk, TASK_INTERRUPTIBLE);
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	__set_task_state(tsk, TASK_RUNNING);
-
-	wake_up(&sem->wait);
-	return retval;
-}
-EXPORT_SYMBOL(__down_interruptible);
diff --git a/arch/ppc64/kernel/vdso.c b/arch/ppc64/kernel/vdso.c
deleted file mode 100644
index 1bbacac44988..000000000000
--- a/arch/ppc64/kernel/vdso.c
+++ /dev/null
@@ -1,625 +0,0 @@
-/*
- *  linux/arch/ppc64/kernel/vdso.c
- *
- *    Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
- *			 <benh@kernel.crashing.org>
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
-#include <linux/slab.h>
-#include <linux/user.h>
-#include <linux/elf.h>
-#include <linux/security.h>
-#include <linux/bootmem.h>
-
-#include <asm/pgtable.h>
-#include <asm/system.h>
-#include <asm/processor.h>
-#include <asm/mmu.h>
-#include <asm/mmu_context.h>
-#include <asm/machdep.h>
-#include <asm/cputable.h>
-#include <asm/sections.h>
-#include <asm/systemcfg.h>
-#include <asm/vdso.h>
-
-#undef DEBUG
-
-#ifdef DEBUG
-#define DBG(fmt...) printk(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
-
-/*
- * The vDSOs themselves are here
- */
-extern char vdso64_start, vdso64_end;
-extern char vdso32_start, vdso32_end;
-
-static void *vdso64_kbase = &vdso64_start;
-static void *vdso32_kbase = &vdso32_start;
-
-unsigned int vdso64_pages;
-unsigned int vdso32_pages;
-
-/* Signal trampolines user addresses */
-
-unsigned long vdso64_rt_sigtramp;
-unsigned long vdso32_sigtramp;
-unsigned long vdso32_rt_sigtramp;
-
-/* Format of the patch table */
-struct vdso_patch_def
-{
-	u32		pvr_mask, pvr_value;
-	const char	*gen_name;
-	const char	*fix_name;
-};
-
-/* Table of functions to patch based on the CPU type/revision
- *
- * TODO: Improve by adding whole lists for each entry
- */
-static struct vdso_patch_def vdso_patches[] = {
-	{
-		0xffff0000, 0x003a0000,		/* POWER5 */
-		"__kernel_sync_dicache", "__kernel_sync_dicache_p5"
-	},
-	{
-		0xffff0000, 0x003b0000,		/* POWER5 */
-		"__kernel_sync_dicache", "__kernel_sync_dicache_p5"
-	},
-};
-
-/*
- * Some infos carried around for each of them during parsing at
- * boot time.
- */
-struct lib32_elfinfo
-{
-	Elf32_Ehdr	*hdr;		/* ptr to ELF */
-	Elf32_Sym	*dynsym;	/* ptr to .dynsym section */
-	unsigned long	dynsymsize;	/* size of .dynsym section */
-	char		*dynstr;	/* ptr to .dynstr section */
-	unsigned long	text;		/* offset of .text section in .so */
-};
-
-struct lib64_elfinfo
-{
-	Elf64_Ehdr	*hdr;
-	Elf64_Sym	*dynsym;
-	unsigned long	dynsymsize;
-	char		*dynstr;
-	unsigned long	text;
-};
-
-
-#ifdef __DEBUG
-static void dump_one_vdso_page(struct page *pg, struct page *upg)
-{
-	printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT),
-	       page_count(pg),
-	       pg->flags);
-	if (upg/* && pg != upg*/) {
-		printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg) << PAGE_SHIFT),
-		       page_count(upg),
-		       upg->flags);
-	}
-	printk("\n");
-}
-
-static void dump_vdso_pages(struct vm_area_struct * vma)
-{
-	int i;
-
-	if (!vma || test_thread_flag(TIF_32BIT)) {
-		printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase);
-		for (i=0; i<vdso32_pages; i++) {
-			struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
-			struct page *upg = (vma && vma->vm_mm) ?
-				follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0)
-				: NULL;
-			dump_one_vdso_page(pg, upg);
-		}
-	}
-	if (!vma || !test_thread_flag(TIF_32BIT)) {
-		printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase);
-		for (i=0; i<vdso64_pages; i++) {
-			struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
-			struct page *upg = (vma && vma->vm_mm) ?
-				follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0)
-				: NULL;
-			dump_one_vdso_page(pg, upg);
-		}
-	}
-}
-#endif /* DEBUG */
-
-/*
- * Keep a dummy vma_close for now, it will prevent VMA merging.
- */
-static void vdso_vma_close(struct vm_area_struct * vma)
-{
-}
-
-/*
- * Our nopage() function, maps in the actual vDSO kernel pages, they will
- * be mapped read-only by do_no_page(), and eventually COW'ed, either
- * right away for an initial write access, or by do_wp_page().
- */
-static struct page * vdso_vma_nopage(struct vm_area_struct * vma,
-				     unsigned long address, int *type)
-{
-	unsigned long offset = address - vma->vm_start;
-	struct page *pg;
-	void *vbase = test_thread_flag(TIF_32BIT) ? vdso32_kbase : vdso64_kbase;
-
-	DBG("vdso_vma_nopage(current: %s, address: %016lx, off: %lx)\n",
-	    current->comm, address, offset);
-
-	if (address < vma->vm_start || address > vma->vm_end)
-		return NOPAGE_SIGBUS;
-
-	/*
-	 * Last page is systemcfg.
-	 */
-	if ((vma->vm_end - address) <= PAGE_SIZE)
-		pg = virt_to_page(_systemcfg);
-	else
-		pg = virt_to_page(vbase + offset);
-
-	get_page(pg);
-	DBG(" ->page count: %d\n", page_count(pg));
-
-	return pg;
-}
-
-static struct vm_operations_struct vdso_vmops = {
-	.close	= vdso_vma_close,
-	.nopage	= vdso_vma_nopage,
-};
-
-/*
- * This is called from binfmt_elf, we create the special vma for the
- * vDSO and insert it into the mm struct tree
- */
-int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack)
-{
-	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
-	unsigned long vdso_pages;
-	unsigned long vdso_base;
-
-	if (test_thread_flag(TIF_32BIT)) {
-		vdso_pages = vdso32_pages;
-		vdso_base = VDSO32_MBASE;
-	} else {
-		vdso_pages = vdso64_pages;
-		vdso_base = VDSO64_MBASE;
-	}
-
-	current->thread.vdso_base = 0;
-
-	/* vDSO has a problem and was disabled, just don't "enable" it for the
-	 * process
-	 */
-	if (vdso_pages == 0)
-		return 0;
-
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
-	if (vma == NULL)
-		return -ENOMEM;
-
-	memset(vma, 0, sizeof(*vma));
-
-	/*
-	 * pick a base address for the vDSO in process space. We try to put it
-	 * at vdso_base which is the "natural" base for it, but we might fail
-	 * and end up putting it elsewhere.
-	 */
-	vdso_base = get_unmapped_area(NULL, vdso_base,
-				      vdso_pages << PAGE_SHIFT, 0, 0);
-	if (vdso_base & ~PAGE_MASK) {
-		kmem_cache_free(vm_area_cachep, vma);
-		return (int)vdso_base;
-	}
-
-	current->thread.vdso_base = vdso_base;
-
-	vma->vm_mm = mm;
-	vma->vm_start = current->thread.vdso_base;
-
-	/*
-	 * the VMA size is one page more than the vDSO since systemcfg
-	 * is mapped in the last one
-	 */
-	vma->vm_end = vma->vm_start + ((vdso_pages + 1) << PAGE_SHIFT);
-
-	/*
-	 * our vma flags don't have VM_WRITE so by default, the process isn't allowed
-	 * to write those pages.
-	 * gdb can break that with ptrace interface, and thus trigger COW on those
-	 * pages but it's then your responsibility to never do that on the "data" page
-	 * of the vDSO or you'll stop getting kernel updates and your nice userland
-	 * gettimeofday will be totally dead. It's fine to use that for setting
-	 * breakpoints in the vDSO code pages though
-	 */
-	vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | VM_RESERVED;
-	vma->vm_flags |= mm->def_flags;
-	vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
-	vma->vm_ops = &vdso_vmops;
-
-	down_write(&mm->mmap_sem);
-	if (insert_vm_struct(mm, vma)) {
-		up_write(&mm->mmap_sem);
-		kmem_cache_free(vm_area_cachep, vma);
-		return -ENOMEM;
-	}
-	mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
-	up_write(&mm->mmap_sem);
-
-	return 0;
-}
-
-static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname,
-				  unsigned long *size)
-{
-	Elf32_Shdr *sechdrs;
-	unsigned int i;
-	char *secnames;
-
-	/* Grab section headers and strings so we can tell who is who */
-	sechdrs = (void *)ehdr + ehdr->e_shoff;
-	secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
-
-	/* Find the section they want */
-	for (i = 1; i < ehdr->e_shnum; i++) {
-		if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
-			if (size)
-				*size = sechdrs[i].sh_size;
-			return (void *)ehdr + sechdrs[i].sh_offset;
-		}
-	}
-	*size = 0;
-	return NULL;
-}
-
-static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname,
-				  unsigned long *size)
-{
-	Elf64_Shdr *sechdrs;
-	unsigned int i;
-	char *secnames;
-
-	/* Grab section headers and strings so we can tell who is who */
-	sechdrs = (void *)ehdr + ehdr->e_shoff;
-	secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
-
-	/* Find the section they want */
-	for (i = 1; i < ehdr->e_shnum; i++) {
-		if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
-			if (size)
-				*size = sechdrs[i].sh_size;
-			return (void *)ehdr + sechdrs[i].sh_offset;
-		}
-	}
-	if (size)
-		*size = 0;
-	return NULL;
-}
-
-static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib, const char *symname)
-{
-	unsigned int i;
-	char name[32], *c;
-
-	for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) {
-		if (lib->dynsym[i].st_name == 0)
-			continue;
-		strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, 32);
-		c = strchr(name, '@');
-		if (c)
-			*c = 0;
-		if (strcmp(symname, name) == 0)
-			return &lib->dynsym[i];
-	}
-	return NULL;
-}
-
-static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib, const char *symname)
-{
-	unsigned int i;
-	char name[32], *c;
-
-	for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) {
-		if (lib->dynsym[i].st_name == 0)
-			continue;
-		strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, 32);
-		c = strchr(name, '@');
-		if (c)
-			*c = 0;
-		if (strcmp(symname, name) == 0)
-			return &lib->dynsym[i];
-	}
-	return NULL;
-}
-
-/* Note that we assume the section is .text and the symbol is relative to
- * the library base
- */
-static unsigned long __init find_function32(struct lib32_elfinfo *lib, const char *symname)
-{
-	Elf32_Sym *sym = find_symbol32(lib, symname);
-
-	if (sym == NULL) {
-		printk(KERN_WARNING "vDSO32: function %s not found !\n", symname);
-		return 0;
-	}
-	return sym->st_value - VDSO32_LBASE;
-}
-
-/* Note that we assume the section is .text and the symbol is relative to
- * the library base
- */
-static unsigned long __init find_function64(struct lib64_elfinfo *lib, const char *symname)
-{
-	Elf64_Sym *sym = find_symbol64(lib, symname);
-
-	if (sym == NULL) {
-		printk(KERN_WARNING "vDSO64: function %s not found !\n", symname);
-		return 0;
-	}
-#ifdef VDS64_HAS_DESCRIPTORS
-	return *((u64 *)(vdso64_kbase + sym->st_value - VDSO64_LBASE)) - VDSO64_LBASE;
-#else
-	return sym->st_value - VDSO64_LBASE;
-#endif
-}
-
-
-static __init int vdso_do_find_sections(struct lib32_elfinfo *v32,
-					struct lib64_elfinfo *v64)
-{
-	void *sect;
-
-	/*
-	 * Locate symbol tables & text section
-	 */
-
-	v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize);
-	v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL);
-	if (v32->dynsym == NULL || v32->dynstr == NULL) {
-		printk(KERN_ERR "vDSO32: a required symbol section was not found\n");
-		return -1;
-	}
-	sect = find_section32(v32->hdr, ".text", NULL);
-	if (sect == NULL) {
-		printk(KERN_ERR "vDSO32: the .text section was not found\n");
-		return -1;
-	}
-	v32->text = sect - vdso32_kbase;
-
-	v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize);
-	v64->dynstr = find_section64(v64->hdr, ".dynstr", NULL);
-	if (v64->dynsym == NULL || v64->dynstr == NULL) {
-		printk(KERN_ERR "vDSO64: a required symbol section was not found\n");
-		return -1;
-	}
-	sect = find_section64(v64->hdr, ".text", NULL);
-	if (sect == NULL) {
-		printk(KERN_ERR "vDSO64: the .text section was not found\n");
-		return -1;
-	}
-	v64->text = sect - vdso64_kbase;
-
-	return 0;
-}
-
-static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32,
-					  struct lib64_elfinfo *v64)
-{
-	/*
-	 * Find signal trampolines
-	 */
-
-	vdso64_rt_sigtramp	= find_function64(v64, "__kernel_sigtramp_rt64");
-	vdso32_sigtramp		= find_function32(v32, "__kernel_sigtramp32");
-	vdso32_rt_sigtramp	= find_function32(v32, "__kernel_sigtramp_rt32");
-}
-
-static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32,
-				       struct lib64_elfinfo *v64)
-{
-	Elf32_Sym *sym32;
-	Elf64_Sym *sym64;
-
-	sym32 = find_symbol32(v32, "__kernel_datapage_offset");
-	if (sym32 == NULL) {
-		printk(KERN_ERR "vDSO32: Can't find symbol __kernel_datapage_offset !\n");
-		return -1;
-	}
-	*((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) =
-		(vdso32_pages << PAGE_SHIFT) - (sym32->st_value - VDSO32_LBASE);
-
-       	sym64 = find_symbol64(v64, "__kernel_datapage_offset");
-	if (sym64 == NULL) {
-		printk(KERN_ERR "vDSO64: Can't find symbol __kernel_datapage_offset !\n");
-		return -1;
-	}
-	*((int *)(vdso64_kbase + sym64->st_value - VDSO64_LBASE)) =
-		(vdso64_pages << PAGE_SHIFT) - (sym64->st_value - VDSO64_LBASE);
-
-	return 0;
-}
-
-static int vdso_do_func_patch32(struct lib32_elfinfo *v32,
-				struct lib64_elfinfo *v64,
-				const char *orig, const char *fix)
-{
-	Elf32_Sym *sym32_gen, *sym32_fix;
-
-	sym32_gen = find_symbol32(v32, orig);
-	if (sym32_gen == NULL) {
-		printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", orig);
-		return -1;
-	}
-	sym32_fix = find_symbol32(v32, fix);
-	if (sym32_fix == NULL) {
-		printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", fix);
-		return -1;
-	}
-	sym32_gen->st_value = sym32_fix->st_value;
-	sym32_gen->st_size = sym32_fix->st_size;
-	sym32_gen->st_info = sym32_fix->st_info;
-	sym32_gen->st_other = sym32_fix->st_other;
-	sym32_gen->st_shndx = sym32_fix->st_shndx;
-
-	return 0;
-}
-
-static int vdso_do_func_patch64(struct lib32_elfinfo *v32,
-				struct lib64_elfinfo *v64,
-				const char *orig, const char *fix)
-{
-	Elf64_Sym *sym64_gen, *sym64_fix;
-
-	sym64_gen = find_symbol64(v64, orig);
-	if (sym64_gen == NULL) {
-		printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", orig);
-		return -1;
-	}
-	sym64_fix = find_symbol64(v64, fix);
-	if (sym64_fix == NULL) {
-		printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", fix);
-		return -1;
-	}
-	sym64_gen->st_value = sym64_fix->st_value;
-	sym64_gen->st_size = sym64_fix->st_size;
-	sym64_gen->st_info = sym64_fix->st_info;
-	sym64_gen->st_other = sym64_fix->st_other;
-	sym64_gen->st_shndx = sym64_fix->st_shndx;
-
-	return 0;
-}
-
-static __init int vdso_fixup_alt_funcs(struct lib32_elfinfo *v32,
-				       struct lib64_elfinfo *v64)
-{
-	u32 pvr;
-	int i;
-
-	pvr = mfspr(SPRN_PVR);
-	for (i = 0; i < ARRAY_SIZE(vdso_patches); i++) {
-		struct vdso_patch_def *patch = &vdso_patches[i];
-		int match = (pvr & patch->pvr_mask) == patch->pvr_value;
-
-		DBG("patch %d (mask: %x, pvr: %x) : %s\n",
-		    i, patch->pvr_mask, patch->pvr_value, match ? "match" : "skip");
-
-		if (!match)
-			continue;
-
-		DBG("replacing %s with %s...\n", patch->gen_name, patch->fix_name);
-
-		/*
-		 * Patch the 32 bits and 64 bits symbols. Note that we do not patch
-		 * the "." symbol on 64 bits. It would be easy to do, but doesn't
-		 * seem to be necessary, patching the OPD symbol is enough.
-		 */
-		vdso_do_func_patch32(v32, v64, patch->gen_name, patch->fix_name);
-		vdso_do_func_patch64(v32, v64, patch->gen_name, patch->fix_name);
-	}
-
-	return 0;
-}
-
-
-static __init int vdso_setup(void)
-{
-	struct lib32_elfinfo	v32;
-	struct lib64_elfinfo	v64;
-
-	v32.hdr = vdso32_kbase;
-	v64.hdr = vdso64_kbase;
-
-	if (vdso_do_find_sections(&v32, &v64))
-		return -1;
-
-	if (vdso_fixup_datapage(&v32, &v64))
-		return -1;
-
-	if (vdso_fixup_alt_funcs(&v32, &v64))
-		return -1;
-
-	vdso_setup_trampolines(&v32, &v64);
-
-	return 0;
-}
-
-void __init vdso_init(void)
-{
-	int i;
-
-	vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT;
-	vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT;
-
-	DBG("vdso64_kbase: %p, 0x%x pages, vdso32_kbase: %p, 0x%x pages\n",
-	       vdso64_kbase, vdso64_pages, vdso32_kbase, vdso32_pages);
-
-	/*
-	 * Initialize the vDSO images in memory, that is do necessary
-	 * fixups of vDSO symbols, locate trampolines, etc...
-	 */
-	if (vdso_setup()) {
-		printk(KERN_ERR "vDSO setup failure, not enabled !\n");
-		/* XXX should free pages here ? */
-		vdso64_pages = vdso32_pages = 0;
-		return;
-	}
-
-	/* Make sure pages are in the correct state */
-	for (i = 0; i < vdso64_pages; i++) {
-		struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
-		ClearPageReserved(pg);
-		get_page(pg);
-	}
-	for (i = 0; i < vdso32_pages; i++) {
-		struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
-		ClearPageReserved(pg);
-		get_page(pg);
-	}
-
-	get_page(virt_to_page(_systemcfg));
-}
-
-int in_gate_area_no_task(unsigned long addr)
-{
-	return 0;
-}
-
-int in_gate_area(struct task_struct *task, unsigned long addr)
-{
-	return 0;
-}
-
-struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
-{
-	return NULL;
-}
-
diff --git a/arch/ppc64/kernel/vmlinux.lds.S b/arch/ppc64/kernel/vmlinux.lds.S
deleted file mode 100644
index 022f220e772f..000000000000
--- a/arch/ppc64/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,151 +0,0 @@
-#include <asm/page.h>
-#include <asm-generic/vmlinux.lds.h>
-
-OUTPUT_ARCH(powerpc:common64)
-jiffies = jiffies_64;
-SECTIONS
-{
-  /* Sections to be discarded. */
-  /DISCARD/ : {
-	*(.exitcall.exit)
-	}
-
-
-  /* Read-only sections, merged into text segment: */
-  .text : {
-	*(.text .text.*)
-	SCHED_TEXT
-	LOCK_TEXT
-	KPROBES_TEXT
-	*(.fixup)
-	. = ALIGN(PAGE_SIZE);
-	_etext = .;
-	}
-
-  __ex_table : {
-	__start___ex_table = .;
-	*(__ex_table)
-	__stop___ex_table = .;
-	}
-
-  __bug_table : {
-	__start___bug_table = .;
-	*(__bug_table)
-	__stop___bug_table = .;
-	}
-
-  __ftr_fixup : {
-	__start___ftr_fixup = .;
-	*(__ftr_fixup)
-	__stop___ftr_fixup = .;
-	}
-
-  RODATA
-
-
-  /* will be freed after init */
-  . = ALIGN(PAGE_SIZE);
-  __init_begin = .;
-
-  .init.text : {
-	_sinittext = .;
-	*(.init.text)
-	_einittext = .;
-	}
-
-  .init.data : {
-	*(.init.data)
-	}
-
-  . = ALIGN(16);
-  .init.setup : {
-	__setup_start = .;
-	*(.init.setup)
-	__setup_end = .;
-	}
-
-  .initcall.init : {
-	__initcall_start = .;
-	*(.initcall1.init)
-	*(.initcall2.init)
-	*(.initcall3.init)
-	*(.initcall4.init)
-	*(.initcall5.init)
-	*(.initcall6.init)
-	*(.initcall7.init)
-	__initcall_end = .;
-	}
-
-  .con_initcall.init : {
-	__con_initcall_start = .;
-	*(.con_initcall.init)
-	__con_initcall_end = .;
-	}
-
-  SECURITY_INIT
-
-  . = ALIGN(PAGE_SIZE);
-  .init.ramfs : {
-	__initramfs_start = .;
-	*(.init.ramfs)
-	__initramfs_end = .;
-	}
-
-  .data.percpu : {
-	__per_cpu_start = .;
-	*(.data.percpu)
-	__per_cpu_end = .;
-	}
-
-  . = ALIGN(PAGE_SIZE);
-  . = ALIGN(16384);
-  __init_end = .;
-  /* freed after init ends here */
-
-
-  /* Read/write sections */
-  . = ALIGN(PAGE_SIZE);
-  . = ALIGN(16384);
-  _sdata = .;
-  /* The initial task and kernel stack */
-  .data.init_task : {
-	*(.data.init_task)
-	}
-
-  . = ALIGN(PAGE_SIZE);
-  .data.page_aligned : {
-	*(.data.page_aligned)
-	}
-
-  .data.cacheline_aligned : {
-	*(.data.cacheline_aligned)
-	}
-
-  .data : {
-	*(.data .data.rel* .toc1)
-	*(.branch_lt)
-	}
-
-  .opd : {
-	*(.opd)
-	}
-
-  .got : {
-	__toc_start = .;
-	*(.got)
-	*(.toc)
-	. = ALIGN(PAGE_SIZE);
-	_edata = .;
-	}
-
-
-  . = ALIGN(PAGE_SIZE);
-  .bss : {
-	__bss_start = .;
-	*(.bss)
-	__bss_stop = .;
-	}
-
-  . = ALIGN(PAGE_SIZE);
-  _end = . ;
-}
diff --git a/arch/ppc64/xmon/privinst.h b/arch/ppc64/xmon/privinst.h
deleted file mode 100644
index 02eb40dac0b3..000000000000
--- a/arch/ppc64/xmon/privinst.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (C) 1996 Paul Mackerras.
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-
-#define GETREG(reg)		\
-    static inline unsigned long get_ ## reg (void)	\
-	{ unsigned long ret; asm volatile ("mf" #reg " %0" : "=r" (ret) :); return ret; }
-
-#define SETREG(reg)		\
-    static inline void set_ ## reg (unsigned long val)	\
-	{ asm volatile ("mt" #reg " %0" : : "r" (val)); }
-
-GETREG(msr)
-SETREG(msrd)
-GETREG(cr)
-
-#define GSETSPR(n, name)	\
-    static inline long get_ ## name (void) \
-	{ long ret; asm volatile ("mfspr %0," #n : "=r" (ret) : ); return ret; } \
-    static inline void set_ ## name (long val) \
-	{ asm volatile ("mtspr " #n ",%0" : : "r" (val)); }
-
-GSETSPR(0, mq)
-GSETSPR(1, xer)
-GSETSPR(4, rtcu)
-GSETSPR(5, rtcl)
-GSETSPR(8, lr)
-GSETSPR(9, ctr)
-GSETSPR(18, dsisr)
-GSETSPR(19, dar)
-GSETSPR(22, dec)
-GSETSPR(25, sdr1)
-GSETSPR(26, srr0)
-GSETSPR(27, srr1)
-GSETSPR(272, sprg0)
-GSETSPR(273, sprg1)
-GSETSPR(274, sprg2)
-GSETSPR(275, sprg3)
-GSETSPR(282, ear)
-GSETSPR(287, pvr)
-GSETSPR(1008, hid0)
-GSETSPR(1009, hid1)
-GSETSPR(1010, iabr)
-GSETSPR(1023, pir)
-
-static inline void store_inst(void *p)
-{
-	asm volatile ("dcbst 0,%0; sync; icbi 0,%0; isync" : : "r" (p));
-}
-
-static inline void cflush(void *p)
-{
-	asm volatile ("dcbf 0,%0; icbi 0,%0" : : "r" (p));
-}
-
-static inline void cinval(void *p)
-{
-	asm volatile ("dcbi 0,%0; icbi 0,%0" : : "r" (p));
-}
diff --git a/include/asm-ppc64/page.h b/include/asm-ppc64/page.h
deleted file mode 100644
index 3efc3288f7e9..000000000000
--- a/include/asm-ppc64/page.h
+++ /dev/null
@@ -1,328 +0,0 @@
-#ifndef _PPC64_PAGE_H
-#define _PPC64_PAGE_H
-
-/*
- * Copyright (C) 2001 PPC64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/config.h>
-#include <asm/asm-compat.h>
-
-/*
- * We support either 4k or 64k software page size. When using 64k pages
- * however, wether we are really supporting 64k pages in HW or not is
- * irrelevant to those definitions. We always define HW_PAGE_SHIFT to 12
- * as use of 64k pages remains a linux kernel specific, every notion of
- * page number shared with the firmware, TCEs, iommu, etc... still assumes
- * a page size of 4096.
- */
-#ifdef CONFIG_PPC_64K_PAGES
-#define PAGE_SHIFT		16
-#else
-#define PAGE_SHIFT		12
-#endif
-
-#define PAGE_SIZE		(ASM_CONST(1) << PAGE_SHIFT)
-#define PAGE_MASK		(~(PAGE_SIZE-1))
-
-/* HW_PAGE_SHIFT is always 4k pages */
-#define HW_PAGE_SHIFT		12
-#define HW_PAGE_SIZE		(ASM_CONST(1) << HW_PAGE_SHIFT)
-#define HW_PAGE_MASK		(~(HW_PAGE_SIZE-1))
-
-/* PAGE_FACTOR is the number of bits factor between PAGE_SHIFT and
- * HW_PAGE_SHIFT, that is 4k pages
- */
-#define PAGE_FACTOR		(PAGE_SHIFT - HW_PAGE_SHIFT)
-
-/* Segment size */
-#define SID_SHIFT       	28
-#define SID_MASK        	0xfffffffffUL
-#define ESID_MASK		0xfffffffff0000000UL
-#define GET_ESID(x)     	(((x) >> SID_SHIFT) & SID_MASK)
-
-/* Large pages size */
-
-#ifndef __ASSEMBLY__
-extern unsigned int HPAGE_SHIFT;
-#define HPAGE_SIZE		((1UL) << HPAGE_SHIFT)
-#define HPAGE_MASK		(~(HPAGE_SIZE - 1))
-#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
-#endif /* __ASSEMBLY__ */
-
-#ifdef CONFIG_HUGETLB_PAGE
-
-
-#define HTLB_AREA_SHIFT		40
-#define HTLB_AREA_SIZE		(1UL << HTLB_AREA_SHIFT)
-#define GET_HTLB_AREA(x)	((x) >> HTLB_AREA_SHIFT)
-
-#define LOW_ESID_MASK(addr, len)    (((1U << (GET_ESID(addr+len-1)+1)) \
-	   	                      - (1U << GET_ESID(addr))) & 0xffff)
-#define HTLB_AREA_MASK(addr, len)   (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \
-	   	                      - (1U << GET_HTLB_AREA(addr))) & 0xffff)
-
-#define ARCH_HAS_HUGEPAGE_ONLY_RANGE
-#define ARCH_HAS_PREPARE_HUGEPAGE_RANGE
-#define ARCH_HAS_SETCLEAR_HUGE_PTE
-
-#define touches_hugepage_low_range(mm, addr, len) \
-	(LOW_ESID_MASK((addr), (len)) & (mm)->context.low_htlb_areas)
-#define touches_hugepage_high_range(mm, addr, len) \
-	(HTLB_AREA_MASK((addr), (len)) & (mm)->context.high_htlb_areas)
-
-#define __within_hugepage_low_range(addr, len, segmask) \
-	((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask))
-#define within_hugepage_low_range(addr, len) \
-	__within_hugepage_low_range((addr), (len), \
-				    current->mm->context.low_htlb_areas)
-#define __within_hugepage_high_range(addr, len, zonemask) \
-	((HTLB_AREA_MASK((addr), (len)) | (zonemask)) == (zonemask))
-#define within_hugepage_high_range(addr, len) \
-	__within_hugepage_high_range((addr), (len), \
-				    current->mm->context.high_htlb_areas)
-
-#define is_hugepage_only_range(mm, addr, len) \
-	(touches_hugepage_high_range((mm), (addr), (len)) || \
-	  touches_hugepage_low_range((mm), (addr), (len)))
-#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-
-#define in_hugepage_area(context, addr) \
-	(cpu_has_feature(CPU_FTR_16M_PAGE) && \
-	 ( ((1 << GET_HTLB_AREA(addr)) & (context).high_htlb_areas) || \
-	   ( ((addr) < 0x100000000L) && \
-	     ((1 << GET_ESID(addr)) & (context).low_htlb_areas) ) ) )
-
-#else /* !CONFIG_HUGETLB_PAGE */
-
-#define in_hugepage_area(mm, addr)	0
-
-#endif /* !CONFIG_HUGETLB_PAGE */
-
-/* align addr on a size boundary - adjust address up/down if needed */
-#define _ALIGN_UP(addr,size)	(((addr)+((size)-1))&(~((size)-1)))
-#define _ALIGN_DOWN(addr,size)	((addr)&(~((size)-1)))
-
-/* align addr on a size boundary - adjust address up if needed */
-#define _ALIGN(addr,size)     _ALIGN_UP(addr,size)
-
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	_ALIGN(addr, PAGE_SIZE)
-
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-#include <asm/cache.h>
-
-#undef STRICT_MM_TYPECHECKS
-
-#define REGION_SIZE   4UL
-#define REGION_SHIFT  60UL
-#define REGION_MASK   (((1UL<<REGION_SIZE)-1UL)<<REGION_SHIFT)
-
-static __inline__ void clear_page(void *addr)
-{
-	unsigned long lines, line_size;
-
-	line_size = ppc64_caches.dline_size;
-	lines = ppc64_caches.dlines_per_page;
-
-	__asm__ __volatile__(
-	"mtctr  	%1	# clear_page\n\
-1:      dcbz  	0,%0\n\
-	add	%0,%0,%3\n\
-	bdnz+	1b"
-        : "=r" (addr)
-        : "r" (lines), "0" (addr), "r" (line_size)
-	: "ctr", "memory");
-}
-
-extern void copy_4K_page(void *to, void *from);
-
-#ifdef CONFIG_PPC_64K_PAGES
-static inline void copy_page(void *to, void *from)
-{
-	unsigned int i;
-	for (i=0; i < (1 << (PAGE_SHIFT - 12)); i++) {
-		copy_4K_page(to, from);
-		to += 4096;
-		from += 4096;
-	}
-}
-#else /* CONFIG_PPC_64K_PAGES */
-static inline void copy_page(void *to, void *from)
-{
-	copy_4K_page(to, from);
-}
-#endif /* CONFIG_PPC_64K_PAGES */
-
-struct page;
-extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
-extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct page *p);
-
-#ifdef STRICT_MM_TYPECHECKS
-/*
- * These are used to make use of C type-checking.  
- * Entries in the pte table are 64b, while entries in the pgd & pmd are 32b.
- */
-
-/* PTE level */
-typedef struct { unsigned long pte; } pte_t;
-#define pte_val(x)	((x).pte)
-#define __pte(x)	((pte_t) { (x) })
-
-/* 64k pages additionally define a bigger "real PTE" type that gathers
- * the "second half" part of the PTE for pseudo 64k pages
- */
-#ifdef CONFIG_PPC_64K_PAGES
-typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
-#else
-typedef struct { pte_t pte; } real_pte_t;
-#endif
-
-/* PMD level */
-typedef struct { unsigned long pmd; } pmd_t;
-#define pmd_val(x)	((x).pmd)
-#define __pmd(x)	((pmd_t) { (x) })
-
-/* PUD level exusts only on 4k pages */
-#ifndef CONFIG_PPC_64K_PAGES
-typedef struct { unsigned long pud; } pud_t;
-#define pud_val(x)	((x).pud)
-#define __pud(x)	((pud_t) { (x) })
-#endif
-
-/* PGD level */
-typedef struct { unsigned long pgd; } pgd_t;
-#define pgd_val(x)	((x).pgd)
-#define __pgd(x)	((pgd_t) { (x) })
-
-/* Page protection bits */
-typedef struct { unsigned long pgprot; } pgprot_t;
-#define pgprot_val(x)	((x).pgprot)
-#define __pgprot(x)	((pgprot_t) { (x) })
-
-#else
-
-/*
- * .. while these make it easier on the compiler
- */
-
-typedef unsigned long pte_t;
-#define pte_val(x)	(x)
-#define __pte(x)	(x)
-
-#ifdef CONFIG_PPC_64K_PAGES
-typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
-#else
-typedef unsigned long real_pte_t;
-#endif
-
-
-typedef unsigned long pmd_t;
-#define pmd_val(x)	(x)
-#define __pmd(x)	(x)
-
-#ifndef CONFIG_PPC_64K_PAGES
-typedef unsigned long pud_t;
-#define pud_val(x)	(x)
-#define __pud(x)	(x)
-#endif
-
-typedef unsigned long pgd_t;
-#define pgd_val(x)	(x)
-#define pgprot_val(x)	(x)
-
-typedef unsigned long pgprot_t;
-#define __pgd(x)	(x)
-#define __pgprot(x)	(x)
-
-#endif
-
-#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
-
-extern int page_is_ram(unsigned long pfn);
-
-extern u64 ppc64_pft_size;		/* Log 2 of page table size */
-
-/* We do define AT_SYSINFO_EHDR but don't use the gate mecanism */
-#define __HAVE_ARCH_GATE_AREA		1
-
-#endif /* __ASSEMBLY__ */
-
-#ifdef MODULE
-#define __page_aligned __attribute__((__aligned__(PAGE_SIZE)))
-#else
-#define __page_aligned \
-	__attribute__((__aligned__(PAGE_SIZE), \
-		__section__(".data.page_aligned")))
-#endif
-
-
-/* This must match the -Ttext linker address            */
-/* Note: tophys & tovirt make assumptions about how     */
-/*       KERNELBASE is defined for performance reasons. */
-/*       When KERNELBASE moves, those macros may have   */
-/*             to change!                               */
-#define PAGE_OFFSET     ASM_CONST(0xC000000000000000)
-#define KERNELBASE      PAGE_OFFSET
-#define VMALLOCBASE     ASM_CONST(0xD000000000000000)
-
-#define VMALLOC_REGION_ID  (VMALLOCBASE >> REGION_SHIFT)
-#define KERNEL_REGION_ID   (KERNELBASE >> REGION_SHIFT)
-#define USER_REGION_ID     (0UL)
-#define REGION_ID(ea)	   (((unsigned long)(ea)) >> REGION_SHIFT)
-
-#define __va(x) ((void *)((unsigned long)(x) + KERNELBASE))
-
-#ifdef CONFIG_FLATMEM
-#define pfn_to_page(pfn)	(mem_map + (pfn))
-#define page_to_pfn(page)	((unsigned long)((page) - mem_map))
-#define pfn_valid(pfn)		((pfn) < max_mapnr)
-#endif
-
-#define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-#define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
-
-#define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
-
-/*
- * Unfortunately the PLT is in the BSS in the PPC32 ELF ABI,
- * and needs to be executable.  This means the whole heap ends
- * up being executable.
- */
-#define VM_DATA_DEFAULT_FLAGS32	(VM_READ | VM_WRITE | VM_EXEC | \
-				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
-#define VM_DATA_DEFAULT_FLAGS64	(VM_READ | VM_WRITE | \
-				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
-#define VM_DATA_DEFAULT_FLAGS \
-	(test_thread_flag(TIF_32BIT) ? \
-	 VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64)
-
-/*
- * This is the default if a program doesn't have a PT_GNU_STACK
- * program header entry. The PPC64 ELF ABI has a non executable stack
- * stack by default, so in the absense of a PT_GNU_STACK program header
- * we turn execute permission off.
- */
-#define VM_STACK_DEFAULT_FLAGS32	(VM_READ | VM_WRITE | VM_EXEC | \
-					 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
-#define VM_STACK_DEFAULT_FLAGS64	(VM_READ | VM_WRITE | \
-					 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
-#define VM_STACK_DEFAULT_FLAGS \
-	(test_thread_flag(TIF_32BIT) ? \
-	 VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64)
-
-#endif /* __KERNEL__ */
-
-#include <asm-generic/page.h>
-
-#endif /* _PPC64_PAGE_H */
-- 
cgit v1.2.3-71-gd317


From 493f25ef4087395891c99fcfe2c72e62e293e89f Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 14 Nov 2005 17:32:50 +1100
Subject: powerpc: Fix 32-bit compile: PPC_MEMSTART was undeclared

This defines PPC_MEMSTART as 0 because it is still used in a couple
of places in the 32-bit code.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/asm-powerpc/page_32.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/asm-powerpc/page_32.h b/include/asm-powerpc/page_32.h
index 35221300a2ce..7259cfd85da9 100644
--- a/include/asm-powerpc/page_32.h
+++ b/include/asm-powerpc/page_32.h
@@ -3,6 +3,8 @@
 
 #define VM_DATA_DEFAULT_FLAGS	VM_DATA_DEFAULT_FLAGS32
 
+#define PPC_MEMSTART	0
+
 #ifndef __ASSEMBLY__
 /*
  * The basic type of a PTE - 64 bits for those CPUs with > 32 bit
-- 
cgit v1.2.3-71-gd317


From 2c13b7cee045af689b36349c2bc6a9ed6e3d73fa Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Mon, 14 Nov 2005 14:14:16 -0500
Subject: [libata] minor fixes, new helpers

- in ata_dev_identify(), don't assume that all devices are either
  ATA or ATAPI.  In the future, this code will see port multipliers
  and other devices.
- make a debugging printk less verbose
- add new helper ata_qc_reinit()
- add new helper BPRINTK() and port flag ATA_FLAG_DEBUGMSG, for
  fine-grained debugging use.
---
 drivers/scsi/libata-core.c | 11 +++--------
 include/linux/libata.h     | 14 ++++++++++++++
 2 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 1ccaf467d516..3855bfa8e8d4 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -1263,7 +1263,7 @@ retry:
 	}
 
 	/* ATAPI-specific feature tests */
-	else {
+	else if (dev->class == ATA_DEV_ATAPI) {
 		if (ata_id_is_ata(dev->id))		/* sanity check */
 			goto err_out_nosup;
 
@@ -2399,7 +2399,7 @@ static void ata_sg_clean(struct ata_queued_cmd *qc)
 	if (qc->flags & ATA_QCFLAG_SINGLE)
 		assert(qc->n_elem == 1);
 
-	DPRINTK("unmapping %u sg elements\n", qc->n_elem);
+	VPRINTK("unmapping %u sg elements\n", qc->n_elem);
 
 	/* if we padded the buffer out to 32-bit bound, and data
 	 * xfer direction is from-device, we must copy from the
@@ -3432,16 +3432,11 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_port *ap,
 
 	qc = ata_qc_new(ap);
 	if (qc) {
-		qc->__sg = NULL;
-		qc->flags = 0;
 		qc->scsicmd = NULL;
 		qc->ap = ap;
 		qc->dev = dev;
-		qc->cursect = qc->cursg = qc->cursg_ofs = 0;
-		qc->nsect = 0;
-		qc->nbytes = qc->curbytes = 0;
 
-		ata_tf_init(ap, &qc->tf, dev->devno);
+		ata_qc_reinit(qc);
 	}
 
 	return qc;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index ad5996183ec2..f2dbb684ce9e 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -59,6 +59,8 @@
 #define VPRINTK(fmt, args...)
 #endif	/* ATA_DEBUG */
 
+#define BPRINTK(fmt, args...) if (ap->flags & ATA_FLAG_DEBUGMSG) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ## args)
+
 #ifdef ATA_NDEBUG
 #define assert(expr)
 #else
@@ -119,6 +121,7 @@ enum {
 	ATA_FLAG_PIO_DMA	= (1 << 8), /* PIO cmds via DMA */
 	ATA_FLAG_NOINTR		= (1 << 9), /* FIXME: Remove this once
 					     * proper HSM is in place. */
+	ATA_FLAG_DEBUGMSG	= (1 << 10),
 
 	ATA_QCFLAG_ACTIVE	= (1 << 1), /* cmd not yet ack'd to scsi lyer */
 	ATA_QCFLAG_SG		= (1 << 3), /* have s/g table? */
@@ -659,6 +662,17 @@ static inline void ata_tf_init(struct ata_port *ap, struct ata_taskfile *tf, uns
 		tf->device = ATA_DEVICE_OBS | ATA_DEV1;
 }
 
+static inline void ata_qc_reinit(struct ata_queued_cmd *qc)
+{
+	qc->__sg = NULL;
+	qc->flags = 0;
+	qc->cursect = qc->cursg = qc->cursg_ofs = 0;
+	qc->nsect = 0;
+	qc->nbytes = qc->curbytes = 0;
+
+	ata_tf_init(qc->ap, &qc->tf, qc->dev->devno);
+}
+
 
 /**
  *	ata_irq_on - Enable interrupts on a port.
-- 
cgit v1.2.3-71-gd317


From c0400c4f5a08cfd1c657f7f616fcf1dfbd76a4d7 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 14 Nov 2005 15:21:41 -0800
Subject: [NETFILTER] nfnetlink: skip size check if size not specified (== 0)

Skip sizecheck if the size of the attribute wasn't specified, ie. zero.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nfnetlink.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 72975fa8795d..8be2f840a557 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -154,11 +154,14 @@ extern void nfattr_parse(struct nfattr *tb[], int maxattr,
 
 #define nfattr_bad_size(tb, max, cta_min)				\
 ({	int __i, __res = 0;						\
- 	for (__i=0; __i<max; __i++) 					\
+ 	for (__i=0; __i<max; __i++) {					\
+ 		if (!cta_min[__i])					\
+ 			continue;					\
  		if (tb[__i] && NFA_PAYLOAD(tb[__i]) < cta_min[__i]){	\
  			__res = 1;					\
  			break;						\
  		}							\
+ 	}								\
  	__res;								\
 })
 
-- 
cgit v1.2.3-71-gd317


From 37d2e7a20d745035b600f1a6be56cbb9c7259419 Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@netfilter.org>
Date: Mon, 14 Nov 2005 15:24:59 -0800
Subject: [NETFILTER] nfnetlink: unconditionally require CAP_NET_ADMIN

This patch unconditionally requires CAP_NET_ADMIN for all nfnetlink
messages.  It also removes the per-message cap_required field, since all
existing subsystems use CAP_NET_ADMIN for all their messages anyway.

Patrick McHardy owes me a beer if we ever need to re-introduce this.

Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nfnetlink.h       |  1 -
 net/ipv4/netfilter/ip_conntrack_netlink.c | 21 +++++++--------------
 net/netfilter/nfnetlink.c                 | 28 ++++++++++++----------------
 net/netfilter/nfnetlink_log.c             |  6 ++----
 net/netfilter/nfnetlink_queue.c           |  9 +++------
 5 files changed, 24 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 8be2f840a557..934a2479f160 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -112,7 +112,6 @@ struct nfnl_callback
 {
 	int (*call)(struct sock *nl, struct sk_buff *skb, 
 		struct nlmsghdr *nlh, struct nfattr *cda[], int *errp);
-	kernel_cap_t cap_required; /* capabilities required for this msg */
 	u_int16_t attr_count;	/* number of nfattr's */
 };
 
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index f5e5e3158670..de9f4464438d 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -1543,29 +1543,22 @@ static struct notifier_block ctnl_notifier_exp = {
 
 static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
 	[IPCTNL_MSG_CT_NEW]		= { .call = ctnetlink_new_conntrack,
-					    .attr_count = CTA_MAX,
-					    .cap_required = CAP_NET_ADMIN },
+					    .attr_count = CTA_MAX, },
 	[IPCTNL_MSG_CT_GET] 		= { .call = ctnetlink_get_conntrack,
-					    .attr_count = CTA_MAX,
-					    .cap_required = CAP_NET_ADMIN },
+					    .attr_count = CTA_MAX, },
 	[IPCTNL_MSG_CT_DELETE]  	= { .call = ctnetlink_del_conntrack,
-					    .attr_count = CTA_MAX,
-					    .cap_required = CAP_NET_ADMIN },
+					    .attr_count = CTA_MAX, },
 	[IPCTNL_MSG_CT_GET_CTRZERO] 	= { .call = ctnetlink_get_conntrack,
-					    .attr_count = CTA_MAX,
-					    .cap_required = CAP_NET_ADMIN },
+					    .attr_count = CTA_MAX, },
 };
 
 static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
 	[IPCTNL_MSG_EXP_GET]		= { .call = ctnetlink_get_expect,
-					    .attr_count = CTA_EXPECT_MAX,
-					    .cap_required = CAP_NET_ADMIN },
+					    .attr_count = CTA_EXPECT_MAX, },
 	[IPCTNL_MSG_EXP_NEW]		= { .call = ctnetlink_new_expect,
-					    .attr_count = CTA_EXPECT_MAX,
-					    .cap_required = CAP_NET_ADMIN },
+					    .attr_count = CTA_EXPECT_MAX, },
 	[IPCTNL_MSG_EXP_DELETE]		= { .call = ctnetlink_del_expect,
-					    .attr_count = CTA_EXPECT_MAX,
-					    .cap_required = CAP_NET_ADMIN },
+					    .attr_count = CTA_EXPECT_MAX, },
 };
 
 static struct nfnetlink_subsystem ctnl_subsys = {
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 83f4c53030fc..a60c59b97631 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -223,6 +223,12 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb,
 		 NFNL_SUBSYS_ID(nlh->nlmsg_type),
 		 NFNL_MSG_TYPE(nlh->nlmsg_type));
 
+	if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) {
+		DEBUGP("missing CAP_NET_ADMIN\n");
+		*errp = -EPERM;
+		return -1;
+	}
+
 	/* Only requests are handled by kernel now. */
 	if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) {
 		DEBUGP("received non-request message\n");
@@ -240,15 +246,12 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb,
 	ss = nfnetlink_get_subsys(type);
 	if (!ss) {
 #ifdef CONFIG_KMOD
-		if (cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) {
-			/* don't call nfnl_shunlock, since it would reenter
-			 * with further packet processing */
-			up(&nfnl_sem);
-			request_module("nfnetlink-subsys-%d",
-					NFNL_SUBSYS_ID(type));
-			nfnl_shlock();
-			ss = nfnetlink_get_subsys(type);
-		}
+		/* don't call nfnl_shunlock, since it would reenter
+		 * with further packet processing */
+		up(&nfnl_sem);
+		request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type));
+		nfnl_shlock();
+		ss = nfnetlink_get_subsys(type);
 		if (!ss)
 #endif
 			goto err_inval;
@@ -260,13 +263,6 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb,
 		goto err_inval;
 	}
 
-	if (nc->cap_required && 
-	    !cap_raised(NETLINK_CB(skb).eff_cap, nc->cap_required)) {
-		DEBUGP("permission denied for type %d\n", type);
-		*errp = -EPERM;
-		return -1;
-	}
-
 	{
 		u_int16_t attr_count = 
 			ss->cb[NFNL_MSG_TYPE(nlh->nlmsg_type)].attr_count;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index d194676f3655..cba63729313d 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -862,11 +862,9 @@ out_put:
 
 static struct nfnl_callback nfulnl_cb[NFULNL_MSG_MAX] = {
 	[NFULNL_MSG_PACKET]	= { .call = nfulnl_recv_unsupp,
-				    .attr_count = NFULA_MAX,
-				    .cap_required = CAP_NET_ADMIN, },
+				    .attr_count = NFULA_MAX, },
 	[NFULNL_MSG_CONFIG]	= { .call = nfulnl_recv_config,
-				    .attr_count = NFULA_CFG_MAX,
-				    .cap_required = CAP_NET_ADMIN },
+				    .attr_count = NFULA_CFG_MAX, },
 };
 
 static struct nfnetlink_subsystem nfulnl_subsys = {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index f065a6c94953..f28460b61e47 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -931,14 +931,11 @@ out_put:
 
 static struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = {
 	[NFQNL_MSG_PACKET]	= { .call = nfqnl_recv_unsupp,
-				    .attr_count = NFQA_MAX,
-				    .cap_required = CAP_NET_ADMIN },
+				    .attr_count = NFQA_MAX, },
 	[NFQNL_MSG_VERDICT]	= { .call = nfqnl_recv_verdict,
-				    .attr_count = NFQA_MAX,
-				    .cap_required = CAP_NET_ADMIN },
+				    .attr_count = NFQA_MAX, },
 	[NFQNL_MSG_CONFIG]	= { .call = nfqnl_recv_config,
-				    .attr_count = NFQA_CFG_MAX,
-				    .cap_required = CAP_NET_ADMIN },
+				    .attr_count = NFQA_CFG_MAX, },
 };
 
 static struct nfnetlink_subsystem nfqnl_subsys = {
-- 
cgit v1.2.3-71-gd317


From a272e24cc8751d125f9582befed0213a2a2b270f Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 14 Nov 2005 21:55:48 +1100
Subject: powerpc: Remove an extraneous and incorrect declaration of
 pmac_nvram_init.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/asm-powerpc/nvram.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-powerpc/nvram.h b/include/asm-powerpc/nvram.h
index 1858244ced32..24bd8c2388ea 100644
--- a/include/asm-powerpc/nvram.h
+++ b/include/asm-powerpc/nvram.h
@@ -68,7 +68,6 @@ extern int nvram_clear_error_log(void);
 extern struct nvram_partition *nvram_find_partition(int sig, const char *name);
 
 extern int pSeries_nvram_init(void);
-extern int pmac_nvram_init(void);
 extern int mmio_nvram_init(void);
 
 /* PowerMac specific nvram stuffs */
-- 
cgit v1.2.3-71-gd317


From a2f1b424900715ed9d1699c3bb88a434a2b42bc0 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 5 Nov 2005 17:25:53 +0100
Subject: [PATCH] x86_64: Add 4GB DMA32 zone

Add a new 4GB GFP_DMA32 zone between the GFP_DMA and GFP_NORMAL zones.

As a bit of historical background: when the x86-64 port
was originally designed we had some discussion if we should
use a 16MB DMA zone like i386 or a 4GB DMA zone like IA64 or
both. Both was ruled out at this point because it was in early
2.4 when VM is still quite shakey and had bad troubles even
dealing with one DMA zone.  We settled on the 16MB DMA zone mainly
because we worried about older soundcards and the floppy.

But this has always caused problems since then because
device drivers had trouble getting enough DMA able memory. These days
the VM works much better and the wide use of NUMA has proven
it can deal with many zones successfully.

So this patch adds both zones.

This helps drivers who need a lot of memory below 4GB because
their hardware is not accessing more (graphic drivers - proprietary
and free ones, video frame buffer drivers, sound drivers etc.).
Previously they could only use IOMMU+16MB GFP_DMA, which
was not enough memory.

Another common problem is that hardware who has full memory
addressing for >4GB misses it for some control structures in memory
(like transmit rings or other metadata).  They tended to allocate memory
in the 16MB GFP_DMA or the IOMMU/swiotlb then using pci_alloc_consistent,
but that can tie up a lot of precious 16MB GFPDMA/IOMMU/swiotlb memory
(even on AMD systems the IOMMU tends to be quite small) especially if you have
many devices.  With the new zone pci_alloc_consistent can just put
this stuff into memory below 4GB which works better.

One argument was still if the zone should be 4GB or 2GB. The main
motivation for 2GB would be an unnamed not so unpopular hardware
raid controller (mostly found in older machines from a particular four letter
company) who has a strange 2GB restriction in firmware. But
that one works ok with swiotlb/IOMMU anyways, so it doesn't really
need GFP_DMA32. I chose 4GB to be compatible with IA64 and because
it seems to be the most common restriction.

The new zone is so far added only for x86-64.

For other architectures who don't set up this
new zone nothing changes. Architectures can set a compatibility
define in Kconfig CONFIG_DMA_IS_DMA32 that will define GFP_DMA32
as GFP_DMA. Otherwise it's a nop because on 32bit architectures
it's normally not needed because GFP_NORMAL (=0) is DMA able
enough.

One problem is still that GFP_DMA means different things on different
architectures. e.g. some drivers used to have #ifdef ia64  use GFP_DMA
(trusting it to be 4GB) #elif __x86_64__ (use other hacks like
the swiotlb because 16MB is not enough) ... . This was quite
ugly and is now obsolete.

These should be now converted to use GFP_DMA32 unconditionally. I haven't done
this yet. Or best only use pci_alloc_consistent/dma_alloc_coherent
which will use GFP_DMA32 transparently.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/mm/init.c      | 65 ++++++++++++++++++++++++++++++----------------
 arch/x86_64/mm/numa.c      | 25 ++++--------------
 include/asm-x86_64/dma.h   | 11 ++++++--
 include/asm-x86_64/proto.h |  2 ++
 include/linux/gfp.h        | 11 ++++++++
 include/linux/mmzone.h     | 16 +++++++-----
 mm/page_alloc.c            | 15 ++++++++---
 7 files changed, 90 insertions(+), 55 deletions(-)

(limited to 'include')

diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index e60a1a848de8..a1ad4cc423a7 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -318,32 +318,51 @@ void zap_low_mappings(void)
 	flush_tlb_all();
 }
 
+/* Compute zone sizes for the DMA and DMA32 zones in a node. */
+__init void
+size_zones(unsigned long *z, unsigned long *h,
+	   unsigned long start_pfn, unsigned long end_pfn)
+{
+ 	int i;
+ 	unsigned long w;
+
+ 	for (i = 0; i < MAX_NR_ZONES; i++)
+ 		z[i] = 0;
+
+ 	if (start_pfn < MAX_DMA_PFN)
+ 		z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
+ 	if (start_pfn < MAX_DMA32_PFN) {
+ 		unsigned long dma32_pfn = MAX_DMA32_PFN;
+ 		if (dma32_pfn > end_pfn)
+ 			dma32_pfn = end_pfn;
+ 		z[ZONE_DMA32] = dma32_pfn - start_pfn;
+ 	}
+ 	z[ZONE_NORMAL] = end_pfn - start_pfn;
+
+ 	/* Remove lower zones from higher ones. */
+ 	w = 0;
+ 	for (i = 0; i < MAX_NR_ZONES; i++) {
+ 		if (z[i])
+ 			z[i] -= w;
+ 	        w += z[i];
+	}
+
+	/* Compute holes */
+	w = 0;
+	for (i = 0; i < MAX_NR_ZONES; i++) {
+		unsigned long s = w;
+		w += z[i];
+		h[i] = e820_hole_size(s, w);
+	}
+}
+
 #ifndef CONFIG_NUMA
 void __init paging_init(void)
 {
-	{
-		unsigned long zones_size[MAX_NR_ZONES];
-		unsigned long holes[MAX_NR_ZONES];
-		unsigned int max_dma;
-
-		memset(zones_size, 0, sizeof(zones_size));
-		memset(holes, 0, sizeof(holes));
-
-		max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-
-		if (end_pfn < max_dma) {
-			zones_size[ZONE_DMA] = end_pfn;
-			holes[ZONE_DMA] = e820_hole_size(0, end_pfn);
-		} else {
-			zones_size[ZONE_DMA] = max_dma;
-			holes[ZONE_DMA] = e820_hole_size(0, max_dma);
-			zones_size[ZONE_NORMAL] = end_pfn - max_dma;
-			holes[ZONE_NORMAL] = e820_hole_size(max_dma, end_pfn);
-		}
-		free_area_init_node(0, NODE_DATA(0), zones_size,
-                        __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
-	}
-	return;
+	unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
+	size_zones(zones, holes, 0, end_pfn);
+	free_area_init_node(0, NODE_DATA(0), zones,
+			    __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
 }
 #endif
 
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 214803821001..18e86e2eac2d 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -132,29 +132,14 @@ void __init setup_node_zones(int nodeid)
 	unsigned long start_pfn, end_pfn; 
 	unsigned long zones[MAX_NR_ZONES];
 	unsigned long holes[MAX_NR_ZONES];
-	unsigned long dma_end_pfn;
 
-	memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES); 
-	memset(holes, 0, sizeof(unsigned long) * MAX_NR_ZONES);
+ 	start_pfn = node_start_pfn(nodeid);
+ 	end_pfn = node_end_pfn(nodeid);
 
-	start_pfn = node_start_pfn(nodeid);
-	end_pfn = node_end_pfn(nodeid);
+	Dprintk(KERN_INFO "setting up node %d %lx-%lx\n",
+		nodeid, start_pfn, end_pfn);
 
-	Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn);
-	
-	/* All nodes > 0 have a zero length zone DMA */ 
-	dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT; 
-	if (start_pfn < dma_end_pfn) { 
-		zones[ZONE_DMA] = dma_end_pfn - start_pfn;
-		holes[ZONE_DMA] = e820_hole_size(start_pfn, dma_end_pfn);
-		zones[ZONE_NORMAL] = end_pfn - dma_end_pfn; 
-		holes[ZONE_NORMAL] = e820_hole_size(dma_end_pfn, end_pfn);
-
-	} else { 
-		zones[ZONE_NORMAL] = end_pfn - start_pfn; 
-		holes[ZONE_NORMAL] = e820_hole_size(start_pfn, end_pfn);
-	} 
-    
+	size_zones(zones, holes, start_pfn, end_pfn);
 	free_area_init_node(nodeid, NODE_DATA(nodeid), zones,
 			    start_pfn, holes);
 } 
diff --git a/include/asm-x86_64/dma.h b/include/asm-x86_64/dma.h
index 16fa3a064d0c..6f2a817b6a7c 100644
--- a/include/asm-x86_64/dma.h
+++ b/include/asm-x86_64/dma.h
@@ -72,8 +72,15 @@
 
 #define MAX_DMA_CHANNELS	8
 
-/* The maximum address that we can perform a DMA transfer to on this platform */
-#define MAX_DMA_ADDRESS      (PAGE_OFFSET+0x1000000)
+
+/* 16MB ISA DMA zone */
+#define MAX_DMA_PFN   ((16*1024*1024) >> PAGE_SHIFT)
+
+/* 4GB broken PCI/AGP hardware bus master zone */
+#define MAX_DMA32_PFN ((4UL*1024*1024*1024) >> PAGE_SHIFT)
+
+/* Compat define for old dma zone */
+#define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT))
 
 /* 8237 DMA controllers */
 #define IO_DMA1_BASE	0x00	/* 8 bit slave DMA, channels 0..3 */
diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h
index dbb37b0adb43..c251152a0658 100644
--- a/include/asm-x86_64/proto.h
+++ b/include/asm-x86_64/proto.h
@@ -22,6 +22,8 @@ extern void mtrr_bp_init(void);
 #define mtrr_bp_init() do {} while (0)
 #endif
 extern void init_memory_mapping(unsigned long start, unsigned long end);
+extern void size_zones(unsigned long *z, unsigned long *h,
+			unsigned long start_pfn, unsigned long end_pfn);
 
 extern void system_call(void); 
 extern int kernel_syscall(void);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index c3779432a723..4351e6bb5a79 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -14,6 +14,13 @@ struct vm_area_struct;
 /* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */
 #define __GFP_DMA	((__force gfp_t)0x01u)
 #define __GFP_HIGHMEM	((__force gfp_t)0x02u)
+#ifdef CONFIG_DMA_IS_DMA32
+#define __GFP_DMA32	((__force gfp_t)0x01)	/* ZONE_DMA is ZONE_DMA32 */
+#elif BITS_PER_LONG < 64
+#define __GFP_DMA32	((__force gfp_t)0x00)	/* ZONE_NORMAL is ZONE_DMA32 */
+#else
+#define __GFP_DMA32	((__force gfp_t)0x04)	/* Has own ZONE_DMA32 */
+#endif
 
 /*
  * Action modifiers - doesn't change the zoning
@@ -64,6 +71,10 @@ struct vm_area_struct;
 
 #define GFP_DMA		__GFP_DMA
 
+/* 4GB DMA on some platforms */
+#define GFP_DMA32	__GFP_DMA32
+
+
 #define gfp_zone(mask) ((__force int)((mask) & (__force gfp_t)GFP_ZONEMASK))
 
 /*
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f5fa3082fd6a..da7a829f8561 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -71,10 +71,11 @@ struct per_cpu_pageset {
 #endif
 
 #define ZONE_DMA		0
-#define ZONE_NORMAL		1
-#define ZONE_HIGHMEM		2
+#define ZONE_DMA32		1
+#define ZONE_NORMAL		2
+#define ZONE_HIGHMEM		3
 
-#define MAX_NR_ZONES		3	/* Sync this with ZONES_SHIFT */
+#define MAX_NR_ZONES		4	/* Sync this with ZONES_SHIFT */
 #define ZONES_SHIFT		2	/* ceil(log2(MAX_NR_ZONES)) */
 
 
@@ -108,9 +109,10 @@ struct per_cpu_pageset {
 
 /*
  * On machines where it is needed (eg PCs) we divide physical memory
- * into multiple physical zones. On a PC we have 3 zones:
+ * into multiple physical zones. On a PC we have 4 zones:
  *
  * ZONE_DMA	  < 16 MB	ISA DMA capable memory
+ * ZONE_DMA32	     0 MB 	Empty
  * ZONE_NORMAL	16-896 MB	direct mapped by the kernel
  * ZONE_HIGHMEM	 > 896 MB	only page cache and user processes
  */
@@ -455,10 +457,10 @@ extern struct pglist_data contig_page_data;
 
 #if BITS_PER_LONG == 32 || defined(ARCH_HAS_ATOMIC_UNSIGNED)
 /*
- * with 32 bit page->flags field, we reserve 8 bits for node/zone info.
- * there are 3 zones (2 bits) and this leaves 8-2=6 bits for nodes.
+ * with 32 bit page->flags field, we reserve 9 bits for node/zone info.
+ * there are 4 zones (3 bits) and this leaves 9-3=6 bits for nodes.
  */
-#define FLAGS_RESERVED		8
+#define FLAGS_RESERVED		9
 
 #elif BITS_PER_LONG == 64
 /*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2dbdd98426fd..9b43511dbefd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -60,8 +60,11 @@ long nr_swap_pages;
  *	NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA
  *	HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL
  *	HIGHMEM allocation will (224M+784M)/256 of ram reserved in ZONE_DMA
+ *
+ * TBD: should special case ZONE_DMA32 machines here - in those we normally
+ * don't need any ZONE_NORMAL reservation
  */
-int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 32 };
+int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 };
 
 EXPORT_SYMBOL(totalram_pages);
 EXPORT_SYMBOL(nr_swap_pages);
@@ -73,7 +76,7 @@ EXPORT_SYMBOL(nr_swap_pages);
 struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly;
 EXPORT_SYMBOL(zone_table);
 
-static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
+static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" };
 int min_free_kbytes = 1024;
 
 unsigned long __initdata nr_kernel_pages;
@@ -1442,6 +1445,10 @@ static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zoneli
 		zone = pgdat->node_zones + ZONE_NORMAL;
 		if (zone->present_pages)
 			zonelist->zones[j++] = zone;
+	case ZONE_DMA32:
+		zone = pgdat->node_zones + ZONE_DMA32;
+		if (zone->present_pages)
+			zonelist->zones[j++] = zone;
 	case ZONE_DMA:
 		zone = pgdat->node_zones + ZONE_DMA;
 		if (zone->present_pages)
@@ -1456,6 +1463,8 @@ static inline int highest_zone(int zone_bits)
 	int res = ZONE_NORMAL;
 	if (zone_bits & (__force int)__GFP_HIGHMEM)
 		res = ZONE_HIGHMEM;
+	if (zone_bits & (__force int)__GFP_DMA32)
+		res = ZONE_DMA32;
 	if (zone_bits & (__force int)__GFP_DMA)
 		res = ZONE_DMA;
 	return res;
@@ -1976,7 +1985,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
 		if (zholes_size)
 			realsize -= zholes_size[j];
 
-		if (j == ZONE_DMA || j == ZONE_NORMAL)
+		if (j < ZONE_HIGHMEM)
 			nr_kernel_pages += realsize;
 		nr_all_pages += realsize;
 
-- 
cgit v1.2.3-71-gd317


From 979edfadbae2286eec5b46143c00e81bca96498e Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Sat, 5 Nov 2005 17:25:53 +0100
Subject: [PATCH] x86_64: Adjust, correct, and complete the HPET definitions
 for x86-64.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/hpet.h | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/asm-x86_64/hpet.h b/include/asm-x86_64/hpet.h
index a3877f570998..c20c28f5c7a0 100644
--- a/include/asm-x86_64/hpet.h
+++ b/include/asm-x86_64/hpet.h
@@ -14,18 +14,18 @@
 #define HPET_CFG	0x010
 #define HPET_STATUS	0x020
 #define HPET_COUNTER	0x0f0
-#define HPET_T0_CFG	0x100
-#define HPET_T0_CMP	0x108
-#define HPET_T0_ROUTE	0x110
-#define HPET_T1_CFG	0x120
-#define HPET_T1_CMP	0x128
-#define HPET_T1_ROUTE	0x130
-#define HPET_T2_CFG	0x140
-#define HPET_T2_CMP	0x148
-#define HPET_T2_ROUTE	0x150
+#define HPET_Tn_OFFSET	0x20
+#define HPET_Tn_CFG(n)	 (0x100 + (n) * HPET_Tn_OFFSET)
+#define HPET_Tn_ROUTE(n) (0x104 + (n) * HPET_Tn_OFFSET)
+#define HPET_Tn_CMP(n)	 (0x108 + (n) * HPET_Tn_OFFSET)
+#define HPET_T0_CFG	HPET_Tn_CFG(0)
+#define HPET_T0_CMP	HPET_Tn_CMP(0)
+#define HPET_T1_CFG	HPET_Tn_CFG(1)
+#define HPET_T1_CMP	HPET_Tn_CMP(1)
 
 #define HPET_ID_VENDOR	0xffff0000
 #define HPET_ID_LEGSUP	0x00008000
+#define HPET_ID_64BIT	0x00002000
 #define HPET_ID_NUMBER	0x00001f00
 #define HPET_ID_REV	0x000000ff
 #define	HPET_ID_NUMBER_SHIFT	8
@@ -38,11 +38,18 @@
 #define	HPET_LEGACY_8254	2
 #define	HPET_LEGACY_RTC		8
 
-#define HPET_TN_ENABLE		0x004
-#define HPET_TN_PERIODIC	0x008
-#define HPET_TN_PERIODIC_CAP	0x010
-#define HPET_TN_SETVAL		0x040
-#define HPET_TN_32BIT		0x100
+#define HPET_TN_LEVEL		0x0002
+#define HPET_TN_ENABLE		0x0004
+#define HPET_TN_PERIODIC	0x0008
+#define HPET_TN_PERIODIC_CAP	0x0010
+#define HPET_TN_64BIT_CAP	0x0020
+#define HPET_TN_SETVAL		0x0040
+#define HPET_TN_32BIT		0x0100
+#define HPET_TN_ROUTE		0x3e00
+#define HPET_TN_FSB		0x4000
+#define HPET_TN_FSB_CAP		0x8000
+
+#define HPET_TN_ROUTE_SHIFT	9
 
 extern int is_hpet_enabled(void);
 extern int hpet_rtc_timer_init(void);
-- 
cgit v1.2.3-71-gd317


From 89b831ef8bf5cfbb357dbc0a2e07700d7f20eec5 Mon Sep 17 00:00:00 2001
From: Jacob Shin <jacob.shin@amd.com>
Date: Sat, 5 Nov 2005 17:25:53 +0100
Subject: [PATCH] x86_64: Support for AMD specific MCE Threshold.

MC4_MISC - DRAM Errors Threshold Register realized under AMD K8 Rev F.
This register is used to count correctable and uncorrectable ECC errors that occur during DRAM read operations.
The user may interface through sysfs files in order to change the threshold configuration.

bank%d/error_count - reads current error count, write to clear.
bank%d/interrupt_enable - set/clear interrupt enable.
bank%d/threshold_limit - read/write the threshold limit.

APIC vector 0xF9 in hw_irq.h.
5 software defined bank ids in mce.h.
new apic.c function to setup threshold apic lvt.
defaults to interrupt off, count enabled, and threshold limit max.
sysfs interface created on /sys/devices/system/threshold.

AK: added some ifdefs to make it compile on UP

Signed-off-by: Jacob Shin <jacob.shin@amd.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/Kconfig          |   8 +
 arch/x86_64/kernel/Makefile  |   1 +
 arch/x86_64/kernel/apic.c    |  10 +
 arch/x86_64/kernel/entry.S   |   3 +
 arch/x86_64/kernel/i8259.c   |   2 +
 arch/x86_64/kernel/mce.c     |   3 +
 arch/x86_64/kernel/mce_amd.c | 538 +++++++++++++++++++++++++++++++++++++++++++
 arch/x86_64/kernel/traps.c   |   4 +
 include/asm-x86_64/apic.h    |   2 +
 include/asm-x86_64/hw_irq.h  |   2 +-
 include/asm-x86_64/mce.h     |  10 +
 11 files changed, 582 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86_64/kernel/mce_amd.c

(limited to 'include')

diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 21afa69a086d..504dc52e8bfa 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -374,6 +374,14 @@ config X86_MCE_INTEL
 	   Additional support for intel specific MCE features such as
 	   the thermal monitor.
 
+config X86_MCE_AMD
+	bool "AMD MCE features"
+	depends on X86_MCE && X86_LOCAL_APIC
+	default y
+	help
+	   Additional support for AMD specific MCE features such as
+	   the DRAM Error Threshold.
+
 config PHYSICAL_START
 	hex "Physical address where the kernel is loaded" if EMBEDDED
 	default "0x100000"
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 14328cab5d3a..fe4cbd1c4b2f 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -11,6 +11,7 @@ obj-y	:= process.o signal.o entry.o traps.o irq.o \
 
 obj-$(CONFIG_X86_MCE)         += mce.o
 obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel.o
+obj-$(CONFIG_X86_MCE_AMD)	+= mce_amd.o
 obj-$(CONFIG_MTRR)		+= ../../i386/kernel/cpu/mtrr/
 obj-$(CONFIG_ACPI)		+= acpi/
 obj-$(CONFIG_X86_MSR)		+= msr.o
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index b6e7715d877f..18691ce4c759 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -833,6 +833,16 @@ int setup_profiling_timer(unsigned int multiplier)
 	return 0;
 }
 
+#ifdef CONFIG_X86_MCE_AMD
+void setup_threshold_lvt(unsigned long lvt_off)
+{
+	unsigned int v = 0;
+	unsigned long reg = (lvt_off << 4) + 0x500;
+	v |= THRESHOLD_APIC_VECTOR;
+	apic_write(reg, v);
+}
+#endif /* CONFIG_X86_MCE_AMD */
+
 #undef APIC_DIVISOR
 
 /*
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 7937971d1853..9ff42041bb6b 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -612,6 +612,9 @@ retint_kernel:
 ENTRY(thermal_interrupt)
 	apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
 
+ENTRY(threshold_interrupt)
+	apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
+
 #ifdef CONFIG_SMP	
 ENTRY(reschedule_interrupt)
 	apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index c6c9791d77c1..5de30035e54b 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -492,6 +492,7 @@ void invalidate_interrupt5(void);
 void invalidate_interrupt6(void);
 void invalidate_interrupt7(void);
 void thermal_interrupt(void);
+void threshold_interrupt(void);
 void i8254_timer_resume(void);
 
 static void setup_timer_hardware(void)
@@ -580,6 +581,7 @@ void __init init_IRQ(void)
 	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 #endif	
 	set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
+	set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	/* self generated IPI for local APIC timer */
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 69541db5ff2c..cf8a76f0f47e 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -356,6 +356,9 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
 	case X86_VENDOR_INTEL:
 		mce_intel_feature_init(c);
 		break;
+	case X86_VENDOR_AMD:
+		mce_amd_feature_init(c);
+		break;
 	default:
 		break;
 	}
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c
new file mode 100644
index 000000000000..1f76175ace02
--- /dev/null
+++ b/arch/x86_64/kernel/mce_amd.c
@@ -0,0 +1,538 @@
+/*
+ *  (c) 2005 Advanced Micro Devices, Inc.
+ *  Your use of this code is subject to the terms and conditions of the
+ *  GNU general public license version 2. See "COPYING" or
+ *  http://www.gnu.org/licenses/gpl.html
+ *
+ *  Written by Jacob Shin - AMD, Inc.
+ *
+ *  Support : jacob.shin@amd.com
+ *
+ *  MC4_MISC0 DRAM ECC Error Threshold available under AMD K8 Rev F.
+ *  MC4_MISC0 exists per physical processor.
+ *
+ */
+
+#include <linux/cpu.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kobject.h>
+#include <linux/notifier.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/sysdev.h>
+#include <linux/sysfs.h>
+#include <asm/apic.h>
+#include <asm/mce.h>
+#include <asm/msr.h>
+#include <asm/percpu.h>
+
+#define PFX "mce_threshold: "
+#define VERSION "version 1.00.9"
+#define NR_BANKS 5
+#define THRESHOLD_MAX 0xFFF
+#define INT_TYPE_APIC 0x00020000
+#define MASK_VALID_HI 0x80000000
+#define MASK_LVTOFF_HI 0x00F00000
+#define MASK_COUNT_EN_HI 0x00080000
+#define MASK_INT_TYPE_HI 0x00060000
+#define MASK_OVERFLOW_HI 0x00010000
+#define MASK_ERR_COUNT_HI 0x00000FFF
+#define MASK_OVERFLOW 0x0001000000000000L
+
+struct threshold_bank {
+	unsigned int cpu;
+	u8 bank;
+	u8 interrupt_enable;
+	u16 threshold_limit;
+	struct kobject kobj;
+};
+
+static struct threshold_bank threshold_defaults = {
+	.interrupt_enable = 0,
+	.threshold_limit = THRESHOLD_MAX,
+};
+
+#ifdef CONFIG_SMP
+static unsigned char shared_bank[NR_BANKS] = {
+	0, 0, 0, 0, 1
+};
+#endif
+
+static DEFINE_PER_CPU(unsigned char, bank_map);	/* see which banks are on */
+
+/*
+ * CPU Initialization
+ */
+
+/* must be called with correct cpu affinity */
+static void threshold_restart_bank(struct threshold_bank *b,
+				   int reset, u16 old_limit)
+{
+	u32 mci_misc_hi, mci_misc_lo;
+
+	rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);
+
+	if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
+		reset = 1;	/* limit cannot be lower than err count */
+
+	if (reset) {		/* reset err count and overflow bit */
+		mci_misc_hi =
+		    (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
+		    (THRESHOLD_MAX - b->threshold_limit);
+	} else if (old_limit) {	/* change limit w/o reset */
+		int new_count = (mci_misc_hi & THRESHOLD_MAX) +
+		    (old_limit - b->threshold_limit);
+		mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
+		    (new_count & THRESHOLD_MAX);
+	}
+
+	b->interrupt_enable ?
+	    (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
+	    (mci_misc_hi &= ~MASK_INT_TYPE_HI);
+
+	mci_misc_hi |= MASK_COUNT_EN_HI;
+	wrmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);
+}
+
+void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
+{
+	int bank;
+	u32 mci_misc_lo, mci_misc_hi;
+	unsigned int cpu = smp_processor_id();
+
+	for (bank = 0; bank < NR_BANKS; ++bank) {
+		rdmsr(MSR_IA32_MC0_MISC + bank * 4, mci_misc_lo, mci_misc_hi);
+
+		/* !valid, !counter present, bios locked */
+		if (!(mci_misc_hi & MASK_VALID_HI) ||
+		    !(mci_misc_hi & MASK_VALID_HI >> 1) ||
+		    (mci_misc_hi & MASK_VALID_HI >> 2))
+			continue;
+
+		per_cpu(bank_map, cpu) |= (1 << bank);
+
+#ifdef CONFIG_SMP
+		if (shared_bank[bank] && cpu_core_id[cpu])
+			continue;
+#endif
+
+		setup_threshold_lvt((mci_misc_hi & MASK_LVTOFF_HI) >> 20);
+		threshold_defaults.cpu = cpu;
+		threshold_defaults.bank = bank;
+		threshold_restart_bank(&threshold_defaults, 0, 0);
+	}
+}
+
+/*
+ * APIC Interrupt Handler
+ */
+
+/*
+ * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
+ * the interrupt goes off when error_count reaches threshold_limit.
+ * the handler will simply log mcelog w/ software defined bank number.
+ */
+asmlinkage void mce_threshold_interrupt(void)
+{
+	int bank;
+	struct mce m;
+
+	ack_APIC_irq();
+	irq_enter();
+
+	memset(&m, 0, sizeof(m));
+	rdtscll(m.tsc);
+	m.cpu = smp_processor_id();
+
+	/* assume first bank caused it */
+	for (bank = 0; bank < NR_BANKS; ++bank) {
+		m.bank = MCE_THRESHOLD_BASE + bank;
+		rdmsrl(MSR_IA32_MC0_MISC + bank * 4, m.misc);
+
+		if (m.misc & MASK_OVERFLOW) {
+			mce_log(&m);
+			goto out;
+		}
+	}
+      out:
+	irq_exit();
+}
+
+/*
+ * Sysfs Interface
+ */
+
+static struct sysdev_class threshold_sysclass = {
+	set_kset_name("threshold"),
+};
+
+static DEFINE_PER_CPU(struct sys_device, device_threshold);
+
+struct threshold_attr {
+        struct attribute attr;
+        ssize_t(*show) (struct threshold_bank *, char *);
+        ssize_t(*store) (struct threshold_bank *, const char *, size_t count);
+};
+
+static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
+
+static cpumask_t affinity_set(unsigned int cpu)
+{
+	cpumask_t oldmask = current->cpus_allowed;
+	cpumask_t newmask = CPU_MASK_NONE;
+	cpu_set(cpu, newmask);
+	set_cpus_allowed(current, newmask);
+	return oldmask;
+}
+
+static void affinity_restore(cpumask_t oldmask)
+{
+	set_cpus_allowed(current, oldmask);
+}
+
+#define SHOW_FIELDS(name) \
+        static ssize_t show_ ## name(struct threshold_bank * b, char *buf) \
+        { \
+                return sprintf(buf, "%lx\n", (unsigned long) b->name); \
+        }
+SHOW_FIELDS(interrupt_enable)
+SHOW_FIELDS(threshold_limit)
+
+static ssize_t store_interrupt_enable(struct threshold_bank *b,
+				      const char *buf, size_t count)
+{
+	char *end;
+	cpumask_t oldmask;
+	unsigned long new = simple_strtoul(buf, &end, 0);
+	if (end == buf)
+		return -EINVAL;
+	b->interrupt_enable = !!new;
+
+	oldmask = affinity_set(b->cpu);
+	threshold_restart_bank(b, 0, 0);
+	affinity_restore(oldmask);
+
+	return end - buf;
+}
+
+static ssize_t store_threshold_limit(struct threshold_bank *b,
+				     const char *buf, size_t count)
+{
+	char *end;
+	cpumask_t oldmask;
+	u16 old;
+	unsigned long new = simple_strtoul(buf, &end, 0);
+	if (end == buf)
+		return -EINVAL;
+	if (new > THRESHOLD_MAX)
+		new = THRESHOLD_MAX;
+	if (new < 1)
+		new = 1;
+	old = b->threshold_limit;
+	b->threshold_limit = new;
+
+	oldmask = affinity_set(b->cpu);
+	threshold_restart_bank(b, 0, old);
+	affinity_restore(oldmask);
+
+	return end - buf;
+}
+
+static ssize_t show_error_count(struct threshold_bank *b, char *buf)
+{
+	u32 high, low;
+	cpumask_t oldmask;
+	oldmask = affinity_set(b->cpu);
+	rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, low, high); /* ignore low 32 */
+	affinity_restore(oldmask);
+	return sprintf(buf, "%x\n",
+		       (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
+}
+
+static ssize_t store_error_count(struct threshold_bank *b,
+				 const char *buf, size_t count)
+{
+	cpumask_t oldmask;
+	oldmask = affinity_set(b->cpu);
+	threshold_restart_bank(b, 1, 0);
+	affinity_restore(oldmask);
+	return 1;
+}
+
+#define THRESHOLD_ATTR(_name,_mode,_show,_store) {            \
+        .attr = {.name = __stringify(_name), .mode = _mode }, \
+        .show = _show,                                        \
+        .store = _store,                                      \
+};
+
+#define ATTR_FIELDS(name) \
+        static struct threshold_attr name = \
+        THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
+
+ATTR_FIELDS(interrupt_enable);
+ATTR_FIELDS(threshold_limit);
+ATTR_FIELDS(error_count);
+
+static struct attribute *default_attrs[] = {
+	&interrupt_enable.attr,
+	&threshold_limit.attr,
+	&error_count.attr,
+	NULL
+};
+
+#define to_bank(k) container_of(k,struct threshold_bank,kobj)
+#define to_attr(a) container_of(a,struct threshold_attr,attr)
+
+static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+	struct threshold_bank *b = to_bank(kobj);
+	struct threshold_attr *a = to_attr(attr);
+	ssize_t ret;
+	ret = a->show ? a->show(b, buf) : -EIO;
+	return ret;
+}
+
+static ssize_t store(struct kobject *kobj, struct attribute *attr,
+		     const char *buf, size_t count)
+{
+	struct threshold_bank *b = to_bank(kobj);
+	struct threshold_attr *a = to_attr(attr);
+	ssize_t ret;
+	ret = a->store ? a->store(b, buf, count) : -EIO;
+	return ret;
+}
+
+static struct sysfs_ops threshold_ops = {
+	.show = show,
+	.store = store,
+};
+
+static struct kobj_type threshold_ktype = {
+	.sysfs_ops = &threshold_ops,
+	.default_attrs = default_attrs,
+};
+
+/* symlinks sibling shared banks to first core.  first core owns dir/files. */
+static __cpuinit int threshold_create_bank(unsigned int cpu, int bank)
+{
+	int err = 0;
+	struct threshold_bank *b = 0;
+
+#ifdef CONFIG_SMP
+	if (cpu_core_id[cpu] && shared_bank[bank]) {	/* symlink */
+		char name[16];
+		unsigned lcpu = first_cpu(cpu_core_map[cpu]);
+		if (cpu_core_id[lcpu])
+			goto out;	/* first core not up yet */
+
+		b = per_cpu(threshold_banks, lcpu)[bank];
+		if (!b)
+			goto out;
+		sprintf(name, "bank%i", bank);
+		err = sysfs_create_link(&per_cpu(device_threshold, cpu).kobj,
+					&b->kobj, name);
+		if (err)
+			goto out;
+		per_cpu(threshold_banks, cpu)[bank] = b;
+		goto out;
+	}
+#endif
+
+	b = kmalloc(sizeof(struct threshold_bank), GFP_KERNEL);
+	if (!b) {
+		err = -ENOMEM;
+		goto out;
+	}
+	memset(b, 0, sizeof(struct threshold_bank));
+
+	b->cpu = cpu;
+	b->bank = bank;
+	b->interrupt_enable = 0;
+	b->threshold_limit = THRESHOLD_MAX;
+	kobject_set_name(&b->kobj, "bank%i", bank);
+	b->kobj.parent = &per_cpu(device_threshold, cpu).kobj;
+	b->kobj.ktype = &threshold_ktype;
+
+	err = kobject_register(&b->kobj);
+	if (err) {
+		kfree(b);
+		goto out;
+	}
+	per_cpu(threshold_banks, cpu)[bank] = b;
+      out:
+	return err;
+}
+
+/* create dir/files for all valid threshold banks */
+static __cpuinit int threshold_create_device(unsigned int cpu)
+{
+	int bank;
+	int err = 0;
+
+	per_cpu(device_threshold, cpu).id = cpu;
+	per_cpu(device_threshold, cpu).cls = &threshold_sysclass;
+	err = sysdev_register(&per_cpu(device_threshold, cpu));
+	if (err)
+		goto out;
+
+	for (bank = 0; bank < NR_BANKS; ++bank) {
+		if (!(per_cpu(bank_map, cpu) & 1 << bank))
+			continue;
+		err = threshold_create_bank(cpu, bank);
+		if (err)
+			goto out;
+	}
+      out:
+	return err;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * let's be hotplug friendly.
+ * in case of multiple core processors, the first core always takes ownership
+ *   of shared sysfs dir/files, and rest of the cores will be symlinked to it.
+ */
+
+/* cpu hotplug call removes all symlinks before first core dies */
+static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank)
+{
+	struct threshold_bank *b;
+	char name[16];
+
+	b = per_cpu(threshold_banks, cpu)[bank];
+	if (!b)
+		return;
+	if (shared_bank[bank] && atomic_read(&b->kobj.kref.refcount) > 2) {
+		sprintf(name, "bank%i", bank);
+		sysfs_remove_link(&per_cpu(device_threshold, cpu).kobj, name);
+		per_cpu(threshold_banks, cpu)[bank] = 0;
+	} else {
+		kobject_unregister(&b->kobj);
+		kfree(per_cpu(threshold_banks, cpu)[bank]);
+	}
+}
+
+static __cpuinit void threshold_remove_device(unsigned int cpu)
+{
+	int bank;
+
+	for (bank = 0; bank < NR_BANKS; ++bank) {
+		if (!(per_cpu(bank_map, cpu) & 1 << bank))
+			continue;
+		threshold_remove_bank(cpu, bank);
+	}
+	sysdev_unregister(&per_cpu(device_threshold, cpu));
+}
+
+/* link all existing siblings when first core comes up */
+static __cpuinit int threshold_create_symlinks(unsigned int cpu)
+{
+	int bank, err = 0;
+	unsigned int lcpu = 0;
+
+	if (cpu_core_id[cpu])
+		return 0;
+	for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
+		if (lcpu == cpu)
+			continue;
+		for (bank = 0; bank < NR_BANKS; ++bank) {
+			if (!(per_cpu(bank_map, cpu) & 1 << bank))
+				continue;
+			if (!shared_bank[bank])
+				continue;
+			err = threshold_create_bank(lcpu, bank);
+		}
+	}
+	return err;
+}
+
+/* remove all symlinks before first core dies. */
+static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
+{
+	int bank;
+	unsigned int lcpu = 0;
+	if (cpu_core_id[cpu])
+		return;
+	for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
+		if (lcpu == cpu)
+			continue;
+		for (bank = 0; bank < NR_BANKS; ++bank) {
+			if (!(per_cpu(bank_map, cpu) & 1 << bank))
+				continue;
+			if (!shared_bank[bank])
+				continue;
+			threshold_remove_bank(lcpu, bank);
+		}
+	}
+}
+#else /* !CONFIG_HOTPLUG_CPU */
+static __cpuinit void threshold_create_symlinks(unsigned int cpu)
+{
+}
+static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
+{
+}
+static void threshold_remove_device(unsigned int cpu)
+{
+}
+#endif
+
+/* get notified when a cpu comes on/off */
+static __cpuinit int threshold_cpu_callback(struct notifier_block *nfb,
+					    unsigned long action, void *hcpu)
+{
+	/* cpu was unsigned int to begin with */
+	unsigned int cpu = (unsigned long)hcpu;
+
+	if (cpu >= NR_CPUS)
+		goto out;
+
+	switch (action) {
+	case CPU_ONLINE:
+		threshold_create_device(cpu);
+		threshold_create_symlinks(cpu);
+		break;
+	case CPU_DOWN_PREPARE:
+		threshold_remove_symlinks(cpu);
+		break;
+	case CPU_DOWN_FAILED:
+		threshold_create_symlinks(cpu);
+		break;
+	case CPU_DEAD:
+		threshold_remove_device(cpu);
+		break;
+	default:
+		break;
+	}
+      out:
+	return NOTIFY_OK;
+}
+
+static struct notifier_block threshold_cpu_notifier = {
+	.notifier_call = threshold_cpu_callback,
+};
+
+static __init int threshold_init_device(void)
+{
+	int err;
+	int lcpu = 0;
+
+	err = sysdev_class_register(&threshold_sysclass);
+	if (err)
+		goto out;
+
+	/* to hit CPUs online before the notifier is up */
+	for_each_online_cpu(lcpu) {
+		err = threshold_create_device(lcpu);
+		if (err)
+			goto out;
+	}
+	register_cpu_notifier(&threshold_cpu_notifier);
+
+      out:
+	return err;
+}
+
+device_initcall(threshold_init_device);
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index b5e09e6b5536..4a836384dd0f 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -888,6 +888,10 @@ asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
 {
 }
 
+asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
+{
+}
+
 /*
  *  'math_state_restore()' saves the current math information in the
  * old math state array, and gets the new ones from the current task
diff --git a/include/asm-x86_64/apic.h b/include/asm-x86_64/apic.h
index 6c5d5ca8383a..5647b7de1749 100644
--- a/include/asm-x86_64/apic.h
+++ b/include/asm-x86_64/apic.h
@@ -111,6 +111,8 @@ extern unsigned int nmi_watchdog;
 
 extern int disable_timer_pin_1;
 
+extern void setup_threshold_lvt(unsigned long lvt_off);
+
 #endif /* CONFIG_X86_LOCAL_APIC */
 
 extern unsigned boot_cpu_id;
diff --git a/include/asm-x86_64/hw_irq.h b/include/asm-x86_64/hw_irq.h
index dc97668ea0f9..c14a8c7267a6 100644
--- a/include/asm-x86_64/hw_irq.h
+++ b/include/asm-x86_64/hw_irq.h
@@ -55,7 +55,7 @@ struct hw_interrupt_type;
 #define CALL_FUNCTION_VECTOR	0xfc
 #define KDB_VECTOR		0xfb	/* reserved for KDB */
 #define THERMAL_APIC_VECTOR	0xfa
-/* 0xf9 free */
+#define THRESHOLD_APIC_VECTOR   0xf9
 #define INVALIDATE_TLB_VECTOR_END	0xf8
 #define INVALIDATE_TLB_VECTOR_START	0xf0	/* f0-f8 used for TLB flush */
 
diff --git a/include/asm-x86_64/mce.h b/include/asm-x86_64/mce.h
index 869249db6795..5d298b799a9f 100644
--- a/include/asm-x86_64/mce.h
+++ b/include/asm-x86_64/mce.h
@@ -67,6 +67,8 @@ struct mce_log {
 /* Software defined banks */
 #define MCE_EXTENDED_BANK	128
 #define MCE_THERMAL_BANK	MCE_EXTENDED_BANK + 0
+#define MCE_THRESHOLD_BASE      MCE_EXTENDED_BANK + 1 /* MCE_AMD */
+#define MCE_THRESHOLD_DRAM_ECC  MCE_THRESHOLD_BASE + 4
 
 void mce_log(struct mce *m);
 #ifdef CONFIG_X86_MCE_INTEL
@@ -77,4 +79,12 @@ static inline void mce_intel_feature_init(struct cpuinfo_x86 *c)
 }
 #endif
 
+#ifdef CONFIG_X86_MCE_AMD
+void mce_amd_feature_init(struct cpuinfo_x86 *c);
+#else
+static inline void mce_amd_feature_init(struct cpuinfo_x86 *c)
+{
+}
+#endif
+
 #endif
-- 
cgit v1.2.3-71-gd317


From 6004e1b7effcbb385a6b7c790e4b8008682cf679 Mon Sep 17 00:00:00 2001
From: James Cleverdon <jamesclv@us.ibm.com>
Date: Sat, 5 Nov 2005 17:25:53 +0100
Subject: [PATCH] i386/x86-64: Share interrupt vectors when there is a large
 number of interrupt sources

Here's a patch that builds on Natalie Protasevich's IRQ compression
patch and tries to work for MPS boots as well as ACPI.  It is meant for
a 4-node IBM x460 NUMA box, which was dying because it had interrupt
pins with GSI numbers > NR_IRQS and thus overflowed irq_desc.

The problem is that this system has 270 GSIs (which are 1:1 mapped with
I/O APIC RTEs) and an 8-node box would have 540.  This is much bigger
than NR_IRQS (224 for both i386 and x86_64).  Also, there aren't enough
vectors to go around.  There are about 190 usable vectors, not counting
the reserved ones and the unused vectors at 0x20 to 0x2F.  So, my patch
attempts to compress the GSI range and share vectors by sharing IRQs.

Cc: "Protasevich, Natalie" <Natalie.Protasevich@unisys.com>

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/acpi/boot.c | 15 ++++-----
 arch/x86_64/kernel/io_apic.c | 80 ++++++++++++++++++++++++++++++++++++++++----
 arch/x86_64/kernel/mpparse.c |  2 +-
 include/asm-x86_64/desc.h    |  3 ++
 include/asm-x86_64/mpspec.h  |  3 +-
 5 files changed, 86 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index b66c13c0cc0f..82754bb80e20 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -39,17 +39,14 @@
 
 #ifdef	CONFIG_X86_64
 
-static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-}
 extern void __init clustered_apic_check(void);
-static inline int ioapic_setup_disabled(void)
-{
-	return 0;
-}
 
+extern int gsi_irq_sharing(int gsi);
 #include <asm/proto.h>
 
+static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; }
+
+
 #else				/* X86 */
 
 #ifdef	CONFIG_X86_LOCAL_APIC
@@ -57,6 +54,8 @@ static inline int ioapic_setup_disabled(void)
 #include <mach_mpparse.h>
 #endif				/* CONFIG_X86_LOCAL_APIC */
 
+static inline int gsi_irq_sharing(int gsi) { return gsi; }
+
 #endif				/* X86 */
 
 #define BAD_MADT_ENTRY(entry, end) (					    \
@@ -459,7 +458,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
 		*irq = IO_APIC_VECTOR(gsi);
 	else
 #endif
-		*irq = gsi;
+		*irq = gsi_irq_sharing(gsi);
 	return 0;
 }
 
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index c8eee20cd519..97154ab058b4 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -57,7 +57,7 @@ int nr_ioapic_registers[MAX_IO_APICS];
  * Rough estimation of how many shared IRQs there are, can
  * be changed anytime.
  */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
+#define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS
 #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
 
 /*
@@ -85,6 +85,7 @@ int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
 	int pin;							\
 	struct irq_pin_list *entry = irq_2_pin + irq;			\
 									\
+	BUG_ON(irq >= NR_IRQS);						\
 	for (;;) {							\
 		unsigned int reg;					\
 		pin = entry->pin;					\
@@ -127,6 +128,8 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 }
 #endif
 
+static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF };
+
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
  * shared ISA-space IRQs, so we have to support them. We are super
@@ -137,6 +140,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 	static int first_free_entry = NR_IRQS;
 	struct irq_pin_list *entry = irq_2_pin + irq;
 
+	BUG_ON(irq >= NR_IRQS);
 	while (entry->next)
 		entry = irq_2_pin + entry->next;
 
@@ -144,7 +148,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 		entry->next = first_free_entry;
 		entry = irq_2_pin + entry->next;
 		if (++first_free_entry >= PIN_MAP_SIZE)
-			panic("io_apic.c: whoops");
+			panic("io_apic.c: ran out of irq_2_pin entries!");
 	}
 	entry->apic = apic;
 	entry->pin = pin;
@@ -420,6 +424,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 				best_guess = irq;
 		}
 	}
+	BUG_ON(best_guess >= NR_IRQS);
 	return best_guess;
 }
 
@@ -610,6 +615,64 @@ static inline int irq_trigger(int idx)
 	return MPBIOS_trigger(idx);
 }
 
+static int next_irq = 16;
+
+/*
+ * gsi_irq_sharing -- Name overload!  "irq" can be either a legacy IRQ
+ * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number
+ * from ACPI, which can reach 800 in large boxen.
+ *
+ * Compact the sparse GSI space into a sequential IRQ series and reuse
+ * vectors if possible.
+ */
+int gsi_irq_sharing(int gsi)
+{
+	int i, tries, vector;
+
+	BUG_ON(gsi >= NR_IRQ_VECTORS);
+
+	if (platform_legacy_irq(gsi))
+		return gsi;
+
+	if (gsi_2_irq[gsi] != 0xFF)
+		return (int)gsi_2_irq[gsi];
+
+	tries = NR_IRQS;
+  try_again:
+	vector = assign_irq_vector(gsi);
+
+	/*
+	 * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
+	 * use of vector and if found, return that IRQ.  However, we never want
+	 * to share legacy IRQs, which usually have a different trigger mode
+	 * than PCI.
+	 */
+	for (i = 0; i < NR_IRQS; i++)
+		if (IO_APIC_VECTOR(i) == vector)
+			break;
+	if (platform_legacy_irq(i)) {
+		if (--tries >= 0) {
+			IO_APIC_VECTOR(i) = 0;
+			goto try_again;
+		}
+		panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi);
+	}
+	if (i < NR_IRQS) {
+		gsi_2_irq[gsi] = i;
+		printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n",
+				gsi, vector, i);
+		return i;
+	}
+
+	i = next_irq++;
+	BUG_ON(i >= NR_IRQS);
+	gsi_2_irq[gsi] = i;
+	IO_APIC_VECTOR(i) = vector;
+	printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n",
+			gsi, vector, i);
+	return i;
+}
+
 static int pin_2_irq(int idx, int apic, int pin)
 {
 	int irq, i;
@@ -639,6 +702,7 @@ static int pin_2_irq(int idx, int apic, int pin)
 			while (i < apic)
 				irq += nr_ioapic_registers[i++];
 			irq += pin;
+			irq = gsi_irq_sharing(irq);
 			break;
 		}
 		default:
@@ -648,6 +712,7 @@ static int pin_2_irq(int idx, int apic, int pin)
 			break;
 		}
 	}
+	BUG_ON(irq >= NR_IRQS);
 
 	/*
 	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
@@ -663,6 +728,7 @@ static int pin_2_irq(int idx, int apic, int pin)
 			}
 		}
 	}
+	BUG_ON(irq >= NR_IRQS);
 	return irq;
 }
 
@@ -690,8 +756,8 @@ int assign_irq_vector(int irq)
 {
 	static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
 
-	BUG_ON(irq >= NR_IRQ_VECTORS);
-	if (IO_APIC_VECTOR(irq) > 0)
+	BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
+	if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
 		return IO_APIC_VECTOR(irq);
 next:
 	current_vector += 8;
@@ -699,9 +765,8 @@ next:
 		goto next;
 
 	if (current_vector >= FIRST_SYSTEM_VECTOR) {
-		offset++;
-		if (!(offset%8))
-			return -ENOSPC;
+		/* If we run out of vectors on large boxen, must share them. */
+		offset = (offset + 1) % 8;
 		current_vector = FIRST_DEVICE_VECTOR + offset;
 	}
 
@@ -1917,6 +1982,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
 	entry.polarity = active_high_low;
 	entry.mask = 1;					 /* Disabled (masked) */
 
+	irq = gsi_irq_sharing(irq);
 	/*
 	 * IRQs < 16 are already in the irq_2_pin[] map
 	 */
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index f16d38d09daf..8f6958e79455 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -218,7 +218,7 @@ static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
 			m->mpc_irqtype, m->mpc_irqflag & 3,
 			(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
 			m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
-	if (++mp_irq_entries == MAX_IRQ_SOURCES)
+	if (++mp_irq_entries >= MAX_IRQ_SOURCES)
 		panic("Max # of irq sources exceeded!!\n");
 }
 
diff --git a/include/asm-x86_64/desc.h b/include/asm-x86_64/desc.h
index 68ac3c62fe3d..1a3d380f9d5d 100644
--- a/include/asm-x86_64/desc.h
+++ b/include/asm-x86_64/desc.h
@@ -98,16 +98,19 @@ static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsig
 
 static inline void set_intr_gate(int nr, void *func) 
 { 
+	BUG_ON((unsigned)nr > 0xFF);
 	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 0); 
 } 
 
 static inline void set_intr_gate_ist(int nr, void *func, unsigned ist) 
 { 
+	BUG_ON((unsigned)nr > 0xFF);
 	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, ist); 
 } 
 
 static inline void set_system_gate(int nr, void *func) 
 { 
+	BUG_ON((unsigned)nr > 0xFF);
 	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0); 
 } 
 
diff --git a/include/asm-x86_64/mpspec.h b/include/asm-x86_64/mpspec.h
index f267e10c023d..6b375384f5c2 100644
--- a/include/asm-x86_64/mpspec.h
+++ b/include/asm-x86_64/mpspec.h
@@ -157,7 +157,8 @@ struct mpc_config_lintsrc
  */
 
 #define MAX_MP_BUSSES 256
-#define MAX_IRQ_SOURCES 256
+/* Each PCI slot may be a combo card with its own bus.  4 IRQ pins per slot. */
+#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4)
 enum mp_bustype {
 	MP_BUS_ISA = 1,
 	MP_BUS_EISA,
-- 
cgit v1.2.3-71-gd317


From 1dff7f3db5f045ccbfeca5bb00b0958a78501557 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 5 Nov 2005 17:25:53 +0100
Subject: [PATCH] x86_64: Fix up outdated pfn_to_page comment

pfn_to_page really requires pfn_valid to be true now, no question.
Some people stumbled over it, but it was misleading and wrong.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/mmzone.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-x86_64/mmzone.h b/include/asm-x86_64/mmzone.h
index b40c661f111e..8d48dc7e43a7 100644
--- a/include/asm-x86_64/mmzone.h
+++ b/include/asm-x86_64/mmzone.h
@@ -41,9 +41,7 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
 #define pfn_to_nid(pfn) phys_to_nid((unsigned long)(pfn) << PAGE_SHIFT)
 #define kvaddr_to_nid(kaddr)	phys_to_nid(__pa(kaddr))
 
-/* AK: this currently doesn't deal with invalid addresses. We'll see 
-   if the 2.5 kernel doesn't pass them
-   (2.4 used to). */
+/* Requires pfn_valid(pfn) to be true */
 #define pfn_to_page(pfn) ({ \
 	int nid = phys_to_nid(((unsigned long)(pfn)) << PAGE_SHIFT); 	\
 	((pfn) - node_start_pfn(nid)) + NODE_DATA(nid)->node_mem_map;	\
-- 
cgit v1.2.3-71-gd317


From 07808b74e7dab1aa385e698795875337d72daf7d Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 5 Nov 2005 17:25:53 +0100
Subject: [PATCH] x86_64: Remove obsolete ARCH_HAS_ATOMIC_UNSIGNED and
 page_flags_t

Has been introduced for x86-64 at some point to save memory
in struct page, but has been obsolete for some time. Just
remove it.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h     | 10 ++--------
 include/linux/mmzone.h |  2 +-
 mm/filemap.c           |  2 +-
 mm/page_alloc.c        |  2 +-
 4 files changed, 5 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5c1fb0a2e806..23fad4dae23c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -206,12 +206,6 @@ struct vm_operations_struct {
 struct mmu_gather;
 struct inode;
 
-#ifdef ARCH_HAS_ATOMIC_UNSIGNED
-typedef unsigned page_flags_t;
-#else
-typedef unsigned long page_flags_t;
-#endif
-
 /*
  * Each physical page in the system has a struct page associated with
  * it to keep track of whatever it is we are using the page for at the
@@ -219,7 +213,7 @@ typedef unsigned long page_flags_t;
  * a page.
  */
 struct page {
-	page_flags_t flags;		/* Atomic flags, some possibly
+	unsigned long flags;		/* Atomic flags, some possibly
 					 * updated asynchronously */
 	atomic_t _count;		/* Usage count, see below. */
 	atomic_t _mapcount;		/* Count of ptes mapped in mms,
@@ -435,7 +429,7 @@ static inline void put_page(struct page *page)
 #endif
 
 /* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */
-#define SECTIONS_PGOFF		((sizeof(page_flags_t)*8) - SECTIONS_WIDTH)
+#define SECTIONS_PGOFF		((sizeof(unsigned long)*8) - SECTIONS_WIDTH)
 #define NODES_PGOFF		(SECTIONS_PGOFF - NODES_WIDTH)
 #define ZONES_PGOFF		(NODES_PGOFF - ZONES_WIDTH)
 
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index da7a829f8561..57fc99c67c31 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -455,7 +455,7 @@ extern struct pglist_data contig_page_data;
 #include <asm/sparsemem.h>
 #endif
 
-#if BITS_PER_LONG == 32 || defined(ARCH_HAS_ATOMIC_UNSIGNED)
+#if BITS_PER_LONG == 32
 /*
  * with 32 bit page->flags field, we reserve 9 bits for node/zone info.
  * there are 4 zones (3 bits) and this leaves 9-3=6 bits for nodes.
diff --git a/mm/filemap.c b/mm/filemap.c
index 5d6e4c2000dc..33a28bfde158 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -134,7 +134,7 @@ static int sync_page(void *word)
 	struct address_space *mapping;
 	struct page *page;
 
-	page = container_of((page_flags_t *)word, struct page, flags);
+	page = container_of((unsigned long *)word, struct page, flags);
 
 	/*
 	 * page_mapping() is being called without PG_locked held.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e8810a7e0264..259a71bacca4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -128,7 +128,7 @@ static void bad_page(const char *function, struct page *page)
 	printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n",
 		function, current->comm, page);
 	printk(KERN_EMERG "flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
-		(int)(2*sizeof(page_flags_t)), (unsigned long)page->flags,
+		(int)(2*sizeof(unsigned long)), (unsigned long)page->flags,
 		page->mapping, page_mapcount(page), page_count(page));
 	printk(KERN_EMERG "Backtrace:\n");
 	dump_stack();
-- 
cgit v1.2.3-71-gd317


From 69d81fcde7797342417591ba7affb372b9c86eae Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 5 Nov 2005 17:25:53 +0100
Subject: [PATCH] x86_64: Speed up numa_node_id by putting it directly into the
 PDA

Not go from the CPU number to an mapping array.
Mode number is often used now in fast paths.

This also adds a generic numa_node_id to all the topology includes

Suggested by Eric Dumazet

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/setup.c    |  4 ++--
 arch/x86_64/mm/numa.c         | 10 ++++++++--
 arch/x86_64/mm/srat.c         |  2 +-
 include/asm-x86_64/numa.h     |  2 ++
 include/asm-x86_64/pda.h      |  1 +
 include/asm-x86_64/topology.h |  2 ++
 include/linux/mmzone.h        |  2 ++
 7 files changed, 18 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index f27731ac95c5..99cfa751949a 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -823,7 +823,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
  		if (!node_online(node))
  			node = nearby_node(apicid);
  	}
-  	cpu_to_node[cpu] = node;
+	numa_set_node(cpu, node);
 
   	printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
   			cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
@@ -975,7 +975,7 @@ static void srat_detect_node(void)
 	node = apicid_to_node[hard_smp_processor_id()];
 	if (node == NUMA_NO_NODE)
 		node = 0;
-	cpu_to_node[cpu] = node;
+	numa_set_node(cpu, node);
 
 	if (acpi_numa > 0)
 		printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node);
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 18e86e2eac2d..4bf64583ba3b 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -156,7 +156,7 @@ void __init numa_init_array(void)
 	for (i = 0; i < NR_CPUS; i++) {
 		if (cpu_to_node[i] != NUMA_NO_NODE)
 			continue;
-		cpu_to_node[i] = rr;
+ 		numa_set_node(i, rr);
 		rr = next_node(rr, node_online_map);
 		if (rr == MAX_NUMNODES)
 			rr = first_node(node_online_map);
@@ -242,7 +242,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 	nodes_clear(node_online_map);
 	node_set_online(0);
 	for (i = 0; i < NR_CPUS; i++)
-		cpu_to_node[i] = 0;
+		numa_set_node(i, 0);
 	node_to_cpumask[0] = cpumask_of_cpu(0);
 	setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
 }
@@ -252,6 +252,12 @@ __cpuinit void numa_add_cpu(int cpu)
 	set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
 } 
 
+void __cpuinit numa_set_node(int cpu, int node)
+{
+	cpu_pda[cpu].nodenumber = node;
+	cpu_to_node[cpu] = node;
+}
+
 unsigned long __init numa_free_all_bootmem(void) 
 { 
 	int i;
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c
index 4b2e844c15a7..c7aa08a58041 100644
--- a/arch/x86_64/mm/srat.c
+++ b/arch/x86_64/mm/srat.c
@@ -203,7 +203,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
 		if (cpu_to_node[i] == NUMA_NO_NODE)
 			continue;
 		if (!node_isset(cpu_to_node[i], nodes_parsed))
-			cpu_to_node[i] = NUMA_NO_NODE; 
+			numa_set_node(i, NUMA_NO_NODE);
 	}
 	numa_init_array();
 	return 0;
diff --git a/include/asm-x86_64/numa.h b/include/asm-x86_64/numa.h
index bcf55c3f7f7f..d51e56fdc3da 100644
--- a/include/asm-x86_64/numa.h
+++ b/include/asm-x86_64/numa.h
@@ -17,6 +17,8 @@ extern void numa_add_cpu(int cpu);
 extern void numa_init_array(void);
 extern int numa_off;
 
+extern void numa_set_node(int cpu, int node);
+
 extern unsigned char apicid_to_node[256];
 
 #define NUMA_NO_NODE 0xff
diff --git a/include/asm-x86_64/pda.h b/include/asm-x86_64/pda.h
index bbf89aa8a1af..8733ccfa442e 100644
--- a/include/asm-x86_64/pda.h
+++ b/include/asm-x86_64/pda.h
@@ -15,6 +15,7 @@ struct x8664_pda {
         int irqcount;		    /* Irq nesting counter. Starts with -1 */  	
 	int cpunumber;		    /* Logical CPU number */
 	char *irqstackptr;	/* top of irqstack */
+	int nodenumber;		    /* number of current node */
 	unsigned int __softirq_pending;
 	unsigned int __nmi_count;	/* number of NMI on this CPUs */
 	struct mm_struct *active_mm;
diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h
index 1c603cd7e4d0..d39ebd5263ed 100644
--- a/include/asm-x86_64/topology.h
+++ b/include/asm-x86_64/topology.h
@@ -28,6 +28,8 @@ extern int __node_distance(int, int);
 #define pcibus_to_node(bus)		((long)(bus->sysdata))	
 #define pcibus_to_cpumask(bus)		node_to_cpumask(pcibus_to_node(bus));
 
+#define numa_node_id()			read_pda(nodenumber)
+
 /* sched_domains SD_NODE_INIT for x86_64 machines */
 #define SD_NODE_INIT (struct sched_domain) {		\
 	.span			= CPU_MASK_NONE,	\
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 57fc99c67c31..f3cffc354dea 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -435,7 +435,9 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
 
 #include <linux/topology.h>
 /* Returns the number of the current Node. */
+#ifndef numa_node_id
 #define numa_node_id()		(cpu_to_node(raw_smp_processor_id()))
+#endif
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 
-- 
cgit v1.2.3-71-gd317


From f6c2e3330d3fdd5474bc3756da46fca889a30e33 Mon Sep 17 00:00:00 2001
From: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Date: Sat, 5 Nov 2005 17:25:53 +0100
Subject: [PATCH] x86_64: Unmap NULL during early bootup

We should zap the low mappings, as soon as possible, so that we can catch
kernel bugs more effectively. Previously early boot had NULL mapped
and didn't trap on NULL references.

This patch introduces boot_level4_pgt, which will always have low identity
addresses mapped.  Druing boot, all the processors will use this as their
level4 pgt.  On BP, we will switch to init_level4_pgt as soon as we enter C
code and zap the low mappings as soon as we are done with the usage of
identity low mapped addresses.  On AP's we will zap the low mappings as
soon as we jump to C code.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/acpi/boot.c |  2 +-
 arch/x86_64/kernel/head.S    | 37 +++++++++++++++++++++++--------------
 arch/x86_64/kernel/head64.c  |  8 ++++++++
 arch/x86_64/kernel/mpparse.c |  2 +-
 arch/x86_64/kernel/setup.c   |  2 ++
 arch/x86_64/kernel/setup64.c |  2 +-
 arch/x86_64/kernel/smpboot.c |  3 ---
 arch/x86_64/mm/init.c        | 28 +++++++++++++++++-----------
 include/asm-x86_64/pgtable.h |  1 +
 include/asm-x86_64/proto.h   |  2 ++
 include/asm-x86_64/smp.h     |  1 -
 11 files changed, 56 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index 82754bb80e20..f36677241ecd 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -542,7 +542,7 @@ acpi_scan_rsdp(unsigned long start, unsigned long length)
 	 * RSDP signature.
 	 */
 	for (offset = 0; offset < length; offset += 16) {
-		if (strncmp((char *)(start + offset), "RSD PTR ", sig_len))
+		if (strncmp((char *)(phys_to_virt(start) + offset), "RSD PTR ", sig_len))
 			continue;
 		return (start + offset);
 	}
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index b92e5f45ed46..15290968e49d 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -12,6 +12,7 @@
 
 #include <linux/linkage.h>
 #include <linux/threads.h>
+#include <linux/init.h>
 #include <asm/desc.h>
 #include <asm/segment.h>
 #include <asm/page.h>
@@ -70,7 +71,7 @@ startup_32:
 	movl	%eax, %cr4
 
 	/* Setup early boot stage 4 level pagetables */
-	movl	$(init_level4_pgt - __START_KERNEL_map), %eax
+	movl	$(boot_level4_pgt - __START_KERNEL_map), %eax
 	movl	%eax, %cr3
 
 	/* Setup EFER (Extended Feature Enable Register) */
@@ -113,7 +114,7 @@ startup_64:
 	movq	%rax, %cr4
 
 	/* Setup early boot stage 4 level pagetables. */
-	movq	$(init_level4_pgt - __START_KERNEL_map), %rax
+	movq	$(boot_level4_pgt - __START_KERNEL_map), %rax
 	movq	%rax, %cr3
 
 	/* Check if nx is implemented */
@@ -240,20 +241,10 @@ ljumpvector:
 ENTRY(stext)
 ENTRY(_stext)
 
-	/*
-	 * This default setting generates an ident mapping at address 0x100000
-	 * and a mapping for the kernel that precisely maps virtual address
-	 * 0xffffffff80000000 to physical address 0x000000. (always using
-	 * 2Mbyte large pages provided by PAE mode)
-	 */
 .org 0x1000
 ENTRY(init_level4_pgt)
-	.quad	0x0000000000002007 + __PHYSICAL_START	/* -> level3_ident_pgt */
-	.fill	255,8,0
-	.quad	0x000000000000a007 + __PHYSICAL_START
-	.fill	254,8,0
-	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
-	.quad	0x0000000000003007 + __PHYSICAL_START	/* -> level3_kernel_pgt */
+	/* This gets initialized in x86_64_start_kernel */
+	.fill	512,8,0
 
 .org 0x2000
 ENTRY(level3_ident_pgt)
@@ -350,6 +341,24 @@ ENTRY(wakeup_level4_pgt)
 	.quad	0x0000000000003007 + __PHYSICAL_START	/* -> level3_kernel_pgt */
 #endif
 
+#ifndef CONFIG_HOTPLUG_CPU
+	__INITDATA
+#endif
+	/*
+	 * This default setting generates an ident mapping at address 0x100000
+	 * and a mapping for the kernel that precisely maps virtual address
+	 * 0xffffffff80000000 to physical address 0x000000. (always using
+	 * 2Mbyte large pages provided by PAE mode)
+	 */
+	.align PAGE_SIZE
+ENTRY(boot_level4_pgt)
+	.quad	0x0000000000002007 + __PHYSICAL_START	/* -> level3_ident_pgt */
+	.fill	255,8,0
+	.quad	0x000000000000a007 + __PHYSICAL_START
+	.fill	254,8,0
+	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+	.quad	0x0000000000003007 + __PHYSICAL_START	/* -> level3_kernel_pgt */
+
 	.data
 
 	.align 16
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index cf6ab147a2a5..e0f12d49db4c 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -19,6 +19,7 @@
 #include <asm/bootsetup.h>
 #include <asm/setup.h>
 #include <asm/desc.h>
+#include <asm/pgtable.h>
 
 /* Don't add a printk in there. printk relies on the PDA which is not initialized 
    yet. */
@@ -86,6 +87,13 @@ void __init x86_64_start_kernel(char * real_mode_data)
 		set_intr_gate(i, early_idt_handler);
 	asm volatile("lidt %0" :: "m" (idt_descr));
 	clear_bss();
+
+	/*
+	 * switch to init_level4_pgt from boot_level4_pgt
+	 */
+	memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t));
+	asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
+
 	pda_init(0);
 	copy_bootdata(real_mode_data);
 #ifdef CONFIG_SMP
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index 8f6958e79455..4a581d1cefbd 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -549,7 +549,7 @@ void __init get_smp_config (void)
 		 * Read the physical hardware table.  Anything here will
 		 * override the defaults.
 		 */
-		if (!smp_read_mpc((void *)(unsigned long)mpf->mpf_physptr)) {
+		if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr))) {
 			smp_found_config = 0;
 			printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
 			printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 99cfa751949a..f76f3743f5d7 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -571,6 +571,8 @@ void __init setup_arch(char **cmdline_p)
 
 	init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
 
+	zap_low_mappings(0);
+
 #ifdef CONFIG_ACPI
 	/*
 	 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index 79190891fbc5..06dc354375c3 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -141,7 +141,6 @@ void pda_init(int cpu)
 			panic("cannot allocate irqstack for cpu %d", cpu); 
 	}
 
-	asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
 
 	pda->irqstackptr += IRQSTACKSIZE-64;
 } 
@@ -197,6 +196,7 @@ void __cpuinit cpu_init (void)
 	/* CPU 0 is initialised in head64.c */
 	if (cpu != 0) {
 		pda_init(cpu);
+		zap_low_mappings(cpu);
 	} else 
 		estacks = boot_exception_stacks; 
 
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 658a81b33f3b..3393fc08823b 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -1063,9 +1063,6 @@ int __cpuinit __cpu_up(unsigned int cpu)
  */
 void __init smp_cpus_done(unsigned int max_cpus)
 {
-#ifndef CONFIG_HOTPLUG_CPU
-	zap_low_mappings();
-#endif
 	smp_cleanup_boot();
 
 #ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 2b1d6c382396..be483a1d7b54 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -312,12 +312,19 @@ void __init init_memory_mapping(unsigned long start, unsigned long end)
 
 extern struct x8664_pda cpu_pda[NR_CPUS];
 
-/* Assumes all CPUs still execute in init_mm */
-void zap_low_mappings(void)
+void __cpuinit zap_low_mappings(int cpu)
 {
-	pgd_t *pgd = pgd_offset_k(0UL);
-	pgd_clear(pgd);
-	flush_tlb_all();
+	if (cpu == 0) {
+		pgd_t *pgd = pgd_offset_k(0UL);
+		pgd_clear(pgd);
+	} else {
+		/*
+		 * For AP's, zap the low identity mappings by changing the cr3
+		 * to init_level4_pgt and doing local flush tlb all
+		 */
+		asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
+	}
+	__flush_tlb_all();
 }
 
 /* Compute zone sizes for the DMA and DMA32 zones in a node. */
@@ -474,14 +481,13 @@ void __init mem_init(void)
 		datasize >> 10,
 		initsize >> 10);
 
+#ifdef CONFIG_SMP
 	/*
-	 * Subtle. SMP is doing its boot stuff late (because it has to
-	 * fork idle threads) - but it also needs low mappings for the
-	 * protected-mode entry to work. We zap these entries only after
-	 * the WP-bit has been tested.
+	 * Sync boot_level4_pgt mappings with the init_level4_pgt
+	 * except for the low identity mappings which are already zapped
+	 * in init_level4_pgt. This sync-up is essential for AP's bringup
 	 */
-#ifndef CONFIG_SMP
-	zap_low_mappings();
+	memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t));
 #endif
 }
 
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index 7a07196a7202..a204efb553dc 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -16,6 +16,7 @@ extern pud_t level3_physmem_pgt[512];
 extern pud_t level3_ident_pgt[512];
 extern pmd_t level2_kernel_pgt[512];
 extern pgd_t init_level4_pgt[];
+extern pgd_t boot_level4_pgt[];
 extern unsigned long __supported_pte_mask;
 
 #define swapper_pg_dir init_level4_pgt
diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h
index c251152a0658..34501086afef 100644
--- a/include/asm-x86_64/proto.h
+++ b/include/asm-x86_64/proto.h
@@ -11,6 +11,8 @@ struct pt_regs;
 extern void start_kernel(void);
 extern void pda_init(int); 
 
+extern void zap_low_mappings(int cpu);
+
 extern void early_idt_handler(void);
 
 extern void mcheck_init(struct cpuinfo_x86 *c);
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
index c57ce4071342..592161e979e5 100644
--- a/include/asm-x86_64/smp.h
+++ b/include/asm-x86_64/smp.h
@@ -47,7 +47,6 @@ extern void lock_ipi_call_lock(void);
 extern void unlock_ipi_call_lock(void);
 extern int smp_num_siblings;
 extern void smp_send_reschedule(int cpu);
-extern void zap_low_mappings(void);
 void smp_stop_cpu(void);
 extern int smp_call_function_single(int cpuid, void (*func) (void *info),
 				void *info, int retry, int wait);
-- 
cgit v1.2.3-71-gd317


From 6b75aeedde1e8a8513393d3c1367bf81bc5b0c67 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 5 Nov 2005 17:25:53 +0100
Subject: [PATCH] x86_64: Don't apply __PHYSICAL_MASK to page frame numbers

It is for physical addresses, not for PFNs.

Pointed out by Tejun Heo.

Cc: htejun@gmail.com

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/page.h    | 2 +-
 include/asm-x86_64/pgtable.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h
index e5ab4d231f2c..06e489f32472 100644
--- a/include/asm-x86_64/page.h
+++ b/include/asm-x86_64/page.h
@@ -11,7 +11,7 @@
 #define PAGE_SIZE	(1UL << PAGE_SHIFT)
 #endif
 #define PAGE_MASK	(~(PAGE_SIZE-1))
-#define PHYSICAL_PAGE_MASK	(~(PAGE_SIZE-1) & (__PHYSICAL_MASK << PAGE_SHIFT))
+#define PHYSICAL_PAGE_MASK	(~(PAGE_SIZE-1) & __PHYSICAL_MASK)
 
 #define THREAD_ORDER 1 
 #ifdef __ASSEMBLY__
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index a204efb553dc..f8e87a57f3a7 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -246,7 +246,7 @@ static inline unsigned long pud_bad(pud_t pud)
 #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))	/* FIXME: is this
 						   right? */
 #define pte_page(x)	pfn_to_page(pte_pfn(x))
-#define pte_pfn(x)  ((pte_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK)
+#define pte_pfn(x)  ((pte_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
 
 static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
 {
@@ -353,7 +353,7 @@ static inline pud_t *__pud_offset_k(pud_t *pud, unsigned long address)
 #define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
 #define	pmd_bad(x)	((pmd_val(x) & (~PTE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE )
 #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
-#define pmd_pfn(x)  ((pmd_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK)
+#define pmd_pfn(x)  ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
 
 #define pte_to_pgoff(pte) ((pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
 #define pgoff_to_pte(off) ((pte_t) { ((off) << PAGE_SHIFT) | _PAGE_FILE })
-- 
cgit v1.2.3-71-gd317


From 2bc0414ee04fd8bb798760801f5d7476dff44241 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 5 Nov 2005 17:25:53 +0100
Subject: [PATCH] x86_64: Only use asm/sections.h to declare section symbols

Adding __initdata_* to asm-generic/sections.h
Replaces a lot of open coded externs in arch/x86_64/*
I had to change __bss_end to __bss_stop to match the other architectures.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/e820.c        |  3 +--
 arch/x86_64/kernel/head64.c      |  6 ++----
 arch/x86_64/kernel/setup.c       |  2 +-
 arch/x86_64/kernel/vmlinux.lds.S |  2 +-
 arch/x86_64/mm/init.c            | 10 ++--------
 include/asm-generic/sections.h   |  1 +
 6 files changed, 8 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index ab3f87aaff70..17579a1a174b 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -23,8 +23,7 @@
 #include <asm/e820.h>
 #include <asm/proto.h>
 #include <asm/bootsetup.h>
-
-extern char _end[];
+#include <asm/sections.h>
 
 /* 
  * PFN of last memory page.
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index e0f12d49db4c..b675c5add01e 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -20,14 +20,14 @@
 #include <asm/setup.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
+#include <asm/sections.h>
 
 /* Don't add a printk in there. printk relies on the PDA which is not initialized 
    yet. */
 static void __init clear_bss(void)
 {
-	extern char __bss_start[], __bss_end[];
 	memset(__bss_start, 0,
-	       (unsigned long) __bss_end - (unsigned long) __bss_start);
+	       (unsigned long) __bss_stop - (unsigned long) __bss_start);
 }
 
 #define NEW_CL_POINTER		0x228	/* Relative to real mode data */
@@ -76,8 +76,6 @@ static void __init setup_boot_cpu_data(void)
 	boot_cpu_data.x86_mask = eax & 0xf;
 }
 
-extern char _end[];
-
 void __init x86_64_start_kernel(char * real_mode_data)
 {
 	char *s;
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index f76f3743f5d7..2ad3556dda52 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -60,6 +60,7 @@
 #include <asm/setup.h>
 #include <asm/mach_apic.h>
 #include <asm/numa.h>
+#include <asm/sections.h>
 
 /*
  * Machine setup..
@@ -103,7 +104,6 @@ struct edid_info edid_info;
 struct e820map e820;
 
 extern int root_mountflags;
-extern char _text, _etext, _edata, _end;
 
 char command_line[COMMAND_LINE_SIZE];
 
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 6dd642cad2ef..58b19215b4b3 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -50,7 +50,7 @@ SECTIONS
 	*(.bss.page_aligned)	
 	*(.bss)
 	}
-  __bss_end = .;
+  __bss_stop = .;
 
   . = ALIGN(PAGE_SIZE);
   . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index be483a1d7b54..489e18df1fe9 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -36,6 +36,7 @@
 #include <asm/mmu_context.h>
 #include <asm/proto.h>
 #include <asm/smp.h>
+#include <asm/sections.h>
 
 #ifndef Dprintk
 #define Dprintk(x...)
@@ -45,8 +46,6 @@
 extern int swiotlb;
 #endif
 
-extern char _stext[];
-
 static unsigned long dma_reserve __initdata;
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -88,9 +87,6 @@ void show_mem(void)
 
 /* References to section boundaries */
 
-extern char _text, _etext, _edata, __bss_start, _end[];
-extern char __init_begin, __init_end;
-
 int after_bootmem;
 
 static void *spp_getpage(void)
@@ -491,8 +487,6 @@ void __init mem_init(void)
 #endif
 }
 
-extern char __initdata_begin[], __initdata_end[];
-
 void free_initmem(void)
 {
 	unsigned long addr;
@@ -506,7 +500,7 @@ void free_initmem(void)
 		totalram_pages++;
 	}
 	memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
-	printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10);
+	printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10);
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index 886dbd116899..0b49f9e070f1 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -13,5 +13,6 @@ extern char _eextratext[] __attribute__((weak));
 extern char _end[];
 extern char __per_cpu_start[], __per_cpu_end[];
 extern char __kprobes_text_start[], __kprobes_text_end[];
+extern char __initdata_begin[], __initdata_end[];
 
 #endif /* _ASM_GENERIC_SECTIONS_H_ */
-- 
cgit v1.2.3-71-gd317


From 485832a5d928facd82f1525270d9f048da2063a1 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 5 Nov 2005 17:25:54 +0100
Subject: [PATCH] x86_64: Use int operations in spinlocks to support more than
 128 CPUs spinning.

Pointed out by Eric Dumazet

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/spinlock.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/asm-x86_64/spinlock.h b/include/asm-x86_64/spinlock.h
index 69636831ad2f..fe484a699cc3 100644
--- a/include/asm-x86_64/spinlock.h
+++ b/include/asm-x86_64/spinlock.h
@@ -18,22 +18,22 @@
  */
 
 #define __raw_spin_is_locked(x) \
-		(*(volatile signed char *)(&(x)->slock) <= 0)
+		(*(volatile signed int *)(&(x)->slock) <= 0)
 
 #define __raw_spin_lock_string \
 	"\n1:\t" \
-	"lock ; decb %0\n\t" \
+	"lock ; decl %0\n\t" \
 	"js 2f\n" \
 	LOCK_SECTION_START("") \
 	"2:\t" \
 	"rep;nop\n\t" \
-	"cmpb $0,%0\n\t" \
+	"cmpl $0,%0\n\t" \
 	"jle 2b\n\t" \
 	"jmp 1b\n" \
 	LOCK_SECTION_END
 
 #define __raw_spin_unlock_string \
-	"movb $1,%0" \
+	"movl $1,%0" \
 		:"=m" (lock->slock) : : "memory"
 
 static inline void __raw_spin_lock(raw_spinlock_t *lock)
@@ -47,10 +47,10 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
 
 static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 {
-	char oldval;
+	int oldval;
 
 	__asm__ __volatile__(
-		"xchgb %b0,%1"
+		"xchgl %0,%1"
 		:"=q" (oldval), "=m" (lock->slock)
 		:"0" (0) : "memory");
 
-- 
cgit v1.2.3-71-gd317


From 420f8f68c9c5148dddf946bebdbc7eacde2172cb Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 5 Nov 2005 17:25:54 +0100
Subject: [PATCH] x86_64: New heuristics to find out hotpluggable CPUs.

With a NR_CPUS==128 kernel with CPU hotplug enabled we would waste 4MB
on per CPU data of all possible CPUs.  The reason was that HOTPLUG
always set up possible map to NR_CPUS cpus and then we need to allocate
that much (each per CPU data is roughly ~32k now)

The underlying problem is that ACPI didn't tell us how many hotplug CPUs
the platform supports.  So the old code just assumed all, which would
lead to this memory wastage.

This implements some new heuristics:

 - If the BIOS specified disabled CPUs in the ACPI/mptables assume they
   can be enabled later (this is bending the ACPI specification a bit,
   but seems like a obvious extension)
 - The user can overwrite it with a new additionals_cpus=NUM option
 - Otherwise use half of the available CPUs or 2, whatever is more.

Cc: ashok.raj@intel.com
Cc: len.brown@intel.com

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/x86_64/boot-options.txt |  3 +++
 arch/x86_64/kernel/mpparse.c          |  8 +++++--
 arch/x86_64/kernel/smpboot.c          | 39 ++++++++++++++++++++++++++++++++---
 include/asm-x86_64/smp.h              |  2 ++
 4 files changed, 47 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt
index ffe1c062088b..a83139692cdf 100644
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86_64/boot-options.txt
@@ -122,6 +122,9 @@ SMP
 
   cpumask=MASK   only use cpus with bits set in mask
 
+  additional_cpus=NUM Allow NUM more CPUs for hotplug
+		 (defaults are specified by the BIOS or half the available CPUs)
+
 NUMA
 
   numa=off	Only set up a single NUMA node spanning all memory.
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index 4a581d1cefbd..1d61f10a92c6 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -65,7 +65,9 @@ unsigned long mp_lapic_addr = 0;
 /* Processor that is doing the boot up */
 unsigned int boot_cpu_id = -1U;
 /* Internal processor count */
-static unsigned int num_processors = 0;
+unsigned int num_processors __initdata = 0;
+
+unsigned disabled_cpus __initdata;
 
 /* Bitmask of physically existing CPUs */
 physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
@@ -109,8 +111,10 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
 	int ver, cpu;
 	static int found_bsp=0;
 
-	if (!(m->mpc_cpuflag & CPU_ENABLED))
+	if (!(m->mpc_cpuflag & CPU_ENABLED)) {
+		disabled_cpus++;
 		return;
+	}
 
 	printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n",
 		m->mpc_apicid,
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 3393fc08823b..f74319a80659 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -880,6 +880,9 @@ static __init void disable_smp(void)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
+
+int additional_cpus __initdata = -1;
+
 /*
  * cpu_possible_map should be static, it cannot change as cpu's
  * are onlined, or offlined. The reason is per-cpu data-structures
@@ -888,14 +891,38 @@ static __init void disable_smp(void)
  * cpu_present_map on the other hand can change dynamically.
  * In case when cpu_hotplug is not compiled, then we resort to current
  * behaviour, which is cpu_possible == cpu_present.
- * If cpu-hotplug is supported, then we need to preallocate for all
- * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range.
  * - Ashok Raj
+ *
+ * Three ways to find out the number of additional hotplug CPUs:
+ * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
+ * - otherwise use half of the available CPUs or 2, whatever is more.
+ * - The user can overwrite it with additional_cpus=NUM
+ * We do this because additional CPUs waste a lot of memory.
+ * -AK
  */
 __init void prefill_possible_map(void)
 {
 	int i;
-	for (i = 0; i < NR_CPUS; i++)
+	int possible;
+
+ 	if (additional_cpus == -1) {
+ 		if (disabled_cpus > 0) {
+ 			additional_cpus = disabled_cpus;
+ 		} else {
+ 			additional_cpus = num_processors / 2;
+ 			if (additional_cpus == 0)
+ 				additional_cpus = 2;
+ 		}
+ 	}
+	possible = num_processors + additional_cpus;
+	if (possible > NR_CPUS) 
+		possible = NR_CPUS;
+
+	printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
+		possible,
+	        max_t(int, possible - num_processors, 0));
+
+	for (i = 0; i < possible; i++)
 		cpu_set(i, cpu_possible_map);
 }
 #endif
@@ -1151,6 +1178,12 @@ void __cpu_die(unsigned int cpu)
  	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
 }
 
+static __init int setup_additional_cpus(char *s)
+{
+	return get_option(&s, &additional_cpus);
+}
+__setup("additional_cpus=", setup_additional_cpus);
+
 #else /* ... !CONFIG_HOTPLUG_CPU */
 
 int __cpu_disable(void)
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
index 592161e979e5..cf8f969f9020 100644
--- a/include/asm-x86_64/smp.h
+++ b/include/asm-x86_64/smp.h
@@ -81,6 +81,8 @@ extern int safe_smp_processor_id(void);
 extern int __cpu_disable(void);
 extern void __cpu_die(unsigned int cpu);
 extern void prefill_possible_map(void);
+extern unsigned num_processors;
+extern unsigned disabled_cpus;
 
 #endif /* !ASSEMBLY */
 
-- 
cgit v1.2.3-71-gd317


From ea0be473a1f0ee89024a24d8ea4b05fbf6efcee3 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 5 Nov 2005 17:25:54 +0100
Subject: [PATCH] x86_64: Allow modular build of ia32 aout loader

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/Kconfig              | 2 +-
 arch/x86_64/ia32/ia32_aout.c     | 3 ---
 arch/x86_64/ia32/ia32_binfmt.c   | 4 +++-
 arch/x86_64/kernel/x8664_ksyms.c | 3 +++
 include/asm-x86_64/ia32.h        | 5 +++++
 5 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 504dc52e8bfa..fd10bf82f8d4 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -510,7 +510,7 @@ config IA32_EMULATION
 	  left.
 
 config IA32_AOUT
-       bool "IA32 a.out support"
+       tristate "IA32 a.out support"
        depends on IA32_EMULATION
        help
          Support old a.out binaries in the 32bit emulation.
diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c
index 93c60f4aa47a..3bf58af98936 100644
--- a/arch/x86_64/ia32/ia32_aout.c
+++ b/arch/x86_64/ia32/ia32_aout.c
@@ -36,9 +36,6 @@
 #undef WARN_OLD
 #undef CORE_DUMP /* probably broken */
 
-extern int ia32_setup_arg_pages(struct linux_binprm *bprm,
-				unsigned long stack_top, int exec_stack);
-
 static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
 static int load_aout_library(struct file*);
 
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index d9161e395978..830feb272eca 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -335,7 +335,8 @@ static void elf32_init(struct pt_regs *regs)
 	me->thread.es = __USER_DS;
 }
 
-int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack)
+int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top,
+			 int executable_stack)
 {
 	unsigned long stack_base;
 	struct vm_area_struct *mpnt;
@@ -389,6 +390,7 @@ int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int exec
 	
 	return 0;
 }
+EXPORT_SYMBOL(ia32_setup_arg_pages);
 
 static unsigned long
 elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c
index fd99ddd009bc..4a54221e10bc 100644
--- a/arch/x86_64/kernel/x8664_ksyms.c
+++ b/arch/x86_64/kernel/x8664_ksyms.c
@@ -203,3 +203,6 @@ EXPORT_SYMBOL(flush_tlb_page);
 #endif
 
 EXPORT_SYMBOL(cpu_khz);
+
+EXPORT_SYMBOL(load_gs_index);
+
diff --git a/include/asm-x86_64/ia32.h b/include/asm-x86_64/ia32.h
index 6efa00fe4e7b..c7bc9c0525ba 100644
--- a/include/asm-x86_64/ia32.h
+++ b/include/asm-x86_64/ia32.h
@@ -165,6 +165,11 @@ struct siginfo_t;
 int do_get_thread_area(struct thread_struct *t, struct user_desc __user *info);
 int do_set_thread_area(struct thread_struct *t, struct user_desc __user *info);
 int ia32_child_tls(struct task_struct *p, struct pt_regs *childregs);
+
+struct linux_binprm;
+extern int ia32_setup_arg_pages(struct linux_binprm *bprm,
+				unsigned long stack_top, int exec_stack);
+
 #endif
 
 #endif /* !CONFIG_IA32_SUPPORT */
-- 
cgit v1.2.3-71-gd317


From a88cde13bae3fffd6ecc812bdd02c91eafb6073e Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 5 Nov 2005 17:25:54 +0100
Subject: [PATCH] x86_64: Formatting fixes for arch/x86_64/kernel/process.c

No functional changes.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/process.c | 47 +++++++++++++++++++++++++-------------------
 include/asm-x86_64/msr.h     |  2 +-
 2 files changed, 28 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index b5a89c0bdf59..bc44c92ca198 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -148,7 +148,8 @@ void cpu_idle_wait(void)
 	do {
 		ssleep(1);
 		for_each_online_cpu(cpu) {
-			if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
+			if (cpu_isset(cpu, map) &&
+					!per_cpu(cpu_idle_state, cpu))
 				cpu_clear(cpu, map);
 		}
 		cpus_and(map, map, cpu_online_map);
@@ -278,7 +279,8 @@ void __show_regs(struct pt_regs * regs)
 		system_utsname.version);
 	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
 	printk_address(regs->rip); 
-	printk("\nRSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
+	printk("\nRSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
+		regs->eflags);
 	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
 	       regs->rax, regs->rbx, regs->rcx);
 	printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
@@ -430,15 +432,14 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
 	struct pt_regs * childregs;
 	struct task_struct *me = current;
 
-	childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
-
+	childregs = ((struct pt_regs *)
+			(THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
 	*childregs = *regs;
 
 	childregs->rax = 0;
 	childregs->rsp = rsp;
-	if (rsp == ~0UL) {
+	if (rsp == ~0UL)
 		childregs->rsp = (unsigned long)childregs;
-	}
 
 	p->thread.rsp = (unsigned long) childregs;
 	p->thread.rsp0 = (unsigned long) (childregs+1);
@@ -460,7 +461,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
 			p->thread.io_bitmap_max = 0;
 			return -ENOMEM;
 		}
-		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES);
+		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
+				IO_BITMAP_BYTES);
 	} 
 
 	/*
@@ -497,7 +499,8 @@ out:
  * - fold all the options into a flag word and test it with a single test.
  * - could test fs/gs bitsliced
  */
-struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+struct task_struct *
+__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
 	struct thread_struct *prev = &prev_p->thread,
 				 *next = &next_p->thread;
@@ -568,7 +571,8 @@ struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *
 	prev->userrsp = read_pda(oldrsp); 
 	write_pda(oldrsp, next->userrsp); 
 	write_pda(pcurrent, next_p); 
-	write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
+	write_pda(kernelstack,
+	    (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
 
 	/*
 	 * Now maybe reload the debug registers
@@ -649,7 +653,9 @@ asmlinkage long sys_fork(struct pt_regs *regs)
 	return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
 }
 
-asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
+asmlinkage long
+sys_clone(unsigned long clone_flags, unsigned long newsp,
+	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
 {
 	if (!newsp)
 		newsp = regs->rsp;
@@ -685,7 +691,8 @@ unsigned long get_wchan(struct task_struct *p)
 		return 0;
 	fp = *(u64 *)(p->thread.rsp);
 	do { 
-		if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE)
+		if (fp < (unsigned long)stack ||
+		    fp > (unsigned long)stack+THREAD_SIZE)
 			return 0; 
 		rip = *(u64 *)(fp+8); 
 		if (!in_sched_functions(rip))
@@ -720,8 +727,8 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 			task->thread.gsindex = 0;
 			task->thread.gs = addr;
 			if (doit) {
-		load_gs_index(0);
-		ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); 
+				load_gs_index(0);
+				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
 			} 
 		}
 		put_cpu();
@@ -738,7 +745,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 			set_32bit_tls(task, FS_TLS, addr);
 			if (doit) { 
 				load_TLS(&task->thread, cpu); 
-				asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL));
+				asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
 			}
 			task->thread.fsindex = FS_TLS_SEL;
 			task->thread.fs = 0;
@@ -748,8 +755,8 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 			if (doit) {
 				/* set the selector to 0 to not confuse
 				   __switch_to */
-		asm volatile("movl %0,%%fs" :: "r" (0));
-		ret = checking_wrmsrl(MSR_FS_BASE, addr); 
+				asm volatile("movl %0,%%fs" :: "r" (0));
+				ret = checking_wrmsrl(MSR_FS_BASE, addr);
 			}
 		}
 		put_cpu();
@@ -758,9 +765,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 		unsigned long base; 
 		if (task->thread.fsindex == FS_TLS_SEL)
 			base = read_32bit_tls(task, FS_TLS);
-		else if (doit) {
+		else if (doit)
 			rdmsrl(MSR_FS_BASE, base);
-		} else
+		else
 			base = task->thread.fs;
 		ret = put_user(base, (unsigned long __user *)addr); 
 		break; 
@@ -769,9 +776,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 		unsigned long base;
 		if (task->thread.gsindex == GS_TLS_SEL)
 			base = read_32bit_tls(task, GS_TLS);
-		else if (doit) {
+		else if (doit)
 			rdmsrl(MSR_KERNEL_GS_BASE, base);
-		} else
+		else
 			base = task->thread.gs;
 		ret = put_user(base, (unsigned long __user *)addr); 
 		break;
diff --git a/include/asm-x86_64/msr.h b/include/asm-x86_64/msr.h
index 5a7fe3c6c3d8..24dc39651bc4 100644
--- a/include/asm-x86_64/msr.h
+++ b/include/asm-x86_64/msr.h
@@ -19,7 +19,7 @@
 			    : "=a" (a__), "=d" (b__) \
 			    : "c" (msr)); \
        val = a__ | (b__<<32); \
-} while(0); 
+} while(0)
 
 #define wrmsr(msr,val1,val2) \
      __asm__ __volatile__("wrmsr" \
-- 
cgit v1.2.3-71-gd317


From e90f22edf432512219cc2952f5811961abbd164f Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 5 Nov 2005 17:25:54 +0100
Subject: [PATCH] x86_64: Fix NUMA node lookup debug code which had bitrotted

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/mmzone.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-x86_64/mmzone.h b/include/asm-x86_64/mmzone.h
index 8d48dc7e43a7..69baaa8a3ce0 100644
--- a/include/asm-x86_64/mmzone.h
+++ b/include/asm-x86_64/mmzone.h
@@ -17,16 +17,15 @@
 /* Simple perfect hash to map physical addresses to node numbers */
 extern int memnode_shift; 
 extern u8  memnodemap[NODEMAPSIZE]; 
-extern int maxnode;
 
 extern struct pglist_data *node_data[];
 
 static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) 
 { 
-	int nid; 
+	unsigned nid; 
 	VIRTUAL_BUG_ON((addr >> memnode_shift) >= NODEMAPSIZE);
 	nid = memnodemap[addr >> memnode_shift]; 
-	VIRTUAL_BUG_ON(nid > maxnode); 
+	VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); 
 	return nid; 
 } 
 
-- 
cgit v1.2.3-71-gd317


From 94605eff572b727aaad9b4b29bc358b919096503 Mon Sep 17 00:00:00 2001
From: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Date: Sat, 5 Nov 2005 17:25:54 +0100
Subject: [PATCH] x86-64/i386: Intel HT, Multi core detection fixes

Fields obtained through cpuid vector 0x1(ebx[16:23]) and
vector 0x4(eax[14:25], eax[26:31]) indicate the maximum values and might not
always be the same as what is available and what OS sees.  So make sure
"siblings" and "cpu cores" values in /proc/cpuinfo reflect the values as seen
by OS instead of what cpuid instruction says. This will also fix the buggy BIOS
cases (for example where cpuid on a single core cpu says there are "2" siblings,
even when HT is disabled in the BIOS.
http://bugzilla.kernel.org/show_bug.cgi?id=4359)

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/cpu/amd.c             | 12 +++---
 arch/i386/kernel/cpu/common.c          | 36 +++++++----------
 arch/i386/kernel/cpu/intel.c           |  2 +-
 arch/i386/kernel/cpu/intel_cacheinfo.c |  2 +-
 arch/i386/kernel/cpu/proc.c            |  7 ++--
 arch/i386/kernel/smpboot.c             | 73 ++++++++++++++++++++++++----------
 arch/x86_64/kernel/setup.c             | 69 ++++++++++++++------------------
 arch/x86_64/kernel/smpboot.c           | 69 +++++++++++++++++++++++++-------
 include/asm-i386/processor.h           |  4 +-
 include/asm-x86_64/processor.h         |  4 +-
 include/linux/bitops.h                 | 10 +++++
 11 files changed, 178 insertions(+), 110 deletions(-)

(limited to 'include')

diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 53a1681cd964..e344ef88cfcd 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -206,9 +206,9 @@ static void __init init_amd(struct cpuinfo_x86 *c)
 	display_cacheinfo(c);
 
 	if (cpuid_eax(0x80000000) >= 0x80000008) {
-		c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
-		if (c->x86_num_cores & (c->x86_num_cores - 1))
-			c->x86_num_cores = 1;
+		c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
+		if (c->x86_max_cores & (c->x86_max_cores - 1))
+			c->x86_max_cores = 1;
 	}
 
 #ifdef CONFIG_X86_HT
@@ -217,15 +217,15 @@ static void __init init_amd(struct cpuinfo_x86 *c)
 	 * distingush the cores.  Assumes number of cores is a power
 	 * of two.
 	 */
-	if (c->x86_num_cores > 1) {
+	if (c->x86_max_cores > 1) {
 		int cpu = smp_processor_id();
 		unsigned bits = 0;
-		while ((1 << bits) < c->x86_num_cores)
+		while ((1 << bits) < c->x86_max_cores)
 			bits++;
 		cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1);
 		phys_proc_id[cpu] >>= bits;
 		printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
-		       cpu, c->x86_num_cores, cpu_core_id[cpu]);
+		       cpu, c->x86_max_cores, cpu_core_id[cpu]);
 	}
 #endif
 }
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 35a67dab4a94..4e9c2e99b0a5 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -335,7 +335,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
 	c->x86_model = c->x86_mask = 0;	/* So far unknown... */
 	c->x86_vendor_id[0] = '\0'; /* Unset */
 	c->x86_model_id[0] = '\0';  /* Unset */
-	c->x86_num_cores = 1;
+	c->x86_max_cores = 1;
 	memset(&c->x86_capability, 0, sizeof c->x86_capability);
 
 	if (!have_cpuid_p()) {
@@ -446,52 +446,44 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
 void __devinit detect_ht(struct cpuinfo_x86 *c)
 {
 	u32 	eax, ebx, ecx, edx;
-	int 	index_msb, tmp;
+	int 	index_msb, core_bits;
 	int 	cpu = smp_processor_id();
 
+	cpuid(1, &eax, &ebx, &ecx, &edx);
+
+	c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
+
 	if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
 		return;
 
-	cpuid(1, &eax, &ebx, &ecx, &edx);
 	smp_num_siblings = (ebx & 0xff0000) >> 16;
 
 	if (smp_num_siblings == 1) {
 		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
 	} else if (smp_num_siblings > 1 ) {
-		index_msb = 31;
 
 		if (smp_num_siblings > NR_CPUS) {
 			printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
 			smp_num_siblings = 1;
 			return;
 		}
-		tmp = smp_num_siblings;
-		while ((tmp & 0x80000000 ) == 0) {
-			tmp <<=1 ;
-			index_msb--;
-		}
-		if (smp_num_siblings & (smp_num_siblings - 1))
-			index_msb++;
+
+		index_msb = get_count_order(smp_num_siblings);
 		phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
 
 		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
 		       phys_proc_id[cpu]);
 
-		smp_num_siblings = smp_num_siblings / c->x86_num_cores;
+		smp_num_siblings = smp_num_siblings / c->x86_max_cores;
 
-		tmp = smp_num_siblings;
-		index_msb = 31;
-		while ((tmp & 0x80000000) == 0) {
-			tmp <<=1 ;
-			index_msb--;
-		}
+		index_msb = get_count_order(smp_num_siblings) ;
 
-		if (smp_num_siblings & (smp_num_siblings - 1))
-			index_msb++;
+		core_bits = get_count_order(c->x86_max_cores);
 
-		cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
+		cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
+					       ((1 << core_bits) - 1);
 
-		if (c->x86_num_cores > 1)
+		if (c->x86_max_cores > 1)
 			printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
 			       cpu_core_id[cpu]);
 	}
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 43601de0f633..8d603ba28126 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -157,7 +157,7 @@ static void __devinit init_intel(struct cpuinfo_x86 *c)
 	if ( p )
 		strcpy(c->x86_model_id, p);
 	
-	c->x86_num_cores = num_cpu_cores(c);
+	c->x86_max_cores = num_cpu_cores(c);
 
 	detect_ht(c);
 
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index 4dc42a189ae5..e66d14099564 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -307,7 +307,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 #ifdef CONFIG_X86_HT
 	else if (num_threads_sharing == smp_num_siblings)
 		this_leaf->shared_cpu_map = cpu_sibling_map[cpu];
-	else if (num_threads_sharing == (c->x86_num_cores * smp_num_siblings))
+	else if (num_threads_sharing == (c->x86_max_cores * smp_num_siblings))
 		this_leaf->shared_cpu_map = cpu_core_map[cpu];
 	else
 		printk(KERN_DEBUG "Number of CPUs sharing cache didn't match "
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
index 41b871ecf4b3..e7921315ae9d 100644
--- a/arch/i386/kernel/cpu/proc.c
+++ b/arch/i386/kernel/cpu/proc.c
@@ -94,12 +94,11 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	if (c->x86_cache_size >= 0)
 		seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
 #ifdef CONFIG_X86_HT
-	if (c->x86_num_cores * smp_num_siblings > 1) {
+	if (c->x86_max_cores * smp_num_siblings > 1) {
 		seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]);
-		seq_printf(m, "siblings\t: %d\n",
-				c->x86_num_cores * smp_num_siblings);
+		seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n]));
 		seq_printf(m, "core id\t\t: %d\n", cpu_core_id[n]);
-		seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores);
+		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
 	}
 #endif
 	
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 01b618e73ecd..0a9c64655236 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -74,9 +74,11 @@ EXPORT_SYMBOL(phys_proc_id);
 int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
 EXPORT_SYMBOL(cpu_core_id);
 
+/* representing HT siblings of each logical CPU */
 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(cpu_sibling_map);
 
+/* representing HT and core siblings of each logical CPU */
 cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(cpu_core_map);
 
@@ -444,35 +446,60 @@ static void __devinit smp_callin(void)
 
 static int cpucount;
 
+/* representing cpus for which sibling maps can be computed */
+static cpumask_t cpu_sibling_setup_map;
+
 static inline void
 set_cpu_sibling_map(int cpu)
 {
 	int i;
+	struct cpuinfo_x86 *c = cpu_data;
+
+	cpu_set(cpu, cpu_sibling_setup_map);
 
 	if (smp_num_siblings > 1) {
-		for (i = 0; i < NR_CPUS; i++) {
-			if (!cpu_isset(i, cpu_callout_map))
-				continue;
-			if (cpu_core_id[cpu] == cpu_core_id[i]) {
+		for_each_cpu_mask(i, cpu_sibling_setup_map) {
+			if (phys_proc_id[cpu] == phys_proc_id[i] &&
+			    cpu_core_id[cpu] == cpu_core_id[i]) {
 				cpu_set(i, cpu_sibling_map[cpu]);
 				cpu_set(cpu, cpu_sibling_map[i]);
+				cpu_set(i, cpu_core_map[cpu]);
+				cpu_set(cpu, cpu_core_map[i]);
 			}
 		}
 	} else {
 		cpu_set(cpu, cpu_sibling_map[cpu]);
 	}
 
-	if (current_cpu_data.x86_num_cores > 1) {
-		for (i = 0; i < NR_CPUS; i++) {
-			if (!cpu_isset(i, cpu_callout_map))
-				continue;
-			if (phys_proc_id[cpu] == phys_proc_id[i]) {
-				cpu_set(i, cpu_core_map[cpu]);
-				cpu_set(cpu, cpu_core_map[i]);
-			}
-		}
-	} else {
+	if (current_cpu_data.x86_max_cores == 1) {
 		cpu_core_map[cpu] = cpu_sibling_map[cpu];
+		c[cpu].booted_cores = 1;
+		return;
+	}
+
+	for_each_cpu_mask(i, cpu_sibling_setup_map) {
+		if (phys_proc_id[cpu] == phys_proc_id[i]) {
+			cpu_set(i, cpu_core_map[cpu]);
+			cpu_set(cpu, cpu_core_map[i]);
+			/*
+			 *  Does this new cpu bringup a new core?
+			 */
+			if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
+				/*
+				 * for each core in package, increment
+				 * the booted_cores for this new cpu
+				 */
+				if (first_cpu(cpu_sibling_map[i]) == i)
+					c[cpu].booted_cores++;
+				/*
+				 * increment the core count for all
+				 * the other cpus in this package
+				 */
+				if (i != cpu)
+					c[i].booted_cores++;
+			} else if (i != cpu && !c[cpu].booted_cores)
+				c[cpu].booted_cores = c[i].booted_cores;
+		}
 	}
 }
 
@@ -1096,11 +1123,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
 
 	current_thread_info()->cpu = 0;
 	smp_tune_scheduling();
-	cpus_clear(cpu_sibling_map[0]);
-	cpu_set(0, cpu_sibling_map[0]);
 
-	cpus_clear(cpu_core_map[0]);
-	cpu_set(0, cpu_core_map[0]);
+	set_cpu_sibling_map(0);
 
 	/*
 	 * If we couldn't find an SMP configuration at boot time,
@@ -1279,15 +1303,24 @@ static void
 remove_siblinginfo(int cpu)
 {
 	int sibling;
+	struct cpuinfo_x86 *c = cpu_data;
 
+	for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
+		cpu_clear(cpu, cpu_core_map[sibling]);
+		/*
+		 * last thread sibling in this cpu core going down
+		 */
+		if (cpus_weight(cpu_sibling_map[cpu]) == 1)
+			c[sibling].booted_cores--;
+	}
+			
 	for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
 		cpu_clear(cpu, cpu_sibling_map[sibling]);
-	for_each_cpu_mask(sibling, cpu_core_map[cpu])
-		cpu_clear(cpu, cpu_core_map[sibling]);
 	cpus_clear(cpu_sibling_map[cpu]);
 	cpus_clear(cpu_core_map[cpu]);
 	phys_proc_id[cpu] = BAD_APICID;
 	cpu_core_id[cpu] = BAD_APICID;
+	cpu_clear(cpu, cpu_sibling_setup_map);
 }
 
 int __cpu_disable(void)
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 476ee034fca2..40c77f6fe4b0 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -795,7 +795,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
 #endif
 
 	bits = 0;
-	while ((1 << bits) < c->x86_num_cores)
+	while ((1 << bits) < c->x86_max_cores)
 		bits++;
 
 	/* Low order bits define the core id (index of core in socket) */
@@ -828,7 +828,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
 	numa_set_node(cpu, node);
 
   	printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
-  			cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
+  			cpu, c->x86_max_cores, node, cpu_core_id[cpu]);
 #endif
 #endif
 }
@@ -877,9 +877,9 @@ static int __init init_amd(struct cpuinfo_x86 *c)
 	display_cacheinfo(c);
 
 	if (c->extended_cpuid_level >= 0x80000008) {
-		c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
-		if (c->x86_num_cores & (c->x86_num_cores - 1))
-			c->x86_num_cores = 1;
+		c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
+		if (c->x86_max_cores & (c->x86_max_cores - 1))
+			c->x86_max_cores = 1;
 
 		amd_detect_cmp(c);
 	}
@@ -891,54 +891,44 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
 	u32 	eax, ebx, ecx, edx;
-	int 	index_msb, tmp;
+	int 	index_msb, core_bits;
 	int 	cpu = smp_processor_id();
-	
+
+	cpuid(1, &eax, &ebx, &ecx, &edx);
+
+	c->apicid = phys_pkg_id(0);
+
 	if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
 		return;
 
-	cpuid(1, &eax, &ebx, &ecx, &edx);
 	smp_num_siblings = (ebx & 0xff0000) >> 16;
-	
+
 	if (smp_num_siblings == 1) {
 		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
-	} else if (smp_num_siblings > 1) {
-		index_msb = 31;
-		/*
-		 * At this point we only support two siblings per
-		 * processor package.
-		 */
+	} else if (smp_num_siblings > 1 ) {
+
 		if (smp_num_siblings > NR_CPUS) {
 			printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
 			smp_num_siblings = 1;
 			return;
 		}
-		tmp = smp_num_siblings;
-		while ((tmp & 0x80000000 ) == 0) {
-			tmp <<=1 ;
-			index_msb--;
-		}
-		if (smp_num_siblings & (smp_num_siblings - 1))
-			index_msb++;
+
+		index_msb = get_count_order(smp_num_siblings);
 		phys_proc_id[cpu] = phys_pkg_id(index_msb);
-		
+
 		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
 		       phys_proc_id[cpu]);
 
-		smp_num_siblings = smp_num_siblings / c->x86_num_cores;
+		smp_num_siblings = smp_num_siblings / c->x86_max_cores;
 
-		tmp = smp_num_siblings;
-		index_msb = 31;
-		while ((tmp & 0x80000000) == 0) {
-			tmp <<=1 ;
-			index_msb--;
-		}
-		if (smp_num_siblings & (smp_num_siblings - 1))
-			index_msb++;
+		index_msb = get_count_order(smp_num_siblings) ;
+
+		core_bits = get_count_order(c->x86_max_cores);
 
-		cpu_core_id[cpu] = phys_pkg_id(index_msb);
+		cpu_core_id[cpu] = phys_pkg_id(index_msb) &
+					       ((1 << core_bits) - 1);
 
-		if (c->x86_num_cores > 1)
+		if (c->x86_max_cores > 1)
 			printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
 			       cpu_core_id[cpu]);
 	}
@@ -1006,7 +996,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 		c->x86_cache_alignment = c->x86_clflush_size * 2;
 	if (c->x86 >= 15)
 		set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
- 	c->x86_num_cores = intel_num_cpu_cores(c);
+ 	c->x86_max_cores = intel_num_cpu_cores(c);
 
 	srat_detect_node();
 }
@@ -1044,7 +1034,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 	c->x86_model_id[0] = '\0';  /* Unset */
 	c->x86_clflush_size = 64;
 	c->x86_cache_alignment = c->x86_clflush_size;
-	c->x86_num_cores = 1;
+	c->x86_max_cores = 1;
 	c->extended_cpuid_level = 0;
 	memset(&c->x86_capability, 0, sizeof c->x86_capability);
 
@@ -1278,13 +1268,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
 	
 #ifdef CONFIG_SMP
-	if (smp_num_siblings * c->x86_num_cores > 1) {
+	if (smp_num_siblings * c->x86_max_cores > 1) {
 		int cpu = c - cpu_data;
 		seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]);
-		seq_printf(m, "siblings\t: %d\n",
-				c->x86_num_cores * smp_num_siblings);
+		seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
 		seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]);
-		seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores);
+		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
 	}
 #endif	
 
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index f74319a80659..2b9ddba61b37 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -64,6 +64,7 @@
 int smp_num_siblings = 1;
 /* Package ID of each logical CPU */
 u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
+/* core ID of each logical CPU */
 u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
 EXPORT_SYMBOL(phys_proc_id);
 EXPORT_SYMBOL(cpu_core_id);
@@ -89,7 +90,10 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
 /* Set when the idlers are all forked */
 int smp_threads_ready;
 
+/* representing HT siblings of each logical CPU */
 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
+
+/* representing HT and core siblings of each logical CPU */
 cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(cpu_core_map);
 
@@ -436,30 +440,59 @@ void __cpuinit smp_callin(void)
 	cpu_set(cpuid, cpu_callin_map);
 }
 
+/* representing cpus for which sibling maps can be computed */
+static cpumask_t cpu_sibling_setup_map;
+
 static inline void set_cpu_sibling_map(int cpu)
 {
 	int i;
+	struct cpuinfo_x86 *c = cpu_data;
+
+	cpu_set(cpu, cpu_sibling_setup_map);
 
 	if (smp_num_siblings > 1) {
-		for_each_cpu(i) {
-			if (cpu_core_id[cpu] == cpu_core_id[i]) {
+		for_each_cpu_mask(i, cpu_sibling_setup_map) {
+			if (phys_proc_id[cpu] == phys_proc_id[i] &&
+			    cpu_core_id[cpu] == cpu_core_id[i]) {
 				cpu_set(i, cpu_sibling_map[cpu]);
 				cpu_set(cpu, cpu_sibling_map[i]);
+				cpu_set(i, cpu_core_map[cpu]);
+				cpu_set(cpu, cpu_core_map[i]);
 			}
 		}
 	} else {
 		cpu_set(cpu, cpu_sibling_map[cpu]);
 	}
 
-	if (current_cpu_data.x86_num_cores > 1) {
-		for_each_cpu(i) {
-			if (phys_proc_id[cpu] == phys_proc_id[i]) {
-				cpu_set(i, cpu_core_map[cpu]);
-				cpu_set(cpu, cpu_core_map[i]);
-			}
-		}
-	} else {
+	if (current_cpu_data.x86_max_cores == 1) {
 		cpu_core_map[cpu] = cpu_sibling_map[cpu];
+		c[cpu].booted_cores = 1;
+		return;
+	}
+
+	for_each_cpu_mask(i, cpu_sibling_setup_map) {
+		if (phys_proc_id[cpu] == phys_proc_id[i]) {
+			cpu_set(i, cpu_core_map[cpu]);
+			cpu_set(cpu, cpu_core_map[i]);
+			/*
+			 *  Does this new cpu bringup a new core?
+			 */
+			if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
+				/*
+				 * for each core in package, increment
+				 * the booted_cores for this new cpu
+				 */
+				if (first_cpu(cpu_sibling_map[i]) == i)
+					c[cpu].booted_cores++;
+				/*
+				 * increment the core count for all
+				 * the other cpus in this package
+				 */
+				if (i != cpu)
+					c[i].booted_cores++;
+			} else if (i != cpu && !c[cpu].booted_cores)
+				c[cpu].booted_cores = c[i].booted_cores;
+		}
 	}
 }
 
@@ -993,6 +1026,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	nmi_watchdog_default();
 	current_cpu_data = boot_cpu_data;
 	current_thread_info()->cpu = 0;  /* needed? */
+	set_cpu_sibling_map(0);
 
 	if (smp_sanity_check(max_cpus) < 0) {
 		printk(KERN_INFO "SMP disabled\n");
@@ -1036,8 +1070,6 @@ void __init smp_prepare_boot_cpu(void)
 	int me = smp_processor_id();
 	cpu_set(me, cpu_online_map);
 	cpu_set(me, cpu_callout_map);
-	cpu_set(0, cpu_sibling_map[0]);
-	cpu_set(0, cpu_core_map[0]);
 	per_cpu(cpu_state, me) = CPU_ONLINE;
 }
 
@@ -1106,15 +1138,24 @@ void __init smp_cpus_done(unsigned int max_cpus)
 static void remove_siblinginfo(int cpu)
 {
 	int sibling;
+	struct cpuinfo_x86 *c = cpu_data;
 
+	for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
+		cpu_clear(cpu, cpu_core_map[sibling]);
+		/*
+		 * last thread sibling in this cpu core going down
+		 */
+		if (cpus_weight(cpu_sibling_map[cpu]) == 1)
+			c[sibling].booted_cores--;
+	}
+			
 	for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
 		cpu_clear(cpu, cpu_sibling_map[sibling]);
-	for_each_cpu_mask(sibling, cpu_core_map[cpu])
-		cpu_clear(cpu, cpu_core_map[sibling]);
 	cpus_clear(cpu_sibling_map[cpu]);
 	cpus_clear(cpu_core_map[cpu]);
 	phys_proc_id[cpu] = BAD_APICID;
 	cpu_core_id[cpu] = BAD_APICID;
+	cpu_clear(cpu, cpu_sibling_setup_map);
 }
 
 void remove_cpu_from_maps(void)
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index 0a4ec764377c..9cd4a05234a1 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -65,7 +65,9 @@ struct cpuinfo_x86 {
 	int	f00f_bug;
 	int	coma_bug;
 	unsigned long loops_per_jiffy;
-	unsigned char x86_num_cores;
+	unsigned char x86_max_cores;	/* cpuid returned max cores value */
+	unsigned char booted_cores;	/* number of cores as seen by OS */
+	unsigned char apicid;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
 #define X86_VENDOR_INTEL 0
diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h
index 03837d34fba0..4861246548f7 100644
--- a/include/asm-x86_64/processor.h
+++ b/include/asm-x86_64/processor.h
@@ -61,10 +61,12 @@ struct cpuinfo_x86 {
 	int	x86_cache_alignment;
 	int	x86_tlbsize;	/* number of 4K pages in DTLB/ITLB combined(in pages)*/
         __u8    x86_virt_bits, x86_phys_bits;
-	__u8	x86_num_cores;
+	__u8	x86_max_cores;	/* cpuid returned max cores value */
         __u32   x86_power; 	
 	__u32   extended_cpuid_level;	/* Max extended CPUID function supported */
 	unsigned long loops_per_jiffy;
+	__u8	apicid;
+	__u8	booted_cores;	/* number of cores as seen by OS */
 } ____cacheline_aligned;
 
 #define X86_VENDOR_INTEL 0
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index cb3c3ef50f50..38c2fb7ebe09 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -84,6 +84,16 @@ static __inline__ int get_bitmask_order(unsigned int count)
 	return order;	/* We could be slightly more clever with -1 here... */
 }
 
+static __inline__ int get_count_order(unsigned int count)
+{
+	int order;
+	
+	order = fls(count) - 1;
+	if (count & (count - 1))
+		order++;
+	return order;
+}
+
 /*
  * hweightN: returns the hamming weight (i.e. the number
  * of bits set) of a N-bit word
-- 
cgit v1.2.3-71-gd317


From a6f5deb2be4c82f24fefadcbf7e448f540c05ae6 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 5 Nov 2005 17:25:54 +0100
Subject: [PATCH] x86_64: Reduce number of retries for reset through keyboard
 controller

Old code could retry for 10 seconds worst time. Only try it
for one second now.

Suggested by Yinghai Lu

Cc: Yinghai.Lu@amd.com

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/reboot.c                 | 2 +-
 include/asm-i386/mach-default/mach_reboot.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/x86_64/kernel/reboot.c b/arch/x86_64/kernel/reboot.c
index 97804bc2c0a0..75235ed2b31b 100644
--- a/arch/x86_64/kernel/reboot.c
+++ b/arch/x86_64/kernel/reboot.c
@@ -121,7 +121,7 @@ void machine_emergency_restart(void)
 		/* Could also try the reset bit in the Hammer NB */
 		switch (reboot_type) { 
 		case BOOT_KBD:
-		for (i=0; i<100; i++) {
+		for (i=0; i<10; i++) {
 			kb_wait();
 			udelay(50);
 			outb(0xfe,0x64);         /* pulse reset low */
diff --git a/include/asm-i386/mach-default/mach_reboot.h b/include/asm-i386/mach-default/mach_reboot.h
index 06ae4d81ba6a..a955e57ad016 100644
--- a/include/asm-i386/mach-default/mach_reboot.h
+++ b/include/asm-i386/mach-default/mach_reboot.h
@@ -19,7 +19,7 @@ static inline void kb_wait(void)
 static inline void mach_reboot(void)
 {
 	int i;
-	for (i = 0; i < 100; i++) {
+	for (i = 0; i < 10; i++) {
 		kb_wait();
 		udelay(50);
 		outb(0x60, 0x64);	/* write Controller Command Byte */
-- 
cgit v1.2.3-71-gd317


From 8e0d4f4e9132ae6e353f9cf27261627bcc7c65cc Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 5 Nov 2005 17:25:54 +0100
Subject: [PATCH] x86_64: Remove asm-x86_64/rwsem.h

Not needed since x86-64 always uses the spinlock based rwsems.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/rwsem.h | 283 ---------------------------------------------
 1 file changed, 283 deletions(-)
 delete mode 100644 include/asm-x86_64/rwsem.h

(limited to 'include')

diff --git a/include/asm-x86_64/rwsem.h b/include/asm-x86_64/rwsem.h
deleted file mode 100644
index 46077e9c1910..000000000000
--- a/include/asm-x86_64/rwsem.h
+++ /dev/null
@@ -1,283 +0,0 @@
-/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for x86_64+
- *
- * Written by David Howells (dhowells@redhat.com).
- * Ported by Andi Kleen <ak@suse.de> to x86-64.
- *
- * Derived from asm-i386/semaphore.h and asm-i386/rwsem.h
- *
- *
- * The MSW of the count is the negated number of active writers and waiting
- * lockers, and the LSW is the total number of active locks
- *
- * The lock count is initialized to 0 (no active and no waiting lockers).
- *
- * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an
- * uncontended lock. This can be determined because XADD returns the old value.
- * Readers increment by 1 and see a positive value when uncontended, negative
- * if there are writers (and maybe) readers waiting (in which case it goes to
- * sleep).
- *
- * The value of WAITING_BIAS supports up to 32766 waiting processes. This can
- * be extended to 65534 by manually checking the whole MSW rather than relying
- * on the S flag.
- *
- * The value of ACTIVE_BIAS supports up to 65535 active processes.
- *
- * This should be totally fair - if anything is waiting, a process that wants a
- * lock will go to the back of the queue. When the currently active lock is
- * released, if there's a writer at the front of the queue, then that and only
- * that will be woken up; if there's a bunch of consecutive readers at the
- * front, then they'll all be woken up, but no other readers will be.
- */
-
-#ifndef _X8664_RWSEM_H
-#define _X8664_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-
-#include <linux/list.h>
-#include <linux/spinlock.h>
-
-struct rwsem_waiter;
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
-	signed int		count;
-#define RWSEM_UNLOCKED_VALUE		0x00000000
-#define RWSEM_ACTIVE_BIAS		0x00000001
-#define RWSEM_ACTIVE_MASK		0x0000ffff
-#define RWSEM_WAITING_BIAS		(-0x00010000)
-#define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-	spinlock_t		wait_lock;
-	struct list_head	wait_list;
-#if RWSEM_DEBUG
-	int			debug;
-#endif
-};
-
-/*
- * initialisation
- */
-#if RWSEM_DEBUG
-#define __RWSEM_DEBUG_INIT      , 0
-#else
-#define __RWSEM_DEBUG_INIT	/* */
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
-{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \
-	__RWSEM_DEBUG_INIT }
-
-#define DECLARE_RWSEM(name) \
-	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
-	sem->count = RWSEM_UNLOCKED_VALUE;
-	spin_lock_init(&sem->wait_lock);
-	INIT_LIST_HEAD(&sem->wait_list);
-#if RWSEM_DEBUG
-	sem->debug = 0;
-#endif
-}
-
-/*
- * lock for reading
- */
-static inline void __down_read(struct rw_semaphore *sem)
-{
-	__asm__ __volatile__(
-		"# beginning down_read\n\t"
-LOCK_PREFIX	"  incl      (%%rdi)\n\t" /* adds 0x00000001, returns the old value */
-		"  js        2f\n\t" /* jump if we weren't granted the lock */
-		"1:\n\t"
-		LOCK_SECTION_START("") \
-		"2:\n\t"
-		"  call      rwsem_down_read_failed_thunk\n\t"
-		"  jmp       1b\n"
-		LOCK_SECTION_END \
-		"# ending down_read\n\t"
-		: "+m"(sem->count)
-		: "D"(sem)
-		: "memory", "cc");
-}
-
-
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
-	__s32 result, tmp;
-	__asm__ __volatile__(
-		"# beginning __down_read_trylock\n\t"
-		"  movl      %0,%1\n\t"
-		"1:\n\t"
-		"  movl	     %1,%2\n\t"
-		"  addl      %3,%2\n\t"
-		"  jle	     2f\n\t"
-LOCK_PREFIX	"  cmpxchgl  %2,%0\n\t"
-		"  jnz	     1b\n\t"
-		"2:\n\t"
-		"# ending __down_read_trylock\n\t"
-		: "+m"(sem->count), "=&a"(result), "=&r"(tmp)
-		: "i"(RWSEM_ACTIVE_READ_BIAS)
-		: "memory", "cc");
-	return result>=0 ? 1 : 0;
-}
-
-
-/*
- * lock for writing
- */
-static inline void __down_write(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	tmp = RWSEM_ACTIVE_WRITE_BIAS;
-	__asm__ __volatile__(
-		"# beginning down_write\n\t"
-LOCK_PREFIX	"  xaddl      %0,(%%rdi)\n\t" /* subtract 0x0000ffff, returns the old value */
-		"  testl     %0,%0\n\t" /* was the count 0 before? */
-		"  jnz       2f\n\t" /* jump if we weren't granted the lock */
-		"1:\n\t"
-		LOCK_SECTION_START("")
-		"2:\n\t"
-		"  call      rwsem_down_write_failed_thunk\n\t"
-		"  jmp       1b\n"
-		LOCK_SECTION_END
-		"# ending down_write"
-		: "=&r" (tmp) 
-		: "0"(tmp), "D"(sem)
-		: "memory", "cc");
-}
-
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
-	signed long ret = cmpxchg(&sem->count,
-				  RWSEM_UNLOCKED_VALUE, 
-				  RWSEM_ACTIVE_WRITE_BIAS);
-	if (ret == RWSEM_UNLOCKED_VALUE)
-		return 1;
-	return 0;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
-	__s32 tmp = -RWSEM_ACTIVE_READ_BIAS;
-	__asm__ __volatile__(
-		"# beginning __up_read\n\t"
-LOCK_PREFIX	"  xaddl      %[tmp],(%%rdi)\n\t" /* subtracts 1, returns the old value */
-		"  js        2f\n\t" /* jump if the lock is being waited upon */
-		"1:\n\t"
-		LOCK_SECTION_START("")
-		"2:\n\t"
-		"  decw      %w[tmp]\n\t" /* do nothing if still outstanding active readers */
-		"  jnz       1b\n\t"
-		"  call      rwsem_wake_thunk\n\t"
-		"  jmp       1b\n"
-		LOCK_SECTION_END
-		"# ending __up_read\n"
-		: "+m"(sem->count), [tmp] "+r" (tmp)
-		: "D"(sem)
-		: "memory", "cc");
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
-	unsigned tmp; 
-	__asm__ __volatile__(
-		"# beginning __up_write\n\t"
-		"  movl     %[bias],%[tmp]\n\t"
-LOCK_PREFIX	"  xaddl     %[tmp],(%%rdi)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */
-		"  jnz       2f\n\t" /* jump if the lock is being waited upon */
-		"1:\n\t"
-		LOCK_SECTION_START("")
-		"2:\n\t"
-		"  decw      %w[tmp]\n\t" /* did the active count reduce to 0? */
-		"  jnz       1b\n\t" /* jump back if not */
-		"  call      rwsem_wake_thunk\n\t"
-		"  jmp       1b\n"
-		LOCK_SECTION_END
-		"# ending __up_write\n"
-		: "+m"(sem->count), [tmp] "=r" (tmp)
-		: "D"(sem), [bias] "i"(-RWSEM_ACTIVE_WRITE_BIAS)
-		: "memory", "cc");
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
-	__asm__ __volatile__(
-		"# beginning __downgrade_write\n\t"
-LOCK_PREFIX	"  addl      %[bias],(%%rdi)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */
-		"  js        2f\n\t" /* jump if the lock is being waited upon */
-		"1:\n\t"
-		LOCK_SECTION_START("")
-		"2:\n\t"
-		"  call	     rwsem_downgrade_thunk\n"
-		"  jmp       1b\n"
-		LOCK_SECTION_END
-		"# ending __downgrade_write\n"
-		: "=m"(sem->count)
-		: "D"(sem), [bias] "i"(-RWSEM_WAITING_BIAS), "m"(sem->count)
-		: "memory", "cc");
-}
-
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
-{
-	__asm__ __volatile__(
-LOCK_PREFIX	"addl %1,%0"
-		:"=m"(sem->count)
-		:"ir"(delta), "m"(sem->count));
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
-{
-	int tmp = delta;
-
-	__asm__ __volatile__(
-LOCK_PREFIX	"xaddl %0,(%2)"
-		: "=r"(tmp), "=m"(sem->count)
-		: "r"(sem), "m"(sem->count), "0" (tmp)
-		: "memory");
-
-	return tmp+delta;
-}
-
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
-	return (sem->count != 0);
-}
-
-#endif /* __KERNEL__ */
-#endif /* _X8664_RWSEM_H */
-- 
cgit v1.2.3-71-gd317


From bf0f2e23834e2bf7d64b467ef07095b1c7e2c04b Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Sat, 5 Nov 2005 17:25:54 +0100
Subject: [PATCH] x86_64: Set ____cacheline_maxaligned_in_smp alignment to 128
 bytes

The current value was correct before the introduction of Intel EM64T support -
but now L1_CACHE_SHIFT_MAX can be less than L1_CACHE_SHIFT, which _is_ funny!

Between the few users of ____cacheline_maxaligned_in_smp, we also have (for
example) rcu_ctrlblk, and struct zone, with zone->{lru_,}lock.  I.e.  we have
a lot of excess cacheline bouncing on them.

No correctness issues, obviously.  So this could even be merged for 2.6.14
(I'm not a fan of this idea, though).

CC: Andi Kleen <ak@suse.de>
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/cache.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-x86_64/cache.h b/include/asm-x86_64/cache.h
index eda62bae1240..33e53424128b 100644
--- a/include/asm-x86_64/cache.h
+++ b/include/asm-x86_64/cache.h
@@ -9,6 +9,6 @@
 /* L1 cache line size */
 #define L1_CACHE_SHIFT	(CONFIG_X86_L1_CACHE_SHIFT)
 #define L1_CACHE_BYTES	(1 << L1_CACHE_SHIFT)
-#define L1_CACHE_SHIFT_MAX 6	/* largest L1 which this arch supports */
+#define L1_CACHE_SHIFT_MAX 7	/* largest L1 which this arch supports */
 
 #endif
-- 
cgit v1.2.3-71-gd317


From efbbdce94f6ea54cf06d9a06e4c95f6874ad64a8 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Sat, 5 Nov 2005 17:25:54 +0100
Subject: [PATCH] x86_64: Use common sys_time64

Keeping this function does not makes sense because it's a copied (and
buggy) copy of sys_time.  The only difference is that now.tv_sec (which is
a time_t, i.e.  a 64-bit long) is copied (and truncated) into a int
(32-bit).

The prototype is the same (they both take a long __user *), so let's drop
this and redirect it to sys_time (and make sure it exists by defining
__ARCH_WANT_SYS_TIME).

Only disadvantage is that the sys_stime definition is also compiled (may be
fixed if needed by adding a separate __ARCH_WANT_SYS_STIME macro, and
defining it for all arch's defining __ARCH_WANT_SYS_TIME except x86_64).

Acked-by: Andi Kleen <ak@suse.de>
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/sys_x86_64.c | 14 --------------
 include/asm-x86_64/unistd.h     |  3 ++-
 2 files changed, 2 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/arch/x86_64/kernel/sys_x86_64.c b/arch/x86_64/kernel/sys_x86_64.c
index cc7821c68851..6449ea8fe756 100644
--- a/arch/x86_64/kernel/sys_x86_64.c
+++ b/arch/x86_64/kernel/sys_x86_64.c
@@ -154,17 +154,3 @@ asmlinkage long sys_uname(struct new_utsname __user * name)
 		err |= copy_to_user(&name->machine, "i686", 5); 		
 	return err ? -EFAULT : 0;
 }
-
-asmlinkage long sys_time64(long __user * tloc)
-{
-	struct timeval now; 
-	int i; 
-
-	do_gettimeofday(&now);
-	i = now.tv_sec;
-	if (tloc) {
-		if (put_user(i,tloc))
-			i = -EFAULT;
-	}
-	return i;
-}
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index 3c494b65d33a..2c42150bce0c 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -462,7 +462,7 @@ __SYSCALL(__NR_fremovexattr, sys_fremovexattr)
 #define __NR_tkill	200
 __SYSCALL(__NR_tkill, sys_tkill) 
 #define __NR_time      201
-__SYSCALL(__NR_time, sys_time64)
+__SYSCALL(__NR_time, sys_time)
 #define __NR_futex     202
 __SYSCALL(__NR_futex, sys_futex)
 #define __NR_sched_setaffinity    203
@@ -608,6 +608,7 @@ do { \
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
+#define __ARCH_WANT_SYS_TIME
 #define __ARCH_WANT_COMPAT_SYS_TIME
 #endif
 
-- 
cgit v1.2.3-71-gd317


From 8893166ff8694f36655009aa9bf8e7f2e1c9339f Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 5 Nov 2005 17:25:54 +0100
Subject: [PATCH] x86_64: Increase the maximum number of local APICs to the
 maximum

This is needed for large multinode IBM systems which have a sparse
APIC space in clustered mode, fully covering the available 8 bits.

The previous kernels would limit the local APIC number to 127,
which caused it to reject some of the CPUs at boot.

I increased the maximum and shrunk the apic_version array a bit
to make up for that (the version is only 8 bit, so don't need
an full int to store)

Cc:  Chris McDermott <lcm@us.ibm.com>

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/mpparse.c | 11 +++++++----
 include/asm-x86_64/mpspec.h  |  4 ++--
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index 1d61f10a92c6..1105250bf02c 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -42,7 +42,7 @@ int acpi_found_madt;
  * Various Linux-internal data structures created from the
  * MP-table.
  */
-int apic_version [MAX_APICS];
+unsigned char apic_version [MAX_APICS];
 unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
 int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
 
@@ -108,7 +108,8 @@ static int __init mpf_checksum(unsigned char *mp, int len)
 
 static void __init MP_processor_info (struct mpc_config_processor *m)
 {
-	int ver, cpu;
+	int cpu;
+	unsigned char ver;
 	static int found_bsp=0;
 
 	if (!(m->mpc_cpuflag & CPU_ENABLED)) {
@@ -133,12 +134,14 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
 	}
 
 	cpu = num_processors++;
-
-	if (m->mpc_apicid > MAX_APICS) {
+	
+#if MAX_APICS < 255	
+	if ((int)m->mpc_apicid > MAX_APICS) {
 		printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
 			m->mpc_apicid, MAX_APICS);
 		return;
 	}
+#endif
 	ver = m->mpc_apicver;
 
 	physid_set(m->mpc_apicid, phys_cpu_present_map);
diff --git a/include/asm-x86_64/mpspec.h b/include/asm-x86_64/mpspec.h
index 6b375384f5c2..6f8a17d105ab 100644
--- a/include/asm-x86_64/mpspec.h
+++ b/include/asm-x86_64/mpspec.h
@@ -16,7 +16,7 @@
 /*
  * A maximum of 255 APICs with the current APIC ID architecture.
  */
-#define MAX_APICS 128
+#define MAX_APICS 255
 
 struct intel_mp_floating
 {
@@ -173,7 +173,7 @@ extern int smp_found_config;
 extern void find_smp_config (void);
 extern void get_smp_config (void);
 extern int nr_ioapics;
-extern int apic_version [MAX_APICS];
+extern unsigned char apic_version [MAX_APICS];
 extern int mp_irq_entries;
 extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
 extern int mpc_default_type;
-- 
cgit v1.2.3-71-gd317


From cf225356578326308b16a0fd03ff3fa72fe3da07 Mon Sep 17 00:00:00 2001
From: Jochen Friedrich <jochen@scram.de>
Date: Mon, 14 Nov 2005 21:58:18 -0800
Subject: [LLC]: Fix typo

Signed-off-by: Jochen Friedrich <jochen@scram.de>
Acked-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/llc_pdu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h
index c7a959428b4f..8f6306581fa7 100644
--- a/include/net/llc_pdu.h
+++ b/include/net/llc_pdu.h
@@ -357,7 +357,7 @@ static inline void llc_pdu_init_as_test_rsp(struct sk_buff *skb,
 
 /* LLC Type 1 XID command/response information fields format */
 struct llc_xid_info {
-	u8 fmt_id;	/* always 0x18 for LLC */
+	u8 fmt_id;	/* always 0x81 for LLC */
 	u8 type;	/* different if NULL/non-NULL LSAP */
 	u8 rw;		/* sender receive window */
 };
-- 
cgit v1.2.3-71-gd317


From d4ed803c564701eae9534ab26a86ddb06acaf49c Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@gnumonks.org>
Date: Tue, 15 Nov 2005 00:09:06 -0800
Subject: [PATCH] Make sysctl.h (again) usable from userspace

Make sysctl.h (again) useable from userspace

Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sysctl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 64f203c45378..6bc03c911a83 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -20,7 +20,6 @@
 
 #include <linux/kernel.h>
 #include <linux/types.h>
-#include <linux/list.h>
 #include <linux/compiler.h>
 
 struct file;
@@ -859,6 +858,7 @@ enum
 };
 
 #ifdef __KERNEL__
+#include <linux/list.h>
 
 extern void sysctl_init(void);
 
-- 
cgit v1.2.3-71-gd317


From f4eeb0a20f017fd8bc849cc50469c2e2e6a0c05c Mon Sep 17 00:00:00 2001
From: Miles Bader <miles@gnu.org>
Date: Tue, 15 Nov 2005 00:09:17 -0800
Subject: [PATCH] v850: Add missing include in hardirq.h

Signed-off-by: Miles Bader <miles@gnu.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-v850/hardirq.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/asm-v850/hardirq.h b/include/asm-v850/hardirq.h
index 5dfca8047cbe..4f913bccbefb 100644
--- a/include/asm-v850/hardirq.h
+++ b/include/asm-v850/hardirq.h
@@ -5,6 +5,8 @@
 #include <linux/threads.h>
 #include <linux/cache.h>
 
+#include <asm/irq.h>
+
 typedef struct {
 	unsigned int __softirq_pending;
 } ____cacheline_aligned irq_cpustat_t;
-- 
cgit v1.2.3-71-gd317


From 0c53508980a95b84c296c4336a831776cc22cf58 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Nov 2005 00:09:18 -0800
Subject: [PATCH] v850: use generic hardirq code

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Miles Bader <miles@gnu.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/v850/Kconfig          |   8 +
 arch/v850/kernel/irq.c     | 636 +--------------------------------------------
 include/asm-v850/hardirq.h |   2 +
 3 files changed, 15 insertions(+), 631 deletions(-)

(limited to 'include')

diff --git a/arch/v850/Kconfig b/arch/v850/Kconfig
index 89c053b6c2c4..310865903234 100644
--- a/arch/v850/Kconfig
+++ b/arch/v850/Kconfig
@@ -23,6 +23,14 @@ config GENERIC_CALIBRATE_DELAY
 	bool
 	default y
 
+config GENERIC_HARDIRQS
+	bool
+	default y
+
+config GENERIC_IRQ_PROBE
+	bool
+	default y
+
 # Turn off some random 386 crap that can affect device config
 config ISA
 	bool
diff --git a/arch/v850/kernel/irq.c b/arch/v850/kernel/irq.c
index 534eb8ab97a7..7a151c26f82e 100644
--- a/arch/v850/kernel/irq.c
+++ b/arch/v850/kernel/irq.c
@@ -27,55 +27,15 @@
 #include <asm/system.h>
 
 /*
- * Controller mappings for all interrupt sources:
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves, it doesn't deserve
+ * a generic callback i think.
  */
-irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
-	[0 ... NR_IRQS-1] = {
-		.handler = &no_irq_type,
-		.lock = SPIN_LOCK_UNLOCKED
-	}
-};
-
-/*
- * Special irq handlers.
- */
-
-irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs)
-{
-	return IRQ_NONE;
-}
-
-/*
- * Generic no controller code
- */
-
-static void enable_none(unsigned int irq) { }
-static unsigned int startup_none(unsigned int irq) { return 0; }
-static void disable_none(unsigned int irq) { }
-static void ack_none(unsigned int irq)
+void ack_bad_irq(unsigned int irq)
 {
-	/*
-	 * 'what should we do if we get a hw irq event on an illegal vector'.
-	 * each architecture has to answer this themselves, it doesn't deserve
-	 * a generic callback i think.
-	 */
 	printk("received IRQ %d with unknown interrupt type\n", irq);
 }
 
-/* startup is the same as "enable", shutdown is same as "disable" */
-#define shutdown_none	disable_none
-#define end_none	enable_none
-
-struct hw_interrupt_type no_irq_type = {
-	.typename = "none",
-	.startup = startup_none,
-	.shutdown = shutdown_none,
-	.enable = enable_none,
-	.disable = disable_none,
-	.ack = ack_none,
-	.end = end_none
-};
-
 volatile unsigned long irq_err_count, spurious_count;
 
 /*
@@ -136,596 +96,16 @@ int show_interrupts(struct seq_file *p, void *v)
 	return 0;
 }
 
-/*
- * This should really return information about whether
- * we should do bottom half handling etc. Right now we
- * end up _always_ checking the bottom half, which is a
- * waste of time and is not what some drivers would
- * prefer.
- */
-int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action)
-{
-	int status = 1; /* Force the "do bottom halves" bit */
-	int ret;
-
-	if (!(action->flags & SA_INTERRUPT))
-		local_irq_enable();
-
-	do {
-		ret = action->handler(irq, action->dev_id, regs);
-		if (ret == IRQ_HANDLED)
-			status |= action->flags;
-		action = action->next;
-	} while (action);
-	if (status & SA_SAMPLE_RANDOM)
-		add_interrupt_randomness(irq);
-	local_irq_disable();
-
-	return status;
-}
-
-/*
- * Generic enable/disable code: this just calls
- * down into the PIC-specific version for the actual
- * hardware disable after having gotten the irq
- * controller lock. 
- */
- 
-/**
- *	disable_irq_nosync - disable an irq without waiting
- *	@irq: Interrupt to disable
- *
- *	Disable the selected interrupt line. Disables of an interrupt
- *	stack. Unlike disable_irq(), this function does not ensure existing
- *	instances of the IRQ handler have completed before returning.
- *
- *	This function may be called from IRQ context.
- */
- 
-void inline disable_irq_nosync(unsigned int irq)
-{
-	irq_desc_t *desc = irq_desc + irq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock, flags);
-	if (!desc->depth++) {
-		desc->status |= IRQ_DISABLED;
-		desc->handler->disable(irq);
-	}
-	spin_unlock_irqrestore(&desc->lock, flags);
-}
-
-/**
- *	disable_irq - disable an irq and wait for completion
- *	@irq: Interrupt to disable
- *
- *	Disable the selected interrupt line. Disables of an interrupt
- *	stack. That is for two disables you need two enables. This
- *	function waits for any pending IRQ handlers for this interrupt
- *	to complete before returning. If you use this function while
- *	holding a resource the IRQ handler may need you will deadlock.
- *
- *	This function may be called - with care - from IRQ context.
- */
- 
-void disable_irq(unsigned int irq)
-{
-	disable_irq_nosync(irq);
-	synchronize_irq(irq);
-}
-
-/**
- *	enable_irq - enable interrupt handling on an irq
- *	@irq: Interrupt to enable
- *
- *	Re-enables the processing of interrupts on this IRQ line
- *	providing no disable_irq calls are now in effect.
- *
- *	This function may be called from IRQ context.
- */
- 
-void enable_irq(unsigned int irq)
-{
-	irq_desc_t *desc = irq_desc + irq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock, flags);
-	switch (desc->depth) {
-	case 1: {
-		unsigned int status = desc->status & ~IRQ_DISABLED;
-		desc->status = status;
-		if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
-			desc->status = status | IRQ_REPLAY;
-			hw_resend_irq(desc->handler,irq);
-		}
-		desc->handler->enable(irq);
-		/* fall-through */
-	}
-	default:
-		desc->depth--;
-		break;
-	case 0:
-		printk("enable_irq(%u) unbalanced from %p\n", irq,
-		       __builtin_return_address(0));
-	}
-	spin_unlock_irqrestore(&desc->lock, flags);
-}
-
 /* Handle interrupt IRQ.  REGS are the registers at the time of ther
    interrupt.  */
 unsigned int handle_irq (int irq, struct pt_regs *regs)
 {
-	/* 
-	 * We ack quickly, we don't want the irq controller
-	 * thinking we're snobs just because some other CPU has
-	 * disabled global interrupts (we have already done the
-	 * INT_ACK cycles, it's too late to try to pretend to the
-	 * controller that we aren't taking the interrupt).
-	 *
-	 * 0 return value means that this irq is already being
-	 * handled by some other CPU. (or is disabled)
-	 */
-	int cpu = smp_processor_id();
-	irq_desc_t *desc = irq_desc + irq;
-	struct irqaction * action;
-	unsigned int status;
-
 	irq_enter();
-	kstat_cpu(cpu).irqs[irq]++;
-	spin_lock(&desc->lock);
-	desc->handler->ack(irq);
-	/*
-	   REPLAY is when Linux resends an IRQ that was dropped earlier
-	   WAITING is used by probe to mark irqs that are being tested
-	   */
-	status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
-	status |= IRQ_PENDING; /* we _want_ to handle it */
-
-	/*
-	 * If the IRQ is disabled for whatever reason, we cannot
-	 * use the action we have.
-	 */
-	action = NULL;
-	if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
-		action = desc->action;
-		status &= ~IRQ_PENDING; /* we commit to handling */
-		status |= IRQ_INPROGRESS; /* we are handling it */
-	}
-	desc->status = status;
-
-	/*
-	 * If there is no IRQ handler or it was disabled, exit early.
-	   Since we set PENDING, if another processor is handling
-	   a different instance of this same irq, the other processor
-	   will take care of it.
-	 */
-	if (unlikely(!action))
-		goto out;
-
-	/*
-	 * Edge triggered interrupts need to remember
-	 * pending events.
-	 * This applies to any hw interrupts that allow a second
-	 * instance of the same irq to arrive while we are in handle_irq
-	 * or in the handler. But the code here only handles the _second_
-	 * instance of the irq, not the third or fourth. So it is mostly
-	 * useful for irq hardware that does not mask cleanly in an
-	 * SMP environment.
-	 */
-	for (;;) {
-		spin_unlock(&desc->lock);
-		handle_IRQ_event(irq, regs, action);
-		spin_lock(&desc->lock);
-		
-		if (likely(!(desc->status & IRQ_PENDING)))
-			break;
-		desc->status &= ~IRQ_PENDING;
-	}
-	desc->status &= ~IRQ_INPROGRESS;
-
-out:
-	/*
-	 * The ->end() handler has to deal with interrupts which got
-	 * disabled while the handler was running.
-	 */
-	desc->handler->end(irq);
-	spin_unlock(&desc->lock);
-
+	__do_IRQ(irq, regs);
 	irq_exit();
-
 	return 1;
 }
 
-/**
- *	request_irq - allocate an interrupt line
- *	@irq: Interrupt line to allocate
- *	@handler: Function to be called when the IRQ occurs
- *	@irqflags: Interrupt type flags
- *	@devname: An ascii name for the claiming device
- *	@dev_id: A cookie passed back to the handler function
- *
- *	This call allocates interrupt resources and enables the
- *	interrupt line and IRQ handling. From the point this
- *	call is made your handler function may be invoked. Since
- *	your handler function must clear any interrupt the board 
- *	raises, you must take care both to initialise your hardware
- *	and to set up the interrupt handler in the right order.
- *
- *	Dev_id must be globally unique. Normally the address of the
- *	device data structure is used as the cookie. Since the handler
- *	receives this value it makes sense to use it.
- *
- *	If your interrupt is shared you must pass a non NULL dev_id
- *	as this is required when freeing the interrupt.
- *
- *	Flags:
- *
- *	SA_SHIRQ		Interrupt is shared
- *
- *	SA_INTERRUPT		Disable local interrupts while processing
- *
- *	SA_SAMPLE_RANDOM	The interrupt can be used for entropy
- *
- */
- 
-int request_irq(unsigned int irq, 
-		irqreturn_t (*handler)(int, void *, struct pt_regs *),
-		unsigned long irqflags, 
-		const char * devname,
-		void *dev_id)
-{
-	int retval;
-	struct irqaction * action;
-
-#if 1
-	/*
-	 * Sanity-check: shared interrupts should REALLY pass in
-	 * a real dev-ID, otherwise we'll have trouble later trying
-	 * to figure out which interrupt is which (messes up the
-	 * interrupt freeing logic etc).
-	 */
-	if (irqflags & SA_SHIRQ) {
-		if (!dev_id)
-			printk("Bad boy: %s (at 0x%x) called us without a dev_id!\n", devname, (&irq)[-1]);
-	}
-#endif
-
-	if (irq >= NR_IRQS)
-		return -EINVAL;
-	if (!handler)
-		return -EINVAL;
-
-	action = (struct irqaction *)
-			kmalloc(sizeof(struct irqaction), GFP_KERNEL);
-	if (!action)
-		return -ENOMEM;
-
-	action->handler = handler;
-	action->flags = irqflags;
-	cpus_clear(action->mask);
-	action->name = devname;
-	action->next = NULL;
-	action->dev_id = dev_id;
-
-	retval = setup_irq(irq, action);
-	if (retval)
-		kfree(action);
-	return retval;
-}
-
-EXPORT_SYMBOL(request_irq);
-
-/**
- *	free_irq - free an interrupt
- *	@irq: Interrupt line to free
- *	@dev_id: Device identity to free
- *
- *	Remove an interrupt handler. The handler is removed and if the
- *	interrupt line is no longer in use by any driver it is disabled.
- *	On a shared IRQ the caller must ensure the interrupt is disabled
- *	on the card it drives before calling this function. The function
- *	does not return until any executing interrupts for this IRQ
- *	have completed.
- *
- *	This function may be called from interrupt context. 
- *
- *	Bugs: Attempting to free an irq in a handler for the same irq hangs
- *	      the machine.
- */
- 
-void free_irq(unsigned int irq, void *dev_id)
-{
-	irq_desc_t *desc;
-	struct irqaction **p;
-	unsigned long flags;
-
-	if (irq >= NR_IRQS)
-		return;
-
-	desc = irq_desc + irq;
-	spin_lock_irqsave(&desc->lock,flags);
-	p = &desc->action;
-	for (;;) {
-		struct irqaction * action = *p;
-		if (action) {
-			struct irqaction **pp = p;
-			p = &action->next;
-			if (action->dev_id != dev_id)
-				continue;
-
-			/* Found it - now remove it from the list of entries */
-			*pp = action->next;
-			if (!desc->action) {
-				desc->status |= IRQ_DISABLED;
-				desc->handler->shutdown(irq);
-			}
-			spin_unlock_irqrestore(&desc->lock,flags);
-
-			synchronize_irq(irq);
-			kfree(action);
-			return;
-		}
-		printk("Trying to free free IRQ%d\n",irq);
-		spin_unlock_irqrestore(&desc->lock,flags);
-		return;
-	}
-}
-
-EXPORT_SYMBOL(free_irq);
-
-/*
- * IRQ autodetection code..
- *
- * This depends on the fact that any interrupt that
- * comes in on to an unassigned handler will get stuck
- * with "IRQ_WAITING" cleared and the interrupt
- * disabled.
- */
-
-static DECLARE_MUTEX(probe_sem);
-
-/**
- *	probe_irq_on	- begin an interrupt autodetect
- *
- *	Commence probing for an interrupt. The interrupts are scanned
- *	and a mask of potential interrupt lines is returned.
- *
- */
- 
-unsigned long probe_irq_on(void)
-{
-	unsigned int i;
-	irq_desc_t *desc;
-	unsigned long val;
-	unsigned long delay;
-
-	down(&probe_sem);
-	/* 
-	 * something may have generated an irq long ago and we want to
-	 * flush such a longstanding irq before considering it as spurious. 
-	 */
-	for (i = NR_IRQS-1; i > 0; i--)  {
-		desc = irq_desc + i;
-
-		spin_lock_irq(&desc->lock);
-		if (!irq_desc[i].action) 
-			irq_desc[i].handler->startup(i);
-		spin_unlock_irq(&desc->lock);
-	}
-
-	/* Wait for longstanding interrupts to trigger. */
-	for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
-		/* about 20ms delay */ barrier();
-
-	/*
-	 * enable any unassigned irqs
-	 * (we must startup again here because if a longstanding irq
-	 * happened in the previous stage, it may have masked itself)
-	 */
-	for (i = NR_IRQS-1; i > 0; i--) {
-		desc = irq_desc + i;
-
-		spin_lock_irq(&desc->lock);
-		if (!desc->action) {
-			desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
-			if (desc->handler->startup(i))
-				desc->status |= IRQ_PENDING;
-		}
-		spin_unlock_irq(&desc->lock);
-	}
-
-	/*
-	 * Wait for spurious interrupts to trigger
-	 */
-	for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
-		/* about 100ms delay */ barrier();
-
-	/*
-	 * Now filter out any obviously spurious interrupts
-	 */
-	val = 0;
-	for (i = 0; i < NR_IRQS; i++) {
-		irq_desc_t *desc = irq_desc + i;
-		unsigned int status;
-
-		spin_lock_irq(&desc->lock);
-		status = desc->status;
-
-		if (status & IRQ_AUTODETECT) {
-			/* It triggered already - consider it spurious. */
-			if (!(status & IRQ_WAITING)) {
-				desc->status = status & ~IRQ_AUTODETECT;
-				desc->handler->shutdown(i);
-			} else
-				if (i < 32)
-					val |= 1 << i;
-		}
-		spin_unlock_irq(&desc->lock);
-	}
-
-	return val;
-}
-
-EXPORT_SYMBOL(probe_irq_on);
-
-/*
- * Return a mask of triggered interrupts (this
- * can handle only legacy ISA interrupts).
- */
- 
-/**
- *	probe_irq_mask - scan a bitmap of interrupt lines
- *	@val:	mask of interrupts to consider
- *
- *	Scan the ISA bus interrupt lines and return a bitmap of
- *	active interrupts. The interrupt probe logic state is then
- *	returned to its previous value.
- *
- *	Note: we need to scan all the irq's even though we will
- *	only return ISA irq numbers - just so that we reset them
- *	all to a known state.
- */
-unsigned int probe_irq_mask(unsigned long val)
-{
-	int i;
-	unsigned int mask;
-
-	mask = 0;
-	for (i = 0; i < NR_IRQS; i++) {
-		irq_desc_t *desc = irq_desc + i;
-		unsigned int status;
-
-		spin_lock_irq(&desc->lock);
-		status = desc->status;
-
-		if (status & IRQ_AUTODETECT) {
-			if (i < 16 && !(status & IRQ_WAITING))
-				mask |= 1 << i;
-
-			desc->status = status & ~IRQ_AUTODETECT;
-			desc->handler->shutdown(i);
-		}
-		spin_unlock_irq(&desc->lock);
-	}
-	up(&probe_sem);
-
-	return mask & val;
-}
-
-/*
- * Return the one interrupt that triggered (this can
- * handle any interrupt source).
- */
-
-/**
- *	probe_irq_off	- end an interrupt autodetect
- *	@val: mask of potential interrupts (unused)
- *
- *	Scans the unused interrupt lines and returns the line which
- *	appears to have triggered the interrupt. If no interrupt was
- *	found then zero is returned. If more than one interrupt is
- *	found then minus the first candidate is returned to indicate
- *	their is doubt.
- *
- *	The interrupt probe logic state is returned to its previous
- *	value.
- *
- *	BUGS: When used in a module (which arguably shouldnt happen)
- *	nothing prevents two IRQ probe callers from overlapping. The
- *	results of this are non-optimal.
- */
- 
-int probe_irq_off(unsigned long val)
-{
-	int i, irq_found, nr_irqs;
-
-	nr_irqs = 0;
-	irq_found = 0;
-	for (i = 0; i < NR_IRQS; i++) {
-		irq_desc_t *desc = irq_desc + i;
-		unsigned int status;
-
-		spin_lock_irq(&desc->lock);
-		status = desc->status;
-
-		if (status & IRQ_AUTODETECT) {
-			if (!(status & IRQ_WAITING)) {
-				if (!nr_irqs)
-					irq_found = i;
-				nr_irqs++;
-			}
-			desc->status = status & ~IRQ_AUTODETECT;
-			desc->handler->shutdown(i);
-		}
-		spin_unlock_irq(&desc->lock);
-	}
-	up(&probe_sem);
-
-	if (nr_irqs > 1)
-		irq_found = -irq_found;
-	return irq_found;
-}
-
-EXPORT_SYMBOL(probe_irq_off);
-
-/* this was setup_x86_irq but it seems pretty generic */
-int setup_irq(unsigned int irq, struct irqaction * new)
-{
-	int shared = 0;
-	unsigned long flags;
-	struct irqaction *old, **p;
-	irq_desc_t *desc = irq_desc + irq;
-
-	/*
-	 * Some drivers like serial.c use request_irq() heavily,
-	 * so we have to be careful not to interfere with a
-	 * running system.
-	 */
-	if (new->flags & SA_SAMPLE_RANDOM) {
-		/*
-		 * This function might sleep, we want to call it first,
-		 * outside of the atomic block.
-		 * Yes, this might clear the entropy pool if the wrong
-		 * driver is attempted to be loaded, without actually
-		 * installing a new handler, but is this really a problem,
-		 * only the sysadmin is able to do this.
-		 */
-		rand_initialize_irq(irq);
-	}
-
-	/*
-	 * The following block of code has to be executed atomically
-	 */
-	spin_lock_irqsave(&desc->lock,flags);
-	p = &desc->action;
-	if ((old = *p) != NULL) {
-		/* Can't share interrupts unless both agree to */
-		if (!(old->flags & new->flags & SA_SHIRQ)) {
-			spin_unlock_irqrestore(&desc->lock,flags);
-			return -EBUSY;
-		}
-
-		/* add new interrupt at end of irq queue */
-		do {
-			p = &old->next;
-			old = *p;
-		} while (old);
-		shared = 1;
-	}
-
-	*p = new;
-
-	if (!shared) {
-		desc->depth = 0;
-		desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING | IRQ_INPROGRESS);
-		desc->handler->startup(irq);
-	}
-	spin_unlock_irqrestore(&desc->lock,flags);
-
-	/* register_irq_proc(irq); */
-	return 0;
-}
-
 /* Initialize irq handling for IRQs.
    BASE_IRQ, BASE_IRQ+INTERVAL, ..., BASE_IRQ+NUM*INTERVAL
    to IRQ_TYPE.  An IRQ_TYPE of 0 means to use a generic interrupt type.  */
@@ -741,9 +121,3 @@ init_irq_handlers (int base_irq, int num, int interval,
 		base_irq += interval;
 	}
 }
-
-#if defined(CONFIG_PROC_FS) && defined(CONFIG_SYSCTL)
-void init_irq_proc(void)
-{
-}
-#endif /* CONFIG_PROC_FS && CONFIG_SYSCTL */
diff --git a/include/asm-v850/hardirq.h b/include/asm-v850/hardirq.h
index 4f913bccbefb..d98488cd5af1 100644
--- a/include/asm-v850/hardirq.h
+++ b/include/asm-v850/hardirq.h
@@ -24,4 +24,6 @@ typedef struct {
 # error HARDIRQ_BITS is too low!
 #endif
 
+void ack_bad_irq(unsigned int irq);
+
 #endif /* __V850_HARDIRQ_H__ */
-- 
cgit v1.2.3-71-gd317


From 31f3426904e066f17e3f88c468a2f7c869ad4aac Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Tue, 15 Nov 2005 15:17:10 -0800
Subject: [TCP]: More spelling fixes.

From Joe Perches

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    | 4 ++--
 net/ipv4/tcp_input.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0f9848011972..d78025f9fbea 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -552,8 +552,8 @@ extern u32	__tcp_select_window(struct sock *sk);
 
 /* TCP timestamps are only 32-bits, this causes a slight
  * complication on 64-bit systems since we store a snapshot
- * of jiffies in the buffer control blocks below.  We decidedly
- * only use of the low 32-bits of jiffies and hide the ugly
+ * of jiffies in the buffer control blocks below.  We decided
+ * to use only the low 32-bits of jiffies and hide the ugly
  * casts with the following macro.
  */
 #define tcp_time_stamp		((__u32)(jiffies))
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 40a26b7157b4..bf2e23086bce 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -367,7 +367,7 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
 		 * are stalled on filesystem I/O.
 		 *
 		 * Also, since we are only going for a minimum in the
-		 * non-timestamp case, we do not smoother things out
+		 * non-timestamp case, we do not smooth things out
 		 * else with timestamps disabled convergence takes too
 		 * long.
 		 */
@@ -546,7 +546,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
 	 *
 	 * Funny. This algorithm seems to be very broken.
 	 * These formulae increase RTO, when it should be decreased, increase
-	 * too slowly, when it should be increased fastly, decrease too fastly
+	 * too slowly, when it should be increased quickly, decrease too quickly
 	 * etc. I guess in BSD RTO takes ONE value, so that it is absolutely
 	 * does not matter how to _calculate_ it. Seems, it was trap
 	 * that VJ failed to avoid. 8)
-- 
cgit v1.2.3-71-gd317