From 50eca3eb89d73d9f0aa070b126c7ee6a616016ab Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 30 Sep 2005 19:03:00 -0400
Subject: [ACPI] ACPICA 20050930

Completed a major overhaul of the Resource Manager code -
specifically, optimizations in the area of the AML/internal
resource conversion code. The code has been optimized to
simplify and eliminate duplicated code, CPU stack use has
been decreased by optimizing function parameters and local
variables, and naming conventions across the manager have
been standardized for clarity and ease of maintenance (this
includes function, parameter, variable, and struct/typedef
names.)

All Resource Manager dispatch and information tables have
been moved to a single location for clarity and ease of
maintenance. One new file was created, named "rsinfo.c".

The ACPI return macros (return_ACPI_STATUS, etc.) have
been modified to guarantee that the argument is
not evaluated twice, making them less prone to macro
side-effects. However, since there exists the possibility
of additional stack use if a particular compiler cannot
optimize them (such as in the debug generation case),
the original macros are optionally available.  Note that
some invocations of the return_VALUE macro may now cause
size mismatch warnings; the return_UINT8 and return_UINT32
macros are provided to eliminate these. (From Randy Dunlap)

Implemented a new mechanism to enable debug tracing for
individual control methods. A new external interface,
acpi_debug_trace(), is provided to enable this mechanism. The
intent is to allow the host OS to easily enable and disable
tracing for problematic control methods. This interface
can be easily exposed to a user or debugger interface if
desired. See the file psxface.c for details.

acpi_ut_callocate() will now return a valid pointer if a
length of zero is specified - a length of one is used
and a warning is issued. This matches the behavior of
acpi_ut_allocate().

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/acpi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 026c3c011dc0..84d3d9f034ce 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -435,7 +435,7 @@ extern int sbf_port ;
 
 #endif 	/* !CONFIG_ACPI */
 
-int acpi_register_gsi (u32 gsi, int edge_level, int active_high_low);
+int acpi_register_gsi (u32 gsi, int triggering, int polarity);
 int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
 
 /*
-- 
cgit v1.2.3-71-gd317


From 729b4d4ce1982c52040bbf22d6711cdf8db07ad8 Mon Sep 17 00:00:00 2001
From: Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>
Date: Thu, 1 Dec 2005 04:29:00 -0500
Subject: [ACPI] fix reboot upon suspend-to-disk

http://bugzilla.kernel.org/show_bug.cgi?id=4320

Signed-off-by: Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>
Acked-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/sleep/poweroff.c | 15 +++++++++------
 drivers/acpi/sleep/sleep.h    |  2 +-
 drivers/acpi/sleep/wakeup.c   |  6 +++---
 include/linux/kernel.h        |  1 +
 include/linux/reboot.h        |  3 +--
 kernel/power/disk.c           |  9 +--------
 kernel/sys.c                  | 25 ++++++++++---------------
 7 files changed, 26 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/sleep/poweroff.c b/drivers/acpi/sleep/poweroff.c
index af7935a95bcc..47fb4b394eec 100644
--- a/drivers/acpi/sleep/poweroff.c
+++ b/drivers/acpi/sleep/poweroff.c
@@ -33,9 +33,7 @@ int acpi_sleep_prepare(u32 acpi_state)
 	ACPI_FLUSH_CPU_CACHE();
 	acpi_enable_wakeup_device_prep(acpi_state);
 #endif
-	if (acpi_state == ACPI_STATE_S5) {
-		acpi_wakeup_gpe_poweroff_prepare();
-	}
+	acpi_gpe_sleep_prepare(acpi_state);
 	acpi_enter_sleep_state_prep(acpi_state);
 	return 0;
 }
@@ -53,11 +51,16 @@ void acpi_power_off(void)
 
 static int acpi_shutdown(struct sys_device *x)
 {
-	if (system_state == SYSTEM_POWER_OFF) {
-		/* Prepare if we are going to power off the system */
+	switch (system_state) {
+	case SYSTEM_POWER_OFF:
+		/* Prepare to power off the system */
 		return acpi_sleep_prepare(ACPI_STATE_S5);
+	case SYSTEM_SUSPEND_DISK:
+		/* Prepare to suspend the system to disk */
+		return acpi_sleep_prepare(ACPI_STATE_S4);
+	default:
+		return 0;
 	}
-	return 0;
 }
 
 static struct sysdev_class acpi_sysclass = {
diff --git a/drivers/acpi/sleep/sleep.h b/drivers/acpi/sleep/sleep.h
index efd0001c6f05..f3e70397a7d6 100644
--- a/drivers/acpi/sleep/sleep.h
+++ b/drivers/acpi/sleep/sleep.h
@@ -5,4 +5,4 @@ extern int acpi_suspend (u32 state);
 extern void acpi_enable_wakeup_device_prep(u8 sleep_state);
 extern void acpi_enable_wakeup_device(u8 sleep_state);
 extern void acpi_disable_wakeup_device(u8 sleep_state);
-extern void acpi_wakeup_gpe_poweroff_prepare(void);
+extern void acpi_gpe_sleep_prepare(u32 sleep_state);
diff --git a/drivers/acpi/sleep/wakeup.c b/drivers/acpi/sleep/wakeup.c
index 4134ed43d026..85df0ceda2a9 100644
--- a/drivers/acpi/sleep/wakeup.c
+++ b/drivers/acpi/sleep/wakeup.c
@@ -192,7 +192,7 @@ late_initcall(acpi_wakeup_device_init);
  * RUNTIME GPEs, we simply mark all GPES that
  * are not enabled for wakeup from S5 as RUNTIME.
  */
-void acpi_wakeup_gpe_poweroff_prepare(void)
+void acpi_gpe_sleep_prepare(u32 sleep_state)
 {
 	struct list_head *node, *next;
 
@@ -201,8 +201,8 @@ void acpi_wakeup_gpe_poweroff_prepare(void)
 						       struct acpi_device,
 						       wakeup_list);
 
-		/* The GPE can wakeup system from S5, don't touch it */
-		if ((u32) dev->wakeup.sleep_state == ACPI_STATE_S5)
+		/* The GPE can wakeup system from this state, don't touch it */
+		if ((u32) dev->wakeup.sleep_state >= sleep_state)
 			continue;
 		/* acpi_set_gpe_type will automatically disable GPE */
 		acpi_set_gpe_type(dev->wakeup.gpe_device,
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index b1e407a4fbda..73aa55a73334 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -181,6 +181,7 @@ extern enum system_states {
 	SYSTEM_HALT,
 	SYSTEM_POWER_OFF,
 	SYSTEM_RESTART,
+	SYSTEM_SUSPEND_DISK,
 } system_state;
 
 #define TAINT_PROPRIETARY_MODULE	(1<<0)
diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index 7ab2cdb83ef0..015297ff73fa 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -60,8 +60,7 @@ extern void machine_crash_shutdown(struct pt_regs *);
  */
 
 extern void kernel_restart_prepare(char *cmd);
-extern void kernel_halt_prepare(void);
-extern void kernel_power_off_prepare(void);
+extern void kernel_shutdown_prepare(enum system_states state);
 
 extern void kernel_restart(char *cmd);
 extern void kernel_halt(void);
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 027322a564f4..f2cd279d07c7 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -52,7 +52,7 @@ static void power_down(suspend_disk_method_t mode)
 
 	switch(mode) {
 	case PM_DISK_PLATFORM:
-		kernel_power_off_prepare();
+		kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK);
 		error = pm_ops->enter(PM_SUSPEND_DISK);
 		break;
 	case PM_DISK_SHUTDOWN:
@@ -119,13 +119,6 @@ static int prepare_processes(void)
 		goto thaw;
 	}
 
-	if (pm_disk_mode == PM_DISK_PLATFORM) {
-		if (pm_ops && pm_ops->prepare) {
-			if ((error = pm_ops->prepare(PM_SUSPEND_DISK)))
-				goto thaw;
-		}
-	}
-
 	/* Free memory before shutting down devices. */
 	free_some_memory();
 	return 0;
diff --git a/kernel/sys.c b/kernel/sys.c
index eecf84526afe..c3b1874661fa 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -427,23 +427,25 @@ void kernel_kexec(void)
 }
 EXPORT_SYMBOL_GPL(kernel_kexec);
 
+void kernel_shutdown_prepare(enum system_states state)
+{
+	notifier_call_chain(&reboot_notifier_list,
+		(state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
+	system_state = state;
+	device_shutdown();
+}
 /**
  *	kernel_halt - halt the system
  *
  *	Shutdown everything and perform a clean system halt.
  */
-void kernel_halt_prepare(void)
-{
-	notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
-	system_state = SYSTEM_HALT;
-	device_shutdown();
-}
 void kernel_halt(void)
 {
-	kernel_halt_prepare();
+	kernel_shutdown_prepare(SYSTEM_HALT);
 	printk(KERN_EMERG "System halted.\n");
 	machine_halt();
 }
+
 EXPORT_SYMBOL_GPL(kernel_halt);
 
 /**
@@ -451,20 +453,13 @@ EXPORT_SYMBOL_GPL(kernel_halt);
  *
  *	Shutdown everything and perform a clean system power_off.
  */
-void kernel_power_off_prepare(void)
-{
-	notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
-	system_state = SYSTEM_POWER_OFF;
-	device_shutdown();
-}
 void kernel_power_off(void)
 {
-	kernel_power_off_prepare();
+	kernel_shutdown_prepare(SYSTEM_POWER_OFF);
 	printk(KERN_EMERG "Power down.\n");
 	machine_power_off();
 }
 EXPORT_SYMBOL_GPL(kernel_power_off);
-
 /*
  * Reboot system call: for obvious reasons only root may call it,
  * and even root needs to set up some magic numbers in the registers
-- 
cgit v1.2.3-71-gd317


From 168678233ca45af3f74fef60c4265fa5dd217e29 Mon Sep 17 00:00:00 2001
From: "akpm@osdl.org" <akpm@osdl.org>
Date: Fri, 13 Jan 2006 15:51:02 -0800
Subject: [AGPGART] Semaphore to Mutex conversion.

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/char/agp/frontend.c | 28 ++++++++++++++--------------
 include/linux/agpgart.h     |  3 ++-
 2 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c
index 17f520c9d471..97eeb2345b18 100644
--- a/drivers/char/agp/frontend.c
+++ b/drivers/char/agp/frontend.c
@@ -592,7 +592,7 @@ static int agp_mmap(struct file *file, struct vm_area_struct *vma)
 	struct agp_file_private *priv = file->private_data;
 	struct agp_kern_info kerninfo;
 
-	down(&(agp_fe.agp_mutex));
+	mutex_lock(&(agp_fe.agp_mutex));
 
 	if (agp_fe.backend_acquired != TRUE)
 		goto out_eperm;
@@ -627,7 +627,7 @@ static int agp_mmap(struct file *file, struct vm_area_struct *vma)
 					    size, vma->vm_page_prot)) {
 			goto out_again;
 		}
-		up(&(agp_fe.agp_mutex));
+		mutex_unlock(&(agp_fe.agp_mutex));
 		return 0;
 	}
 
@@ -643,20 +643,20 @@ static int agp_mmap(struct file *file, struct vm_area_struct *vma)
 					    size, vma->vm_page_prot)) {
 			goto out_again;
 		}
-		up(&(agp_fe.agp_mutex));
+		mutex_unlock(&(agp_fe.agp_mutex));
 		return 0;
 	}
 
 out_eperm:
-	up(&(agp_fe.agp_mutex));
+	mutex_unlock(&(agp_fe.agp_mutex));
 	return -EPERM;
 
 out_inval:
-	up(&(agp_fe.agp_mutex));
+	mutex_unlock(&(agp_fe.agp_mutex));
 	return -EINVAL;
 
 out_again:
-	up(&(agp_fe.agp_mutex));
+	mutex_unlock(&(agp_fe.agp_mutex));
 	return -EAGAIN;
 }
 
@@ -664,7 +664,7 @@ static int agp_release(struct inode *inode, struct file *file)
 {
 	struct agp_file_private *priv = file->private_data;
 
-	down(&(agp_fe.agp_mutex));
+	mutex_lock(&(agp_fe.agp_mutex));
 
 	DBG("priv=%p", priv);
 
@@ -687,7 +687,7 @@ static int agp_release(struct inode *inode, struct file *file)
 	agp_remove_file_private(priv);
 	kfree(priv);
 	file->private_data = NULL;
-	up(&(agp_fe.agp_mutex));
+	mutex_unlock(&(agp_fe.agp_mutex));
 	return 0;
 }
 
@@ -698,7 +698,7 @@ static int agp_open(struct inode *inode, struct file *file)
 	struct agp_client *client;
 	int rc = -ENXIO;
 
-	down(&(agp_fe.agp_mutex));
+	mutex_lock(&(agp_fe.agp_mutex));
 
 	if (minor != AGPGART_MINOR)
 		goto err_out;
@@ -723,13 +723,13 @@ static int agp_open(struct inode *inode, struct file *file)
 	file->private_data = (void *) priv;
 	agp_insert_file_private(priv);
 	DBG("private=%p, client=%p", priv, client);
-	up(&(agp_fe.agp_mutex));
+	mutex_unlock(&(agp_fe.agp_mutex));
 	return 0;
 
 err_out_nomem:
 	rc = -ENOMEM;
 err_out:
-	up(&(agp_fe.agp_mutex));
+	mutex_unlock(&(agp_fe.agp_mutex));
 	return rc;
 }
 
@@ -985,7 +985,7 @@ static int agp_ioctl(struct inode *inode, struct file *file,
 	int ret_val = -ENOTTY;
 
 	DBG("priv=%p, cmd=%x", curr_priv, cmd);
-	down(&(agp_fe.agp_mutex));
+	mutex_lock(&(agp_fe.agp_mutex));
 
 	if ((agp_fe.current_controller == NULL) &&
 	    (cmd != AGPIOC_ACQUIRE)) {
@@ -1055,7 +1055,7 @@ static int agp_ioctl(struct inode *inode, struct file *file,
 
 ioctl_out:
 	DBG("ioctl returns %d\n", ret_val);
-	up(&(agp_fe.agp_mutex));
+	mutex_unlock(&(agp_fe.agp_mutex));
 	return ret_val;
 }
 
@@ -1081,7 +1081,7 @@ static struct miscdevice agp_miscdev =
 int agp_frontend_initialize(void)
 {
 	memset(&agp_fe, 0, sizeof(struct agp_front_data));
-	sema_init(&(agp_fe.agp_mutex), 1);
+	mutex_init(&(agp_fe.agp_mutex));
 
 	if (misc_register(&agp_miscdev)) {
 		printk(KERN_ERR PFX "unable to get minor: %d\n", AGPGART_MINOR);
diff --git a/include/linux/agpgart.h b/include/linux/agpgart.h
index 17a17c55a17f..6d59c8efe3be 100644
--- a/include/linux/agpgart.h
+++ b/include/linux/agpgart.h
@@ -111,6 +111,7 @@ typedef struct _agp_unbind {
 } agp_unbind;
 
 #else				/* __KERNEL__ */
+#include <linux/mutex.h>
 
 #define AGPGART_MINOR 175
 
@@ -201,7 +202,7 @@ struct agp_file_private {
 };
 
 struct agp_front_data {
-	struct semaphore agp_mutex;
+	struct mutex agp_mutex;
 	struct agp_controller *current_controller;
 	struct agp_controller *controllers;
 	struct agp_file_private *file_priv_list;
-- 
cgit v1.2.3-71-gd317


From 83933af4720b282f6f6a0b6c05a2a47b4cf08819 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Sat, 14 Jan 2006 16:01:49 +0100
Subject: [CPUFREQ] convert remaining cpufreq semaphore to a mutex

This one fell through the automation at first because it initializes the
semaphore to locked, but that's easily remedied

Signed-off-by:  Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Dave Jones <davej@redhat.com>

 drivers/cpufreq/cpufreq.c |   37 +++++++++++++++++++------------------
 include/linux/cpufreq.h   |    3 ++-
 2 files changed, 21 insertions(+), 19 deletions(-)
---
 drivers/cpufreq/cpufreq.c | 37 +++++++++++++++++++------------------
 include/linux/cpufreq.h   |  3 ++-
 2 files changed, 21 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index eb2f19d00e93..0675d9f02e34 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -601,7 +601,8 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
 	policy->cpu = cpu;
 	policy->cpus = cpumask_of_cpu(cpu);
 
-	init_MUTEX_LOCKED(&policy->lock);
+	mutex_init(&policy->lock);
+	mutex_lock(&policy->lock);
 	init_completion(&policy->kobj_unregister);
 	INIT_WORK(&policy->update, handle_update, (void *)(long)cpu);
 
@@ -642,7 +643,7 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 	policy->governor = NULL; /* to assure that the starting sequence is
 				  * run in cpufreq_set_policy */
-	up(&policy->lock);
+	mutex_unlock(&policy->lock);
 	
 	/* set default policy */
 	
@@ -763,10 +764,10 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 #endif
 
-	down(&data->lock);
+	mutex_lock(&data->lock);
 	if (cpufreq_driver->target)
 		__cpufreq_governor(data, CPUFREQ_GOV_STOP);
-	up(&data->lock);
+	mutex_unlock(&data->lock);
 
 	kobject_unregister(&data->kobj);
 
@@ -835,9 +836,9 @@ unsigned int cpufreq_quick_get(unsigned int cpu)
 	unsigned int ret = 0;
 
 	if (policy) {
-		down(&policy->lock);
+		mutex_lock(&policy->lock);
 		ret = policy->cur;
-		up(&policy->lock);
+		mutex_unlock(&policy->lock);
 		cpufreq_cpu_put(policy);
 	}
 
@@ -863,7 +864,7 @@ unsigned int cpufreq_get(unsigned int cpu)
 	if (!cpufreq_driver->get)
 		goto out;
 
-	down(&policy->lock);
+	mutex_lock(&policy->lock);
 
 	ret = cpufreq_driver->get(cpu);
 
@@ -876,7 +877,7 @@ unsigned int cpufreq_get(unsigned int cpu)
 		}
 	}
 
-	up(&policy->lock);
+	mutex_unlock(&policy->lock);
 
  out:
 	cpufreq_cpu_put(policy);
@@ -1159,11 +1160,11 @@ int cpufreq_driver_target(struct cpufreq_policy *policy,
 	if (!policy)
 		return -EINVAL;
 
-	down(&policy->lock);
+	mutex_lock(&policy->lock);
 
 	ret = __cpufreq_driver_target(policy, target_freq, relation);
 
-	up(&policy->lock);
+	mutex_unlock(&policy->lock);
 
 	cpufreq_cpu_put(policy);
 
@@ -1200,9 +1201,9 @@ int cpufreq_governor(unsigned int cpu, unsigned int event)
 	if (!policy)
 		return -EINVAL;
 
-	down(&policy->lock);
+	mutex_lock(&policy->lock);
 	ret = __cpufreq_governor(policy, event);
-	up(&policy->lock);
+	mutex_unlock(&policy->lock);
 
 	cpufreq_cpu_put(policy);
 
@@ -1269,9 +1270,9 @@ int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
 	if (!cpu_policy)
 		return -EINVAL;
 
-	down(&cpu_policy->lock);
+	mutex_lock(&cpu_policy->lock);
 	memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
-	up(&cpu_policy->lock);
+	mutex_unlock(&cpu_policy->lock);
 
 	cpufreq_cpu_put(cpu_policy);
 
@@ -1383,7 +1384,7 @@ int cpufreq_set_policy(struct cpufreq_policy *policy)
 		return -EINVAL;
 
 	/* lock this CPU */
-	down(&data->lock);
+	mutex_lock(&data->lock);
 
 	ret = __cpufreq_set_policy(data, policy);
 	data->user_policy.min = data->min;
@@ -1391,7 +1392,7 @@ int cpufreq_set_policy(struct cpufreq_policy *policy)
 	data->user_policy.policy = data->policy;
 	data->user_policy.governor = data->governor;
 
-	up(&data->lock);
+	mutex_unlock(&data->lock);
 	cpufreq_cpu_put(data);
 
 	return ret;
@@ -1415,7 +1416,7 @@ int cpufreq_update_policy(unsigned int cpu)
 	if (!data)
 		return -ENODEV;
 
-	down(&data->lock);
+	mutex_lock(&data->lock);
 
 	dprintk("updating policy for CPU %u\n", cpu);
 	memcpy(&policy, 
@@ -1428,7 +1429,7 @@ int cpufreq_update_policy(unsigned int cpu)
 
 	ret = __cpufreq_set_policy(data, &policy);
 
-	up(&data->lock);
+	mutex_unlock(&data->lock);
 
 	cpufreq_cpu_put(data);
 	return ret;
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index c31650df9241..17866d7e2b71 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -14,6 +14,7 @@
 #ifndef _LINUX_CPUFREQ_H
 #define _LINUX_CPUFREQ_H
 
+#include <linux/mutex.h>
 #include <linux/config.h>
 #include <linux/notifier.h>
 #include <linux/threads.h>
@@ -82,7 +83,7 @@ struct cpufreq_policy {
         unsigned int		policy; /* see above */
 	struct cpufreq_governor	*governor; /* see below */
 
- 	struct semaphore	lock;   /* CPU ->setpolicy or ->target may
+ 	struct mutex		lock;   /* CPU ->setpolicy or ->target may
 					   only be called once a time */
 
 	struct work_struct	update; /* if update_policy() needs to be
-- 
cgit v1.2.3-71-gd317


From 3b0e8eadc511eaceba6d6b8d0743359a34ee23c6 Mon Sep 17 00:00:00 2001
From: Alan Hourihane <alanh@fairlite.demon.co.uk>
Date: Thu, 19 Jan 2006 14:08:40 +0000
Subject: [AGPGART] 945GM support for agpgart

Here's a very small diff for 945GM support for agpgart.

Patch against 2.6.15.

From: Alan Hourihane <alanh@fairlite.demon.co.uk>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/char/agp/intel-agp.c | 15 +++++++++++++--
 include/linux/pci_ids.h      |  2 ++
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index e7bed5047dcc..631531fd97a5 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -422,7 +422,8 @@ static void intel_i830_init_gtt_entries(void)
 			/* Check it's really I915G */
 			if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915G_HB ||
 			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915GM_HB ||
-			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945G_HB)
+			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945G_HB ||
+			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB)
 				gtt_entries = MB(48) - KB(size);
 			else
 				gtt_entries = 0;
@@ -431,7 +432,8 @@ static void intel_i830_init_gtt_entries(void)
 			/* Check it's really I915G */
 			if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915G_HB ||
 			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915GM_HB ||
-			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945G_HB)
+			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945G_HB ||
+			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB)
 				gtt_entries = MB(64) - KB(size);
 			else
 				gtt_entries = 0;
@@ -1681,6 +1683,14 @@ static int __devinit agp_intel_probe(struct pci_dev *pdev,
 		}
 		name = "945G";
 		break;
+	case PCI_DEVICE_ID_INTEL_82945GM_HB:
+		if (find_i830(PCI_DEVICE_ID_INTEL_82945GM_IG)) {
+			bridge->driver = &intel_915_driver;
+		} else {
+			bridge->driver = &intel_845_driver;
+		}
+		name = "945GM";
+		break;
 	case PCI_DEVICE_ID_INTEL_7505_0:
 		bridge->driver = &intel_7505_driver;
 		name = "E7505";
@@ -1821,6 +1831,7 @@ static struct pci_device_id agp_intel_pci_table[] = {
 	ID(PCI_DEVICE_ID_INTEL_82915G_HB),
 	ID(PCI_DEVICE_ID_INTEL_82915GM_HB),
 	ID(PCI_DEVICE_ID_INTEL_82945G_HB),
+	ID(PCI_DEVICE_ID_INTEL_82945GM_HB),
 	{ }
 };
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 5403257ae3e7..2726140a6732 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2084,6 +2084,8 @@
 #define PCI_DEVICE_ID_INTEL_82915GM_IG	0x2592
 #define PCI_DEVICE_ID_INTEL_82945G_HB	0x2770
 #define PCI_DEVICE_ID_INTEL_82945G_IG	0x2772
+#define PCI_DEVICE_ID_INTEL_82945GM_HB	0x27A0
+#define PCI_DEVICE_ID_INTEL_82945GM_IG	0x27A2
 #define PCI_DEVICE_ID_INTEL_ICH6_0	0x2640
 #define PCI_DEVICE_ID_INTEL_ICH6_1	0x2641
 #define PCI_DEVICE_ID_INTEL_ICH6_2	0x2642
-- 
cgit v1.2.3-71-gd317


From 6fbfc9688448aac064edbaccb5d30ecd565a9105 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Fri, 20 Jan 2006 11:57:07 -0800
Subject: [NETFILTER]: Unbreak x-tables on x86.

x86 defines __alignof__(long long) as 8 yet it gives 4
for a struct containing a long long, ho hum... so my
simplified form doesn't work everywhere.

So use Harald Welte's original patch, which should work
on all platforms.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/x_tables.h | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 59ff6c430cf6..6500d4e59d46 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -19,7 +19,21 @@ struct xt_get_revision
 /* For standard target */
 #define XT_RETURN (-NF_REPEAT - 1)
 
-#define XT_ALIGN(s) (((s) + (__alignof__(u_int64_t)-1)) & ~(__alignof__(u_int64_t)-1))
+/* this is a dummy structure to find out the alignment requirement for a struct
+ * containing all the fundamental data types that are used in ipt_entry,
+ * ip6t_entry and arpt_entry.  This sucks, and it is a hack.  It will be my
+ * personal pleasure to remove it -HW
+ */
+struct _xt_align
+{
+	u_int8_t u8;
+	u_int16_t u16;
+	u_int32_t u32;
+	u_int64_t u64;
+};
+
+#define XT_ALIGN(s) (((s) + (__alignof__(struct _xt_align)-1)) 	\
+			& ~(__alignof__(struct _xt_align)-1))
 
 /* Standard return verdict, or do jump. */
 #define XT_STANDARD_TARGET ""
-- 
cgit v1.2.3-71-gd317


From ca740803856f23dbc5b1872039291231bc131ecb Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sat, 21 Jan 2006 20:06:14 +0000
Subject: [SERIAL] Remove UPF_AUTOPROBE and UPF_BOOT_ONLYMCA

The functionality UPF_BOOT_ONLYMCA provided has been replaced by
the 8250_mca module, which only registers MCA ports if MCA is
present.

UPF_AUTOPROBE has no functional effect - in fact, it's never
tested.  Only ibmasm set the flag.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/misc/ibmasm/uart.c  | 2 +-
 drivers/serial/8250.c       | 7 -------
 include/linux/serial_core.h | 2 --
 3 files changed, 1 insertion(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/ibmasm/uart.c b/drivers/misc/ibmasm/uart.c
index 7e98434cfa37..9783caf49696 100644
--- a/drivers/misc/ibmasm/uart.c
+++ b/drivers/misc/ibmasm/uart.c
@@ -50,7 +50,7 @@ void ibmasm_register_uart(struct service_processor *sp)
 	memset(&uport, 0, sizeof(struct uart_port));
 	uport.irq	= sp->irq;
 	uport.uartclk	= 3686400;
-	uport.flags	= UPF_AUTOPROBE | UPF_SHARE_IRQ;
+	uport.flags	= UPF_SHARE_IRQ;
 	uport.iotype	= UPIO_MEM;
 	uport.membase	= iomem_base;
 
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index ff2f931c6715..179c1f065e60 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -31,7 +31,6 @@
 #include <linux/init.h>
 #include <linux/console.h>
 #include <linux/sysrq.h>
-#include <linux/mca.h>
 #include <linux/delay.h>
 #include <linux/platform_device.h>
 #include <linux/tty.h>
@@ -2026,12 +2025,6 @@ static void serial8250_config_port(struct uart_port *port, int flags)
 	int probeflags = PROBE_ANY;
 	int ret;
 
-	/*
-	 * Don't probe for MCA ports on non-MCA machines.
-	 */
-	if (up->port.flags & UPF_BOOT_ONLYMCA && !MCA_bus)
-		return;
-
 	/*
 	 * Find the region that we can probe for.  This in turn
 	 * tells us whether we can probe for the type of port.
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index ec351005bf9d..f3af47713a4e 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -245,9 +245,7 @@ struct uart_port {
 #define UPF_HARDPPS_CD		(1 << 11)
 #define UPF_LOW_LATENCY		(1 << 13)
 #define UPF_BUGGY_UART		(1 << 14)
-#define UPF_AUTOPROBE		(1 << 15)
 #define UPF_MAGIC_MULTIPLIER	(1 << 16)
-#define UPF_BOOT_ONLYMCA	(1 << 22)
 #define UPF_CONS_FLOW		(1 << 23)
 #define UPF_SHARE_IRQ		(1 << 24)
 #define UPF_BOOT_AUTOCONF	(1 << 28)
-- 
cgit v1.2.3-71-gd317


From ba899dbc036d24ab6b45faf64e3648a268721cc9 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sat, 21 Jan 2006 22:45:50 +0000
Subject: [SERIAL] Make port->ops constant

No one should write to the port->ops structure, so make it constant.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/serial/serial_core.c | 4 ++--
 include/linux/serial_core.h  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 943770470b9d..74142b731527 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -1870,7 +1870,7 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *port)
 	mutex_lock(&state->mutex);
 
 	if (state->info && state->info->flags & UIF_INITIALIZED) {
-		struct uart_ops *ops = port->ops;
+		const struct uart_ops *ops = port->ops;
 
 		spin_lock_irq(&port->lock);
 		ops->stop_tx(port);
@@ -1932,7 +1932,7 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
 	}
 
 	if (state->info && state->info->flags & UIF_INITIALIZED) {
-		struct uart_ops *ops = port->ops;
+		const struct uart_ops *ops = port->ops;
 		int ret;
 
 		ops->set_mctrl(port, 0);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index f3af47713a4e..b74ff34469b9 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -257,7 +257,7 @@ struct uart_port {
 	unsigned int		mctrl;			/* current modem ctrl settings */
 	unsigned int		timeout;		/* character-based timeout */
 	unsigned int		type;			/* port type */
-	struct uart_ops		*ops;
+	const struct uart_ops	*ops;
 	unsigned int		custom_divisor;
 	unsigned int		line;			/* port index */
 	unsigned long		mapbase;		/* for ioremap */
-- 
cgit v1.2.3-71-gd317


From 747c8a55946ed037bf7d62454c3c599c02af2262 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sat, 21 Jan 2006 22:50:36 +0000
Subject: [SERIAL] Make uart_info flags a bitwise type

The potential for confusing the flags is fairly high.  Make
uart_info's flags a bitwise type so sparse can check that the
right flag definitions are used with the right structure.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/serial_core.h | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index b74ff34469b9..90f681789a64 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -287,6 +287,9 @@ struct uart_state {
 };
 
 #define UART_XMIT_SIZE	PAGE_SIZE
+
+typedef unsigned int __bitwise__ uif_t;
+
 /*
  * This is the state information which is only valid when the port
  * is open; it may be freed by the core driver once the device has
@@ -296,17 +299,16 @@ struct uart_state {
 struct uart_info {
 	struct tty_struct	*tty;
 	struct circ_buf		xmit;
-	unsigned int		flags;
+	uif_t			flags;
 
 /*
- * These are the flags that specific to info->flags, and reflect our
- * internal state.  They can not be accessed via port->flags.  Low
- * level drivers must not change these, but may query them instead.
+ * Definitions for info->flags.  These are _private_ to serial_core, and
+ * are specific to this structure.  They may be queried by low level drivers.
  */
-#define UIF_CHECK_CD		(1 << 25)
-#define UIF_CTS_FLOW		(1 << 26)
-#define UIF_NORMAL_ACTIVE	(1 << 29)
-#define UIF_INITIALIZED		(1 << 31)
+#define UIF_CHECK_CD		((__force uif_t) (1 << 25))
+#define UIF_CTS_FLOW		((__force uif_t) (1 << 26))
+#define UIF_NORMAL_ACTIVE	((__force uif_t) (1 << 29))
+#define UIF_INITIALIZED		((__force uif_t) (1 << 31))
 
 	int			blocked_open;
 
-- 
cgit v1.2.3-71-gd317


From 27ae7a7435634820e7f7e2b922d8119f79cfc6e4 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sat, 21 Jan 2006 22:54:06 +0000
Subject: [SERIAL] Fix UPF_ flag usage with uart_info->flags

The previous change found a bug in the serial SAK handling - because
we were looking for UPF_SAK set in uart_info->flags, we would never
raise a SAK condition.  UPF_SAK is in uart_port->flags.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/serial_core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 90f681789a64..1a8cd0169c9a 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -430,7 +430,7 @@ static inline int uart_handle_break(struct uart_port *port)
 		port->sysrq = 0;
 	}
 #endif
-	if (info->flags & UPF_SAK)
+	if (port->flags & UPF_SAK)
 		do_SAK(info->tty);
 	return 0;
 }
-- 
cgit v1.2.3-71-gd317


From 0077d45e46fe2af3aaee5813c99268afcd0e7c0e Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sat, 21 Jan 2006 23:03:28 +0000
Subject: [SERIAL] Make uart_port flags a bitwise type

Same reasoning as commit 747c8a55946ed037bf7d62454c3c599c02af2262
but this time we're making uart_port flags a bitwise type - not
all of these flags correspond with the old ASYNC_ flags, so there
is the possibility for bugs if the wrong ASYNC_* constants are
used.  Always use UPF_* constants for uart_port->flags.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/serial/serial_core.c | 12 ++++++-----
 include/linux/serial_8250.h  |  2 +-
 include/linux/serial_core.h  | 48 +++++++++++++++++++++++---------------------
 3 files changed, 33 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 74142b731527..0717abfdae06 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -332,7 +332,7 @@ uart_get_baud_rate(struct uart_port *port, struct termios *termios,
 		   struct termios *old, unsigned int min, unsigned int max)
 {
 	unsigned int try, baud, altbaud = 38400;
-	unsigned int flags = port->flags & UPF_SPD_MASK;
+	upf_t flags = port->flags & UPF_SPD_MASK;
 
 	if (flags == UPF_SPD_HI)
 		altbaud = 57600;
@@ -615,8 +615,9 @@ static int uart_set_info(struct uart_state *state,
 	struct serial_struct new_serial;
 	struct uart_port *port = state->port;
 	unsigned long new_port;
-	unsigned int change_irq, change_port, old_flags, closing_wait;
+	unsigned int change_irq, change_port, closing_wait;
 	unsigned int old_custom_divisor, close_delay;
+	upf_t old_flags, new_flags;
 	int retval = 0;
 
 	if (copy_from_user(&new_serial, newinfo, sizeof(new_serial)))
@@ -655,6 +656,7 @@ static int uart_set_info(struct uart_state *state,
 		      new_serial.type != port->type;
 
 	old_flags = port->flags;
+	new_flags = new_serial.flags;
 	old_custom_divisor = port->custom_divisor;
 
 	if (!capable(CAP_SYS_ADMIN)) {
@@ -664,10 +666,10 @@ static int uart_set_info(struct uart_state *state,
 		    (close_delay != state->close_delay) ||
 		    (closing_wait != state->closing_wait) ||
 		    (new_serial.xmit_fifo_size != port->fifosize) ||
-		    (((new_serial.flags ^ old_flags) & ~UPF_USR_MASK) != 0))
+		    (((new_flags ^ old_flags) & ~UPF_USR_MASK) != 0))
 			goto exit;
 		port->flags = ((port->flags & ~UPF_USR_MASK) |
-			       (new_serial.flags & UPF_USR_MASK));
+			       (new_flags & UPF_USR_MASK));
 		port->custom_divisor = new_serial.custom_divisor;
 		goto check_and_exit;
 	}
@@ -764,7 +766,7 @@ static int uart_set_info(struct uart_state *state,
 	port->irq              = new_serial.irq;
 	port->uartclk          = new_serial.baud_base * 16;
 	port->flags            = (port->flags & ~UPF_CHANGE_MASK) |
-				 (new_serial.flags & UPF_CHANGE_MASK);
+				 (new_flags & UPF_CHANGE_MASK);
 	port->custom_divisor   = new_serial.custom_divisor;
 	state->close_delay     = close_delay;
 	state->closing_wait    = closing_wait;
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index cee302aefdb7..73b464f0926a 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -26,7 +26,7 @@ struct plat_serial8250_port {
 	unsigned char	regshift;	/* register shift */
 	unsigned char	iotype;		/* UPIO_* */
 	unsigned char	hub6;
-	unsigned int	flags;		/* UPF_* flags */
+	upf_t		flags;		/* UPF_* flags */
 };
 
 /*
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 1a8cd0169c9a..4041122dabfc 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -203,6 +203,8 @@ struct uart_icount {
 	__u32	buf_overrun;
 };
 
+typedef unsigned int __bitwise__ upf_t;
+
 struct uart_port {
 	spinlock_t		lock;			/* port lock */
 	unsigned int		iobase;			/* in/out[bwl] */
@@ -230,29 +232,29 @@ struct uart_port {
 	unsigned long		sysrq;			/* sysrq timeout */
 #endif
 
-	unsigned int		flags;
-
-#define UPF_FOURPORT		(1 << 1)
-#define UPF_SAK			(1 << 2)
-#define UPF_SPD_MASK		(0x1030)
-#define UPF_SPD_HI		(0x0010)
-#define UPF_SPD_VHI		(0x0020)
-#define UPF_SPD_CUST		(0x0030)
-#define UPF_SPD_SHI		(0x1000)
-#define UPF_SPD_WARP		(0x1010)
-#define UPF_SKIP_TEST		(1 << 6)
-#define UPF_AUTO_IRQ		(1 << 7)
-#define UPF_HARDPPS_CD		(1 << 11)
-#define UPF_LOW_LATENCY		(1 << 13)
-#define UPF_BUGGY_UART		(1 << 14)
-#define UPF_MAGIC_MULTIPLIER	(1 << 16)
-#define UPF_CONS_FLOW		(1 << 23)
-#define UPF_SHARE_IRQ		(1 << 24)
-#define UPF_BOOT_AUTOCONF	(1 << 28)
-#define UPF_IOREMAP		(1 << 31)
-
-#define UPF_CHANGE_MASK		(0x17fff)
-#define UPF_USR_MASK		(UPF_SPD_MASK|UPF_LOW_LATENCY)
+	upf_t			flags;
+
+#define UPF_FOURPORT		((__force upf_t) (1 << 1))
+#define UPF_SAK			((__force upf_t) (1 << 2))
+#define UPF_SPD_MASK		((__force upf_t) (0x1030))
+#define UPF_SPD_HI		((__force upf_t) (0x0010))
+#define UPF_SPD_VHI		((__force upf_t) (0x0020))
+#define UPF_SPD_CUST		((__force upf_t) (0x0030))
+#define UPF_SPD_SHI		((__force upf_t) (0x1000))
+#define UPF_SPD_WARP		((__force upf_t) (0x1010))
+#define UPF_SKIP_TEST		((__force upf_t) (1 << 6))
+#define UPF_AUTO_IRQ		((__force upf_t) (1 << 7))
+#define UPF_HARDPPS_CD		((__force upf_t) (1 << 11))
+#define UPF_LOW_LATENCY		((__force upf_t) (1 << 13))
+#define UPF_BUGGY_UART		((__force upf_t) (1 << 14))
+#define UPF_MAGIC_MULTIPLIER	((__force upf_t) (1 << 16))
+#define UPF_CONS_FLOW		((__force upf_t) (1 << 23))
+#define UPF_SHARE_IRQ		((__force upf_t) (1 << 24))
+#define UPF_BOOT_AUTOCONF	((__force upf_t) (1 << 28))
+#define UPF_IOREMAP		((__force upf_t) (1 << 31))
+
+#define UPF_CHANGE_MASK		((__force upf_t) (0x17fff))
+#define UPF_USR_MASK		((__force upf_t) (UPF_SPD_MASK|UPF_LOW_LATENCY))
 
 	unsigned int		mctrl;			/* current modem ctrl settings */
 	unsigned int		timeout;		/* character-based timeout */
-- 
cgit v1.2.3-71-gd317


From 2cb2e147a6d20bffd1d6b7a79be7301560f751c3 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Tue, 17 Jan 2006 09:04:32 +0100
Subject: [BLOCK] ll_rw_blk: make max_sectors and max_hw_sectors unsigned ints

IDE lba48 can support full 64k request size, which overflows the
max_hw_sectors variable.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/ll_rw_blk.c      | 2 +-
 include/linux/blkdev.h | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 8e27d0ab0d7c..5c62507a510d 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -662,7 +662,7 @@ EXPORT_SYMBOL(blk_queue_bounce_limit);
  *    Enables a low level driver to set an upper limit on the size of
  *    received requests.
  **/
-void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors)
+void blk_queue_max_sectors(request_queue_t *q, unsigned int max_sectors)
 {
 	if ((max_sectors << 9) < PAGE_CACHE_SIZE) {
 		max_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 02a585faa62c..860e7a485a5f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -392,8 +392,8 @@ struct request_queue
 	unsigned int		nr_congestion_off;
 	unsigned int		nr_batching;
 
-	unsigned short		max_sectors;
-	unsigned short		max_hw_sectors;
+	unsigned int		max_sectors;
+	unsigned int		max_hw_sectors;
 	unsigned short		max_phys_segments;
 	unsigned short		max_hw_segments;
 	unsigned short		hardsect_size;
@@ -697,7 +697,7 @@ extern request_queue_t *blk_init_queue(request_fn_proc *, spinlock_t *);
 extern void blk_cleanup_queue(request_queue_t *);
 extern void blk_queue_make_request(request_queue_t *, make_request_fn *);
 extern void blk_queue_bounce_limit(request_queue_t *, u64);
-extern void blk_queue_max_sectors(request_queue_t *, unsigned short);
+extern void blk_queue_max_sectors(request_queue_t *, unsigned int);
 extern void blk_queue_max_phys_segments(request_queue_t *, unsigned short);
 extern void blk_queue_max_hw_segments(request_queue_t *, unsigned short);
 extern void blk_queue_max_segment_size(request_queue_t *, unsigned int);
-- 
cgit v1.2.3-71-gd317


From 7ce08c93e388922e25a96a7d9895784182e4c72c Mon Sep 17 00:00:00 2001
From: Luca Risolia <luca.risolia@studio.unibo.it>
Date: Wed, 11 Jan 2006 02:06:59 +0000
Subject: [PATCH] USB: Add ET61X[12]51 Video4Linux2 driver

This patch adds a Video4Linux2 driver giving support
to ET61X151 and ET61X251 PC Camera Controllers made by
Etoms Electronics.

Signed-off-by: Luca Risolia <luca.risolia@studio.unibo.it>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/usb/et61x251.txt          |  306 ++++
 MAINTAINERS                             |   10 +
 drivers/usb/Makefile                    |    1 +
 drivers/usb/media/Kconfig               |   17 +-
 drivers/usb/media/Makefile              |    2 +
 drivers/usb/media/et61x251.h            |  220 +++
 drivers/usb/media/et61x251_core.c       | 2605 +++++++++++++++++++++++++++++++
 drivers/usb/media/et61x251_sensor.h     |  115 ++
 drivers/usb/media/et61x251_tas5130d1b.c |  137 ++
 include/linux/videodev2.h               |    1 +
 10 files changed, 3412 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/usb/et61x251.txt
 create mode 100644 drivers/usb/media/et61x251.h
 create mode 100644 drivers/usb/media/et61x251_core.c
 create mode 100644 drivers/usb/media/et61x251_sensor.h
 create mode 100644 drivers/usb/media/et61x251_tas5130d1b.c

(limited to 'include/linux')

diff --git a/Documentation/usb/et61x251.txt b/Documentation/usb/et61x251.txt
new file mode 100644
index 000000000000..b44dda407ce2
--- /dev/null
+++ b/Documentation/usb/et61x251.txt
@@ -0,0 +1,306 @@
+
+                       ET61X[12]51 PC Camera Controllers
+                                Driver for Linux
+                       =================================
+
+                               - Documentation -
+
+
+Index
+=====
+1.  Copyright
+2.  Disclaimer
+3.  License
+4.  Overview and features
+5.  Module dependencies
+6.  Module loading
+7.  Module parameters
+8.  Optional device control through "sysfs"
+9.  Supported devices
+10. Notes for V4L2 application developers
+11. Contact information
+
+
+1. Copyright
+============
+Copyright (C) 2006 by Luca Risolia <luca.risolia@studio.unibo.it>
+
+
+2. Disclaimer
+=============
+Etoms is a trademark of Etoms Electronics Corp.
+This software is not developed or sponsored by Etoms Electronics.
+
+
+3. License
+==========
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+4. Overview and features
+========================
+This driver supports the video interface of the devices mounting the ET61X151
+or ET61X251 PC Camera Controllers.
+
+It's worth to note that Etoms Electronics has never collaborated with the
+author during the development of this project; despite several requests,
+Etoms Electronics also refused to release enough detailed specifications of
+the video compression engine.
+
+The driver relies on the Video4Linux2 and USB core modules. It has been
+designed to run properly on SMP systems as well.
+
+The latest version of the ET61X[12]51 driver can be found at the following URL:
+http://www.linux-projects.org/
+
+Some of the features of the driver are:
+
+- full compliance with the Video4Linux2 API (see also "Notes for V4L2
+  application developers" paragraph);
+- available mmap or read/poll methods for video streaming through isochronous
+  data transfers;
+- automatic detection of image sensor;
+- support for any window resolutions and optional panning within the maximum
+  pixel area of image sensor;
+- image downscaling with arbitrary scaling factors from 1 and 2 in both
+  directions (see "Notes for V4L2 application developers" paragraph);
+- two different video formats for uncompressed or compressed data in low or
+  high compression quality (see also "Notes for V4L2 application developers"
+  paragraph);
+- full support for the capabilities of every possible image sensors that can
+  be connected to the ET61X[12]51 bridges, including, for istance, red, green,
+  blue and global gain adjustments and exposure control (see "Supported
+  devices" paragraph for details);
+- use of default color settings for sunlight conditions;
+- dynamic I/O interface for both ET61X[12]51 and image sensor control (see
+  "Optional device control through 'sysfs'" paragraph);
+- dynamic driver control thanks to various module parameters (see "Module
+  parameters" paragraph);
+- up to 64 cameras can be handled at the same time; they can be connected and
+  disconnected from the host many times without turning off the computer, if
+  the system supports hotplugging;
+- no known bugs.
+
+
+5. Module dependencies
+======================
+For it to work properly, the driver needs kernel support for Video4Linux and
+USB.
+
+The following options of the kernel configuration file must be enabled and
+corresponding modules must be compiled:
+
+	# Multimedia devices
+	#
+	CONFIG_VIDEO_DEV=m
+
+To enable advanced debugging functionality on the device through /sysfs:
+
+	# Multimedia devices
+	#
+	CONFIG_VIDEO_ADV_DEBUG=y
+
+	# USB support
+	#
+	CONFIG_USB=m
+
+In addition, depending on the hardware being used, the modules below are
+necessary:
+
+	# USB Host Controller Drivers
+	#
+	CONFIG_USB_EHCI_HCD=m
+	CONFIG_USB_UHCI_HCD=m
+	CONFIG_USB_OHCI_HCD=m
+
+And finally:
+
+	# USB Multimedia devices
+	#
+	CONFIG_USB_ET61X251=m
+
+
+6. Module loading
+=================
+To use the driver, it is necessary to load the "et61x251" module into memory
+after every other module required: "videodev", "usbcore" and, depending on
+the USB host controller you have, "ehci-hcd", "uhci-hcd" or "ohci-hcd".
+
+Loading can be done as shown below:
+
+	[root@localhost home]# modprobe et61x251
+
+At this point the devices should be recognized. You can invoke "dmesg" to
+analyze kernel messages and verify that the loading process has gone well:
+
+	[user@localhost home]$ dmesg
+
+
+7. Module parameters
+====================
+Module parameters are listed below:
+-------------------------------------------------------------------------------
+Name:           video_nr
+Type:           short array (min = 0, max = 64)
+Syntax:         <-1|n[,...]>
+Description:    Specify V4L2 minor mode number:
+                -1 = use next available
+                 n = use minor number n
+                You can specify up to 64 cameras this way.
+                For example:
+                video_nr=-1,2,-1 would assign minor number 2 to the second
+                registered camera and use auto for the first one and for every
+                other camera.
+Default:        -1
+-------------------------------------------------------------------------------
+Name:           force_munmap
+Type:           bool array (min = 0, max = 64)
+Syntax:         <0|1[,...]>
+Description:    Force the application to unmap previously mapped buffer memory
+                before calling any VIDIOC_S_CROP or VIDIOC_S_FMT ioctl's. Not
+                all the applications support this feature. This parameter is
+                specific for each detected camera.
+                0 = do not force memory unmapping
+                1 = force memory unmapping (save memory)
+Default:        0
+-------------------------------------------------------------------------------
+Name:           debug
+Type:           ushort
+Syntax:         <n>
+Description:    Debugging information level, from 0 to 3:
+                0 = none (use carefully)
+                1 = critical errors
+                2 = significant informations
+                3 = more verbose messages
+                Level 3 is useful for testing only, when only one device
+                is used at the same time. It also shows some more informations
+                about the hardware being detected. This module parameter can be
+                changed at runtime thanks to the /sys filesystem interface.
+Default:        2
+-------------------------------------------------------------------------------
+
+
+8. Optional device control through "sysfs"
+==========================================
+If the kernel has been compiled with the CONFIG_VIDEO_ADV_DEBUG option enabled,
+it is possible to read and write both the ET61X[12]51 and the image sensor
+registers by using the "sysfs" filesystem interface.
+
+There are four files in the /sys/class/video4linux/videoX directory for each
+registered camera: "reg", "val", "i2c_reg" and "i2c_val". The first two files
+control the ET61X[12]51 bridge, while the other two control the sensor chip.
+"reg" and "i2c_reg" hold the values of the current register index where the
+following reading/writing operations are addressed at through "val" and
+"i2c_val". Their use is not intended for end-users, unless you know what you
+are doing. Remember that you must be logged in as root before writing to them.
+
+As an example, suppose we were to want to read the value contained in the
+register number 1 of the sensor register table - which is usually the product
+identifier - of the camera registered as "/dev/video0":
+
+	[root@localhost #] cd /sys/class/video4linux/video0
+	[root@localhost #] echo 1 > i2c_reg
+	[root@localhost #] cat i2c_val
+
+Note that if the sensor registers can not be read, "cat" will fail.
+To avoid race conditions, all the I/O accesses to the files are serialized.
+
+
+9. Supported devices
+====================
+None of the names of the companies as well as their products will be mentioned
+here. They have never collaborated with the author, so no advertising.
+
+From the point of view of a driver, what unambiguously identify a device are
+its vendor and product USB identifiers. Below is a list of known identifiers of
+devices mounting the ET61X[12]51 PC camera controllers:
+
+Vendor ID  Product ID
+---------  ----------
+0x102c     0x6151
+0x102c     0x6251
+0x102c     0x6253
+0x102c     0x6254
+0x102c     0x6255
+0x102c     0x6256
+0x102c     0x6257
+0x102c     0x6258
+0x102c     0x6259
+0x102c     0x625a
+0x102c     0x625b
+0x102c     0x625c
+0x102c     0x625d
+0x102c     0x625e
+0x102c     0x625f
+0x102c     0x6260
+0x102c     0x6261
+0x102c     0x6262
+0x102c     0x6263
+0x102c     0x6264
+0x102c     0x6265
+0x102c     0x6266
+0x102c     0x6267
+0x102c     0x6268
+0x102c     0x6269
+
+The following image sensors are supported:
+
+Model       Manufacturer
+-----       ------------
+TAS5130D1B  Taiwan Advanced Sensor Corporation
+
+All the available control settings of each image sensor are supported through
+the V4L2 interface.
+
+
+10. Notes for V4L2 application developers
+========================================
+This driver follows the V4L2 API specifications. In particular, it enforces two
+rules:
+
+- exactly one I/O method, either "mmap" or "read", is associated with each
+file descriptor. Once it is selected, the application must close and reopen the
+device to switch to the other I/O method;
+
+- although it is not mandatory, previously mapped buffer memory should always
+be unmapped before calling any "VIDIOC_S_CROP" or "VIDIOC_S_FMT" ioctl's.
+The same number of buffers as before will be allocated again to match the size
+of the new video frames, so you have to map the buffers again before any I/O
+attempts on them.
+
+Consistently with the hardware limits, this driver also supports image
+downscaling with arbitrary scaling factors from 1 and 2 in both directions.
+However, the V4L2 API specifications don't correctly define how the scaling
+factor can be chosen arbitrarily by the "negotiation" of the "source" and
+"target" rectangles. To work around this flaw, we have added the convention
+that, during the negotiation, whenever the "VIDIOC_S_CROP" ioctl is issued, the
+scaling factor is restored to 1.
+
+This driver supports two different video formats: the first one is the "8-bit
+Sequential Bayer" format and can be used to obtain uncompressed video data
+from the device through the current I/O method, while the second one provides
+"raw" compressed video data (without frame headers not related to the
+compressed data). The current compression quality may vary from 0 to 1 and can
+be selected or queried thanks to the VIDIOC_S_JPEGCOMP and VIDIOC_G_JPEGCOMP
+V4L2 ioctl's.
+
+
+11. Contact information
+=======================
+The author may be contacted by e-mail at <luca.risolia@studio.unibo.it>.
+
+GPG/PGP encrypted e-mail's are accepted. The GPG key ID of the author is
+'FCE635A4'; the public 1024-bit key should be available at any keyserver;
+the fingerprint is: '88E8 F32F 7244 68BA 3958  5D40 99DA 5D2A FCE6 35A4'.
diff --git a/MAINTAINERS b/MAINTAINERS
index a37a2b38a557..42955fe1ffa0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2673,6 +2673,14 @@ M:	dbrownell@users.sourceforge.net
 L:	linux-usb-devel@lists.sourceforge.net
 S:	Maintained
 
+USB ET61X[12]51 DRIVER
+P:	Luca Risolia
+M:	luca.risolia@studio.unibo.it
+L:	linux-usb-devel@lists.sourceforge.net
+L:	video4linux-list@redhat.com
+W:	http://www.linux-projects.org
+S:	Maintained
+
 USB HID/HIDBP DRIVERS
 P:	Vojtech Pavlik
 M:	vojtech@suse.cz
@@ -2836,6 +2844,7 @@ USB SN9C10x DRIVER
 P:	Luca Risolia
 M:	luca.risolia@studio.unibo.it
 L:	linux-usb-devel@lists.sourceforge.net
+L:	video4linux-list@redhat.com
 W:	http://www.linux-projects.org
 S:	Maintained
 
@@ -2865,6 +2874,7 @@ USB W996[87]CF DRIVER
 P:	Luca Risolia
 M:	luca.risolia@studio.unibo.it
 L:	linux-usb-devel@lists.sourceforge.net
+L:	video4linux-list@redhat.com
 W:	http://www.linux-projects.org
 S:	Maintained
 
diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile
index 3639c3f8d357..36e476dd9123 100644
--- a/drivers/usb/Makefile
+++ b/drivers/usb/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_USB_XPAD)		+= input/
 
 obj-$(CONFIG_USB_DABUSB)	+= media/
 obj-$(CONFIG_USB_DSBR)		+= media/
+obj-$(CONFIG_USB_ET61X251)	+= media/
 obj-$(CONFIG_USB_IBMCAM)	+= media/
 obj-$(CONFIG_USB_KONICAWC)	+= media/
 obj-$(CONFIG_USB_OV511)		+= media/
diff --git a/drivers/usb/media/Kconfig b/drivers/usb/media/Kconfig
index 21232ee2974c..0d3d2cc5d7be 100644
--- a/drivers/usb/media/Kconfig
+++ b/drivers/usb/media/Kconfig
@@ -53,6 +53,21 @@ config USB_DSBR
 	  To compile this driver as a module, choose M here: the
 	  module will be called dsbr100.
 
+config USB_ET61X251
+	tristate "USB ET61X[12]51 PC Camera Controller support"
+	depends on USB && VIDEO_DEV
+	---help---
+	  Say Y here if you want support for cameras based on Etoms ET61X151
+	  or ET61X251 PC Camera Controllers.
+
+	  See <file:Documentation/usb/et61x251.txt> for more informations.
+
+	  This driver uses the Video For Linux API. You must say Y or M to
+	  "Video For Linux" to use this driver.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called et61x251.
+
 config USB_IBMCAM
 	tristate "USB IBM (Xirlink) C-it Camera support"
 	depends on USB && VIDEO_DEV
@@ -209,5 +224,3 @@ config USB_PWC
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called pwc.
-
-
diff --git a/drivers/usb/media/Makefile b/drivers/usb/media/Makefile
index d83adffa925f..3957aa1be0f2 100644
--- a/drivers/usb/media/Makefile
+++ b/drivers/usb/media/Makefile
@@ -3,9 +3,11 @@
 #
 
 sn9c102-objs	:= sn9c102_core.o sn9c102_hv7131d.o sn9c102_mi0343.o sn9c102_ov7630.o sn9c102_pas106b.o sn9c102_pas202bcb.o sn9c102_tas5110c1b.o sn9c102_tas5130d1b.o
+et61x251-objs	:= et61x251_core.o et61x251_tas5130d1b.o
 
 obj-$(CONFIG_USB_DABUSB)	+= dabusb.o
 obj-$(CONFIG_USB_DSBR)		+= dsbr100.o
+obj-$(CONFIG_USB_ET61X251)	+= et61x251.o
 obj-$(CONFIG_USB_IBMCAM)	+= ibmcam.o usbvideo.o ultracam.o
 obj-$(CONFIG_USB_KONICAWC)	+= konicawc.o usbvideo.o
 obj-$(CONFIG_USB_OV511)		+= ov511.o
diff --git a/drivers/usb/media/et61x251.h b/drivers/usb/media/et61x251.h
new file mode 100644
index 000000000000..652238f329f3
--- /dev/null
+++ b/drivers/usb/media/et61x251.h
@@ -0,0 +1,220 @@
+/***************************************************************************
+ * V4L2 driver for ET61X[12]51 PC Camera Controllers                       *
+ *                                                                         *
+ * Copyright (C) 2006 by Luca Risolia <luca.risolia@studio.unibo.it>       *
+ *                                                                         *
+ * This program is free software; you can redistribute it and/or modify    *
+ * it under the terms of the GNU General Public License as published by    *
+ * the Free Software Foundation; either version 2 of the License, or       *
+ * (at your option) any later version.                                     *
+ *                                                                         *
+ * This program is distributed in the hope that it will be useful,         *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of          *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the           *
+ * GNU General Public License for more details.                            *
+ *                                                                         *
+ * You should have received a copy of the GNU General Public License       *
+ * along with this program; if not, write to the Free Software             *
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.               *
+ ***************************************************************************/
+
+#ifndef _ET61X251_H_
+#define _ET61X251_H_
+
+#include <linux/version.h>
+#include <linux/usb.h>
+#include <linux/videodev2.h>
+#include <media/v4l2-common.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/time.h>
+#include <linux/wait.h>
+#include <linux/types.h>
+#include <linux/param.h>
+#include <linux/rwsem.h>
+#include <asm/semaphore.h>
+
+#include "et61x251_sensor.h"
+
+/*****************************************************************************/
+
+#define ET61X251_DEBUG
+#define ET61X251_DEBUG_LEVEL         2
+#define ET61X251_MAX_DEVICES         64
+#define ET61X251_PRESERVE_IMGSCALE   0
+#define ET61X251_FORCE_MUNMAP        0
+#define ET61X251_MAX_FRAMES          32
+#define ET61X251_COMPRESSION_QUALITY 0
+#define ET61X251_URBS                2
+#define ET61X251_ISO_PACKETS         7
+#define ET61X251_ALTERNATE_SETTING   13
+#define ET61X251_URB_TIMEOUT         msecs_to_jiffies(2 * ET61X251_ISO_PACKETS)
+#define ET61X251_CTRL_TIMEOUT        100
+
+/*****************************************************************************/
+
+static const struct usb_device_id et61x251_id_table[] = {
+	{ USB_DEVICE(0x102c, 0x6151), },
+	{ USB_DEVICE(0x102c, 0x6251), },
+	{ USB_DEVICE(0x102c, 0x6253), },
+	{ USB_DEVICE(0x102c, 0x6254), },
+	{ USB_DEVICE(0x102c, 0x6255), },
+	{ USB_DEVICE(0x102c, 0x6256), },
+	{ USB_DEVICE(0x102c, 0x6257), },
+	{ USB_DEVICE(0x102c, 0x6258), },
+	{ USB_DEVICE(0x102c, 0x6259), },
+	{ USB_DEVICE(0x102c, 0x625a), },
+	{ USB_DEVICE(0x102c, 0x625b), },
+	{ USB_DEVICE(0x102c, 0x625c), },
+	{ USB_DEVICE(0x102c, 0x625d), },
+	{ USB_DEVICE(0x102c, 0x625e), },
+	{ USB_DEVICE(0x102c, 0x625f), },
+	{ USB_DEVICE(0x102c, 0x6260), },
+	{ USB_DEVICE(0x102c, 0x6261), },
+	{ USB_DEVICE(0x102c, 0x6262), },
+	{ USB_DEVICE(0x102c, 0x6263), },
+	{ USB_DEVICE(0x102c, 0x6264), },
+	{ USB_DEVICE(0x102c, 0x6265), },
+	{ USB_DEVICE(0x102c, 0x6266), },
+	{ USB_DEVICE(0x102c, 0x6267), },
+	{ USB_DEVICE(0x102c, 0x6268), },
+	{ USB_DEVICE(0x102c, 0x6269), },
+	{ }
+};
+
+ET61X251_SENSOR_TABLE
+
+/*****************************************************************************/
+
+enum et61x251_frame_state {
+	F_UNUSED,
+	F_QUEUED,
+	F_GRABBING,
+	F_DONE,
+	F_ERROR,
+};
+
+struct et61x251_frame_t {
+	void* bufmem;
+	struct v4l2_buffer buf;
+	enum et61x251_frame_state state;
+	struct list_head frame;
+	unsigned long vma_use_count;
+};
+
+enum et61x251_dev_state {
+	DEV_INITIALIZED = 0x01,
+	DEV_DISCONNECTED = 0x02,
+	DEV_MISCONFIGURED = 0x04,
+};
+
+enum et61x251_io_method {
+	IO_NONE,
+	IO_READ,
+	IO_MMAP,
+};
+
+enum et61x251_stream_state {
+	STREAM_OFF,
+	STREAM_INTERRUPT,
+	STREAM_ON,
+};
+
+struct et61x251_sysfs_attr {
+	u8 reg, i2c_reg;
+};
+
+struct et61x251_module_param {
+	u8 force_munmap;
+};
+
+static DECLARE_MUTEX(et61x251_sysfs_lock);
+static DECLARE_RWSEM(et61x251_disconnect);
+
+struct et61x251_device {
+	struct video_device* v4ldev;
+
+	struct et61x251_sensor* sensor;
+
+	struct usb_device* usbdev;
+	struct urb* urb[ET61X251_URBS];
+	void* transfer_buffer[ET61X251_URBS];
+	u8* control_buffer;
+
+	struct et61x251_frame_t *frame_current, frame[ET61X251_MAX_FRAMES];
+	struct list_head inqueue, outqueue;
+	u32 frame_count, nbuffers, nreadbuffers;
+
+	enum et61x251_io_method io;
+	enum et61x251_stream_state stream;
+
+	struct v4l2_jpegcompression compression;
+
+	struct et61x251_sysfs_attr sysfs;
+	struct et61x251_module_param module_param;
+
+	enum et61x251_dev_state state;
+	u8 users;
+
+	struct semaphore dev_sem, fileop_sem;
+	spinlock_t queue_lock;
+	wait_queue_head_t open, wait_frame, wait_stream;
+};
+
+/*****************************************************************************/
+
+void
+et61x251_attach_sensor(struct et61x251_device* cam,
+                       struct et61x251_sensor* sensor)
+{
+	cam->sensor = sensor;
+	cam->sensor->usbdev = cam->usbdev;
+}
+
+/*****************************************************************************/
+
+#undef DBG
+#undef KDBG
+#ifdef ET61X251_DEBUG
+#	define DBG(level, fmt, args...)                                       \
+do {                                                                          \
+	if (debug >= (level)) {                                               \
+		if ((level) == 1)                                             \
+			dev_err(&cam->usbdev->dev, fmt "\n", ## args);        \
+		else if ((level) == 2)                                        \
+			dev_info(&cam->usbdev->dev, fmt "\n", ## args);       \
+		else if ((level) >= 3)                                        \
+			dev_info(&cam->usbdev->dev, "[%s:%d] " fmt "\n",      \
+			         __FUNCTION__, __LINE__ , ## args);           \
+	}                                                                     \
+} while (0)
+#	define KDBG(level, fmt, args...)                                      \
+do {                                                                          \
+	if (debug >= (level)) {                                               \
+		if ((level) == 1 || (level) == 2)                             \
+			pr_info("et61x251: " fmt "\n", ## args);              \
+		else if ((level) == 3)                                        \
+			pr_debug("et61x251: [%s:%d] " fmt "\n", __FUNCTION__, \
+			         __LINE__ , ## args);                         \
+	}                                                                     \
+} while (0)
+#	define V4LDBG(level, name, cmd)                                       \
+do {                                                                          \
+	if (debug >= (level))                                                 \
+		v4l_print_ioctl(name, cmd);                                   \
+} while (0)
+#else
+#	define DBG(level, fmt, args...) do {;} while(0)
+#	define KDBG(level, fmt, args...) do {;} while(0)
+#	define V4LDBG(level, name, cmd) do {;} while(0)
+#endif
+
+#undef PDBG
+#define PDBG(fmt, args...)                                                    \
+dev_info(&cam->dev, "[%s:%d] " fmt "\n", __FUNCTION__, __LINE__ , ## args)
+
+#undef PDBGG
+#define PDBGG(fmt, args...) do {;} while(0) /* placeholder */
+
+#endif /* _ET61X251_H_ */
diff --git a/drivers/usb/media/et61x251_core.c b/drivers/usb/media/et61x251_core.c
new file mode 100644
index 000000000000..2c0171a5ad62
--- /dev/null
+++ b/drivers/usb/media/et61x251_core.c
@@ -0,0 +1,2605 @@
+/***************************************************************************
+ * V4L2 driver for ET61X[12]51 PC Camera Controllers                       *
+ *                                                                         *
+ * Copyright (C) 2006 by Luca Risolia <luca.risolia@studio.unibo.it>       *
+ *                                                                         *
+ * This program is free software; you can redistribute it and/or modify    *
+ * it under the terms of the GNU General Public License as published by    *
+ * the Free Software Foundation; either version 2 of the License, or       *
+ * (at your option) any later version.                                     *
+ *                                                                         *
+ * This program is distributed in the hope that it will be useful,         *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of          *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the           *
+ * GNU General Public License for more details.                            *
+ *                                                                         *
+ * You should have received a copy of the GNU General Public License       *
+ * along with this program; if not, write to the Free Software             *
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.               *
+ ***************************************************************************/
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/moduleparam.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/delay.h>
+#include <linux/stddef.h>
+#include <linux/compiler.h>
+#include <linux/ioctl.h>
+#include <linux/poll.h>
+#include <linux/stat.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/page-flags.h>
+#include <linux/byteorder/generic.h>
+#include <asm/page.h>
+#include <asm/uaccess.h>
+
+#include "et61x251.h"
+
+/*****************************************************************************/
+
+#define ET61X251_MODULE_NAME    "V4L2 driver for ET61X[12]51 "                \
+                                "PC Camera Controllers"
+#define ET61X251_MODULE_AUTHOR  "(C) 2006 Luca Risolia"
+#define ET61X251_AUTHOR_EMAIL   "<luca.risolia@studio.unibo.it>"
+#define ET61X251_MODULE_LICENSE "GPL"
+#define ET61X251_MODULE_VERSION "1:1.01"
+#define ET61X251_MODULE_VERSION_CODE  KERNEL_VERSION(1, 0, 1)
+
+/*****************************************************************************/
+
+MODULE_DEVICE_TABLE(usb, et61x251_id_table);
+
+MODULE_AUTHOR(ET61X251_MODULE_AUTHOR " " ET61X251_AUTHOR_EMAIL);
+MODULE_DESCRIPTION(ET61X251_MODULE_NAME);
+MODULE_VERSION(ET61X251_MODULE_VERSION);
+MODULE_LICENSE(ET61X251_MODULE_LICENSE);
+
+static short video_nr[] = {[0 ... ET61X251_MAX_DEVICES-1] = -1};
+module_param_array(video_nr, short, NULL, 0444);
+MODULE_PARM_DESC(video_nr,
+                 "\n<-1|n[,...]> Specify V4L2 minor mode number."
+                 "\n -1 = use next available (default)"
+                 "\n  n = use minor number n (integer >= 0)"
+                 "\nYou can specify up to "
+                 __MODULE_STRING(ET61X251_MAX_DEVICES) " cameras this way."
+                 "\nFor example:"
+                 "\nvideo_nr=-1,2,-1 would assign minor number 2 to"
+                 "\nthe second registered camera and use auto for the first"
+                 "\none and for every other camera."
+                 "\n");
+
+static short force_munmap[] = {[0 ... ET61X251_MAX_DEVICES-1] =
+                               ET61X251_FORCE_MUNMAP};
+module_param_array(force_munmap, bool, NULL, 0444);
+MODULE_PARM_DESC(force_munmap,
+                 "\n<0|1[,...]> Force the application to unmap previously"
+                 "\nmapped buffer memory before calling any VIDIOC_S_CROP or"
+                 "\nVIDIOC_S_FMT ioctl's. Not all the applications support"
+                 "\nthis feature. This parameter is specific for each"
+                 "\ndetected camera."
+                 "\n 0 = do not force memory unmapping"
+                 "\n 1 = force memory unmapping (save memory)"
+                 "\nDefault value is "__MODULE_STRING(SN9C102_FORCE_MUNMAP)"."
+                 "\n");
+
+#ifdef ET61X251_DEBUG
+static unsigned short debug = ET61X251_DEBUG_LEVEL;
+module_param(debug, ushort, 0644);
+MODULE_PARM_DESC(debug,
+                 "\n<n> Debugging information level, from 0 to 3:"
+                 "\n0 = none (use carefully)"
+                 "\n1 = critical errors"
+                 "\n2 = significant informations"
+                 "\n3 = more verbose messages"
+                 "\nLevel 3 is useful for testing only, when only "
+                 "one device is used."
+                 "\nDefault value is "__MODULE_STRING(ET61X251_DEBUG_LEVEL)"."
+                 "\n");
+#endif
+
+/*****************************************************************************/
+
+static u32
+et61x251_request_buffers(struct et61x251_device* cam, u32 count,
+                         enum et61x251_io_method io)
+{
+	struct v4l2_pix_format* p = &(cam->sensor->pix_format);
+	struct v4l2_rect* r = &(cam->sensor->cropcap.bounds);
+	const size_t imagesize = cam->module_param.force_munmap ||
+	                         io == IO_READ ?
+	                         (p->width * p->height * p->priv) / 8 :
+	                         (r->width * r->height * p->priv) / 8;
+	void* buff = NULL;
+	u32 i;
+
+	if (count > ET61X251_MAX_FRAMES)
+		count = ET61X251_MAX_FRAMES;
+
+	cam->nbuffers = count;
+	while (cam->nbuffers > 0) {
+		if ((buff = vmalloc_32(cam->nbuffers * PAGE_ALIGN(imagesize))))
+			break;
+		cam->nbuffers--;
+	}
+
+	for (i = 0; i < cam->nbuffers; i++) {
+		cam->frame[i].bufmem = buff + i*PAGE_ALIGN(imagesize);
+		cam->frame[i].buf.index = i;
+		cam->frame[i].buf.m.offset = i*PAGE_ALIGN(imagesize);
+		cam->frame[i].buf.length = imagesize;
+		cam->frame[i].buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+		cam->frame[i].buf.sequence = 0;
+		cam->frame[i].buf.field = V4L2_FIELD_NONE;
+		cam->frame[i].buf.memory = V4L2_MEMORY_MMAP;
+		cam->frame[i].buf.flags = 0;
+	}
+
+	return cam->nbuffers;
+}
+
+
+static void et61x251_release_buffers(struct et61x251_device* cam)
+{
+	if (cam->nbuffers) {
+		vfree(cam->frame[0].bufmem);
+		cam->nbuffers = 0;
+	}
+	cam->frame_current = NULL;
+}
+
+
+static void et61x251_empty_framequeues(struct et61x251_device* cam)
+{
+	u32 i;
+
+	INIT_LIST_HEAD(&cam->inqueue);
+	INIT_LIST_HEAD(&cam->outqueue);
+
+	for (i = 0; i < ET61X251_MAX_FRAMES; i++) {
+		cam->frame[i].state = F_UNUSED;
+		cam->frame[i].buf.bytesused = 0;
+	}
+}
+
+
+static void et61x251_requeue_outqueue(struct et61x251_device* cam)
+{
+	struct et61x251_frame_t *i;
+
+	list_for_each_entry(i, &cam->outqueue, frame) {
+		i->state = F_QUEUED;
+		list_add(&i->frame, &cam->inqueue);
+	}
+
+	INIT_LIST_HEAD(&cam->outqueue);
+}
+
+
+static void et61x251_queue_unusedframes(struct et61x251_device* cam)
+{
+	unsigned long lock_flags;
+	u32 i;
+
+	for (i = 0; i < cam->nbuffers; i++)
+		if (cam->frame[i].state == F_UNUSED) {
+			cam->frame[i].state = F_QUEUED;
+			spin_lock_irqsave(&cam->queue_lock, lock_flags);
+			list_add_tail(&cam->frame[i].frame, &cam->inqueue);
+			spin_unlock_irqrestore(&cam->queue_lock, lock_flags);
+		}
+}
+
+/*****************************************************************************/
+
+int et61x251_write_reg(struct et61x251_device* cam, u8 value, u16 index)
+{
+	struct usb_device* udev = cam->usbdev;
+	u8* buff = cam->control_buffer;
+	int res;
+
+	*buff = value;
+
+	res = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x00, 0x41,
+	                      0, index, buff, 1, ET61X251_CTRL_TIMEOUT);
+	if (res < 0) {
+		DBG(3, "Failed to write a register (value 0x%02X, index "
+		       "0x%02X, error %d)", value, index, res);
+		return -1;
+	}
+
+	return 0;
+}
+
+
+int et61x251_read_reg(struct et61x251_device* cam, u16 index)
+{
+	struct usb_device* udev = cam->usbdev;
+	u8* buff = cam->control_buffer;
+	int res;
+
+	res = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), 0x00, 0xc1,
+	                      0, index, buff, 1, ET61X251_CTRL_TIMEOUT);
+	if (res < 0)
+		DBG(3, "Failed to read a register (index 0x%02X, error %d)",
+		    index, res);
+
+	return (res >= 0) ? (int)(*buff) : -1;
+}
+
+
+static int
+et61x251_i2c_wait(struct et61x251_device* cam, struct et61x251_sensor* sensor)
+{
+	int i, r;
+
+	for (i = 1; i <= 8; i++) {
+		if (sensor->interface == ET61X251_I2C_3WIRES) {
+			r = et61x251_read_reg(cam, 0x8e);
+			if (!(r & 0x02) && (r >= 0))
+				return 0;
+		} else {
+			r = et61x251_read_reg(cam, 0x8b);
+			if (!(r & 0x01) && (r >= 0))
+				return 0;
+		}
+		if (r < 0)
+			return -EIO;
+		udelay(8*8); /* minimum for sensors at 400kHz */
+	}
+
+	return -EBUSY;
+}
+
+
+int
+et61x251_i2c_try_read(struct et61x251_device* cam,
+                      struct et61x251_sensor* sensor, u8 address)
+{
+	struct usb_device* udev = cam->usbdev;
+	u8* data = cam->control_buffer;
+	int err = 0, res;
+
+	data[0] = address;
+	data[1] = cam->sensor->i2c_slave_id;
+	data[2] = cam->sensor->rsta | 0x10;
+	data[3] = !(et61x251_read_reg(cam, 0x8b) & 0x02);
+	res = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x00, 0x41,
+	                      0, 0x88, data, 4, ET61X251_CTRL_TIMEOUT);
+	if (res < 0)
+		err += res;
+
+	err += et61x251_i2c_wait(cam, sensor);
+
+	res = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), 0x00, 0xc1,
+	                      0, 0x80, data, 8, ET61X251_CTRL_TIMEOUT);
+	if (res < 0)
+		err += res;
+
+	if (err)
+		DBG(3, "I2C read failed for %s image sensor", sensor->name);
+
+	PDBGG("I2C read: address 0x%02X, value: 0x%02X", address, data[0]);
+
+	return err ? -1 : (int)data[0];
+}
+
+
+int
+et61x251_i2c_try_write(struct et61x251_device* cam,
+                       struct et61x251_sensor* sensor, u8 address, u8 value)
+{
+	struct usb_device* udev = cam->usbdev;
+	u8* data = cam->control_buffer;
+	int err = 0, res;
+
+	data[0] = address;
+	data[1] = cam->sensor->i2c_slave_id;
+	data[2] = cam->sensor->rsta | 0x12;
+	res = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x00, 0x41,
+	                      0, 0x88, data, 3, ET61X251_CTRL_TIMEOUT);
+	if (res < 0)
+		err += res;
+
+	data[0] = value;
+	res = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x00, 0x41,
+	                      0, 0x80, data, 1, ET61X251_CTRL_TIMEOUT);
+	if (res < 0)
+		err += res;
+
+	err += et61x251_i2c_wait(cam, sensor);
+
+	if (err)
+		DBG(3, "I2C write failed for %s image sensor", sensor->name);
+
+	PDBGG("I2C write: address 0x%02X, value: 0x%02X", address, value);
+
+	return err ? -1 : 0;
+}
+
+
+int
+et61x251_i2c_raw_write(struct et61x251_device* cam, u8 n, u8 data1, u8 data2,
+                       u8 data3, u8 data4, u8 data5, u8 data6, u8 data7,
+                       u8 data8, u8 address)
+{
+	struct usb_device* udev = cam->usbdev;
+	u8* data = cam->control_buffer;
+	int err = 0, res;
+
+	if (!cam->sensor)
+		return -1;
+
+	data[0] = data2;
+	data[1] = data3;
+	data[2] = data4;
+	data[3] = data5;
+	data[4] = data6;
+	data[5] = data7;
+	data[6] = data8;
+	res = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x00, 0x41,
+	                      0, 0x81, data, n-1, ET61X251_CTRL_TIMEOUT);
+	if (res < 0)
+		err += res;
+
+	data[0] = address;
+	data[1] = cam->sensor->i2c_slave_id;
+	data[2] = cam->sensor->rsta | 0x02 | (n << 4);
+	res = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x00, 0x41,
+	                      0, 0x88, data, 3, ET61X251_CTRL_TIMEOUT);
+	if (res < 0)
+		err += res;
+
+	/* Start writing through the serial interface */
+	data[0] = data1;
+	res = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x00, 0x41,
+	                      0, 0x80, data, 1, ET61X251_CTRL_TIMEOUT);
+	if (res < 0)
+		err += res;
+
+	err += et61x251_i2c_wait(cam, cam->sensor);
+
+	if (err)
+		DBG(3, "I2C raw write failed for %s image sensor",
+		    cam->sensor->name);
+
+	PDBGG("I2C raw write: %u bytes, address = 0x%02X, data1 = 0x%02X, "
+	      "data2 = 0x%02X, data3 = 0x%02X, data4 = 0x%02X, data5 = 0x%02X,"
+	      " data6 = 0x%02X, data7 = 0x%02X, data8 = 0x%02X", n, address,
+	      data1, data2, data3, data4, data5, data6, data7, data8);
+
+	return err ? -1 : 0;
+
+}
+
+
+int et61x251_i2c_read(struct et61x251_device* cam, u8 address)
+{
+	if (!cam->sensor)
+		return -1;
+
+	return et61x251_i2c_try_read(cam, cam->sensor, address);
+}
+
+
+int et61x251_i2c_write(struct et61x251_device* cam, u8 address, u8 value)
+{
+	if (!cam->sensor)
+		return -1;
+
+	return et61x251_i2c_try_write(cam, cam->sensor, address, value);
+}
+
+/*****************************************************************************/
+
+static void et61x251_urb_complete(struct urb *urb, struct pt_regs* regs)
+{
+	struct et61x251_device* cam = urb->context;
+	struct et61x251_frame_t** f;
+	size_t imagesize;
+	u8 i;
+	int err = 0;
+
+	if (urb->status == -ENOENT)
+		return;
+
+	f = &cam->frame_current;
+
+	if (cam->stream == STREAM_INTERRUPT) {
+		cam->stream = STREAM_OFF;
+		if ((*f))
+			(*f)->state = F_QUEUED;
+		DBG(3, "Stream interrupted");
+		wake_up_interruptible(&cam->wait_stream);
+	}
+
+	if (cam->state & DEV_DISCONNECTED)
+		return;
+
+	if (cam->state & DEV_MISCONFIGURED) {
+		wake_up_interruptible(&cam->wait_frame);
+		return;
+	}
+
+	if (cam->stream == STREAM_OFF || list_empty(&cam->inqueue))
+		goto resubmit_urb;
+
+	if (!(*f))
+		(*f) = list_entry(cam->inqueue.next, struct et61x251_frame_t,
+		                  frame);
+
+	imagesize = (cam->sensor->pix_format.width *
+	             cam->sensor->pix_format.height *
+	             cam->sensor->pix_format.priv) / 8;
+
+	for (i = 0; i < urb->number_of_packets; i++) {
+		unsigned int len, status;
+		void *pos;
+		u8* b1, * b2, sof;
+		const u8 VOID_BYTES = 6;
+		size_t imglen;
+
+		len = urb->iso_frame_desc[i].actual_length;
+		status = urb->iso_frame_desc[i].status;
+		pos = urb->iso_frame_desc[i].offset + urb->transfer_buffer;
+
+		if (status) {
+			DBG(3, "Error in isochronous frame");
+			(*f)->state = F_ERROR;
+			continue;
+		}
+
+		b1 = pos++;
+		b2 = pos++;
+		sof = ((*b1 & 0x3f) == 63);
+		imglen = ((*b1 & 0xc0) << 2) | *b2;
+
+		PDBGG("Isochrnous frame: length %u, #%u i, image length %zu",
+		      len, i, imglen);
+
+		if ((*f)->state == F_QUEUED || (*f)->state == F_ERROR)
+start_of_frame:
+			if (sof) {
+				(*f)->state = F_GRABBING;
+				(*f)->buf.bytesused = 0;
+				do_gettimeofday(&(*f)->buf.timestamp);
+				pos += 22;
+				DBG(3, "SOF detected: new video frame");
+			}
+
+		if ((*f)->state == F_GRABBING) {
+			if (sof && (*f)->buf.bytesused) {
+				if (cam->sensor->pix_format.pixelformat ==
+				                         V4L2_PIX_FMT_ET61X251)
+					goto end_of_frame;
+				else {
+					DBG(3, "Not expected SOF detected "
+					       "after %lu bytes",
+					   (unsigned long)(*f)->buf.bytesused);
+					(*f)->state = F_ERROR;
+					continue;
+				}
+			}
+
+			if ((*f)->buf.bytesused + imglen > imagesize) {
+				DBG(3, "Video frame size exceeded");
+				(*f)->state = F_ERROR;
+				continue;
+			}
+
+			pos += VOID_BYTES;
+
+			memcpy((*f)->bufmem+(*f)->buf.bytesused, pos, imglen);
+			(*f)->buf.bytesused += imglen;
+
+			if ((*f)->buf.bytesused == imagesize) {
+				u32 b;
+end_of_frame:
+				b = (*f)->buf.bytesused;
+				(*f)->state = F_DONE;
+				(*f)->buf.sequence= ++cam->frame_count;
+				spin_lock(&cam->queue_lock);
+				list_move_tail(&(*f)->frame, &cam->outqueue);
+				if (!list_empty(&cam->inqueue))
+					(*f) = list_entry(cam->inqueue.next,
+					               struct et61x251_frame_t,
+					                  frame);
+				else
+					(*f) = NULL;
+				spin_unlock(&cam->queue_lock);
+				DBG(3, "Video frame captured: : %lu bytes",
+				       (unsigned long)(b));
+
+				if (!(*f))
+					goto resubmit_urb;
+
+				if (sof &&
+				    cam->sensor->pix_format.pixelformat ==
+				                         V4L2_PIX_FMT_ET61X251)
+					goto start_of_frame;
+			}
+		}
+	}
+
+resubmit_urb:
+	urb->dev = cam->usbdev;
+	err = usb_submit_urb(urb, GFP_ATOMIC);
+	if (err < 0 && err != -EPERM) {
+		cam->state |= DEV_MISCONFIGURED;
+		DBG(1, "usb_submit_urb() failed");
+	}
+
+	wake_up_interruptible(&cam->wait_frame);
+}
+
+
+static int et61x251_start_transfer(struct et61x251_device* cam)
+{
+	struct usb_device *udev = cam->usbdev;
+	struct urb* urb;
+	const unsigned int wMaxPacketSize[] = {0, 256, 384, 512, 640, 768, 832,
+	                                       864, 896, 920, 956, 980, 1000,
+	                                       1022};
+	const unsigned int psz = wMaxPacketSize[ET61X251_ALTERNATE_SETTING];
+	s8 i, j;
+	int err = 0;
+
+	for (i = 0; i < ET61X251_URBS; i++) {
+		cam->transfer_buffer[i] = kzalloc(ET61X251_ISO_PACKETS * psz,
+		                                  GFP_KERNEL);
+		if (!cam->transfer_buffer[i]) {
+			err = -ENOMEM;
+			DBG(1, "Not enough memory");
+			goto free_buffers;
+		}
+	}
+
+	for (i = 0; i < ET61X251_URBS; i++) {
+		urb = usb_alloc_urb(ET61X251_ISO_PACKETS, GFP_KERNEL);
+		cam->urb[i] = urb;
+		if (!urb) {
+			err = -ENOMEM;
+			DBG(1, "usb_alloc_urb() failed");
+			goto free_urbs;
+		}
+		urb->dev = udev;
+		urb->context = cam;
+		urb->pipe = usb_rcvisocpipe(udev, 1);
+		urb->transfer_flags = URB_ISO_ASAP;
+		urb->number_of_packets = ET61X251_ISO_PACKETS;
+		urb->complete = et61x251_urb_complete;
+		urb->transfer_buffer = cam->transfer_buffer[i];
+		urb->transfer_buffer_length = psz * ET61X251_ISO_PACKETS;
+		urb->interval = 1;
+		for (j = 0; j < ET61X251_ISO_PACKETS; j++) {
+			urb->iso_frame_desc[j].offset = psz * j;
+			urb->iso_frame_desc[j].length = psz;
+		}
+	}
+
+	err = et61x251_write_reg(cam, 0x01, 0x03);
+	err = et61x251_write_reg(cam, 0x00, 0x03);
+	err = et61x251_write_reg(cam, 0x08, 0x03);
+	if (err) {
+		err = -EIO;
+		DBG(1, "I/O hardware error");
+		goto free_urbs;
+	}
+
+	err = usb_set_interface(udev, 0, ET61X251_ALTERNATE_SETTING);
+	if (err) {
+		DBG(1, "usb_set_interface() failed");
+		goto free_urbs;
+	}
+
+	cam->frame_current = NULL;
+
+	for (i = 0; i < ET61X251_URBS; i++) {
+		err = usb_submit_urb(cam->urb[i], GFP_KERNEL);
+		if (err) {
+			for (j = i-1; j >= 0; j--)
+				usb_kill_urb(cam->urb[j]);
+			DBG(1, "usb_submit_urb() failed, error %d", err);
+			goto free_urbs;
+		}
+	}
+
+	return 0;
+
+free_urbs:
+	for (i = 0; (i < ET61X251_URBS) &&  cam->urb[i]; i++)
+		usb_free_urb(cam->urb[i]);
+
+free_buffers:
+	for (i = 0; (i < ET61X251_URBS) && cam->transfer_buffer[i]; i++)
+		kfree(cam->transfer_buffer[i]);
+
+	return err;
+}
+
+
+static int et61x251_stop_transfer(struct et61x251_device* cam)
+{
+	struct usb_device *udev = cam->usbdev;
+	s8 i;
+	int err = 0;
+
+	if (cam->state & DEV_DISCONNECTED)
+		return 0;
+
+	for (i = ET61X251_URBS-1; i >= 0; i--) {
+		usb_kill_urb(cam->urb[i]);
+		usb_free_urb(cam->urb[i]);
+		kfree(cam->transfer_buffer[i]);
+	}
+
+	err = usb_set_interface(udev, 0, 0); /* 0 Mb/s */
+	if (err)
+		DBG(3, "usb_set_interface() failed");
+
+	return err;
+}
+
+
+static int et61x251_stream_interrupt(struct et61x251_device* cam)
+{
+	int err = 0;
+
+	cam->stream = STREAM_INTERRUPT;
+	err = wait_event_timeout(cam->wait_stream,
+	                         (cam->stream == STREAM_OFF) ||
+	                         (cam->state & DEV_DISCONNECTED),
+	                         ET61X251_URB_TIMEOUT);
+	if (cam->state & DEV_DISCONNECTED)
+		return -ENODEV;
+	else if (err) {
+		cam->state |= DEV_MISCONFIGURED;
+		DBG(1, "URB timeout reached. The camera is misconfigured. To "
+		       "use it, close and open /dev/video%d again.",
+		    cam->v4ldev->minor);
+		return err;
+	}
+
+	return 0;
+}
+
+/*****************************************************************************/
+
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+static u8 et61x251_strtou8(const char* buff, size_t len, ssize_t* count)
+{
+	char str[5];
+	char* endp;
+	unsigned long val;
+
+	if (len < 4) {
+		strncpy(str, buff, len);
+		str[len+1] = '\0';
+	} else {
+		strncpy(str, buff, 4);
+		str[4] = '\0';
+	}
+
+	val = simple_strtoul(str, &endp, 0);
+
+	*count = 0;
+	if (val <= 0xff)
+		*count = (ssize_t)(endp - str);
+	if ((*count) && (len == *count+1) && (buff[*count] == '\n'))
+		*count += 1;
+
+	return (u8)val;
+}
+
+/*
+   NOTE 1: being inside one of the following methods implies that the v4l
+           device exists for sure (see kobjects and reference counters)
+   NOTE 2: buffers are PAGE_SIZE long
+*/
+
+static ssize_t et61x251_show_reg(struct class_device* cd, char* buf)
+{
+	struct et61x251_device* cam;
+	ssize_t count;
+
+	if (down_interruptible(&et61x251_sysfs_lock))
+		return -ERESTARTSYS;
+
+	cam = video_get_drvdata(to_video_device(cd));
+	if (!cam) {
+		up(&et61x251_sysfs_lock);
+		return -ENODEV;
+	}
+
+	count = sprintf(buf, "%u\n", cam->sysfs.reg);
+
+	up(&et61x251_sysfs_lock);
+
+	return count;
+}
+
+
+static ssize_t
+et61x251_store_reg(struct class_device* cd, const char* buf, size_t len)
+{
+	struct et61x251_device* cam;
+	u8 index;
+	ssize_t count;
+
+	if (down_interruptible(&et61x251_sysfs_lock))
+		return -ERESTARTSYS;
+
+	cam = video_get_drvdata(to_video_device(cd));
+	if (!cam) {
+		up(&et61x251_sysfs_lock);
+		return -ENODEV;
+	}
+
+	index = et61x251_strtou8(buf, len, &count);
+	if (index > 0x8e || !count) {
+		up(&et61x251_sysfs_lock);
+		return -EINVAL;
+	}
+
+	cam->sysfs.reg = index;
+
+	DBG(2, "Moved ET61X[12]51 register index to 0x%02X", cam->sysfs.reg);
+	DBG(3, "Written bytes: %zd", count);
+
+	up(&et61x251_sysfs_lock);
+
+	return count;
+}
+
+
+static ssize_t et61x251_show_val(struct class_device* cd, char* buf)
+{
+	struct et61x251_device* cam;
+	ssize_t count;
+	int val;
+
+	if (down_interruptible(&et61x251_sysfs_lock))
+		return -ERESTARTSYS;
+
+	cam = video_get_drvdata(to_video_device(cd));
+	if (!cam) {
+		up(&et61x251_sysfs_lock);
+		return -ENODEV;
+	}
+
+	if ((val = et61x251_read_reg(cam, cam->sysfs.reg)) < 0) {
+		up(&et61x251_sysfs_lock);
+		return -EIO;
+	}
+
+	count = sprintf(buf, "%d\n", val);
+
+	DBG(3, "Read bytes: %zd", count);
+
+	up(&et61x251_sysfs_lock);
+
+	return count;
+}
+
+
+static ssize_t
+et61x251_store_val(struct class_device* cd, const char* buf, size_t len)
+{
+	struct et61x251_device* cam;
+	u8 value;
+	ssize_t count;
+	int err;
+
+	if (down_interruptible(&et61x251_sysfs_lock))
+		return -ERESTARTSYS;
+
+	cam = video_get_drvdata(to_video_device(cd));
+	if (!cam) {
+		up(&et61x251_sysfs_lock);
+		return -ENODEV;
+	}
+
+	value = et61x251_strtou8(buf, len, &count);
+	if (!count) {
+		up(&et61x251_sysfs_lock);
+		return -EINVAL;
+	}
+
+	err = et61x251_write_reg(cam, value, cam->sysfs.reg);
+	if (err) {
+		up(&et61x251_sysfs_lock);
+		return -EIO;
+	}
+
+	DBG(2, "Written ET61X[12]51 reg. 0x%02X, val. 0x%02X",
+	    cam->sysfs.reg, value);
+	DBG(3, "Written bytes: %zd", count);
+
+	up(&et61x251_sysfs_lock);
+
+	return count;
+}
+
+
+static ssize_t et61x251_show_i2c_reg(struct class_device* cd, char* buf)
+{
+	struct et61x251_device* cam;
+	ssize_t count;
+
+	if (down_interruptible(&et61x251_sysfs_lock))
+		return -ERESTARTSYS;
+
+	cam = video_get_drvdata(to_video_device(cd));
+	if (!cam) {
+		up(&et61x251_sysfs_lock);
+		return -ENODEV;
+	}
+
+	count = sprintf(buf, "%u\n", cam->sysfs.i2c_reg);
+
+	DBG(3, "Read bytes: %zd", count);
+
+	up(&et61x251_sysfs_lock);
+
+	return count;
+}
+
+
+static ssize_t
+et61x251_store_i2c_reg(struct class_device* cd, const char* buf, size_t len)
+{
+	struct et61x251_device* cam;
+	u8 index;
+	ssize_t count;
+
+	if (down_interruptible(&et61x251_sysfs_lock))
+		return -ERESTARTSYS;
+
+	cam = video_get_drvdata(to_video_device(cd));
+	if (!cam) {
+		up(&et61x251_sysfs_lock);
+		return -ENODEV;
+	}
+
+	index = et61x251_strtou8(buf, len, &count);
+	if (!count) {
+		up(&et61x251_sysfs_lock);
+		return -EINVAL;
+	}
+
+	cam->sysfs.i2c_reg = index;
+
+	DBG(2, "Moved sensor register index to 0x%02X", cam->sysfs.i2c_reg);
+	DBG(3, "Written bytes: %zd", count);
+
+	up(&et61x251_sysfs_lock);
+
+	return count;
+}
+
+
+static ssize_t et61x251_show_i2c_val(struct class_device* cd, char* buf)
+{
+	struct et61x251_device* cam;
+	ssize_t count;
+	int val;
+
+	if (down_interruptible(&et61x251_sysfs_lock))
+		return -ERESTARTSYS;
+
+	cam = video_get_drvdata(to_video_device(cd));
+	if (!cam) {
+		up(&et61x251_sysfs_lock);
+		return -ENODEV;
+	}
+
+	if (!(cam->sensor->sysfs_ops & ET61X251_I2C_READ)) {
+		up(&et61x251_sysfs_lock);
+		return -ENOSYS;
+	}
+
+	if ((val = et61x251_i2c_read(cam, cam->sysfs.i2c_reg)) < 0) {
+		up(&et61x251_sysfs_lock);
+		return -EIO;
+	}
+
+	count = sprintf(buf, "%d\n", val);
+
+	DBG(3, "Read bytes: %zd", count);
+
+	up(&et61x251_sysfs_lock);
+
+	return count;
+}
+
+
+static ssize_t
+et61x251_store_i2c_val(struct class_device* cd, const char* buf, size_t len)
+{
+	struct et61x251_device* cam;
+	u8 value;
+	ssize_t count;
+	int err;
+
+	if (down_interruptible(&et61x251_sysfs_lock))
+		return -ERESTARTSYS;
+
+	cam = video_get_drvdata(to_video_device(cd));
+	if (!cam) {
+		up(&et61x251_sysfs_lock);
+		return -ENODEV;
+	}
+
+	if (!(cam->sensor->sysfs_ops & ET61X251_I2C_READ)) {
+		up(&et61x251_sysfs_lock);
+		return -ENOSYS;
+	}
+
+	value = et61x251_strtou8(buf, len, &count);
+	if (!count) {
+		up(&et61x251_sysfs_lock);
+		return -EINVAL;
+	}
+
+	err = et61x251_i2c_write(cam, cam->sysfs.i2c_reg, value);
+	if (err) {
+		up(&et61x251_sysfs_lock);
+		return -EIO;
+	}
+
+	DBG(2, "Written sensor reg. 0x%02X, val. 0x%02X",
+	    cam->sysfs.i2c_reg, value);
+	DBG(3, "Written bytes: %zd", count);
+
+	up(&et61x251_sysfs_lock);
+
+	return count;
+}
+
+
+static CLASS_DEVICE_ATTR(reg, S_IRUGO | S_IWUSR,
+                         et61x251_show_reg, et61x251_store_reg);
+static CLASS_DEVICE_ATTR(val, S_IRUGO | S_IWUSR,
+                         et61x251_show_val, et61x251_store_val);
+static CLASS_DEVICE_ATTR(i2c_reg, S_IRUGO | S_IWUSR,
+                         et61x251_show_i2c_reg, et61x251_store_i2c_reg);
+static CLASS_DEVICE_ATTR(i2c_val, S_IRUGO | S_IWUSR,
+                         et61x251_show_i2c_val, et61x251_store_i2c_val);
+
+
+static void et61x251_create_sysfs(struct et61x251_device* cam)
+{
+	struct video_device *v4ldev = cam->v4ldev;
+
+	video_device_create_file(v4ldev, &class_device_attr_reg);
+	video_device_create_file(v4ldev, &class_device_attr_val);
+	if (cam->sensor && cam->sensor->sysfs_ops) {
+		video_device_create_file(v4ldev, &class_device_attr_i2c_reg);
+		video_device_create_file(v4ldev, &class_device_attr_i2c_val);
+	}
+}
+#endif /* CONFIG_VIDEO_ADV_DEBUG */
+
+/*****************************************************************************/
+
+static int
+et61x251_set_pix_format(struct et61x251_device* cam,
+                        struct v4l2_pix_format* pix)
+{
+	int r, err = 0;
+
+	if ((r = et61x251_read_reg(cam, 0x12)) < 0)
+		err += r;
+	if (pix->pixelformat == V4L2_PIX_FMT_ET61X251)
+		err += et61x251_write_reg(cam, r & 0xfd, 0x12);
+	else
+		err += et61x251_write_reg(cam, r | 0x02, 0x12);
+
+	return err ? -EIO : 0;
+}
+
+
+static int
+et61x251_set_compression(struct et61x251_device* cam,
+                         struct v4l2_jpegcompression* compression)
+{
+	int r, err = 0;
+
+	if ((r = et61x251_read_reg(cam, 0x12)) < 0)
+		err += r;
+	if (compression->quality == 0)
+		err += et61x251_write_reg(cam, r & 0xfb, 0x12);
+	else
+		err += et61x251_write_reg(cam, r | 0x04, 0x12);
+
+	return err ? -EIO : 0;
+}
+
+
+static int et61x251_set_scale(struct et61x251_device* cam, u8 scale)
+{
+	int r = 0, err = 0;
+
+	r = et61x251_read_reg(cam, 0x12);
+	if (r < 0)
+		err += r;
+
+	if (scale == 1)
+		err += et61x251_write_reg(cam, r & ~0x01, 0x12);
+	else if (scale == 2)
+		err += et61x251_write_reg(cam, r | 0x01, 0x12);
+
+	if (err)
+		return -EIO;
+
+	PDBGG("Scaling factor: %u", scale);
+
+	return 0;
+}
+
+
+static int
+et61x251_set_crop(struct et61x251_device* cam, struct v4l2_rect* rect)
+{
+	struct et61x251_sensor* s = cam->sensor;
+	u16 fmw_sx = (u16)(rect->left - s->cropcap.bounds.left +
+	                   s->active_pixel.left),
+	    fmw_sy = (u16)(rect->top - s->cropcap.bounds.top +
+	                   s->active_pixel.top),
+	    fmw_length = (u16)(rect->width),
+	    fmw_height = (u16)(rect->height);
+	int err = 0;
+
+	err += et61x251_write_reg(cam, fmw_sx & 0xff, 0x69);
+	err += et61x251_write_reg(cam, fmw_sy & 0xff, 0x6a);
+	err += et61x251_write_reg(cam, fmw_length & 0xff, 0x6b);
+	err += et61x251_write_reg(cam, fmw_height & 0xff, 0x6c);
+	err += et61x251_write_reg(cam, (fmw_sx >> 8) | ((fmw_sy & 0x300) >> 6)
+	                               | ((fmw_length & 0x300) >> 4)
+	                               | ((fmw_height & 0x300) >> 2), 0x6d);
+	if (err)
+		return -EIO;
+
+	PDBGG("fmw_sx, fmw_sy, fmw_length, fmw_height: %u %u %u %u",
+	      fmw_sx, fmw_sy, fmw_length, fmw_height);
+
+	return 0;
+}
+
+
+static int et61x251_init(struct et61x251_device* cam)
+{
+	struct et61x251_sensor* s = cam->sensor;
+	struct v4l2_control ctrl;
+	struct v4l2_queryctrl *qctrl;
+	struct v4l2_rect* rect;
+	u8 i = 0;
+	int err = 0;
+
+	if (!(cam->state & DEV_INITIALIZED)) {
+		init_waitqueue_head(&cam->open);
+		qctrl = s->qctrl;
+		rect = &(s->cropcap.defrect);
+		cam->compression.quality = ET61X251_COMPRESSION_QUALITY;
+	} else { /* use current values */
+		qctrl = s->_qctrl;
+		rect = &(s->_rect);
+	}
+
+	err += et61x251_set_scale(cam, rect->width / s->pix_format.width);
+	err += et61x251_set_crop(cam, rect);
+	if (err)
+		return err;
+
+	if (s->init) {
+		err = s->init(cam);
+		if (err) {
+			DBG(3, "Sensor initialization failed");
+			return err;
+		}
+	}
+
+	err += et61x251_set_compression(cam, &cam->compression);
+	err += et61x251_set_pix_format(cam, &s->pix_format);
+	if (s->set_pix_format)
+		err += s->set_pix_format(cam, &s->pix_format);
+	if (err)
+		return err;
+
+	if (s->pix_format.pixelformat == V4L2_PIX_FMT_ET61X251)
+		DBG(3, "Compressed video format is active, quality %d",
+		    cam->compression.quality);
+	else
+		DBG(3, "Uncompressed video format is active");
+
+	if (s->set_crop)
+		if ((err = s->set_crop(cam, rect))) {
+			DBG(3, "set_crop() failed");
+			return err;
+		}
+
+	if (s->set_ctrl) {
+		for (i = 0; i < ARRAY_SIZE(s->qctrl); i++)
+			if (s->qctrl[i].id != 0 &&
+			    !(s->qctrl[i].flags & V4L2_CTRL_FLAG_DISABLED)) {
+				ctrl.id = s->qctrl[i].id;
+				ctrl.value = qctrl[i].default_value;
+				err = s->set_ctrl(cam, &ctrl);
+				if (err) {
+					DBG(3, "Set %s control failed",
+					    s->qctrl[i].name);
+					return err;
+				}
+				DBG(3, "Image sensor supports '%s' control",
+				    s->qctrl[i].name);
+			}
+	}
+
+	if (!(cam->state & DEV_INITIALIZED)) {
+		init_MUTEX(&cam->fileop_sem);
+		spin_lock_init(&cam->queue_lock);
+		init_waitqueue_head(&cam->wait_frame);
+		init_waitqueue_head(&cam->wait_stream);
+		cam->nreadbuffers = 2;
+		memcpy(s->_qctrl, s->qctrl, sizeof(s->qctrl));
+		memcpy(&(s->_rect), &(s->cropcap.defrect),
+		       sizeof(struct v4l2_rect));
+		cam->state |= DEV_INITIALIZED;
+	}
+
+	DBG(2, "Initialization succeeded");
+	return 0;
+}
+
+
+static void et61x251_release_resources(struct et61x251_device* cam)
+{
+	down(&et61x251_sysfs_lock);
+
+	DBG(2, "V4L2 device /dev/video%d deregistered", cam->v4ldev->minor);
+	video_set_drvdata(cam->v4ldev, NULL);
+	video_unregister_device(cam->v4ldev);
+
+	up(&et61x251_sysfs_lock);
+
+	kfree(cam->control_buffer);
+}
+
+/*****************************************************************************/
+
+static int et61x251_open(struct inode* inode, struct file* filp)
+{
+	struct et61x251_device* cam;
+	int err = 0;
+
+	/*
+	   This is the only safe way to prevent race conditions with
+	   disconnect
+	*/
+	if (!down_read_trylock(&et61x251_disconnect))
+		return -ERESTARTSYS;
+
+	cam = video_get_drvdata(video_devdata(filp));
+
+	if (down_interruptible(&cam->dev_sem)) {
+		up_read(&et61x251_disconnect);
+		return -ERESTARTSYS;
+	}
+
+	if (cam->users) {
+		DBG(2, "Device /dev/video%d is busy...", cam->v4ldev->minor);
+		if ((filp->f_flags & O_NONBLOCK) ||
+		    (filp->f_flags & O_NDELAY)) {
+			err = -EWOULDBLOCK;
+			goto out;
+		}
+		up(&cam->dev_sem);
+		err = wait_event_interruptible_exclusive(cam->open,
+		                                  cam->state & DEV_DISCONNECTED
+		                                         || !cam->users);
+		if (err) {
+			up_read(&et61x251_disconnect);
+			return err;
+		}
+		if (cam->state & DEV_DISCONNECTED) {
+			up_read(&et61x251_disconnect);
+			return -ENODEV;
+		}
+		down(&cam->dev_sem);
+	}
+
+
+	if (cam->state & DEV_MISCONFIGURED) {
+		err = et61x251_init(cam);
+		if (err) {
+			DBG(1, "Initialization failed again. "
+			       "I will retry on next open().");
+			goto out;
+		}
+		cam->state &= ~DEV_MISCONFIGURED;
+	}
+
+	if ((err = et61x251_start_transfer(cam)))
+		goto out;
+
+	filp->private_data = cam;
+	cam->users++;
+	cam->io = IO_NONE;
+	cam->stream = STREAM_OFF;
+	cam->nbuffers = 0;
+	cam->frame_count = 0;
+	et61x251_empty_framequeues(cam);
+
+	DBG(3, "Video device /dev/video%d is open", cam->v4ldev->minor);
+
+out:
+	up(&cam->dev_sem);
+	up_read(&et61x251_disconnect);
+	return err;
+}
+
+
+static int et61x251_release(struct inode* inode, struct file* filp)
+{
+	struct et61x251_device* cam = video_get_drvdata(video_devdata(filp));
+
+	down(&cam->dev_sem); /* prevent disconnect() to be called */
+
+	et61x251_stop_transfer(cam);
+
+	et61x251_release_buffers(cam);
+
+	if (cam->state & DEV_DISCONNECTED) {
+		et61x251_release_resources(cam);
+		up(&cam->dev_sem);
+		kfree(cam);
+		return 0;
+	}
+
+	cam->users--;
+	wake_up_interruptible_nr(&cam->open, 1);
+
+	DBG(3, "Video device /dev/video%d closed", cam->v4ldev->minor);
+
+	up(&cam->dev_sem);
+
+	return 0;
+}
+
+
+static ssize_t
+et61x251_read(struct file* filp, char __user * buf,
+              size_t count, loff_t* f_pos)
+{
+	struct et61x251_device* cam = video_get_drvdata(video_devdata(filp));
+	struct et61x251_frame_t* f, * i;
+	unsigned long lock_flags;
+	int err = 0;
+
+	if (down_interruptible(&cam->fileop_sem))
+		return -ERESTARTSYS;
+
+	if (cam->state & DEV_DISCONNECTED) {
+		DBG(1, "Device not present");
+		up(&cam->fileop_sem);
+		return -ENODEV;
+	}
+
+	if (cam->state & DEV_MISCONFIGURED) {
+		DBG(1, "The camera is misconfigured. Close and open it "
+		       "again.");
+		up(&cam->fileop_sem);
+		return -EIO;
+	}
+
+	if (cam->io == IO_MMAP) {
+		DBG(3, "Close and open the device again to choose the read "
+		       "method");
+		up(&cam->fileop_sem);
+		return -EINVAL;
+	}
+
+	if (cam->io == IO_NONE) {
+		if (!et61x251_request_buffers(cam, cam->nreadbuffers,
+		                              IO_READ)) {
+			DBG(1, "read() failed, not enough memory");
+			up(&cam->fileop_sem);
+			return -ENOMEM;
+		}
+		cam->io = IO_READ;
+		cam->stream = STREAM_ON;
+	}
+
+	if (list_empty(&cam->inqueue)) {
+		if (!list_empty(&cam->outqueue))
+			et61x251_empty_framequeues(cam);
+		et61x251_queue_unusedframes(cam);
+	}
+
+	if (!count) {
+		up(&cam->fileop_sem);
+		return 0;
+	}
+
+	if (list_empty(&cam->outqueue)) {
+		if (filp->f_flags & O_NONBLOCK) {
+			up(&cam->fileop_sem);
+			return -EAGAIN;
+		}
+		err = wait_event_interruptible
+		      ( cam->wait_frame,
+		        (!list_empty(&cam->outqueue)) ||
+		        (cam->state & DEV_DISCONNECTED) ||
+			(cam->state & DEV_MISCONFIGURED) );
+		if (err) {
+			up(&cam->fileop_sem);
+			return err;
+		}
+		if (cam->state & DEV_DISCONNECTED) {
+			up(&cam->fileop_sem);
+			return -ENODEV;
+		}
+		if (cam->state & DEV_MISCONFIGURED) {
+			up(&cam->fileop_sem);
+			return -EIO;
+		}
+	}
+
+	f = list_entry(cam->outqueue.prev, struct et61x251_frame_t, frame);
+
+	if (count > f->buf.bytesused)
+		count = f->buf.bytesused;
+
+	if (copy_to_user(buf, f->bufmem, count)) {
+		err = -EFAULT;
+		goto exit;
+	}
+	*f_pos += count;
+
+exit:
+	spin_lock_irqsave(&cam->queue_lock, lock_flags);
+	list_for_each_entry(i, &cam->outqueue, frame)
+		i->state = F_UNUSED;
+	INIT_LIST_HEAD(&cam->outqueue);
+	spin_unlock_irqrestore(&cam->queue_lock, lock_flags);
+
+	et61x251_queue_unusedframes(cam);
+
+	PDBGG("Frame #%lu, bytes read: %zu",
+	      (unsigned long)f->buf.index, count);
+
+	up(&cam->fileop_sem);
+
+	return err ? err : count;
+}
+
+
+static unsigned int et61x251_poll(struct file *filp, poll_table *wait)
+{
+	struct et61x251_device* cam = video_get_drvdata(video_devdata(filp));
+	struct et61x251_frame_t* f;
+	unsigned long lock_flags;
+	unsigned int mask = 0;
+
+	if (down_interruptible(&cam->fileop_sem))
+		return POLLERR;
+
+	if (cam->state & DEV_DISCONNECTED) {
+		DBG(1, "Device not present");
+		goto error;
+	}
+
+	if (cam->state & DEV_MISCONFIGURED) {
+		DBG(1, "The camera is misconfigured. Close and open it "
+		       "again.");
+		goto error;
+	}
+
+	if (cam->io == IO_NONE) {
+		if (!et61x251_request_buffers(cam, cam->nreadbuffers,
+		                              IO_READ)) {
+			DBG(1, "poll() failed, not enough memory");
+			goto error;
+		}
+		cam->io = IO_READ;
+		cam->stream = STREAM_ON;
+	}
+
+	if (cam->io == IO_READ) {
+		spin_lock_irqsave(&cam->queue_lock, lock_flags);
+		list_for_each_entry(f, &cam->outqueue, frame)
+			f->state = F_UNUSED;
+		INIT_LIST_HEAD(&cam->outqueue);
+		spin_unlock_irqrestore(&cam->queue_lock, lock_flags);
+		et61x251_queue_unusedframes(cam);
+	}
+
+	poll_wait(filp, &cam->wait_frame, wait);
+
+	if (!list_empty(&cam->outqueue))
+		mask |= POLLIN | POLLRDNORM;
+
+	up(&cam->fileop_sem);
+
+	return mask;
+
+error:
+	up(&cam->fileop_sem);
+	return POLLERR;
+}
+
+
+static void et61x251_vm_open(struct vm_area_struct* vma)
+{
+	struct et61x251_frame_t* f = vma->vm_private_data;
+	f->vma_use_count++;
+}
+
+
+static void et61x251_vm_close(struct vm_area_struct* vma)
+{
+	/* NOTE: buffers are not freed here */
+	struct et61x251_frame_t* f = vma->vm_private_data;
+	f->vma_use_count--;
+}
+
+
+static struct vm_operations_struct et61x251_vm_ops = {
+	.open = et61x251_vm_open,
+	.close = et61x251_vm_close,
+};
+
+
+static int et61x251_mmap(struct file* filp, struct vm_area_struct *vma)
+{
+	struct et61x251_device* cam = video_get_drvdata(video_devdata(filp));
+	unsigned long size = vma->vm_end - vma->vm_start,
+	              start = vma->vm_start;
+	void *pos;
+	u32 i;
+
+	if (down_interruptible(&cam->fileop_sem))
+		return -ERESTARTSYS;
+
+	if (cam->state & DEV_DISCONNECTED) {
+		DBG(1, "Device not present");
+		up(&cam->fileop_sem);
+		return -ENODEV;
+	}
+
+	if (cam->state & DEV_MISCONFIGURED) {
+		DBG(1, "The camera is misconfigured. Close and open it "
+		       "again.");
+		up(&cam->fileop_sem);
+		return -EIO;
+	}
+
+	if (cam->io != IO_MMAP || !(vma->vm_flags & VM_WRITE) ||
+	    size != PAGE_ALIGN(cam->frame[0].buf.length)) {
+		up(&cam->fileop_sem);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < cam->nbuffers; i++) {
+		if ((cam->frame[i].buf.m.offset>>PAGE_SHIFT) == vma->vm_pgoff)
+			break;
+	}
+	if (i == cam->nbuffers) {
+		up(&cam->fileop_sem);
+		return -EINVAL;
+	}
+
+	vma->vm_flags |= VM_IO;
+	vma->vm_flags |= VM_RESERVED;
+
+	pos = cam->frame[i].bufmem;
+	while (size > 0) { /* size is page-aligned */
+		if (vm_insert_page(vma, start, vmalloc_to_page(pos))) {
+			up(&cam->fileop_sem);
+			return -EAGAIN;
+		}
+		start += PAGE_SIZE;
+		pos += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+
+	vma->vm_ops = &et61x251_vm_ops;
+	vma->vm_private_data = &cam->frame[i];
+
+	et61x251_vm_open(vma);
+
+	up(&cam->fileop_sem);
+
+	return 0;
+}
+
+/*****************************************************************************/
+
+static int
+et61x251_vidioc_querycap(struct et61x251_device* cam, void __user * arg)
+{
+	struct v4l2_capability cap = {
+		.driver = "et61x251",
+		.version = ET61X251_MODULE_VERSION_CODE,
+		.capabilities = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_READWRITE |
+		                V4L2_CAP_STREAMING,
+	};
+
+	strlcpy(cap.card, cam->v4ldev->name, sizeof(cap.card));
+	if (usb_make_path(cam->usbdev, cap.bus_info, sizeof(cap.bus_info)) < 0)
+		strlcpy(cap.bus_info, cam->usbdev->dev.bus_id,
+		        sizeof(cap.bus_info));
+
+	if (copy_to_user(arg, &cap, sizeof(cap)))
+		return -EFAULT;
+
+	return 0;
+}
+
+
+static int
+et61x251_vidioc_enuminput(struct et61x251_device* cam, void __user * arg)
+{
+	struct v4l2_input i;
+
+	if (copy_from_user(&i, arg, sizeof(i)))
+		return -EFAULT;
+
+	if (i.index)
+		return -EINVAL;
+
+	memset(&i, 0, sizeof(i));
+	strcpy(i.name, "Camera");
+
+	if (copy_to_user(arg, &i, sizeof(i)))
+		return -EFAULT;
+
+	return 0;
+}
+
+
+static int
+et61x251_vidioc_gs_input(struct et61x251_device* cam, void __user * arg)
+{
+	int index;
+
+	if (copy_from_user(&index, arg, sizeof(index)))
+		return -EFAULT;
+
+	if (index != 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+
+static int
+et61x251_vidioc_query_ctrl(struct et61x251_device* cam, void __user * arg)
+{
+	struct et61x251_sensor* s = cam->sensor;
+	struct v4l2_queryctrl qc;
+	u8 i;
+
+	if (copy_from_user(&qc, arg, sizeof(qc)))
+		return -EFAULT;
+
+	for (i = 0; i < ARRAY_SIZE(s->qctrl); i++)
+		if (qc.id && qc.id == s->qctrl[i].id) {
+			memcpy(&qc, &(s->qctrl[i]), sizeof(qc));
+			if (copy_to_user(arg, &qc, sizeof(qc)))
+				return -EFAULT;
+			return 0;
+		}
+
+	return -EINVAL;
+}
+
+
+static int
+et61x251_vidioc_g_ctrl(struct et61x251_device* cam, void __user * arg)
+{
+	struct et61x251_sensor* s = cam->sensor;
+	struct v4l2_control ctrl;
+	int err = 0;
+	u8 i;
+
+	if (!s->get_ctrl && !s->set_ctrl)
+		return -EINVAL;
+
+	if (copy_from_user(&ctrl, arg, sizeof(ctrl)))
+		return -EFAULT;
+
+	if (!s->get_ctrl) {
+		for (i = 0; i < ARRAY_SIZE(s->qctrl); i++)
+			if (ctrl.id == s->qctrl[i].id) {
+				ctrl.value = s->_qctrl[i].default_value;
+				goto exit;
+			}
+		return -EINVAL;
+	} else
+		err = s->get_ctrl(cam, &ctrl);
+
+exit:
+	if (copy_to_user(arg, &ctrl, sizeof(ctrl)))
+		return -EFAULT;
+
+	return err;
+}
+
+
+static int
+et61x251_vidioc_s_ctrl(struct et61x251_device* cam, void __user * arg)
+{
+	struct et61x251_sensor* s = cam->sensor;
+	struct v4l2_control ctrl;
+	u8 i;
+	int err = 0;
+
+	if (!s->set_ctrl)
+		return -EINVAL;
+
+	if (copy_from_user(&ctrl, arg, sizeof(ctrl)))
+		return -EFAULT;
+
+	for (i = 0; i < ARRAY_SIZE(s->qctrl); i++)
+		if (ctrl.id == s->qctrl[i].id) {
+			if (ctrl.value < s->qctrl[i].minimum ||
+			    ctrl.value > s->qctrl[i].maximum)
+				return -ERANGE;
+			ctrl.value -= ctrl.value % s->qctrl[i].step;
+			break;
+		}
+
+	if ((err = s->set_ctrl(cam, &ctrl)))
+		return err;
+
+	s->_qctrl[i].default_value = ctrl.value;
+
+	return 0;
+}
+
+
+static int
+et61x251_vidioc_cropcap(struct et61x251_device* cam, void __user * arg)
+{
+	struct v4l2_cropcap* cc = &(cam->sensor->cropcap);
+
+	cc->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	cc->pixelaspect.numerator = 1;
+	cc->pixelaspect.denominator = 1;
+
+	if (copy_to_user(arg, cc, sizeof(*cc)))
+		return -EFAULT;
+
+	return 0;
+}
+
+
+static int
+et61x251_vidioc_g_crop(struct et61x251_device* cam, void __user * arg)
+{
+	struct et61x251_sensor* s = cam->sensor;
+	struct v4l2_crop crop = {
+		.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
+	};
+
+	memcpy(&(crop.c), &(s->_rect), sizeof(struct v4l2_rect));
+
+	if (copy_to_user(arg, &crop, sizeof(crop)))
+		return -EFAULT;
+
+	return 0;
+}
+
+
+static int
+et61x251_vidioc_s_crop(struct et61x251_device* cam, void __user * arg)
+{
+	struct et61x251_sensor* s = cam->sensor;
+	struct v4l2_crop crop;
+	struct v4l2_rect* rect;
+	struct v4l2_rect* bounds = &(s->cropcap.bounds);
+	struct v4l2_pix_format* pix_format = &(s->pix_format);
+	u8 scale;
+	const enum et61x251_stream_state stream = cam->stream;
+	const u32 nbuffers = cam->nbuffers;
+	u32 i;
+	int err = 0;
+
+	if (copy_from_user(&crop, arg, sizeof(crop)))
+		return -EFAULT;
+
+	rect = &(crop.c);
+
+	if (crop.type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;
+
+	if (cam->module_param.force_munmap)
+		for (i = 0; i < cam->nbuffers; i++)
+			if (cam->frame[i].vma_use_count) {
+				DBG(3, "VIDIOC_S_CROP failed. "
+				       "Unmap the buffers first.");
+				return -EINVAL;
+			}
+
+	/* Preserve R,G or B origin */
+	rect->left = (s->_rect.left & 1L) ? rect->left | 1L : rect->left & ~1L;
+	rect->top = (s->_rect.top & 1L) ? rect->top | 1L : rect->top & ~1L;
+
+	if (rect->width < 4)
+		rect->width = 4;
+	if (rect->height < 4)
+		rect->height = 4;
+	if (rect->width > bounds->width)
+		rect->width = bounds->width;
+	if (rect->height > bounds->height)
+		rect->height = bounds->height;
+	if (rect->left < bounds->left)
+		rect->left = bounds->left;
+	if (rect->top < bounds->top)
+		rect->top = bounds->top;
+	if (rect->left + rect->width > bounds->left + bounds->width)
+		rect->left = bounds->left+bounds->width - rect->width;
+	if (rect->top + rect->height > bounds->top + bounds->height)
+		rect->top = bounds->top+bounds->height - rect->height;
+
+	rect->width &= ~3L;
+	rect->height &= ~3L;
+
+	if (ET61X251_PRESERVE_IMGSCALE) {
+		/* Calculate the actual scaling factor */
+		u32 a, b;
+		a = rect->width * rect->height;
+		b = pix_format->width * pix_format->height;
+		scale = b ? (u8)((a / b) < 4 ? 1 : 2) : 1;
+	} else
+		scale = 1;
+
+	if (cam->stream == STREAM_ON)
+		if ((err = et61x251_stream_interrupt(cam)))
+			return err;
+
+	if (copy_to_user(arg, &crop, sizeof(crop))) {
+		cam->stream = stream;
+		return -EFAULT;
+	}
+
+	if (cam->module_param.force_munmap || cam->io == IO_READ)
+		et61x251_release_buffers(cam);
+
+	err = et61x251_set_crop(cam, rect);
+	if (s->set_crop)
+		err += s->set_crop(cam, rect);
+	err += et61x251_set_scale(cam, scale);
+
+	if (err) { /* atomic, no rollback in ioctl() */
+		cam->state |= DEV_MISCONFIGURED;
+		DBG(1, "VIDIOC_S_CROP failed because of hardware problems. To "
+		       "use the camera, close and open /dev/video%d again.",
+		    cam->v4ldev->minor);
+		return -EIO;
+	}
+
+	s->pix_format.width = rect->width/scale;
+	s->pix_format.height = rect->height/scale;
+	memcpy(&(s->_rect), rect, sizeof(*rect));
+
+	if ((cam->module_param.force_munmap  || cam->io == IO_READ) &&
+	    nbuffers != et61x251_request_buffers(cam, nbuffers, cam->io)) {
+		cam->state |= DEV_MISCONFIGURED;
+		DBG(1, "VIDIOC_S_CROP failed because of not enough memory. To "
+		       "use the camera, close and open /dev/video%d again.",
+		    cam->v4ldev->minor);
+		return -ENOMEM;
+	}
+
+	if (cam->io == IO_READ)
+		et61x251_empty_framequeues(cam);
+	else if (cam->module_param.force_munmap)
+		et61x251_requeue_outqueue(cam);
+
+	cam->stream = stream;
+
+	return 0;
+}
+
+
+static int
+et61x251_vidioc_enum_fmt(struct et61x251_device* cam, void __user * arg)
+{
+	struct v4l2_fmtdesc fmtd;
+
+	if (copy_from_user(&fmtd, arg, sizeof(fmtd)))
+		return -EFAULT;
+
+	if (fmtd.index == 0) {
+		strcpy(fmtd.description, "bayer rgb");
+		fmtd.pixelformat = V4L2_PIX_FMT_SBGGR8;
+	} else if (fmtd.index == 1) {
+		strcpy(fmtd.description, "compressed");
+		fmtd.pixelformat = V4L2_PIX_FMT_ET61X251;
+		fmtd.flags = V4L2_FMT_FLAG_COMPRESSED;
+	} else
+		return -EINVAL;
+
+	fmtd.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	memset(&fmtd.reserved, 0, sizeof(fmtd.reserved));
+
+	if (copy_to_user(arg, &fmtd, sizeof(fmtd)))
+		return -EFAULT;
+
+	return 0;
+}
+
+
+static int
+et61x251_vidioc_g_fmt(struct et61x251_device* cam, void __user * arg)
+{
+	struct v4l2_format format;
+	struct v4l2_pix_format* pfmt = &(cam->sensor->pix_format);
+
+	if (copy_from_user(&format, arg, sizeof(format)))
+		return -EFAULT;
+
+	if (format.type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;
+
+	pfmt->bytesperline = (pfmt->pixelformat==V4L2_PIX_FMT_ET61X251)
+	                     ? 0 : (pfmt->width * pfmt->priv) / 8;
+	pfmt->sizeimage = pfmt->height * ((pfmt->width*pfmt->priv)/8);
+	pfmt->field = V4L2_FIELD_NONE;
+	memcpy(&(format.fmt.pix), pfmt, sizeof(*pfmt));
+
+	if (copy_to_user(arg, &format, sizeof(format)))
+		return -EFAULT;
+
+	return 0;
+}
+
+
+static int
+et61x251_vidioc_try_s_fmt(struct et61x251_device* cam, unsigned int cmd,
+                          void __user * arg)
+{
+	struct et61x251_sensor* s = cam->sensor;
+	struct v4l2_format format;
+	struct v4l2_pix_format* pix;
+	struct v4l2_pix_format* pfmt = &(s->pix_format);
+	struct v4l2_rect* bounds = &(s->cropcap.bounds);
+	struct v4l2_rect rect;
+	u8 scale;
+	const enum et61x251_stream_state stream = cam->stream;
+	const u32 nbuffers = cam->nbuffers;
+	u32 i;
+	int err = 0;
+
+	if (copy_from_user(&format, arg, sizeof(format)))
+		return -EFAULT;
+
+	pix = &(format.fmt.pix);
+
+	if (format.type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;
+
+	memcpy(&rect, &(s->_rect), sizeof(rect));
+
+	{ /* calculate the actual scaling factor */
+		u32 a, b;
+		a = rect.width * rect.height;
+		b = pix->width * pix->height;
+		scale = b ? (u8)((a / b) < 4 ? 1 : 2) : 1;
+	}
+
+	rect.width = scale * pix->width;
+	rect.height = scale * pix->height;
+
+	if (rect.width < 4)
+		rect.width = 4;
+	if (rect.height < 4)
+		rect.height = 4;
+	if (rect.width > bounds->left + bounds->width - rect.left)
+		rect.width = bounds->left + bounds->width - rect.left;
+	if (rect.height > bounds->top + bounds->height - rect.top)
+		rect.height = bounds->top + bounds->height - rect.top;
+
+	rect.width &= ~3L;
+	rect.height &= ~3L;
+
+	{ /* adjust the scaling factor */
+		u32 a, b;
+		a = rect.width * rect.height;
+		b = pix->width * pix->height;
+		scale = b ? (u8)((a / b) < 4 ? 1 : 2) : 1;
+	}
+
+	pix->width = rect.width / scale;
+	pix->height = rect.height / scale;
+
+	if (pix->pixelformat != V4L2_PIX_FMT_ET61X251 &&
+	    pix->pixelformat != V4L2_PIX_FMT_SBGGR8)
+		pix->pixelformat = pfmt->pixelformat;
+	pix->priv = pfmt->priv; /* bpp */
+	pix->colorspace = pfmt->colorspace;
+	pix->bytesperline = (pix->pixelformat == V4L2_PIX_FMT_ET61X251)
+	                    ? 0 : (pix->width * pix->priv) / 8;
+	pix->sizeimage = pix->height * ((pix->width * pix->priv) / 8);
+	pix->field = V4L2_FIELD_NONE;
+
+	if (cmd == VIDIOC_TRY_FMT) {
+		if (copy_to_user(arg, &format, sizeof(format)))
+			return -EFAULT;
+		return 0;
+	}
+
+	if (cam->module_param.force_munmap)
+		for (i = 0; i < cam->nbuffers; i++)
+			if (cam->frame[i].vma_use_count) {
+				DBG(3, "VIDIOC_S_FMT failed. "
+				       "Unmap the buffers first.");
+				return -EINVAL;
+			}
+
+	if (cam->stream == STREAM_ON)
+		if ((err = et61x251_stream_interrupt(cam)))
+			return err;
+
+	if (copy_to_user(arg, &format, sizeof(format))) {
+		cam->stream = stream;
+		return -EFAULT;
+	}
+
+	if (cam->module_param.force_munmap || cam->io == IO_READ)
+		et61x251_release_buffers(cam);
+
+	err += et61x251_set_pix_format(cam, pix);
+	err += et61x251_set_crop(cam, &rect);
+	if (s->set_pix_format)
+		err += s->set_pix_format(cam, pix);
+	if (s->set_crop)
+		err += s->set_crop(cam, &rect);
+	err += et61x251_set_scale(cam, scale);
+
+	if (err) { /* atomic, no rollback in ioctl() */
+		cam->state |= DEV_MISCONFIGURED;
+		DBG(1, "VIDIOC_S_FMT failed because of hardware problems. To "
+		       "use the camera, close and open /dev/video%d again.",
+		    cam->v4ldev->minor);
+		return -EIO;
+	}
+
+	memcpy(pfmt, pix, sizeof(*pix));
+	memcpy(&(s->_rect), &rect, sizeof(rect));
+
+	if ((cam->module_param.force_munmap  || cam->io == IO_READ) &&
+	    nbuffers != et61x251_request_buffers(cam, nbuffers, cam->io)) {
+		cam->state |= DEV_MISCONFIGURED;
+		DBG(1, "VIDIOC_S_FMT failed because of not enough memory. To "
+		       "use the camera, close and open /dev/video%d again.",
+		    cam->v4ldev->minor);
+		return -ENOMEM;
+	}
+
+	if (cam->io == IO_READ)
+		et61x251_empty_framequeues(cam);
+	else if (cam->module_param.force_munmap)
+		et61x251_requeue_outqueue(cam);
+
+	cam->stream = stream;
+
+	return 0;
+}
+
+
+static int
+et61x251_vidioc_g_jpegcomp(struct et61x251_device* cam, void __user * arg)
+{
+	if (copy_to_user(arg, &cam->compression,
+	                 sizeof(cam->compression)))
+		return -EFAULT;
+
+	return 0;
+}
+
+
+static int
+et61x251_vidioc_s_jpegcomp(struct et61x251_device* cam, void __user * arg)
+{
+	struct v4l2_jpegcompression jc;
+	const enum et61x251_stream_state stream = cam->stream;
+	int err = 0;
+
+	if (copy_from_user(&jc, arg, sizeof(jc)))
+		return -EFAULT;
+
+	if (jc.quality != 0 && jc.quality != 1)
+		return -EINVAL;
+
+	if (cam->stream == STREAM_ON)
+		if ((err = et61x251_stream_interrupt(cam)))
+			return err;
+
+	err += et61x251_set_compression(cam, &jc);
+	if (err) { /* atomic, no rollback in ioctl() */
+		cam->state |= DEV_MISCONFIGURED;
+		DBG(1, "VIDIOC_S_JPEGCOMP failed because of hardware "
+		       "problems. To use the camera, close and open "
+		       "/dev/video%d again.", cam->v4ldev->minor);
+		return -EIO;
+	}
+
+	cam->compression.quality = jc.quality;
+
+	cam->stream = stream;
+
+	return 0;
+}
+
+
+static int
+et61x251_vidioc_reqbufs(struct et61x251_device* cam, void __user * arg)
+{
+	struct v4l2_requestbuffers rb;
+	u32 i;
+	int err;
+
+	if (copy_from_user(&rb, arg, sizeof(rb)))
+		return -EFAULT;
+
+	if (rb.type != V4L2_BUF_TYPE_VIDEO_CAPTURE ||
+	    rb.memory != V4L2_MEMORY_MMAP)
+		return -EINVAL;
+
+	if (cam->io == IO_READ) {
+		DBG(3, "Close and open the device again to choose the mmap "
+		       "I/O method");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < cam->nbuffers; i++)
+		if (cam->frame[i].vma_use_count) {
+			DBG(3, "VIDIOC_REQBUFS failed. "
+			       "Previous buffers are still mapped.");
+			return -EINVAL;
+		}
+
+	if (cam->stream == STREAM_ON)
+		if ((err = et61x251_stream_interrupt(cam)))
+			return err;
+
+	et61x251_empty_framequeues(cam);
+
+	et61x251_release_buffers(cam);
+	if (rb.count)
+		rb.count = et61x251_request_buffers(cam, rb.count, IO_MMAP);
+
+	if (copy_to_user(arg, &rb, sizeof(rb))) {
+		et61x251_release_buffers(cam);
+		cam->io = IO_NONE;
+		return -EFAULT;
+	}
+
+	cam->io = rb.count ? IO_MMAP : IO_NONE;
+
+	return 0;
+}
+
+
+static int
+et61x251_vidioc_querybuf(struct et61x251_device* cam, void __user * arg)
+{
+	struct v4l2_buffer b;
+
+	if (copy_from_user(&b, arg, sizeof(b)))
+		return -EFAULT;
+
+	if (b.type != V4L2_BUF_TYPE_VIDEO_CAPTURE ||
+	    b.index >= cam->nbuffers || cam->io != IO_MMAP)
+		return -EINVAL;
+
+	memcpy(&b, &cam->frame[b.index].buf, sizeof(b));
+
+	if (cam->frame[b.index].vma_use_count)
+		b.flags |= V4L2_BUF_FLAG_MAPPED;
+
+	if (cam->frame[b.index].state == F_DONE)
+		b.flags |= V4L2_BUF_FLAG_DONE;
+	else if (cam->frame[b.index].state != F_UNUSED)
+		b.flags |= V4L2_BUF_FLAG_QUEUED;
+
+	if (copy_to_user(arg, &b, sizeof(b)))
+		return -EFAULT;
+
+	return 0;
+}
+
+
+static int
+et61x251_vidioc_qbuf(struct et61x251_device* cam, void __user * arg)
+{
+	struct v4l2_buffer b;
+	unsigned long lock_flags;
+
+	if (copy_from_user(&b, arg, sizeof(b)))
+		return -EFAULT;
+
+	if (b.type != V4L2_BUF_TYPE_VIDEO_CAPTURE ||
+	    b.index >= cam->nbuffers || cam->io != IO_MMAP)
+		return -EINVAL;
+
+	if (cam->frame[b.index].state != F_UNUSED)
+		return -EINVAL;
+
+	cam->frame[b.index].state = F_QUEUED;
+
+	spin_lock_irqsave(&cam->queue_lock, lock_flags);
+	list_add_tail(&cam->frame[b.index].frame, &cam->inqueue);
+	spin_unlock_irqrestore(&cam->queue_lock, lock_flags);
+
+	PDBGG("Frame #%lu queued", (unsigned long)b.index);
+
+	return 0;
+}
+
+
+static int
+et61x251_vidioc_dqbuf(struct et61x251_device* cam, struct file* filp,
+                      void __user * arg)
+{
+	struct v4l2_buffer b;
+	struct et61x251_frame_t *f;
+	unsigned long lock_flags;
+	int err = 0;
+
+	if (copy_from_user(&b, arg, sizeof(b)))
+		return -EFAULT;
+
+	if (b.type != V4L2_BUF_TYPE_VIDEO_CAPTURE || cam->io!= IO_MMAP)
+		return -EINVAL;
+
+	if (list_empty(&cam->outqueue)) {
+		if (cam->stream == STREAM_OFF)
+			return -EINVAL;
+		if (filp->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+		err = wait_event_interruptible
+		      ( cam->wait_frame,
+		        (!list_empty(&cam->outqueue)) ||
+		        (cam->state & DEV_DISCONNECTED) ||
+		        (cam->state & DEV_MISCONFIGURED) );
+		if (err)
+			return err;
+		if (cam->state & DEV_DISCONNECTED)
+			return -ENODEV;
+		if (cam->state & DEV_MISCONFIGURED)
+			return -EIO;
+	}
+
+	spin_lock_irqsave(&cam->queue_lock, lock_flags);
+	f = list_entry(cam->outqueue.next, struct et61x251_frame_t, frame);
+	list_del(cam->outqueue.next);
+	spin_unlock_irqrestore(&cam->queue_lock, lock_flags);
+
+	f->state = F_UNUSED;
+
+	memcpy(&b, &f->buf, sizeof(b));
+	if (f->vma_use_count)
+		b.flags |= V4L2_BUF_FLAG_MAPPED;
+
+	if (copy_to_user(arg, &b, sizeof(b)))
+		return -EFAULT;
+
+	PDBGG("Frame #%lu dequeued", (unsigned long)f->buf.index);
+
+	return 0;
+}
+
+
+static int
+et61x251_vidioc_streamon(struct et61x251_device* cam, void __user * arg)
+{
+	int type;
+
+	if (copy_from_user(&type, arg, sizeof(type)))
+		return -EFAULT;
+
+	if (type != V4L2_BUF_TYPE_VIDEO_CAPTURE || cam->io != IO_MMAP)
+		return -EINVAL;
+
+	if (list_empty(&cam->inqueue))
+		return -EINVAL;
+
+	cam->stream = STREAM_ON;
+
+	DBG(3, "Stream on");
+
+	return 0;
+}
+
+
+static int
+et61x251_vidioc_streamoff(struct et61x251_device* cam, void __user * arg)
+{
+	int type, err;
+
+	if (copy_from_user(&type, arg, sizeof(type)))
+		return -EFAULT;
+
+	if (type != V4L2_BUF_TYPE_VIDEO_CAPTURE || cam->io != IO_MMAP)
+		return -EINVAL;
+
+	if (cam->stream == STREAM_ON)
+		if ((err = et61x251_stream_interrupt(cam)))
+			return err;
+
+	et61x251_empty_framequeues(cam);
+
+	DBG(3, "Stream off");
+
+	return 0;
+}
+
+
+static int
+et61x251_vidioc_g_parm(struct et61x251_device* cam, void __user * arg)
+{
+	struct v4l2_streamparm sp;
+
+	if (copy_from_user(&sp, arg, sizeof(sp)))
+		return -EFAULT;
+
+	if (sp.type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;
+
+	sp.parm.capture.extendedmode = 0;
+	sp.parm.capture.readbuffers = cam->nreadbuffers;
+
+	if (copy_to_user(arg, &sp, sizeof(sp)))
+		return -EFAULT;
+
+	return 0;
+}
+
+
+static int
+et61x251_vidioc_s_parm(struct et61x251_device* cam, void __user * arg)
+{
+	struct v4l2_streamparm sp;
+
+	if (copy_from_user(&sp, arg, sizeof(sp)))
+		return -EFAULT;
+
+	if (sp.type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;
+
+	sp.parm.capture.extendedmode = 0;
+
+	if (sp.parm.capture.readbuffers == 0)
+		sp.parm.capture.readbuffers = cam->nreadbuffers;
+
+	if (sp.parm.capture.readbuffers > ET61X251_MAX_FRAMES)
+		sp.parm.capture.readbuffers = ET61X251_MAX_FRAMES;
+
+	if (copy_to_user(arg, &sp, sizeof(sp)))
+		return -EFAULT;
+
+	cam->nreadbuffers = sp.parm.capture.readbuffers;
+
+	return 0;
+}
+
+
+static int et61x251_ioctl_v4l2(struct inode* inode, struct file* filp,
+                               unsigned int cmd, void __user * arg)
+{
+	struct et61x251_device* cam = video_get_drvdata(video_devdata(filp));
+
+	switch (cmd) {
+
+	case VIDIOC_QUERYCAP:
+		return et61x251_vidioc_querycap(cam, arg);
+
+	case VIDIOC_ENUMINPUT:
+		return et61x251_vidioc_enuminput(cam, arg);
+
+	case VIDIOC_G_INPUT:
+	case VIDIOC_S_INPUT:
+		return et61x251_vidioc_gs_input(cam, arg);
+
+	case VIDIOC_QUERYCTRL:
+		return et61x251_vidioc_query_ctrl(cam, arg);
+
+	case VIDIOC_G_CTRL:
+		return et61x251_vidioc_g_ctrl(cam, arg);
+
+	case VIDIOC_S_CTRL_OLD:
+	case VIDIOC_S_CTRL:
+		return et61x251_vidioc_s_ctrl(cam, arg);
+
+	case VIDIOC_CROPCAP_OLD:
+	case VIDIOC_CROPCAP:
+		return et61x251_vidioc_cropcap(cam, arg);
+
+	case VIDIOC_G_CROP:
+		return et61x251_vidioc_g_crop(cam, arg);
+
+	case VIDIOC_S_CROP:
+		return et61x251_vidioc_s_crop(cam, arg);
+
+	case VIDIOC_ENUM_FMT:
+		return et61x251_vidioc_enum_fmt(cam, arg);
+
+	case VIDIOC_G_FMT:
+		return et61x251_vidioc_g_fmt(cam, arg);
+
+	case VIDIOC_TRY_FMT:
+	case VIDIOC_S_FMT:
+		return et61x251_vidioc_try_s_fmt(cam, cmd, arg);
+
+	case VIDIOC_G_JPEGCOMP:
+		return et61x251_vidioc_g_jpegcomp(cam, arg);
+
+	case VIDIOC_S_JPEGCOMP:
+		return et61x251_vidioc_s_jpegcomp(cam, arg);
+
+	case VIDIOC_REQBUFS:
+		return et61x251_vidioc_reqbufs(cam, arg);
+
+	case VIDIOC_QUERYBUF:
+		return et61x251_vidioc_querybuf(cam, arg);
+
+	case VIDIOC_QBUF:
+		return et61x251_vidioc_qbuf(cam, arg);
+
+	case VIDIOC_DQBUF:
+		return et61x251_vidioc_dqbuf(cam, filp, arg);
+
+	case VIDIOC_STREAMON:
+		return et61x251_vidioc_streamon(cam, arg);
+
+	case VIDIOC_STREAMOFF:
+		return et61x251_vidioc_streamoff(cam, arg);
+
+	case VIDIOC_G_PARM:
+		return et61x251_vidioc_g_parm(cam, arg);
+
+	case VIDIOC_S_PARM_OLD:
+	case VIDIOC_S_PARM:
+		return et61x251_vidioc_s_parm(cam, arg);
+
+	case VIDIOC_G_STD:
+	case VIDIOC_S_STD:
+	case VIDIOC_QUERYSTD:
+	case VIDIOC_ENUMSTD:
+	case VIDIOC_QUERYMENU:
+		return -EINVAL;
+
+	default:
+		return -EINVAL;
+
+	}
+}
+
+
+static int et61x251_ioctl(struct inode* inode, struct file* filp,
+                         unsigned int cmd, unsigned long arg)
+{
+	struct et61x251_device* cam = video_get_drvdata(video_devdata(filp));
+	int err = 0;
+
+	if (down_interruptible(&cam->fileop_sem))
+		return -ERESTARTSYS;
+
+	if (cam->state & DEV_DISCONNECTED) {
+		DBG(1, "Device not present");
+		up(&cam->fileop_sem);
+		return -ENODEV;
+	}
+
+	if (cam->state & DEV_MISCONFIGURED) {
+		DBG(1, "The camera is misconfigured. Close and open it "
+		       "again.");
+		up(&cam->fileop_sem);
+		return -EIO;
+	}
+
+	V4LDBG(3, "et61x251", cmd);
+
+	err = et61x251_ioctl_v4l2(inode, filp, cmd, (void __user *)arg);
+
+	up(&cam->fileop_sem);
+
+	return err;
+}
+
+
+static struct file_operations et61x251_fops = {
+	.owner = THIS_MODULE,
+	.open =    et61x251_open,
+	.release = et61x251_release,
+	.ioctl =   et61x251_ioctl,
+	.read =    et61x251_read,
+	.poll =    et61x251_poll,
+	.mmap =    et61x251_mmap,
+	.llseek =  no_llseek,
+};
+
+/*****************************************************************************/
+
+/* It exists a single interface only. We do not need to validate anything. */
+static int
+et61x251_usb_probe(struct usb_interface* intf, const struct usb_device_id* id)
+{
+	struct usb_device *udev = interface_to_usbdev(intf);
+	struct et61x251_device* cam;
+	static unsigned int dev_nr = 0;
+	unsigned int i;
+	int err = 0;
+
+	if (!(cam = kzalloc(sizeof(struct et61x251_device), GFP_KERNEL)))
+		return -ENOMEM;
+
+	cam->usbdev = udev;
+
+	if (!(cam->control_buffer = kzalloc(8, GFP_KERNEL))) {
+		DBG(1, "kmalloc() failed");
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	if (!(cam->v4ldev = video_device_alloc())) {
+		DBG(1, "video_device_alloc() failed");
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	init_MUTEX(&cam->dev_sem);
+
+	DBG(2, "ET61X[12]51 PC Camera Controller detected "
+	       "(vid/pid 0x%04X/0x%04X)",id->idVendor, id->idProduct);
+
+	for  (i = 0; et61x251_sensor_table[i]; i++) {
+		err = et61x251_sensor_table[i](cam);
+		if (!err)
+			break;
+	}
+
+	if (!err && cam->sensor)
+		DBG(2, "%s image sensor detected", cam->sensor->name);
+	else {
+		DBG(1, "No supported image sensor detected");
+		err = -ENODEV;
+		goto fail;
+	}
+
+	if (et61x251_init(cam)) {
+		DBG(1, "Initialization failed. I will retry on open().");
+		cam->state |= DEV_MISCONFIGURED;
+	}
+
+	strcpy(cam->v4ldev->name, "ET61X[12]51 PC Camera");
+	cam->v4ldev->owner = THIS_MODULE;
+	cam->v4ldev->type = VID_TYPE_CAPTURE | VID_TYPE_SCALES;
+	cam->v4ldev->hardware = 0;
+	cam->v4ldev->fops = &et61x251_fops;
+	cam->v4ldev->minor = video_nr[dev_nr];
+	cam->v4ldev->release = video_device_release;
+	video_set_drvdata(cam->v4ldev, cam);
+
+	down(&cam->dev_sem);
+
+	err = video_register_device(cam->v4ldev, VFL_TYPE_GRABBER,
+	                            video_nr[dev_nr]);
+	if (err) {
+		DBG(1, "V4L2 device registration failed");
+		if (err == -ENFILE && video_nr[dev_nr] == -1)
+			DBG(1, "Free /dev/videoX node not found");
+		video_nr[dev_nr] = -1;
+		dev_nr = (dev_nr < ET61X251_MAX_DEVICES-1) ? dev_nr+1 : 0;
+		up(&cam->dev_sem);
+		goto fail;
+	}
+
+	DBG(2, "V4L2 device registered as /dev/video%d", cam->v4ldev->minor);
+
+	cam->module_param.force_munmap = force_munmap[dev_nr];
+
+	dev_nr = (dev_nr < ET61X251_MAX_DEVICES-1) ? dev_nr+1 : 0;
+
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+	et61x251_create_sysfs(cam);
+	DBG(2, "Optional device control through 'sysfs' interface ready");
+#endif
+
+	usb_set_intfdata(intf, cam);
+
+	up(&cam->dev_sem);
+
+	return 0;
+
+fail:
+	if (cam) {
+		kfree(cam->control_buffer);
+		if (cam->v4ldev)
+			video_device_release(cam->v4ldev);
+		kfree(cam);
+	}
+	return err;
+}
+
+
+static void et61x251_usb_disconnect(struct usb_interface* intf)
+{
+	struct et61x251_device* cam = usb_get_intfdata(intf);
+
+	if (!cam)
+		return;
+
+	down_write(&et61x251_disconnect);
+
+	down(&cam->dev_sem);
+
+	DBG(2, "Disconnecting %s...", cam->v4ldev->name);
+
+	wake_up_interruptible_all(&cam->open);
+
+	if (cam->users) {
+		DBG(2, "Device /dev/video%d is open! Deregistration and "
+		       "memory deallocation are deferred on close.",
+		    cam->v4ldev->minor);
+		cam->state |= DEV_MISCONFIGURED;
+		et61x251_stop_transfer(cam);
+		cam->state |= DEV_DISCONNECTED;
+		wake_up_interruptible(&cam->wait_frame);
+		wake_up_interruptible(&cam->wait_stream);
+	} else {
+		cam->state |= DEV_DISCONNECTED;
+		et61x251_release_resources(cam);
+	}
+
+	up(&cam->dev_sem);
+
+	if (!cam->users)
+		kfree(cam);
+
+	up_write(&et61x251_disconnect);
+}
+
+
+static struct usb_driver et61x251_usb_driver = {
+	.name =       "et61x251",
+	.id_table =   et61x251_id_table,
+	.probe =      et61x251_usb_probe,
+	.disconnect = et61x251_usb_disconnect,
+};
+
+/*****************************************************************************/
+
+static int __init et61x251_module_init(void)
+{
+	int err = 0;
+
+	KDBG(2, ET61X251_MODULE_NAME " v" ET61X251_MODULE_VERSION);
+	KDBG(3, ET61X251_MODULE_AUTHOR);
+
+	if ((err = usb_register(&et61x251_usb_driver)))
+		KDBG(1, "usb_register() failed");
+
+	return err;
+}
+
+
+static void __exit et61x251_module_exit(void)
+{
+	usb_deregister(&et61x251_usb_driver);
+}
+
+
+module_init(et61x251_module_init);
+module_exit(et61x251_module_exit);
diff --git a/drivers/usb/media/et61x251_sensor.h b/drivers/usb/media/et61x251_sensor.h
new file mode 100644
index 000000000000..b9df91062fc0
--- /dev/null
+++ b/drivers/usb/media/et61x251_sensor.h
@@ -0,0 +1,115 @@
+/***************************************************************************
+ * API for image sensors connected to ET61X[12]51 PC Camera Controllers    *
+ *                                                                         *
+ * Copyright (C) 2006 by Luca Risolia <luca.risolia@studio.unibo.it>       *
+ *                                                                         *
+ * This program is free software; you can redistribute it and/or modify    *
+ * it under the terms of the GNU General Public License as published by    *
+ * the Free Software Foundation; either version 2 of the License, or       *
+ * (at your option) any later version.                                     *
+ *                                                                         *
+ * This program is distributed in the hope that it will be useful,         *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of          *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the           *
+ * GNU General Public License for more details.                            *
+ *                                                                         *
+ * You should have received a copy of the GNU General Public License       *
+ * along with this program; if not, write to the Free Software             *
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.               *
+ ***************************************************************************/
+
+#ifndef _ET61X251_SENSOR_H_
+#define _ET61X251_SENSOR_H_
+
+#include <linux/usb.h>
+#include <linux/videodev.h>
+#include <linux/device.h>
+#include <linux/stddef.h>
+#include <linux/errno.h>
+#include <asm/types.h>
+
+struct et61x251_device;
+struct et61x251_sensor;
+
+/*****************************************************************************/
+
+extern int et61x251_probe_tas5130d1b(struct et61x251_device* cam);
+
+#define ET61X251_SENSOR_TABLE                                                 \
+/* Weak detections must go at the end of the list */                          \
+static int (*et61x251_sensor_table[])(struct et61x251_device*) = {            \
+	&et61x251_probe_tas5130d1b,                                           \
+	NULL,                                                                 \
+};
+
+extern void
+et61x251_attach_sensor(struct et61x251_device* cam,
+                       struct et61x251_sensor* sensor);
+
+/*****************************************************************************/
+
+extern int et61x251_write_reg(struct et61x251_device*, u8 value, u16 index);
+extern int et61x251_read_reg(struct et61x251_device*, u16 index);
+extern int et61x251_i2c_write(struct et61x251_device*, u8 address, u8 value);
+extern int et61x251_i2c_read(struct et61x251_device*, u8 address);
+extern int et61x251_i2c_try_write(struct et61x251_device*,
+                                  struct et61x251_sensor*, u8 address,
+                                  u8 value);
+extern int et61x251_i2c_try_read(struct et61x251_device*,
+                                 struct et61x251_sensor*, u8 address);
+extern int et61x251_i2c_raw_write(struct et61x251_device*, u8 n, u8 data1,
+                                  u8 data2, u8 data3, u8 data4, u8 data5,
+                                  u8 data6, u8 data7, u8 data8, u8 address);
+
+/*****************************************************************************/
+
+enum et61x251_i2c_sysfs_ops {
+	ET61X251_I2C_READ = 0x01,
+	ET61X251_I2C_WRITE = 0x02,
+};
+
+enum et61x251_i2c_interface {
+	ET61X251_I2C_2WIRES,
+	ET61X251_I2C_3WIRES,
+};
+
+/* Repeat start condition when RSTA is high */
+enum et61x251_i2c_rsta {
+	ET61X251_I2C_RSTA_STOP = 0x00, /* stop then start */
+	ET61X251_I2C_RSTA_REPEAT = 0x01, /* repeat start */
+};
+
+#define ET61X251_MAX_CTRLS V4L2_CID_LASTP1-V4L2_CID_BASE+10
+
+struct et61x251_sensor {
+	char name[32];
+
+	enum et61x251_i2c_sysfs_ops sysfs_ops;
+
+	enum et61x251_i2c_interface interface;
+	u8 i2c_slave_id;
+	enum et61x251_i2c_rsta rsta;
+	struct v4l2_rect active_pixel; /* left and top define FVSX and FVSY */
+
+	struct v4l2_queryctrl qctrl[ET61X251_MAX_CTRLS];
+	struct v4l2_cropcap cropcap;
+	struct v4l2_pix_format pix_format;
+
+	int (*init)(struct et61x251_device* cam);
+	int (*get_ctrl)(struct et61x251_device* cam,
+	                struct v4l2_control* ctrl);
+	int (*set_ctrl)(struct et61x251_device* cam,
+	                const struct v4l2_control* ctrl);
+	int (*set_crop)(struct et61x251_device* cam,
+	                const struct v4l2_rect* rect);
+	int (*set_pix_format)(struct et61x251_device* cam,
+	                      const struct v4l2_pix_format* pix);
+
+	const struct usb_device* usbdev;
+
+	/* Private */
+	struct v4l2_queryctrl _qctrl[ET61X251_MAX_CTRLS];
+	struct v4l2_rect _rect;
+};
+
+#endif /* _ET61X251_SENSOR_H_ */
diff --git a/drivers/usb/media/et61x251_tas5130d1b.c b/drivers/usb/media/et61x251_tas5130d1b.c
new file mode 100644
index 000000000000..65f1ae9cf2b3
--- /dev/null
+++ b/drivers/usb/media/et61x251_tas5130d1b.c
@@ -0,0 +1,137 @@
+/***************************************************************************
+ * Plug-in for TAS5130D1B image sensor connected to the ET61X[12]51        *
+ * PC Camera Controllers                                                   *
+ *                                                                         *
+ * Copyright (C) 2006 by Luca Risolia <luca.risolia@studio.unibo.it>       *
+ *                                                                         *
+ * This program is free software; you can redistribute it and/or modify    *
+ * it under the terms of the GNU General Public License as published by    *
+ * the Free Software Foundation; either version 2 of the License, or       *
+ * (at your option) any later version.                                     *
+ *                                                                         *
+ * This program is distributed in the hope that it will be useful,         *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of          *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the           *
+ * GNU General Public License for more details.                            *
+ *                                                                         *
+ * You should have received a copy of the GNU General Public License       *
+ * along with this program; if not, write to the Free Software             *
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.               *
+ ***************************************************************************/
+
+#include "et61x251_sensor.h"
+
+
+static int tas5130d1b_init(struct et61x251_device* cam)
+{
+	int err = 0;
+
+	err += et61x251_write_reg(cam, 0x14, 0x01);
+	err += et61x251_write_reg(cam, 0x1b, 0x02);
+	err += et61x251_write_reg(cam, 0x02, 0x12);
+	err += et61x251_write_reg(cam, 0x0e, 0x60);
+	err += et61x251_write_reg(cam, 0x80, 0x61);
+	err += et61x251_write_reg(cam, 0xf0, 0x62);
+	err += et61x251_write_reg(cam, 0x03, 0x63);
+	err += et61x251_write_reg(cam, 0x14, 0x64);
+	err += et61x251_write_reg(cam, 0xf4, 0x65);
+	err += et61x251_write_reg(cam, 0x01, 0x66);
+	err += et61x251_write_reg(cam, 0x05, 0x67);
+	err += et61x251_write_reg(cam, 0x8f, 0x68);
+	err += et61x251_write_reg(cam, 0x0f, 0x8d);
+	err += et61x251_write_reg(cam, 0x08, 0x8e);
+
+	return err;
+}
+
+
+static int tas5130d1b_set_ctrl(struct et61x251_device* cam,
+                               const struct v4l2_control* ctrl)
+{
+	int err = 0;
+
+	switch (ctrl->id) {
+	case V4L2_CID_GAIN:
+		err += et61x251_i2c_raw_write(cam, 2, 0x20,
+		                              0xf6-ctrl->value, 0, 0, 0,
+		                              0, 0, 0, 0);
+		break;
+	case V4L2_CID_EXPOSURE:
+		err += et61x251_i2c_raw_write(cam, 2, 0x40,
+		                              0x47-ctrl->value, 0, 0, 0,
+		                              0, 0, 0, 0);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return err ? -EIO : 0;
+}
+
+
+static struct et61x251_sensor tas5130d1b = {
+	.name = "TAS5130D1B",
+	.interface = ET61X251_I2C_3WIRES,
+	.rsta = ET61X251_I2C_RSTA_STOP,
+	.active_pixel = {
+		.left = 106,
+		.top = 13,
+	},
+	.init = &tas5130d1b_init,
+	.qctrl = {
+		{
+			.id = V4L2_CID_GAIN,
+			.type = V4L2_CTRL_TYPE_INTEGER,
+			.name = "global gain",
+			.minimum = 0x00,
+			.maximum = 0xf6,
+			.step = 0x02,
+			.default_value = 0x0d,
+			.flags = 0,
+		},
+		{
+			.id = V4L2_CID_EXPOSURE,
+			.type = V4L2_CTRL_TYPE_INTEGER,
+			.name = "exposure",
+			.minimum = 0x00,
+			.maximum = 0x47,
+			.step = 0x01,
+			.default_value = 0x23,
+			.flags = 0,
+		},
+	},
+	.set_ctrl = &tas5130d1b_set_ctrl,
+	.cropcap = {
+		.bounds = {
+			.left = 0,
+			.top = 0,
+			.width = 640,
+			.height = 480,
+		},
+		.defrect = {
+			.left = 0,
+			.top = 0,
+			.width = 640,
+			.height = 480,
+		},
+	},
+	.pix_format = {
+		.width = 640,
+		.height = 480,
+		.pixelformat = V4L2_PIX_FMT_SBGGR8,
+		.priv = 8,
+	},
+};
+
+
+int et61x251_probe_tas5130d1b(struct et61x251_device* cam)
+{
+	/* This sensor has no identifiers, so let's attach it anyway */
+	et61x251_attach_sensor(cam, &tas5130d1b);
+
+	/* Sensor detection is based on USB pid/vid */
+	if (le16_to_cpu(tas5130d1b.usbdev->descriptor.idProduct) != 0x6251)
+		return -ENODEV;
+
+	return 0;
+}
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index ce40675324bd..6f6c69777648 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -315,6 +315,7 @@ struct v4l2_pix_format
 #define V4L2_PIX_FMT_SN9C10X  v4l2_fourcc('S','9','1','0') /* SN9C10x compression */
 #define V4L2_PIX_FMT_PWC1     v4l2_fourcc('P','W','C','1') /* pwc older webcam */
 #define V4L2_PIX_FMT_PWC2     v4l2_fourcc('P','W','C','2') /* pwc newer webcam */
+#define V4L2_PIX_FMT_ET61X251 v4l2_fourcc('E','6','2','5') /* ET61X251 compression */
 
 /*
  *	F O R M A T   E N U M E R A T I O N
-- 
cgit v1.2.3-71-gd317


From c02c4bb2058587d3c012ec08268fd93fdc654ae7 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Fri, 20 Jan 2006 14:44:12 -0800
Subject: [PATCH] USB: USB authentication states

Another hook needed for wireless USB:  there are states associated with the
device authentication protocol.  Wireless devices must authenticate using
the host system's keystore.

Note that wired connections could also use this authentication protocol, if
for no other reason than to support the most secure "simple" key exchange
protocols for wireless devices.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb_ch9.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb_ch9.h b/include/linux/usb_ch9.h
index ee21e6bf3867..a2aacfc7af2f 100644
--- a/include/linux/usb_ch9.h
+++ b/include/linux/usb_ch9.h
@@ -535,9 +535,11 @@ enum usb_device_state {
 	 */
 	USB_STATE_NOTATTACHED = 0,
 
-	/* the chapter 9 device states */
+	/* chapter 9 and authentication (wireless) device states */
 	USB_STATE_ATTACHED,
-	USB_STATE_POWERED,
+	USB_STATE_POWERED,			/* wired */
+	USB_STATE_UNAUTHENTICATED,		/* auth */
+	USB_STATE_RECONNECTING,			/* auth */
 	USB_STATE_DEFAULT,			/* limited function */
 	USB_STATE_ADDRESS,
 	USB_STATE_CONFIGURED,			/* most functions */
-- 
cgit v1.2.3-71-gd317


From b6ebb2659065b6e03605e7f0c69449bda382261a Mon Sep 17 00:00:00 2001
From: Jason Gaston <jason.d.gaston@intel.com>
Date: Mon, 9 Jan 2006 10:53:45 -0800
Subject: [PATCH] PCI: irq and pci_ids: patch for Intel ICH8

This patch adds the Intel ICH8 DID's to the irq.c and pci_ids.h files.

Signed-off-by: Jason Gaston <Jason.d.gaston@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/i386/pci/irq.c     | 5 +++++
 include/linux/pci_ids.h | 7 +++++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c
index e715aa930036..3ca59cad05f3 100644
--- a/arch/i386/pci/irq.c
+++ b/arch/i386/pci/irq.c
@@ -539,6 +539,11 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
 		case PCI_DEVICE_ID_INTEL_ICH7_30:
 		case PCI_DEVICE_ID_INTEL_ICH7_31:
 		case PCI_DEVICE_ID_INTEL_ESB2_0:
+		case PCI_DEVICE_ID_INTEL_ICH8_0:
+		case PCI_DEVICE_ID_INTEL_ICH8_1:
+		case PCI_DEVICE_ID_INTEL_ICH8_2:
+		case PCI_DEVICE_ID_INTEL_ICH8_3:
+		case PCI_DEVICE_ID_INTEL_ICH8_4:
 			r->name = "PIIX/ICH";
 			r->get = pirq_piix_get;
 			r->set = pirq_piix_set;
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 4401a7e06057..9eb1983b8787 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2106,6 +2106,13 @@
 #define PCI_DEVICE_ID_INTEL_ICH7_19	0x27dd
 #define PCI_DEVICE_ID_INTEL_ICH7_20	0x27de
 #define PCI_DEVICE_ID_INTEL_ICH7_21	0x27df
+#define PCI_DEVICE_ID_INTEL_ICH8_0	0x2810
+#define PCI_DEVICE_ID_INTEL_ICH8_1	0x2811
+#define PCI_DEVICE_ID_INTEL_ICH8_2	0x2812
+#define PCI_DEVICE_ID_INTEL_ICH8_3	0x2814
+#define PCI_DEVICE_ID_INTEL_ICH8_4	0x2815
+#define PCI_DEVICE_ID_INTEL_ICH8_5	0x283e
+#define PCI_DEVICE_ID_INTEL_ICH8_6	0x2850
 #define PCI_DEVICE_ID_INTEL_82855PM_HB	0x3340
 #define PCI_DEVICE_ID_INTEL_82830_HB	0x3575
 #define PCI_DEVICE_ID_INTEL_82830_CGC	0x3577
-- 
cgit v1.2.3-71-gd317


From f8d65713332cf6306889a3036142a17e01e3447e Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Fri, 6 Jan 2006 03:25:37 +0100
Subject: [PATCH] PCI: drivers/pci/pci.c: #if 0 pci_find_ext_capability()

This patch #if 0's the unused global function pci_find_ext_capability().

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/pci/pci.c   | 2 ++
 include/linux/pci.h | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d2a633efa10a..d2d187916643 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -163,6 +163,7 @@ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap)
 	return __pci_bus_find_cap(bus, devfn, hdr_type & 0x7f, cap);
 }
 
+#if 0
 /**
  * pci_find_ext_capability - Find an extended capability
  * @dev: PCI device to query
@@ -210,6 +211,7 @@ int pci_find_ext_capability(struct pci_dev *dev, int cap)
 
 	return 0;
 }
+#endif  /*  0  */
 
 /**
  * pci_find_parent_resource - return resource region of parent bus of given region
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0a44072383ec..fe1a2b02fc55 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -406,7 +406,6 @@ struct pci_dev *pci_find_device_reverse (unsigned int vendor, unsigned int devic
 struct pci_dev *pci_find_slot (unsigned int bus, unsigned int devfn);
 int pci_find_capability (struct pci_dev *dev, int cap);
 int pci_find_next_capability (struct pci_dev *dev, u8 pos, int cap);
-int pci_find_ext_capability (struct pci_dev *dev, int cap);
 struct pci_bus * pci_find_next_bus(const struct pci_bus *from);
 
 struct pci_dev *pci_get_device (unsigned int vendor, unsigned int device, struct pci_dev *from);
@@ -626,7 +625,6 @@ static inline int pci_register_driver(struct pci_driver *drv) { return 0;}
 static inline void pci_unregister_driver(struct pci_driver *drv) { }
 static inline int pci_find_capability (struct pci_dev *dev, int cap) {return 0; }
 static inline int pci_find_next_capability (struct pci_dev *dev, u8 post, int cap) { return 0; }
-static inline int pci_find_ext_capability (struct pci_dev *dev, int cap) {return 0; }
 static inline const struct pci_device_id *pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev) { return NULL; }
 
 /* Power management related routines */
-- 
cgit v1.2.3-71-gd317


From 8cea8e9303d45556cb606cc8d9e41f889ff600c0 Mon Sep 17 00:00:00 2001
From: Mark Rustad <MRustad@mac.com>
Date: Thu, 5 Jan 2006 22:47:29 -0800
Subject: [PATCH] PCI: restore 2 missing pci ids

Somewhere between 2.6.14 and 2.6.15-rc3, some PCI ids were apparently
removed.  The ecc.c module, which is not a part of the kernel.org tree, but
included in some distributions, fails to compile.

Signed-off-by: Mark Rustad <mrustad@mac.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci_ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 9eb1983b8787..7868a8ed1906 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2065,6 +2065,7 @@
 #define PCI_DEVICE_ID_INTEL_82801EB_5	0x24d5
 #define PCI_DEVICE_ID_INTEL_82801EB_6	0x24d6
 #define PCI_DEVICE_ID_INTEL_82801EB_11	0x24db
+#define PCI_DEVICE_ID_INTEL_82801EB_13	0x24dd
 #define PCI_DEVICE_ID_INTEL_ESB_1	0x25a1
 #define PCI_DEVICE_ID_INTEL_ESB_2	0x25a2
 #define PCI_DEVICE_ID_INTEL_ESB_4	0x25a4
@@ -2156,6 +2157,7 @@
 #define PCI_DEVICE_ID_INTEL_82443GX_2	0x71a2
 #define PCI_DEVICE_ID_INTEL_82372FB_1	0x7601
 #define PCI_DEVICE_ID_INTEL_82454GX	0x84c4
+#define PCI_DEVICE_ID_INTEL_82450GX	0x84c5
 #define PCI_DEVICE_ID_INTEL_82451NX	0x84ca
 #define PCI_DEVICE_ID_INTEL_82454NX     0x84cb
 #define PCI_DEVICE_ID_INTEL_84460GX	0x84ea
-- 
cgit v1.2.3-71-gd317


From 2181c971952ec2af56cd9cc68453f7ad5a0a38d6 Mon Sep 17 00:00:00 2001
From: Grant Coady <gcoady@gmail.com>
Date: Sun, 15 Jan 2006 16:21:27 +1100
Subject: [PATCH] PCI: pci_ids: remove duplicates gathered during merge period

pci_ids.h: remove duplicates.  Compile tested allmodconfig.

Signed-off-by: Grant Coady <gcoady@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci_ids.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 7868a8ed1906..b0b908f583c5 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -394,14 +394,9 @@
 #define PCI_DEVICE_ID_NS_SC1100_SMI	0x0511
 #define PCI_DEVICE_ID_NS_SC1100_XBUS	0x0515
 #define PCI_DEVICE_ID_NS_87410		0xd001
-#define PCI_DEVICE_ID_NS_CS5535_IDE	0x002d
 
 #define PCI_DEVICE_ID_NS_CS5535_HOST_BRIDGE  0x0028
 #define PCI_DEVICE_ID_NS_CS5535_ISA_BRIDGE   0x002b
-#define PCI_DEVICE_ID_NS_CS5535_IDE          0x002d
-#define PCI_DEVICE_ID_NS_CS5535_AUDIO        0x002e
-#define PCI_DEVICE_ID_NS_CS5535_USB          0x002f
-#define PCI_DEVICE_ID_NS_CS5535_VIDEO        0x0030
 
 #define PCI_VENDOR_ID_TSENG		0x100c
 #define PCI_DEVICE_ID_TSENG_W32P_2	0x3202
@@ -511,8 +506,6 @@
 #define PCI_DEVICE_ID_AMD_CS5536_UOC    0x2097
 #define PCI_DEVICE_ID_AMD_CS5536_IDE    0x209A
 
-#define PCI_DEVICE_ID_AMD_CS5536_IDE	0x209A
-
 #define PCI_DEVICE_ID_AMD_LX_VIDEO  0x2081
 #define PCI_DEVICE_ID_AMD_LX_AES    0x2082
 
-- 
cgit v1.2.3-71-gd317


From bd3f8f2b12bcf4ea25c89b84adeaafad232662c8 Mon Sep 17 00:00:00 2001
From: Chris Wright <chrisw@sous-sol.org>
Date: Tue, 31 Jan 2006 19:10:23 -0800
Subject: [PATCH] Make sure to always check upper bits of tv_nsec in
 timespec_valid.

Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/time.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/time.h b/include/linux/time.h
index 614dd8465839..7b4dc36532bb 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -48,7 +48,7 @@ extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec);
  * Returns true if the timespec is norm, false if denorm:
  */
 #define timespec_valid(ts) \
-	(((ts)->tv_sec >= 0) && (((unsigned) (ts)->tv_nsec) < NSEC_PER_SEC))
+	(((ts)->tv_sec >= 0) && (((unsigned long) (ts)->tv_nsec) < NSEC_PER_SEC))
 
 /*
  * 64-bit nanosec type. Large enough to span 292+ years in nanosecond
-- 
cgit v1.2.3-71-gd317


From 3a2ca64496cc1c9aeab1076e06d092b3ec74a43d Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 1 Feb 2006 03:04:33 -0800
Subject: [PATCH] prototypes for *at functions & typo fix

Here's the follow-up patch which introduces the prototypes for the new
syscalls.  There was also a typo in one of the new symbols.

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/ia32_unistd.h |  2 +-
 include/linux/syscalls.h         | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/asm-x86_64/ia32_unistd.h b/include/asm-x86_64/ia32_unistd.h
index e87cd83a0e86..9afc0c7d3661 100644
--- a/include/asm-x86_64/ia32_unistd.h
+++ b/include/asm-x86_64/ia32_unistd.h
@@ -300,7 +300,7 @@
 #define __NR_ia32_inotify_add_watch	292
 #define __NR_ia32_inotify_rm_watch	293
 #define __NR_ia32_migrate_pages		294
-#define __NR_ia32_opanat		295
+#define __NR_ia32_openat		295
 #define __NR_ia32_mkdirat		296
 #define __NR_ia32_mknodat		297
 #define __NR_ia32_fchownat		298
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index e666d6070569..fdbd436b24cc 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -50,6 +50,8 @@ struct timezone;
 struct tms;
 struct utimbuf;
 struct mq_attr;
+struct compat_stat;
+struct compat_timeval;
 
 #include <linux/config.h>
 #include <linux/types.h>
@@ -534,4 +536,35 @@ asmlinkage long sys_spu_run(int fd, __u32 __user *unpc,
 asmlinkage long sys_spu_create(const char __user *name,
 		unsigned int flags, mode_t mode);
 
+asmlinkage long sys_mknodat(int dfd, const char __user * filename, int mode,
+			    unsigned dev);
+asmlinkage long sys_mkdirat(int dfd, const char __user * pathname, int mode);
+asmlinkage long sys_unlinkat(int dfd, const char __user * pathname, int flag);
+asmlinkage long sys_symlinkat(const char __user * oldname,
+			      int newdfd, const char __user * newname);
+asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
+			   int newdfd, const char __user *newname);
+asmlinkage long sys_renameat(int olddfd, const char __user * oldname,
+			     int newdfd, const char __user * newname);
+asmlinkage long sys_futimesat(int dfd, char __user *filename,
+			      struct timeval __user *utimes);
+asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode);
+asmlinkage long sys_fchmodat(int dfd, const char __user * filename,
+			     mode_t mode);
+asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
+			     gid_t group, int flag);
+asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
+			   int mode);
+asmlinkage long sys_newfstatat(int dfd, char __user *filename,
+			       struct stat __user *statbuf, int flag);
+asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *buf,
+			       int bufsiz);
+asmlinkage long compat_sys_futimesat(int dfd, char __user *filename,
+				     struct compat_timeval __user *t);
+asmlinkage long compat_sys_newfstatat(int dfd, char __user * filename,
+				      struct compat_stat __user *statbuf,
+				      int flag);
+asmlinkage long compat_sys_openat(int dfd, const char __user *filename,
+				   int flags, int mode);
+
 #endif
-- 
cgit v1.2.3-71-gd317


From 9cd684551124e71630ab96d238747051463f5b56 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Wed, 1 Feb 2006 03:04:40 -0800
Subject: [PATCH] fuse: fix async read for legacy filesystems

While asynchronous reads mean a performance improvement in most cases, if
the filesystem assumed that reads are synchronous, then async reads may
degrade performance (filesystem may receive reads out of order, which can
confuse it's own readahead logic).

With sshfs a 1.5 to 4 times slowdown can be measured.

There's also a need for userspace filesystems to know whether asynchronous
reads are supported by the kernel or not.

To achive these, negotiate in the INIT request whether async reads will be
used and the maximum readahead value.  Update interface version to 7.6

If userspace uses a version earlier than 7.6, then disable async reads, and
set maximum readahead value to the maximum read size, as done in previous
versions.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/file.c       |  9 +++++++--
 fs/fuse/fuse_i.h     |  3 +++
 fs/fuse/inode.c      | 14 ++++++++++++--
 include/linux/fuse.h | 16 ++++++++++++++--
 4 files changed, 36 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a7ef5e716f3c..296351615b00 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -335,9 +335,14 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file,
 	loff_t pos = page_offset(req->pages[0]);
 	size_t count = req->num_pages << PAGE_CACHE_SHIFT;
 	req->out.page_zeroing = 1;
-	req->end = fuse_readpages_end;
 	fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
-	request_send_background(fc, req);
+	if (fc->async_read) {
+		req->end = fuse_readpages_end;
+		request_send_background(fc, req);
+	} else {
+		request_send(fc, req);
+		fuse_readpages_end(fc, req);
+	}
 }
 
 struct fuse_readpages_data {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 46cf933aa3bf..4a83adfec968 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -272,6 +272,9 @@ struct fuse_conn {
 	    reply, before any other request, and never cleared */
 	unsigned conn_error : 1;
 
+	/** Do readpages asynchronously?  Only set in INIT */
+	unsigned async_read : 1;
+
 	/*
 	 * The following bitfields are only for optimization purposes
 	 * and hence races in setting them will not cause malfunction
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index c755a0440a66..879e6fba9480 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -473,6 +473,16 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 	if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION)
 		fc->conn_error = 1;
 	else {
+		unsigned long ra_pages;
+
+		if (arg->minor >= 6) {
+			ra_pages = arg->max_readahead / PAGE_CACHE_SIZE;
+			if (arg->flags & FUSE_ASYNC_READ)
+				fc->async_read = 1;
+		} else
+			ra_pages = fc->max_read / PAGE_CACHE_SIZE;
+
+		fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
 		fc->minor = arg->minor;
 		fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
 	}
@@ -496,6 +506,8 @@ static void fuse_send_init(struct fuse_conn *fc)
 
 	arg->major = FUSE_KERNEL_VERSION;
 	arg->minor = FUSE_KERNEL_MINOR_VERSION;
+	arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
+	arg->flags |= FUSE_ASYNC_READ;
 	req->in.h.opcode = FUSE_INIT;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(*arg);
@@ -552,8 +564,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	fc->user_id = d.user_id;
 	fc->group_id = d.group_id;
 	fc->max_read = d.max_read;
-	if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages)
-		fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE;
 
 	/* Used by get_root_inode() */
 	sb->s_fs_info = fc;
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 528959c52f1b..5425b60021e3 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -14,7 +14,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 5
+#define FUSE_KERNEL_MINOR_VERSION 6
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -58,6 +58,9 @@ struct fuse_kstatfs {
 	__u32	spare[6];
 };
 
+/**
+ * Bitmasks for fuse_setattr_in.valid
+ */
 #define FATTR_MODE	(1 << 0)
 #define FATTR_UID	(1 << 1)
 #define FATTR_GID	(1 << 2)
@@ -75,6 +78,11 @@ struct fuse_kstatfs {
 #define FOPEN_DIRECT_IO		(1 << 0)
 #define FOPEN_KEEP_CACHE	(1 << 1)
 
+/**
+ * INIT request/reply flags
+ */
+#define FUSE_ASYNC_READ		(1 << 0)
+
 enum fuse_opcode {
 	FUSE_LOOKUP	   = 1,
 	FUSE_FORGET	   = 2,  /* no reply */
@@ -247,12 +255,16 @@ struct fuse_access_in {
 struct fuse_init_in {
 	__u32	major;
 	__u32	minor;
+	__u32	max_readahead;
+	__u32	flags;
 };
 
 struct fuse_init_out {
 	__u32	major;
 	__u32	minor;
-	__u32	unused[3];
+	__u32	max_readahead;
+	__u32	flags;
+	__u32	unused;
 	__u32	max_write;
 };
 
-- 
cgit v1.2.3-71-gd317


From 3ee247ebce93a526f482d6bc714ce796fa85a81a Mon Sep 17 00:00:00 2001
From: Alasdair G Kergon <agk@redhat.com>
Date: Wed, 1 Feb 2006 03:04:55 -0800
Subject: [PATCH] dm: dm-table warning fix

drivers/md/dm-table.c:500: warning: comparison of distinct pointer types lacks a cast

Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/dm-table.c         | 2 +-
 include/linux/device-mapper.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index a6f2dc66c3db..9b1e2f5ca630 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -508,7 +508,7 @@ int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
 		if (q->merge_bvec_fn)
 			rs->max_sectors =
 				min_not_zero(rs->max_sectors,
-					     (unsigned short)(PAGE_SIZE >> 9));
+					     (unsigned int) (PAGE_SIZE >> 9));
 
 		rs->max_phys_segments =
 			min_not_zero(rs->max_phys_segments,
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 83c7d207b80e..51e0e95a421a 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -91,7 +91,7 @@ struct target_type {
 };
 
 struct io_restrictions {
-	unsigned short		max_sectors;
+	unsigned int		max_sectors;
 	unsigned short		max_phys_segments;
 	unsigned short		max_hw_segments;
 	unsigned short		hardsect_size;
-- 
cgit v1.2.3-71-gd317


From ed5a92700d3ce2646cb7763792a5f7ad1bade7e8 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Wed, 1 Feb 2006 03:05:00 -0800
Subject: [PATCH] tpm_bios: needs more securityfs_ functions

tpm_bios.c needs securityfs_xyz() functions.

Does include/linux/security.h need stubs for these, or should
char/tpm/Makefile just be modified to say:

ifdef CONFIG_ACPI
ifdef CONFIG_SECURITY
	obj-$(CONFIG_TCG_TPM) += tpm_bios.o
endif
endif

drivers/char/tpm/tpm_bios.c:494: warning: implicit declaration of function 'securityfs_create_dir'
drivers/char/tpm/tpm_bios.c:494: warning: assignment makes pointer from integer without a cast
drivers/char/tpm/tpm_bios.c:499: warning: implicit declaration of function 'securityfs_create_file'
drivers/char/tpm/tpm_bios.c:501: warning: assignment makes pointer from integer without a cast
drivers/char/tpm/tpm_bios.c:508: warning: assignment makes pointer from integer without a cast
drivers/char/tpm/tpm_bios.c:523: warning: implicit declaration of function 'securityfs_remove'
*** Warning: "securityfs_create_file" [drivers/char/tpm/tpm_bios.ko] undefined!
*** Warning: "securityfs_create_dir" [drivers/char/tpm/tpm_bios.ko] undefined!
*** Warning: "securityfs_remove" [drivers/char/tpm/tpm_bios.ko] undefined!

There are also some gcc and sparse warnings that could be fixed.
(see http://www.xenotime.net/linux/doc/build-tpm.out)

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: Greg KH <greg@kroah.com>
Cc: Kylene Jo Hall <kjhall@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/security.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index ef753654daa5..bb1da86747c7 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2617,6 +2617,25 @@ static inline int security_netlink_recv (struct sk_buff *skb)
 	return cap_netlink_recv (skb);
 }
 
+static inline struct dentry *securityfs_create_dir(const char *name,
+					struct dentry *parent)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline struct dentry *securityfs_create_file(const char *name,
+						mode_t mode,
+						struct dentry *parent,
+						void *data,
+						struct file_operations *fops)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline void securityfs_remove(struct dentry *dentry)
+{
+}
+
 #endif	/* CONFIG_SECURITY */
 
 #ifdef CONFIG_SECURITY_NETWORK
-- 
cgit v1.2.3-71-gd317


From ff60a5dc4fa584d47022d2533bc5c53b80096fb5 Mon Sep 17 00:00:00 2001
From: "akpm@osdl.org" <akpm@osdl.org>
Date: Wed, 1 Feb 2006 03:05:10 -0800
Subject: [PATCH] hrtimers: fix posix-timer requeue race

From: Steven Rostedtrostedt@goodmis.org <rostedt@goodmis.org>

CPU0 expires a posix-timer and runs the callback function.  The signal is
queued.

After releasing the posix-timer lock and before returning to hrtimer_run_queue
CPU0 gets interrupted.  CPU1 delivers the queued signal and rearms the timer.
CPU0 comes back to hrtimer_run_queue and sets the timer state to expired.

The next modification of the timer can result in an oops, because the state
information is wrong.

Keep track of state = RUNNING and check if the state has been in the return
path of hrtimer_run_queue.  In case the state has been changed, ignore a
restart request and do not touch the state variable.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/hrtimer.h | 1 +
 kernel/hrtimer.c        | 5 +++++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 089bfb1fa01a..c657f3d4924a 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -40,6 +40,7 @@ enum hrtimer_restart {
 enum hrtimer_state {
 	HRTIMER_INACTIVE,	/* Timer is inactive */
 	HRTIMER_EXPIRED,		/* Timer is expired */
+	HRTIMER_RUNNING,		/* Timer is running the callback function */
 	HRTIMER_PENDING,		/* Timer is pending */
 };
 
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index f1c4155b49ac..f580dd9db286 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -550,6 +550,7 @@ static inline void run_hrtimer_queue(struct hrtimer_base *base)
 		fn = timer->function;
 		data = timer->data;
 		set_curr_timer(base, timer);
+		timer->state = HRTIMER_RUNNING;
 		__remove_hrtimer(timer, base);
 		spin_unlock_irq(&base->lock);
 
@@ -565,6 +566,10 @@ static inline void run_hrtimer_queue(struct hrtimer_base *base)
 
 		spin_lock_irq(&base->lock);
 
+		/* Another CPU has added back the timer */
+		if (timer->state != HRTIMER_RUNNING)
+			continue;
+
 		if (restart == HRTIMER_RESTART)
 			enqueue_hrtimer(timer, base);
 		else
-- 
cgit v1.2.3-71-gd317


From 7978672c4d9a1e6a6081de3a9d9ba5e5b24904a0 Mon Sep 17 00:00:00 2001
From: George Anzinger <george@wildturkeyranch.net>
Date: Wed, 1 Feb 2006 03:05:11 -0800
Subject: [PATCH] hrtimers: cleanups and simplifications

Clean up the interface to hrtimers by changing the init code to pass the mode
as well as the clock.  This allow the init code to select the correct base and
eliminates extra timer re-init code in posix-timers.  We also simplify the
restart interface nanosleep use.

Signed-off-by: George Anzinger <george@mvista.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/hrtimer.h |  5 ++---
 kernel/fork.c           |  2 +-
 kernel/hrtimer.c        | 59 ++++++++++++++++++++-----------------------------
 kernel/posix-timers.c   | 37 ++++++++-----------------------
 4 files changed, 36 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index c657f3d4924a..6361544bb6ae 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -101,9 +101,8 @@ struct hrtimer_base {
 /* Exported timer functions: */
 
 /* Initialize timers: */
-extern void hrtimer_init(struct hrtimer *timer, const clockid_t which_clock);
-extern void hrtimer_rebase(struct hrtimer *timer, const clockid_t which_clock);
-
+extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock,
+			 enum hrtimer_mode mode);
 
 /* Basic timer operations: */
 extern int hrtimer_start(struct hrtimer *timer, ktime_t tim,
diff --git a/kernel/fork.c b/kernel/fork.c
index 4ae8cfc1c89c..7f0ab5ee948c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -802,7 +802,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
 	init_sigpending(&sig->shared_pending);
 	INIT_LIST_HEAD(&sig->posix_timers);
 
-	hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC);
+	hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_REL);
 	sig->it_real_incr.tv64 = 0;
 	sig->real_timer.function = it_real_fn;
 	sig->real_timer.data = tsk;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index f580dd9db286..efff9496b2fa 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -66,6 +66,12 @@ EXPORT_SYMBOL_GPL(ktime_get_real);
 
 /*
  * The timer bases:
+ *
+ * Note: If we want to add new timer bases, we have to skip the two
+ * clock ids captured by the cpu-timers. We do this by holding empty
+ * entries rather than doing math adjustment of the clock ids.
+ * This ensures that we capture erroneous accesses to these clock ids
+ * rather than moving them into the range of valid clock id's.
  */
 
 #define MAX_HRTIMER_BASES 2
@@ -483,29 +489,25 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
 }
 
 /**
- * hrtimer_rebase - rebase an initialized hrtimer to a different base
+ * hrtimer_init - initialize a timer to the given clock
  *
- * @timer:	the timer to be rebased
+ * @timer:	the timer to be initialized
  * @clock_id:	the clock to be used
+ * @mode:	timer mode abs/rel
  */
-void hrtimer_rebase(struct hrtimer *timer, const clockid_t clock_id)
+void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
+		  enum hrtimer_mode mode)
 {
 	struct hrtimer_base *bases;
 
+	memset(timer, 0, sizeof(struct hrtimer));
+
 	bases = per_cpu(hrtimer_bases, raw_smp_processor_id());
-	timer->base = &bases[clock_id];
-}
 
-/**
- * hrtimer_init - initialize a timer to the given clock
- *
- * @timer:	the timer to be initialized
- * @clock_id:	the clock to be used
- */
-void hrtimer_init(struct hrtimer *timer, const clockid_t clock_id)
-{
-	memset(timer, 0, sizeof(struct hrtimer));
-	hrtimer_rebase(timer, clock_id);
+	if (clock_id == CLOCK_REALTIME && mode != HRTIMER_ABS)
+		clock_id = CLOCK_MONOTONIC;
+
+	timer->base = &bases[clock_id];
 }
 
 /**
@@ -643,8 +645,7 @@ schedule_hrtimer_interruptible(struct hrtimer *timer,
 	return schedule_hrtimer(timer, mode);
 }
 
-static long __sched
-nanosleep_restart(struct restart_block *restart, clockid_t clockid)
+static long __sched nanosleep_restart(struct restart_block *restart)
 {
 	struct timespec __user *rmtp;
 	struct timespec tu;
@@ -654,7 +655,7 @@ nanosleep_restart(struct restart_block *restart, clockid_t clockid)
 
 	restart->fn = do_no_restart_syscall;
 
-	hrtimer_init(&timer, clockid);
+	hrtimer_init(&timer, (clockid_t) restart->arg3, HRTIMER_ABS);
 
 	timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0;
 
@@ -674,16 +675,6 @@ nanosleep_restart(struct restart_block *restart, clockid_t clockid)
 	return -ERESTART_RESTARTBLOCK;
 }
 
-static long __sched nanosleep_restart_mono(struct restart_block *restart)
-{
-	return nanosleep_restart(restart, CLOCK_MONOTONIC);
-}
-
-static long __sched nanosleep_restart_real(struct restart_block *restart)
-{
-	return nanosleep_restart(restart, CLOCK_REALTIME);
-}
-
 long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
 		       const enum hrtimer_mode mode, const clockid_t clockid)
 {
@@ -692,7 +683,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
 	struct timespec tu;
 	ktime_t rem;
 
-	hrtimer_init(&timer, clockid);
+	hrtimer_init(&timer, clockid, mode);
 
 	timer.expires = timespec_to_ktime(*rqtp);
 
@@ -700,7 +691,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
 	if (rem.tv64 <= 0)
 		return 0;
 
-	/* Absolute timers do not update the rmtp value: */
+	/* Absolute timers do not update the rmtp value and restart: */
 	if (mode == HRTIMER_ABS)
 		return -ERESTARTNOHAND;
 
@@ -710,11 +701,11 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
 		return -EFAULT;
 
 	restart = &current_thread_info()->restart_block;
-	restart->fn = (clockid == CLOCK_MONOTONIC) ?
-		nanosleep_restart_mono : nanosleep_restart_real;
+	restart->fn = nanosleep_restart;
 	restart->arg0 = timer.expires.tv64 & 0xFFFFFFFF;
 	restart->arg1 = timer.expires.tv64 >> 32;
 	restart->arg2 = (unsigned long) rmtp;
+	restart->arg3 = (unsigned long) timer.base->index;
 
 	return -ERESTART_RESTARTBLOCK;
 }
@@ -741,10 +732,8 @@ static void __devinit init_hrtimers_cpu(int cpu)
 	struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu);
 	int i;
 
-	for (i = 0; i < MAX_HRTIMER_BASES; i++) {
+	for (i = 0; i < MAX_HRTIMER_BASES; i++, base++)
 		spin_lock_init(&base->lock);
-		base++;
-	}
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 3b606d361b52..28e72fd0029f 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -194,9 +194,7 @@ static inline int common_clock_set(const clockid_t which_clock,
 
 static int common_timer_create(struct k_itimer *new_timer)
 {
-	hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock);
-	new_timer->it.real.timer.data = new_timer;
-	new_timer->it.real.timer.function = posix_timer_fn;
+	hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
 	return 0;
 }
 
@@ -693,6 +691,7 @@ common_timer_set(struct k_itimer *timr, int flags,
 		 struct itimerspec *new_setting, struct itimerspec *old_setting)
 {
 	struct hrtimer *timer = &timr->it.real.timer;
+	enum hrtimer_mode mode;
 
 	if (old_setting)
 		common_timer_get(timr, old_setting);
@@ -714,14 +713,10 @@ common_timer_set(struct k_itimer *timr, int flags,
 	if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec)
 		return 0;
 
-	/* Posix madness. Only absolute CLOCK_REALTIME timers
-	 * are affected by clock sets. So we must reiniatilize
-	 * the timer.
-	 */
-	if (timr->it_clock == CLOCK_REALTIME && (flags & TIMER_ABSTIME))
-		hrtimer_rebase(timer, CLOCK_REALTIME);
-	else
-		hrtimer_rebase(timer, CLOCK_MONOTONIC);
+	mode = flags & TIMER_ABSTIME ? HRTIMER_ABS : HRTIMER_REL;
+	hrtimer_init(&timr->it.real.timer, timr->it_clock, mode);
+	timr->it.real.timer.data = timr;
+	timr->it.real.timer.function = posix_timer_fn;
 
 	timer->expires = timespec_to_ktime(new_setting->it_value);
 
@@ -732,8 +727,7 @@ common_timer_set(struct k_itimer *timr, int flags,
 	if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
 		return 0;
 
-	hrtimer_start(timer, timer->expires, (flags & TIMER_ABSTIME) ?
-		      HRTIMER_ABS : HRTIMER_REL);
+	hrtimer_start(timer, timer->expires, mode);
 	return 0;
 }
 
@@ -948,21 +942,8 @@ sys_clock_getres(const clockid_t which_clock, struct timespec __user *tp)
 static int common_nsleep(const clockid_t which_clock, int flags,
 			 struct timespec *tsave, struct timespec __user *rmtp)
 {
-	int mode = flags & TIMER_ABSTIME ? HRTIMER_ABS : HRTIMER_REL;
-	int clockid = which_clock;
-
-	switch (which_clock) {
-	case CLOCK_REALTIME:
-		/* Posix madness. Only absolute timers on clock realtime
-		   are affected by clock set. */
-		if (mode != HRTIMER_ABS)
-			clockid = CLOCK_MONOTONIC;
-	case CLOCK_MONOTONIC:
-		break;
-	default:
-		return -EINVAL;
-	}
-	return hrtimer_nanosleep(tsave, rmtp, mode, clockid);
+	return hrtimer_nanosleep(tsave, rmtp, flags & TIMER_ABSTIME ?
+				 HRTIMER_ABS : HRTIMER_REL, which_clock);
 }
 
 asmlinkage long
-- 
cgit v1.2.3-71-gd317


From 66188fae3bf7f8dd951e2291d2a81888ed1b65de Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 1 Feb 2006 03:05:13 -0800
Subject: [PATCH] hrtimers: add back lost credit lines

At some point we added credits to people who actively helped to bring
k/hr-timers along.  This was lost in the big code revamp.  Add it back.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/ktime.h | 6 ++++++
 kernel/hrtimer.c      | 6 ++++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 1bd6552cc341..6aca67a569a2 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -10,6 +10,12 @@
  *
  *  Started by: Thomas Gleixner and Ingo Molnar
  *
+ *  Credits:
+ *
+ *  	Roman Zippel provided the ideas and primary code snippets of
+ *  	the ktime_t union and further simplifications of the original
+ *  	code.
+ *
  *  For licencing details see kernel-base/COPYING
  */
 #ifndef _LINUX_KTIME_H
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index efff9496b2fa..2b6e1757aedd 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -21,6 +21,12 @@
  *  Credits:
  *	based on kernel/timer.c
  *
+ *	Help, testing, suggestions, bugfixes, improvements were
+ *	provided by:
+ *
+ *	George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
+ *	et. al.
+ *
  *  For licencing details see kernel-base/COPYING
  */
 
-- 
cgit v1.2.3-71-gd317


From 493f01d1d0699ddafc30067d33fcc18d0b95b624 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 1 Feb 2006 03:05:14 -0800
Subject: [PATCH] kernel/posix-timers.c: remove do_posix_clock_notimer_create()

This function is neither used nor has any real contents.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/posix-timers.h | 1 -
 kernel/posix-timers.c        | 6 ------
 2 files changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 54faf5236da0..95572c434bc9 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -84,7 +84,6 @@ struct k_clock {
 void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock);
 
 /* error handlers for timer_create, nanosleep and settime */
-int do_posix_clock_notimer_create(struct k_itimer *timer);
 int do_posix_clock_nonanosleep(const clockid_t, int flags, struct timespec *,
 			       struct timespec __user *);
 int do_posix_clock_nosettime(const clockid_t, struct timespec *tp);
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index aad6f138d5c9..216f574b5ffb 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -875,12 +875,6 @@ int do_posix_clock_nosettime(const clockid_t clockid, struct timespec *tp)
 }
 EXPORT_SYMBOL_GPL(do_posix_clock_nosettime);
 
-int do_posix_clock_notimer_create(struct k_itimer *timer)
-{
-	return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(do_posix_clock_notimer_create);
-
 int do_posix_clock_nonanosleep(const clockid_t clock, int flags,
 			       struct timespec *t, struct timespec __user *r)
 {
-- 
cgit v1.2.3-71-gd317


From f7589f28d7dd4586b4e90ac3b2a180409669053a Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Wed, 1 Feb 2006 03:05:15 -0800
Subject: [PATCH] Define BITS_PER_BYTE

This can make the intent behind some arithmetic expressions clearer.

Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/types.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/types.h b/include/linux/types.h
index 21b9ce803644..54ae2d59e71b 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -8,6 +8,8 @@
 	(((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
 #define DECLARE_BITMAP(name,bits) \
 	unsigned long name[BITS_TO_LONGS(bits)]
+
+#define BITS_PER_BYTE 8
 #endif
 
 #include <linux/posix_types.h>
-- 
cgit v1.2.3-71-gd317


From c27a0d75b33c030965cc97d3d7f571107a673fb4 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Wed, 1 Feb 2006 03:05:16 -0800
Subject: [PATCH] Introduce __iowrite32_copy

This arch-independent routine copies data to a memory-mapped I/O region,
using 32-bit accesses.  The naming is double-underscored to make it clear
that it does not guarantee write ordering, nor does it perform a memory
barrier afterwards; the kernel doc also explicitly states this.  This style
of access is required by some devices.

This change also introduces include/linux/io.h, at Andrew's suggestion.  It
only has one occupant at the moment, but is a logical destination for
oft-replicated contents of include/asm-*/{io,iomap}.h to migrate to.

Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/io.h | 25 +++++++++++++++++++++++++
 lib/Makefile       |  2 +-
 lib/iomap_copy.c   | 42 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 68 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/io.h
 create mode 100644 lib/iomap_copy.c

(limited to 'include/linux')

diff --git a/include/linux/io.h b/include/linux/io.h
new file mode 100644
index 000000000000..85533ec5aaa1
--- /dev/null
+++ b/include/linux/io.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2006 PathScale, Inc.  All Rights Reserved.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _LINUX_IO_H
+#define _LINUX_IO_H
+
+#include <asm/io.h>
+
+void __iowrite32_copy(void __iomem *to, const void *from, size_t count);
+
+#endif /* _LINUX_IO_H */
diff --git a/lib/Makefile b/lib/Makefile
index 8535f4d7d1c3..648b2c1242fd 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -9,7 +9,7 @@ lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \
 
 lib-y	+= kobject.o kref.o kobject_uevent.o klist.o
 
-obj-y += sort.o parser.o halfmd4.o
+obj-y += sort.o parser.o halfmd4.o iomap_copy.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/iomap_copy.c b/lib/iomap_copy.c
new file mode 100644
index 000000000000..a6b1e271d53c
--- /dev/null
+++ b/lib/iomap_copy.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2006 PathScale, Inc.  All Rights Reserved.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/io.h>
+#include <linux/module.h>
+
+/**
+ * __iowrite32_copy - copy data to MMIO space, in 32-bit units
+ * @to: destination, in MMIO space (must be 32-bit aligned)
+ * @from: source (must be 32-bit aligned)
+ * @count: number of 32-bit quantities to copy
+ *
+ * Copy data from kernel space to MMIO space, in units of 32 bits at a
+ * time.  Order of access is not guaranteed, nor is a memory barrier
+ * performed afterwards.
+ */
+void __attribute__((weak)) __iowrite32_copy(void __iomem *to,
+					    const void *from,
+					    size_t count)
+{
+	u32 __iomem *dst = to;
+	const u32 *src = from;
+	const u32 *end = src + count;
+
+	while (src < end)
+		__raw_writel(*src++, dst++);
+}
+EXPORT_SYMBOL_GPL(__iowrite32_copy);
-- 
cgit v1.2.3-71-gd317


From 79046ae07ae21245520ca0aab985ee6678a879f8 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 1 Feb 2006 03:05:26 -0800
Subject: [PATCH] GFP_ZONETYPES: add commentry on how to calculate

GFP_ZONETYPES define using GFP_ZONEMASK and add commentry

Add commentry explaining the optimisation that we can apply to GFP_ZONETYPES
when the leftmost bit is a 'loaner', it can only be set in isolation.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 93a849f742db..88c30f844abf 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -91,6 +91,14 @@ struct per_cpu_pageset {
  * be 8 (2 ** 3) zonelists.  GFP_ZONETYPES defines the number of possible
  * combinations of zone modifiers in "zone modifier space".
  *
+ * As an optimisation any zone modifier bits which are only valid when
+ * no other zone modifier bits are set (loners) should be placed in
+ * the highest order bits of this field.  This allows us to reduce the
+ * extent of the zonelists thus saving space.  For example in the case
+ * of three zone modifier bits, we could require up to eight zonelists.
+ * If the left most zone modifier is a "loner" then the highest valid
+ * zonelist would be four allowing us to allocate only five zonelists.
+ *
  * NOTE! Make sure this matches the zones in <linux/gfp.h>
  */
 #define GFP_ZONEMASK	0x07
-- 
cgit v1.2.3-71-gd317


From ce2ea89ba101d976907128441ba3aca72a8804b9 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Wed, 1 Feb 2006 03:05:27 -0800
Subject: [PATCH] GFP_ZONETYPES: calculate from GFP_ZONEMASK

GFP_ZONETYPES calculate from GFP_ZONEMASK

GFP_ZONETYPES's value is directly related to the value of GFP_ZONEMASK.  It
takes one of two forms depending whether the top bit of GFP_ZONEMASK is a
'loner'.  Supply both forms, enabling the loner.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 88c30f844abf..ebfc238cc243 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -98,11 +98,14 @@ struct per_cpu_pageset {
  * of three zone modifier bits, we could require up to eight zonelists.
  * If the left most zone modifier is a "loner" then the highest valid
  * zonelist would be four allowing us to allocate only five zonelists.
+ * Use the first form for GFP_ZONETYPES when the left most bit is not
+ * a "loner", otherwise use the second.
  *
  * NOTE! Make sure this matches the zones in <linux/gfp.h>
  */
 #define GFP_ZONEMASK	0x07
-#define GFP_ZONETYPES	5
+/* #define GFP_ZONETYPES       (GFP_ZONEMASK + 1) */           /* Non-loner */
+#define GFP_ZONETYPES  ((GFP_ZONEMASK + 1) / 2 + 1)            /* Loner */
 
 /*
  * On machines where it is needed (eg PCs) we divide physical memory
-- 
cgit v1.2.3-71-gd317


From 2a11ff06d7d12be5d1bbcf592fff649b45ac2388 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Wed, 1 Feb 2006 03:05:33 -0800
Subject: [PATCH] zone_reclaim: configurable off node allocation period.

Currently the zone_reclaim code has a fixed window of 30 seconds of off node
allocations should a local zone have no unused pagecache pages left.  Reclaim
will be attempted again after this timeout period to avoid repeated useless
scans for memory.  This is also useful to established sufficiently large off
node allocation chunks to relieve the local node.

It may be beneficial to adjust that time period for some special situations.
For example if memory use was exceeding node capacity one may want to give up
for longer periods of time.  If memory spikes intermittendly then one may want
to shorten the time period to reduce the number of off node allocations.

This patch allows just that....

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/sysctl/vm.txt | 12 ++++++++++++
 include/linux/swap.h        |  1 +
 include/linux/sysctl.h      |  3 ++-
 kernel/sysctl.c             |  9 +++++++++
 mm/vmscan.c                 |  4 ++--
 5 files changed, 26 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 391dd64363e7..44518c023949 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -28,6 +28,7 @@ Currently, these files are in /proc/sys/vm:
 - block_dump
 - drop-caches
 - zone_reclaim_mode
+- zone_reclaim_interval
 
 ==============================================================
 
@@ -137,4 +138,15 @@ of memory should be used for caching files from disk.
 
 It may be beneficial to switch this on if one wants to do zone
 reclaim regardless of the numa distances in the system.
+================================================================
+
+zone_reclaim_interval:
+
+The time allowed for off node allocations after zone reclaim
+has failed to reclaim enough pages to allow a local allocation.
+
+Time is set in seconds and set by default to 30 seconds.
+
+Reduce the interval if undesired off node allocations occur. However, too
+frequent scans will have a negative impact onoff node allocation performance.
 
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4a99e4a7fbf3..e53fef7051e6 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -178,6 +178,7 @@ extern int vm_swappiness;
 
 #ifdef CONFIG_NUMA
 extern int zone_reclaim_mode;
+extern int zone_reclaim_interval;
 extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
 #else
 #define zone_reclaim_mode 0
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 8352a7ce5895..32a4139c4ad8 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -182,7 +182,8 @@ enum
 	VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */
 	VM_DROP_PAGECACHE=29,	/* int: nuke lots of pagecache */
 	VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */
-	VM_ZONE_RECLAIM_MODE=31,/* reclaim local zone memory before going off node */
+	VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */
+	VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */
 };
 
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c74f03bc0144..71dd6f62efec 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -881,6 +881,15 @@ static ctl_table vm_table[] = {
 		.strategy	= &sysctl_intvec,
 		.extra1		= &zero,
 	},
+	{
+		.ctl_name	= VM_ZONE_RECLAIM_INTERVAL,
+		.procname	= "zone_reclaim_interval",
+		.data		= &zone_reclaim_interval,
+		.maxlen		= sizeof(zone_reclaim_interval),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies,
+	},
 #endif
 	{ .ctl_name = 0 }
 };
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f8b94ea6f722..8760a4abfa1f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1595,7 +1595,7 @@ int zone_reclaim_mode __read_mostly;
 /*
  * Mininum time between zone reclaim scans
  */
-#define ZONE_RECLAIM_INTERVAL 30*HZ
+int zone_reclaim_interval __read_mostly = 30*HZ;
 
 /*
  * Priority for ZONE_RECLAIM. This determines the fraction of pages
@@ -1617,7 +1617,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	int node_id;
 
 	if (time_before(jiffies,
-		zone->last_unsuccessful_zone_reclaim + ZONE_RECLAIM_INTERVAL))
+		zone->last_unsuccessful_zone_reclaim + zone_reclaim_interval))
 			return 0;
 
 	if (!(gfp_mask & __GFP_WAIT) ||
-- 
cgit v1.2.3-71-gd317


From a48d07afdf18212de22b959715b16793c5a6e57a Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 1 Feb 2006 03:05:38 -0800
Subject: [PATCH] Direct Migration V9: migrate_pages() extension

Add direct migration support with fall back to swap.

Direct migration support on top of the swap based page migration facility.

This allows the direct migration of anonymous pages and the migration of file
backed pages by dropping the associated buffers (requires writeout).

Fall back to swap out if necessary.

The patch is based on lots of patches from the hotplug project but the code
was restructured, documented and simplified as much as possible.

Note that an additional patch that defines the migrate_page() method for
filesystems is necessary in order to avoid writeback for anonymous and file
backed pages.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Mike Kravetz <kravetz@us.ibm.com>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/vm/page_migration | 129 +++++++++++++++++++++++
 include/linux/rmap.h            |   4 +-
 include/linux/swap.h            |   2 +
 mm/rmap.c                       |  21 ++--
 mm/vmscan.c                     | 226 ++++++++++++++++++++++++++++++++++++++--
 5 files changed, 360 insertions(+), 22 deletions(-)
 create mode 100644 Documentation/vm/page_migration

(limited to 'include/linux')

diff --git a/Documentation/vm/page_migration b/Documentation/vm/page_migration
new file mode 100644
index 000000000000..c52820fcf500
--- /dev/null
+++ b/Documentation/vm/page_migration
@@ -0,0 +1,129 @@
+Page migration
+--------------
+
+Page migration allows the moving of the physical location of pages between
+nodes in a numa system while the process is running. This means that the
+virtual addresses that the process sees do not change. However, the
+system rearranges the physical location of those pages.
+
+The main intend of page migration is to reduce the latency of memory access
+by moving pages near to the processor where the process accessing that memory
+is running.
+
+Page migration allows a process to manually relocate the node on which its
+pages are located through the MF_MOVE and MF_MOVE_ALL options while setting
+a new memory policy. The pages of process can also be relocated
+from another process using the sys_migrate_pages() function call. The
+migrate_pages function call takes two sets of nodes and moves pages of a
+process that are located on the from nodes to the destination nodes.
+
+Manual migration is very useful if for example the scheduler has relocated
+a process to a processor on a distant node. A batch scheduler or an
+administrator may detect the situation and move the pages of the process
+nearer to the new processor. At some point in the future we may have
+some mechanism in the scheduler that will automatically move the pages.
+
+Larger installations usually partition the system using cpusets into
+sections of nodes. Paul Jackson has equipped cpusets with the ability to
+move pages when a task is moved to another cpuset. This allows automatic
+control over locality of a process. If a task is moved to a new cpuset
+then also all its pages are moved with it so that the performance of the
+process does not sink dramatically (as is the case today).
+
+Page migration allows the preservation of the relative location of pages
+within a group of nodes for all migration techniques which will preserve a
+particular memory allocation pattern generated even after migrating a
+process. This is necessary in order to preserve the memory latencies.
+Processes will run with similar performance after migration.
+
+Page migration occurs in several steps. First a high level
+description for those trying to use migrate_pages() and then
+a low level description of how the low level details work.
+
+A. Use of migrate_pages()
+-------------------------
+
+1. Remove pages from the LRU.
+
+   Lists of pages to be migrated are generated by scanning over
+   pages and moving them into lists. This is done by
+   calling isolate_lru_page() or __isolate_lru_page().
+   Calling isolate_lru_page increases the references to the page
+   so that it cannot vanish under us.
+
+2. Generate a list of newly allocates page to move the contents
+   of the first list to.
+
+3. The migrate_pages() function is called which attempts
+   to do the migration. It returns the moved pages in the
+   list specified as the third parameter and the failed
+   migrations in the fourth parameter. The first parameter
+   will contain the pages that could still be retried.
+
+4. The leftover pages of various types are returned
+   to the LRU using putback_to_lru_pages() or otherwise
+   disposed of. The pages will still have the refcount as
+   increased by isolate_lru_pages()!
+
+B. Operation of migrate_pages()
+--------------------------------
+
+migrate_pages does several passes over its list of pages. A page is moved
+if all references to a page are removable at the time.
+
+Steps:
+
+1. Lock the page to be migrated
+
+2. Insure that writeback is complete.
+
+3. Make sure that the page has assigned swap cache entry if
+   it is an anonyous page. The swap cache reference is necessary
+   to preserve the information contain in the page table maps.
+
+4. Prep the new page that we want to move to. It is locked
+   and set to not being uptodate so that all accesses to the new
+   page immediately lock while we are moving references.
+
+5. All the page table references to the page are either dropped (file backed)
+   or converted to swap references (anonymous pages). This should decrease the
+   reference count.
+
+6. The radix tree lock is taken
+
+7. The refcount of the page is examined and we back out if references remain
+   otherwise we know that we are the only one referencing this page.
+
+8. The radix tree is checked and if it does not contain the pointer to this
+   page then we back out.
+
+9. The mapping is checked. If the mapping is gone then a truncate action may
+   be in progress and we back out.
+
+10. The new page is prepped with some settings from the old page so that accesses
+   to the new page will be discovered to have the correct settings.
+
+11. The radix tree is changed to point to the new page.
+
+12. The reference count of the old page is dropped because the reference has now
+    been removed.
+
+13. The radix tree lock is dropped.
+
+14. The page contents are copied to the new page.
+
+15. The remaining page flags are copied to the new page.
+
+16. The old page flags are cleared to indicate that the page does
+    not use any information anymore.
+
+17. Queued up writeback on the new page is triggered.
+
+18. If swap pte's were generated for the page then remove them again.
+
+19. The locks are dropped from the old and new page.
+
+20. The new page is moved to the LRU.
+
+Christoph Lameter, December 19, 2005.
+
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 9d6fbeef2104..0f1ea2d6ed86 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -91,7 +91,7 @@ static inline void page_dup_rmap(struct page *page)
  * Called from mm/vmscan.c to handle paging out
  */
 int page_referenced(struct page *, int is_locked);
-int try_to_unmap(struct page *);
+int try_to_unmap(struct page *, int ignore_refs);
 
 /*
  * Called from mm/filemap_xip.c to unmap empty zero page
@@ -111,7 +111,7 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
 #define anon_vma_link(vma)	do {} while (0)
 
 #define page_referenced(page,l) TestClearPageReferenced(page)
-#define try_to_unmap(page)	SWAP_FAIL
+#define try_to_unmap(page, refs) SWAP_FAIL
 
 #endif	/* CONFIG_MMU */
 
diff --git a/include/linux/swap.h b/include/linux/swap.h
index e53fef7051e6..d359fc022433 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -191,6 +191,8 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
 #ifdef CONFIG_MIGRATION
 extern int isolate_lru_page(struct page *p);
 extern int putback_lru_pages(struct list_head *l);
+extern int migrate_page(struct page *, struct page *);
+extern void migrate_page_copy(struct page *, struct page *);
 extern int migrate_pages(struct list_head *l, struct list_head *t,
 		struct list_head *moved, struct list_head *failed);
 #else
diff --git a/mm/rmap.c b/mm/rmap.c
index d85a99d28c03..13fad5fcdf79 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -52,6 +52,7 @@
 #include <linux/init.h>
 #include <linux/rmap.h>
 #include <linux/rcupdate.h>
+#include <linux/module.h>
 
 #include <asm/tlbflush.h>
 
@@ -541,7 +542,8 @@ void page_remove_rmap(struct page *page)
  * Subfunctions of try_to_unmap: try_to_unmap_one called
  * repeatedly from either try_to_unmap_anon or try_to_unmap_file.
  */
-static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
+static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
+				int ignore_refs)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long address;
@@ -564,7 +566,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
 	 * skipped over this mm) then we should reactivate it.
 	 */
 	if ((vma->vm_flags & VM_LOCKED) ||
-			ptep_clear_flush_young(vma, address, pte)) {
+			(ptep_clear_flush_young(vma, address, pte)
+				&& !ignore_refs)) {
 		ret = SWAP_FAIL;
 		goto out_unmap;
 	}
@@ -698,7 +701,7 @@ static void try_to_unmap_cluster(unsigned long cursor,
 	pte_unmap_unlock(pte - 1, ptl);
 }
 
-static int try_to_unmap_anon(struct page *page)
+static int try_to_unmap_anon(struct page *page, int ignore_refs)
 {
 	struct anon_vma *anon_vma;
 	struct vm_area_struct *vma;
@@ -709,7 +712,7 @@ static int try_to_unmap_anon(struct page *page)
 		return ret;
 
 	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
-		ret = try_to_unmap_one(page, vma);
+		ret = try_to_unmap_one(page, vma, ignore_refs);
 		if (ret == SWAP_FAIL || !page_mapped(page))
 			break;
 	}
@@ -726,7 +729,7 @@ static int try_to_unmap_anon(struct page *page)
  *
  * This function is only called from try_to_unmap for object-based pages.
  */
-static int try_to_unmap_file(struct page *page)
+static int try_to_unmap_file(struct page *page, int ignore_refs)
 {
 	struct address_space *mapping = page->mapping;
 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
@@ -740,7 +743,7 @@ static int try_to_unmap_file(struct page *page)
 
 	spin_lock(&mapping->i_mmap_lock);
 	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
-		ret = try_to_unmap_one(page, vma);
+		ret = try_to_unmap_one(page, vma, ignore_refs);
 		if (ret == SWAP_FAIL || !page_mapped(page))
 			goto out;
 	}
@@ -825,16 +828,16 @@ out:
  * SWAP_AGAIN	- we missed a mapping, try again later
  * SWAP_FAIL	- the page is unswappable
  */
-int try_to_unmap(struct page *page)
+int try_to_unmap(struct page *page, int ignore_refs)
 {
 	int ret;
 
 	BUG_ON(!PageLocked(page));
 
 	if (PageAnon(page))
-		ret = try_to_unmap_anon(page);
+		ret = try_to_unmap_anon(page, ignore_refs);
 	else
-		ret = try_to_unmap_file(page);
+		ret = try_to_unmap_file(page, ignore_refs);
 
 	if (!page_mapped(page))
 		ret = SWAP_SUCCESS;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index aa4b80dbe3ad..8f326ce2b690 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -483,7 +483,7 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
 			if (!sc->may_swap)
 				goto keep_locked;
 
-			switch (try_to_unmap(page)) {
+			switch (try_to_unmap(page, 0)) {
 			case SWAP_FAIL:
 				goto activate_locked;
 			case SWAP_AGAIN:
@@ -623,7 +623,7 @@ static int swap_page(struct page *page)
 	struct address_space *mapping = page_mapping(page);
 
 	if (page_mapped(page) && mapping)
-		if (try_to_unmap(page) != SWAP_SUCCESS)
+		if (try_to_unmap(page, 0) != SWAP_SUCCESS)
 			goto unlock_retry;
 
 	if (PageDirty(page)) {
@@ -659,6 +659,154 @@ unlock_retry:
 retry:
 	return -EAGAIN;
 }
+
+/*
+ * Page migration was first developed in the context of the memory hotplug
+ * project. The main authors of the migration code are:
+ *
+ * IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
+ * Hirokazu Takahashi <taka@valinux.co.jp>
+ * Dave Hansen <haveblue@us.ibm.com>
+ * Christoph Lameter <clameter@sgi.com>
+ */
+
+/*
+ * Remove references for a page and establish the new page with the correct
+ * basic settings to be able to stop accesses to the page.
+ */
+static int migrate_page_remove_references(struct page *newpage,
+				struct page *page, int nr_refs)
+{
+	struct address_space *mapping = page_mapping(page);
+	struct page **radix_pointer;
+
+	/*
+	 * Avoid doing any of the following work if the page count
+	 * indicates that the page is in use or truncate has removed
+	 * the page.
+	 */
+	if (!mapping || page_mapcount(page) + nr_refs != page_count(page))
+		return 1;
+
+	/*
+	 * Establish swap ptes for anonymous pages or destroy pte
+	 * maps for files.
+	 *
+	 * In order to reestablish file backed mappings the fault handlers
+	 * will take the radix tree_lock which may then be used to stop
+  	 * processses from accessing this page until the new page is ready.
+	 *
+	 * A process accessing via a swap pte (an anonymous page) will take a
+	 * page_lock on the old page which will block the process until the
+	 * migration attempt is complete. At that time the PageSwapCache bit
+	 * will be examined. If the page was migrated then the PageSwapCache
+	 * bit will be clear and the operation to retrieve the page will be
+	 * retried which will find the new page in the radix tree. Then a new
+	 * direct mapping may be generated based on the radix tree contents.
+	 *
+	 * If the page was not migrated then the PageSwapCache bit
+	 * is still set and the operation may continue.
+	 */
+	try_to_unmap(page, 1);
+
+	/*
+	 * Give up if we were unable to remove all mappings.
+	 */
+	if (page_mapcount(page))
+		return 1;
+
+	write_lock_irq(&mapping->tree_lock);
+
+	radix_pointer = (struct page **)radix_tree_lookup_slot(
+						&mapping->page_tree,
+						page_index(page));
+
+	if (!page_mapping(page) || page_count(page) != nr_refs ||
+			*radix_pointer != page) {
+		write_unlock_irq(&mapping->tree_lock);
+		return 1;
+	}
+
+	/*
+	 * Now we know that no one else is looking at the page.
+	 *
+	 * Certain minimal information about a page must be available
+	 * in order for other subsystems to properly handle the page if they
+	 * find it through the radix tree update before we are finished
+	 * copying the page.
+	 */
+	get_page(newpage);
+	newpage->index = page->index;
+	newpage->mapping = page->mapping;
+	if (PageSwapCache(page)) {
+		SetPageSwapCache(newpage);
+		set_page_private(newpage, page_private(page));
+	}
+
+	*radix_pointer = newpage;
+	__put_page(page);
+	write_unlock_irq(&mapping->tree_lock);
+
+	return 0;
+}
+
+/*
+ * Copy the page to its new location
+ */
+void migrate_page_copy(struct page *newpage, struct page *page)
+{
+	copy_highpage(newpage, page);
+
+	if (PageError(page))
+		SetPageError(newpage);
+	if (PageReferenced(page))
+		SetPageReferenced(newpage);
+	if (PageUptodate(page))
+		SetPageUptodate(newpage);
+	if (PageActive(page))
+		SetPageActive(newpage);
+	if (PageChecked(page))
+		SetPageChecked(newpage);
+	if (PageMappedToDisk(page))
+		SetPageMappedToDisk(newpage);
+
+	if (PageDirty(page)) {
+		clear_page_dirty_for_io(page);
+		set_page_dirty(newpage);
+ 	}
+
+	ClearPageSwapCache(page);
+	ClearPageActive(page);
+	ClearPagePrivate(page);
+	set_page_private(page, 0);
+	page->mapping = NULL;
+
+	/*
+	 * If any waiters have accumulated on the new page then
+	 * wake them up.
+	 */
+	if (PageWriteback(newpage))
+		end_page_writeback(newpage);
+}
+
+/*
+ * Common logic to directly migrate a single page suitable for
+ * pages that do not use PagePrivate.
+ *
+ * Pages are locked upon entry and exit.
+ */
+int migrate_page(struct page *newpage, struct page *page)
+{
+	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
+
+	if (migrate_page_remove_references(newpage, page, 2))
+		return -EAGAIN;
+
+	migrate_page_copy(newpage, page);
+
+	return 0;
+}
+
 /*
  * migrate_pages
  *
@@ -672,11 +820,6 @@ retry:
  * are movable anymore because t has become empty
  * or no retryable pages exist anymore.
  *
- * SIMPLIFIED VERSION: This implementation of migrate_pages
- * is only swapping out pages and never touches the second
- * list. The direct migration patchset
- * extends this function to avoid the use of swap.
- *
  * Return: Number of pages not migrated when "to" ran empty.
  */
 int migrate_pages(struct list_head *from, struct list_head *to,
@@ -697,6 +840,9 @@ redo:
 	retry = 0;
 
 	list_for_each_entry_safe(page, page2, from, lru) {
+		struct page *newpage = NULL;
+		struct address_space *mapping;
+
 		cond_resched();
 
 		rc = 0;
@@ -704,6 +850,9 @@ redo:
 			/* page was freed from under us. So we are done. */
 			goto next;
 
+		if (to && list_empty(to))
+			break;
+
 		/*
 		 * Skip locked pages during the first two passes to give the
 		 * functions holding the lock time to release the page. Later we
@@ -740,12 +889,64 @@ redo:
 			}
 		}
 
+		if (!to) {
+			rc = swap_page(page);
+			goto next;
+		}
+
+		newpage = lru_to_page(to);
+		lock_page(newpage);
+
 		/*
-		 * Page is properly locked and writeback is complete.
+		 * Pages are properly locked and writeback is complete.
 		 * Try to migrate the page.
 		 */
-		rc = swap_page(page);
-		goto next;
+		mapping = page_mapping(page);
+		if (!mapping)
+			goto unlock_both;
+
+		/*
+		 * Trigger writeout if page is dirty
+		 */
+		if (PageDirty(page)) {
+			switch (pageout(page, mapping)) {
+			case PAGE_KEEP:
+			case PAGE_ACTIVATE:
+				goto unlock_both;
+
+			case PAGE_SUCCESS:
+				unlock_page(newpage);
+				goto next;
+
+			case PAGE_CLEAN:
+				; /* try to migrate the page below */
+			}
+                }
+		/*
+		 * If we have no buffer or can release the buffer
+		 * then do a simple migration.
+		 */
+		if (!page_has_buffers(page) ||
+		    try_to_release_page(page, GFP_KERNEL)) {
+			rc = migrate_page(newpage, page);
+			goto unlock_both;
+		}
+
+		/*
+		 * On early passes with mapped pages simply
+		 * retry. There may be a lock held for some
+		 * buffers that may go away. Later
+		 * swap them out.
+		 */
+		if (pass > 4) {
+			unlock_page(newpage);
+			newpage = NULL;
+			rc = swap_page(page);
+			goto next;
+		}
+
+unlock_both:
+		unlock_page(newpage);
 
 unlock_page:
 		unlock_page(page);
@@ -758,7 +959,10 @@ next:
 			list_move(&page->lru, failed);
 			nr_failed++;
 		} else {
-			/* Success */
+			if (newpage) {
+				/* Successful migration. Return page to LRU */
+				move_to_lru(newpage);
+			}
 			list_move(&page->lru, moved);
 		}
 	}
-- 
cgit v1.2.3-71-gd317


From a3351e525e4768c29aa5d22ef59b5b38e0361e53 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 1 Feb 2006 03:05:39 -0800
Subject: [PATCH] Direct Migration V9: remove_from_swap() to remove swap ptes

Add remove_from_swap

remove_from_swap() allows the restoration of the pte entries that existed
before page migration occurred for anonymous pages by walking the reverse
maps.  This reduces swap use and establishes regular pte's without the need
for page faults.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/rmap.h |  1 +
 include/linux/swap.h |  1 +
 mm/rmap.c            | 29 +++++++++++++++++++++++++++++
 mm/swapfile.c        |  9 +++++++++
 mm/vmscan.c          |  9 +++++++++
 5 files changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 0f1ea2d6ed86..d6b9bcd1384c 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -92,6 +92,7 @@ static inline void page_dup_rmap(struct page *page)
  */
 int page_referenced(struct page *, int is_locked);
 int try_to_unmap(struct page *, int ignore_refs);
+void remove_from_swap(struct page *page);
 
 /*
  * Called from mm/filemap_xip.c to unmap empty zero page
diff --git a/include/linux/swap.h b/include/linux/swap.h
index d359fc022433..229b6d04b4b6 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -248,6 +248,7 @@ extern int remove_exclusive_swap_page(struct page *);
 struct backing_dev_info;
 
 extern spinlock_t swap_lock;
+extern int remove_vma_swap(struct vm_area_struct *vma, struct page *page);
 
 /* linux/mm/thrash.c */
 extern struct mm_struct * swap_token_mm;
diff --git a/mm/rmap.c b/mm/rmap.c
index 13fad5fcdf79..f4b91d7aa5cf 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -206,6 +206,35 @@ out:
 	return anon_vma;
 }
 
+#ifdef CONFIG_MIGRATION
+/*
+ * Remove an anonymous page from swap replacing the swap pte's
+ * through real pte's pointing to valid pages and then releasing
+ * the page from the swap cache.
+ *
+ * Must hold page lock on page.
+ */
+void remove_from_swap(struct page *page)
+{
+	struct anon_vma *anon_vma;
+	struct vm_area_struct *vma;
+
+	if (!PageAnon(page) || !PageSwapCache(page))
+		return;
+
+	anon_vma = page_lock_anon_vma(page);
+	if (!anon_vma)
+		return;
+
+	list_for_each_entry(vma, &anon_vma->head, anon_vma_node)
+		remove_vma_swap(vma, page);
+
+	spin_unlock(&anon_vma->lock);
+
+	delete_from_swap_cache(page);
+}
+#endif
+
 /*
  * At what user virtual address is page expected in vma?
  */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 9678182e0eef..1f9cf0d073b8 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -554,6 +554,15 @@ static int unuse_mm(struct mm_struct *mm,
 	return 0;
 }
 
+#ifdef CONFIG_MIGRATION
+int remove_vma_swap(struct vm_area_struct *vma, struct page *page)
+{
+	swp_entry_t entry = { .val = page_private(page) };
+
+	return unuse_vma(vma, entry, page);
+}
+#endif
+
 /*
  * Scan swap_map from current position to next entry still in use.
  * Recycle to start on reaching the end, returning 0 when empty.
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8f326ce2b690..5e98b86feb74 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -804,6 +804,15 @@ int migrate_page(struct page *newpage, struct page *page)
 
 	migrate_page_copy(newpage, page);
 
+	/*
+	 * Remove auxiliary swap entries and replace
+	 * them with real ptes.
+	 *
+	 * Note that a real pte entry will allow processes that are not
+	 * waiting on the page lock to use the new page via the page tables
+	 * before the new page is unlocked.
+	 */
+	remove_from_swap(newpage);
 	return 0;
 }
 
-- 
cgit v1.2.3-71-gd317


From e965f9630c651fa4249039fd4b80c9392d07a856 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 1 Feb 2006 03:05:41 -0800
Subject: [PATCH] Direct Migration V9: Avoid writeback / page_migrate() method

Migrate a page with buffers without requiring writeback

This introduces a new address space operation migratepage() that may be used
by a filesystem to implement its own version of page migration.

A version is provided that migrates buffers attached to pages.  Some
filesystems (ext2, ext3, xfs) are modified to utilize this feature.

The swapper address space operation are modified so that a regular
migrate_page() will occur for anonymous pages without writeback (migrate_pages
forces every anonymous page to have a swap entry).

Signed-off-by: Mike Kravetz <kravetz@us.ibm.com>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c                 | 60 +++++++++++++++++++++++++++++++++++++++++++++
 fs/ext2/inode.c             |  2 ++
 fs/ext3/inode.c             |  2 ++
 fs/xfs/linux-2.6/xfs_aops.c |  1 +
 fs/xfs/linux-2.6/xfs_buf.c  |  1 +
 include/linux/fs.h          |  8 ++++++
 include/linux/swap.h        |  5 ++++
 mm/rmap.c                   |  1 +
 mm/swap_state.c             |  1 +
 mm/vmscan.c                 | 20 ++++++++++++++-
 10 files changed, 100 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 3dc712f29d2d..8bcbac87a28c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3049,6 +3049,66 @@ asmlinkage long sys_bdflush(int func, long data)
 	return 0;
 }
 
+/*
+ * Migration function for pages with buffers. This function can only be used
+ * if the underlying filesystem guarantees that no other references to "page"
+ * exist.
+ */
+#ifdef CONFIG_MIGRATION
+int buffer_migrate_page(struct page *newpage, struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	struct buffer_head *bh, *head;
+
+	if (!mapping)
+		return -EAGAIN;
+
+	if (!page_has_buffers(page))
+		return migrate_page(newpage, page);
+
+	head = page_buffers(page);
+
+	if (migrate_page_remove_references(newpage, page, 3))
+		return -EAGAIN;
+
+	bh = head;
+	do {
+		get_bh(bh);
+		lock_buffer(bh);
+		bh = bh->b_this_page;
+
+	} while (bh != head);
+
+	ClearPagePrivate(page);
+	set_page_private(newpage, page_private(page));
+	set_page_private(page, 0);
+	put_page(page);
+	get_page(newpage);
+
+	bh = head;
+	do {
+		set_bh_page(bh, newpage, bh_offset(bh));
+		bh = bh->b_this_page;
+
+	} while (bh != head);
+
+	SetPagePrivate(newpage);
+
+	migrate_page_copy(newpage, page);
+
+	bh = head;
+	do {
+		unlock_buffer(bh);
+ 		put_bh(bh);
+		bh = bh->b_this_page;
+
+	} while (bh != head);
+
+	return 0;
+}
+EXPORT_SYMBOL(buffer_migrate_page);
+#endif
+
 /*
  * Buffer-head allocation
  */
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index e7d3f0522d01..a717837f272e 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -706,6 +706,7 @@ struct address_space_operations ext2_aops = {
 	.bmap			= ext2_bmap,
 	.direct_IO		= ext2_direct_IO,
 	.writepages		= ext2_writepages,
+	.migratepage		= buffer_migrate_page,
 };
 
 struct address_space_operations ext2_aops_xip = {
@@ -723,6 +724,7 @@ struct address_space_operations ext2_nobh_aops = {
 	.bmap			= ext2_bmap,
 	.direct_IO		= ext2_direct_IO,
 	.writepages		= ext2_writepages,
+	.migratepage		= buffer_migrate_page,
 };
 
 /*
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 8824e84f8a56..3fc4238e9703 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1559,6 +1559,7 @@ static struct address_space_operations ext3_ordered_aops = {
 	.invalidatepage	= ext3_invalidatepage,
 	.releasepage	= ext3_releasepage,
 	.direct_IO	= ext3_direct_IO,
+	.migratepage	= buffer_migrate_page,
 };
 
 static struct address_space_operations ext3_writeback_aops = {
@@ -1572,6 +1573,7 @@ static struct address_space_operations ext3_writeback_aops = {
 	.invalidatepage	= ext3_invalidatepage,
 	.releasepage	= ext3_releasepage,
 	.direct_IO	= ext3_direct_IO,
+	.migratepage	= buffer_migrate_page,
 };
 
 static struct address_space_operations ext3_journalled_aops = {
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 120626789406..9892268e3005 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1462,4 +1462,5 @@ struct address_space_operations linvfs_aops = {
 	.commit_write		= generic_commit_write,
 	.bmap			= linvfs_bmap,
 	.direct_IO		= linvfs_direct_IO,
+	.migratepage		= buffer_migrate_page,
 };
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index a36a8e3b703f..bfb4f2917bb6 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1521,6 +1521,7 @@ xfs_mapping_buftarg(
 	struct address_space	*mapping;
 	static struct address_space_operations mapping_aops = {
 		.sync_page = block_sync_page,
+		.migratepage = fail_migrate_page,
 	};
 
 	inode = new_inode(bdev->bd_inode->i_sb);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 84bb449b9b01..e059da947007 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -363,6 +363,8 @@ struct address_space_operations {
 			loff_t offset, unsigned long nr_segs);
 	struct page* (*get_xip_page)(struct address_space *, sector_t,
 			int);
+	/* migrate the contents of a page to the specified target */
+	int (*migratepage) (struct page *, struct page *);
 };
 
 struct backing_dev_info;
@@ -1719,6 +1721,12 @@ extern void simple_release_fs(struct vfsmount **mount, int *count);
 
 extern ssize_t simple_read_from_buffer(void __user *, size_t, loff_t *, const void *, size_t);
 
+#ifdef CONFIG_MIGRATION
+extern int buffer_migrate_page(struct page *, struct page *);
+#else
+#define buffer_migrate_page NULL
+#endif
+
 extern int inode_change_ok(struct inode *, struct iattr *);
 extern int __must_check inode_setattr(struct inode *, struct iattr *);
 
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 229b6d04b4b6..f3e17d5963c3 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -193,13 +193,18 @@ extern int isolate_lru_page(struct page *p);
 extern int putback_lru_pages(struct list_head *l);
 extern int migrate_page(struct page *, struct page *);
 extern void migrate_page_copy(struct page *, struct page *);
+extern int migrate_page_remove_references(struct page *, struct page *, int);
 extern int migrate_pages(struct list_head *l, struct list_head *t,
 		struct list_head *moved, struct list_head *failed);
+extern int fail_migrate_page(struct page *, struct page *);
 #else
 static inline int isolate_lru_page(struct page *p) { return -ENOSYS; }
 static inline int putback_lru_pages(struct list_head *l) { return 0; }
 static inline int migrate_pages(struct list_head *l, struct list_head *t,
 	struct list_head *moved, struct list_head *failed) { return -ENOSYS; }
+/* Possible settings for the migrate_page() method in address_operations */
+#define migrate_page NULL
+#define fail_migrate_page NULL
 #endif
 
 #ifdef CONFIG_MMU
diff --git a/mm/rmap.c b/mm/rmap.c
index f4b91d7aa5cf..df2c41c2a9a2 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -233,6 +233,7 @@ void remove_from_swap(struct page *page)
 
 	delete_from_swap_cache(page);
 }
+EXPORT_SYMBOL(remove_from_swap);
 #endif
 
 /*
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 7b09ac503fec..db8a3d3e1636 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -27,6 +27,7 @@ static struct address_space_operations swap_aops = {
 	.writepage	= swap_writepage,
 	.sync_page	= block_sync_page,
 	.set_page_dirty	= __set_page_dirty_nobuffers,
+	.migratepage	= migrate_page,
 };
 
 static struct backing_dev_info swap_backing_dev_info = {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5e98b86feb74..5a610804cd06 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -614,6 +614,15 @@ int putback_lru_pages(struct list_head *l)
 	return count;
 }
 
+/*
+ * Non migratable page
+ */
+int fail_migrate_page(struct page *newpage, struct page *page)
+{
+	return -EIO;
+}
+EXPORT_SYMBOL(fail_migrate_page);
+
 /*
  * swapout a single page
  * page is locked upon entry, unlocked on exit
@@ -659,6 +668,7 @@ unlock_retry:
 retry:
 	return -EAGAIN;
 }
+EXPORT_SYMBOL(swap_page);
 
 /*
  * Page migration was first developed in the context of the memory hotplug
@@ -674,7 +684,7 @@ retry:
  * Remove references for a page and establish the new page with the correct
  * basic settings to be able to stop accesses to the page.
  */
-static int migrate_page_remove_references(struct page *newpage,
+int migrate_page_remove_references(struct page *newpage,
 				struct page *page, int nr_refs)
 {
 	struct address_space *mapping = page_mapping(page);
@@ -749,6 +759,7 @@ static int migrate_page_remove_references(struct page *newpage,
 
 	return 0;
 }
+EXPORT_SYMBOL(migrate_page_remove_references);
 
 /*
  * Copy the page to its new location
@@ -788,6 +799,7 @@ void migrate_page_copy(struct page *newpage, struct page *page)
 	if (PageWriteback(newpage))
 		end_page_writeback(newpage);
 }
+EXPORT_SYMBOL(migrate_page_copy);
 
 /*
  * Common logic to directly migrate a single page suitable for
@@ -815,6 +827,7 @@ int migrate_page(struct page *newpage, struct page *page)
 	remove_from_swap(newpage);
 	return 0;
 }
+EXPORT_SYMBOL(migrate_page);
 
 /*
  * migrate_pages
@@ -914,6 +927,11 @@ redo:
 		if (!mapping)
 			goto unlock_both;
 
+		if (mapping->a_ops->migratepage) {
+			rc = mapping->a_ops->migratepage(newpage, page);
+			goto unlock_both;
+                }
+
 		/*
 		 * Trigger writeout if page is dirty
 		 */
-- 
cgit v1.2.3-71-gd317


From 7fd6b1413082c303613fc137aca9a004740cacf0 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Wed, 1 Feb 2006 03:05:52 -0800
Subject: [PATCH] slab: fix kzalloc and kstrdup caller report for
 CONFIG_DEBUG_SLAB

Fix kzalloc() and kstrdup() caller report for CONFIG_DEBUG_SLAB.  We must
pass the caller to __cache_alloc() instead of directly doing
__builtin_return_address(0) there; otherwise kzalloc() and kstrdup() are
reported as the allocation site instead of the real one.

Thanks to Valdis Kletnieks for reporting the problem and Steven Rostedt for
the original idea.

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/slab.h |  7 +++++++
 mm/slab.c            | 29 ++++++++++++++++++++++++-----
 2 files changed, 31 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 1fb77a9cc148..8cf52939d0ab 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -76,7 +76,14 @@ struct cache_sizes {
 	kmem_cache_t	*cs_dmacachep;
 };
 extern struct cache_sizes malloc_sizes[];
+
+#ifndef CONFIG_DEBUG_SLAB
 extern void *__kmalloc(size_t, gfp_t);
+#else
+extern void *__kmalloc_track_caller(size_t, gfp_t, void*);
+#define __kmalloc(size, flags) \
+    __kmalloc_track_caller(size, flags, __builtin_return_address(0))
+#endif
 
 static inline void *kmalloc(size_t size, gfp_t flags)
 {
diff --git a/mm/slab.c b/mm/slab.c
index 6fbd6a1cdeb4..67527268b01c 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2687,7 +2687,8 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
 	return objp;
 }
 
-static inline void *__cache_alloc(struct kmem_cache *cachep, gfp_t flags)
+static __always_inline void *
+__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
 {
 	unsigned long save_flags;
 	void *objp;
@@ -2698,7 +2699,7 @@ static inline void *__cache_alloc(struct kmem_cache *cachep, gfp_t flags)
 	objp = ____cache_alloc(cachep, flags);
 	local_irq_restore(save_flags);
 	objp = cache_alloc_debugcheck_after(cachep, flags, objp,
-					    __builtin_return_address(0));
+					    caller);
 	prefetchw(objp);
 	return objp;
 }
@@ -2927,7 +2928,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
  */
 void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
 {
-	return __cache_alloc(cachep, flags);
+	return __cache_alloc(cachep, flags, __builtin_return_address(0));
 }
 EXPORT_SYMBOL(kmem_cache_alloc);
 
@@ -3041,7 +3042,8 @@ EXPORT_SYMBOL(kmalloc_node);
  * platforms.  For example, on i386, it means that the memory must come
  * from the first 16MB.
  */
-void *__kmalloc(size_t size, gfp_t flags)
+static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
+					  void *caller)
 {
 	struct kmem_cache *cachep;
 
@@ -3053,10 +3055,27 @@ void *__kmalloc(size_t size, gfp_t flags)
 	cachep = __find_general_cachep(size, flags);
 	if (unlikely(cachep == NULL))
 		return NULL;
-	return __cache_alloc(cachep, flags);
+	return __cache_alloc(cachep, flags, caller);
+}
+
+#ifndef CONFIG_DEBUG_SLAB
+
+void *__kmalloc(size_t size, gfp_t flags)
+{
+	return __do_kmalloc(size, flags, NULL);
 }
 EXPORT_SYMBOL(__kmalloc);
 
+#else
+
+void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller)
+{
+	return __do_kmalloc(size, flags, caller);
+}
+EXPORT_SYMBOL(__kmalloc_track_caller);
+
+#endif
+
 #ifdef CONFIG_SMP
 /**
  * __alloc_percpu - allocate one copy of the object for every present
-- 
cgit v1.2.3-71-gd317


From d739b42b82773206297db1fc0d96ef895a5d9688 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Wed, 1 Feb 2006 03:06:43 -0800
Subject: [PATCH] reiserfs: remove kmalloc wrapper

Remove kmalloc() wrapper from fs/reiserfs/.  Please note that a reiserfs
/proc entry format is changed because kmalloc statistics is removed.

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/dir.c              | 16 +++-----
 fs/reiserfs/fix_node.c         | 50 ++-----------------------
 fs/reiserfs/journal.c          | 84 ++++++++++++++----------------------------
 fs/reiserfs/namei.c            | 16 ++++----
 fs/reiserfs/procfs.c           |  3 +-
 fs/reiserfs/super.c            |  6 ---
 fs/reiserfs/xattr.c            | 11 ++----
 include/linux/reiserfs_fs.h    | 16 --------
 include/linux/reiserfs_fs_sb.h |  1 -
 9 files changed, 49 insertions(+), 154 deletions(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 9dd71e807034..d71ac6579289 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -150,18 +150,15 @@ static int reiserfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 				if (d_reclen <= 32) {
 					local_buf = small_buf;
 				} else {
-					local_buf =
-					    reiserfs_kmalloc(d_reclen, GFP_NOFS,
-							     inode->i_sb);
+					local_buf = kmalloc(d_reclen,
+							    GFP_NOFS);
 					if (!local_buf) {
 						pathrelse(&path_to_entry);
 						ret = -ENOMEM;
 						goto out;
 					}
 					if (item_moved(&tmp_ih, &path_to_entry)) {
-						reiserfs_kfree(local_buf,
-							       d_reclen,
-							       inode->i_sb);
+						kfree(local_buf);
 						goto research;
 					}
 				}
@@ -174,15 +171,12 @@ static int reiserfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 				    (dirent, local_buf, d_reclen, d_off, d_ino,
 				     DT_UNKNOWN) < 0) {
 					if (local_buf != small_buf) {
-						reiserfs_kfree(local_buf,
-							       d_reclen,
-							       inode->i_sb);
+						kfree(local_buf);
 					}
 					goto end;
 				}
 				if (local_buf != small_buf) {
-					reiserfs_kfree(local_buf, d_reclen,
-						       inode->i_sb);
+					kfree(local_buf);
 				}
 				// next entry should be looked for with such offset
 				next_pos = deh_offset(deh) + 1;
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 45829889dcdc..aa22588019ec 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -2021,38 +2021,6 @@ static int get_neighbors(struct tree_balance *p_s_tb, int n_h)
 	return CARRY_ON;
 }
 
-#ifdef CONFIG_REISERFS_CHECK
-void *reiserfs_kmalloc(size_t size, gfp_t flags, struct super_block *s)
-{
-	void *vp;
-	static size_t malloced;
-
-	vp = kmalloc(size, flags);
-	if (vp) {
-		REISERFS_SB(s)->s_kmallocs += size;
-		if (REISERFS_SB(s)->s_kmallocs > malloced + 200000) {
-			reiserfs_warning(s,
-					 "vs-8301: reiserfs_kmalloc: allocated memory %d",
-					 REISERFS_SB(s)->s_kmallocs);
-			malloced = REISERFS_SB(s)->s_kmallocs;
-		}
-	}
-	return vp;
-}
-
-void reiserfs_kfree(const void *vp, size_t size, struct super_block *s)
-{
-	kfree(vp);
-
-	REISERFS_SB(s)->s_kmallocs -= size;
-	if (REISERFS_SB(s)->s_kmallocs < 0)
-		reiserfs_warning(s,
-				 "vs-8302: reiserfs_kfree: allocated memory %d",
-				 REISERFS_SB(s)->s_kmallocs);
-
-}
-#endif
-
 static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh)
 {
 	int max_num_of_items;
@@ -2086,7 +2054,7 @@ static int get_mem_for_virtual_node(struct tree_balance *tb)
 		/* we have to allocate more memory for virtual node */
 		if (tb->vn_buf) {
 			/* free memory allocated before */
-			reiserfs_kfree(tb->vn_buf, tb->vn_buf_size, tb->tb_sb);
+			kfree(tb->vn_buf);
 			/* this is not needed if kfree is atomic */
 			check_fs = 1;
 		}
@@ -2095,24 +2063,15 @@ static int get_mem_for_virtual_node(struct tree_balance *tb)
 		tb->vn_buf_size = size;
 
 		/* get memory for virtual item */
-		buf =
-		    reiserfs_kmalloc(size, GFP_ATOMIC | __GFP_NOWARN,
-				     tb->tb_sb);
+		buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN);
 		if (!buf) {
 			/* getting memory with GFP_KERNEL priority may involve
 			   balancing now (due to indirect_to_direct conversion on
 			   dcache shrinking). So, release path and collected
 			   resources here */
 			free_buffers_in_tb(tb);
-			buf = reiserfs_kmalloc(size, GFP_NOFS, tb->tb_sb);
+			buf = kmalloc(size, GFP_NOFS);
 			if (!buf) {
-#ifdef CONFIG_REISERFS_CHECK
-				reiserfs_warning(tb->tb_sb,
-						 "vs-8345: get_mem_for_virtual_node: "
-						 "kmalloc failed. reiserfs kmalloced %d bytes",
-						 REISERFS_SB(tb->tb_sb)->
-						 s_kmallocs);
-#endif
 				tb->vn_buf_size = 0;
 			}
 			tb->vn_buf = buf;
@@ -2619,7 +2578,6 @@ void unfix_nodes(struct tree_balance *tb)
 		}
 	}
 
-	if (tb->vn_buf)
-		reiserfs_kfree(tb->vn_buf, tb->vn_buf_size, tb->tb_sb);
+	kfree(tb->vn_buf);
 
 }
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 4491fcf2a0e6..16b526fd20b9 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -152,18 +152,16 @@ static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
 	struct reiserfs_bitmap_node *bn;
 	static int id;
 
-	bn = reiserfs_kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS,
-			      p_s_sb);
+	bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS);
 	if (!bn) {
 		return NULL;
 	}
-	bn->data = reiserfs_kmalloc(p_s_sb->s_blocksize, GFP_NOFS, p_s_sb);
+	bn->data = kzalloc(p_s_sb->s_blocksize, GFP_NOFS);
 	if (!bn->data) {
-		reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb);
+		kfree(bn);
 		return NULL;
 	}
 	bn->id = id++;
-	memset(bn->data, 0, p_s_sb->s_blocksize);
 	INIT_LIST_HEAD(&bn->list);
 	return bn;
 }
@@ -197,8 +195,8 @@ static inline void free_bitmap_node(struct super_block *p_s_sb,
 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
 	journal->j_used_bitmap_nodes--;
 	if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
-		reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb);
-		reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb);
+		kfree(bn->data);
+		kfree(bn);
 	} else {
 		list_add(&bn->list, &journal->j_bitmap_nodes);
 		journal->j_free_bitmap_nodes++;
@@ -276,8 +274,8 @@ static int free_bitmap_nodes(struct super_block *p_s_sb)
 	while (next != &journal->j_bitmap_nodes) {
 		bn = list_entry(next, struct reiserfs_bitmap_node, list);
 		list_del(next);
-		reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb);
-		reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb);
+		kfree(bn->data);
+		kfree(bn);
 		next = journal->j_bitmap_nodes.next;
 		journal->j_free_bitmap_nodes--;
 	}
@@ -581,7 +579,7 @@ static inline void put_journal_list(struct super_block *s,
 			       jl->j_trans_id, jl->j_refcount);
 	}
 	if (--jl->j_refcount == 0)
-		reiserfs_kfree(jl, sizeof(struct reiserfs_journal_list), s);
+		kfree(jl);
 }
 
 /*
@@ -1818,8 +1816,7 @@ void remove_journal_hash(struct super_block *sb,
 static void free_journal_ram(struct super_block *p_s_sb)
 {
 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
-	reiserfs_kfree(journal->j_current_jl,
-		       sizeof(struct reiserfs_journal_list), p_s_sb);
+	kfree(journal->j_current_jl);
 	journal->j_num_lists--;
 
 	vfree(journal->j_cnode_free_orig);
@@ -2093,21 +2090,15 @@ static int journal_read_transaction(struct super_block *p_s_sb,
 	}
 	trans_id = get_desc_trans_id(desc);
 	/* now we know we've got a good transaction, and it was inside the valid time ranges */
-	log_blocks =
-	    reiserfs_kmalloc(get_desc_trans_len(desc) *
-			     sizeof(struct buffer_head *), GFP_NOFS, p_s_sb);
-	real_blocks =
-	    reiserfs_kmalloc(get_desc_trans_len(desc) *
-			     sizeof(struct buffer_head *), GFP_NOFS, p_s_sb);
+	log_blocks = kmalloc(get_desc_trans_len(desc) *
+			     sizeof(struct buffer_head *), GFP_NOFS);
+	real_blocks = kmalloc(get_desc_trans_len(desc) *
+			      sizeof(struct buffer_head *), GFP_NOFS);
 	if (!log_blocks || !real_blocks) {
 		brelse(c_bh);
 		brelse(d_bh);
-		reiserfs_kfree(log_blocks,
-			       get_desc_trans_len(desc) *
-			       sizeof(struct buffer_head *), p_s_sb);
-		reiserfs_kfree(real_blocks,
-			       get_desc_trans_len(desc) *
-			       sizeof(struct buffer_head *), p_s_sb);
+		kfree(log_blocks);
+		kfree(real_blocks);
 		reiserfs_warning(p_s_sb,
 				 "journal-1169: kmalloc failed, unable to mount FS");
 		return -1;
@@ -2145,12 +2136,8 @@ static int journal_read_transaction(struct super_block *p_s_sb,
 			brelse_array(real_blocks, i);
 			brelse(c_bh);
 			brelse(d_bh);
-			reiserfs_kfree(log_blocks,
-				       get_desc_trans_len(desc) *
-				       sizeof(struct buffer_head *), p_s_sb);
-			reiserfs_kfree(real_blocks,
-				       get_desc_trans_len(desc) *
-				       sizeof(struct buffer_head *), p_s_sb);
+			kfree(log_blocks);
+			kfree(real_blocks);
 			return -1;
 		}
 	}
@@ -2166,12 +2153,8 @@ static int journal_read_transaction(struct super_block *p_s_sb,
 			brelse_array(real_blocks, get_desc_trans_len(desc));
 			brelse(c_bh);
 			brelse(d_bh);
-			reiserfs_kfree(log_blocks,
-				       get_desc_trans_len(desc) *
-				       sizeof(struct buffer_head *), p_s_sb);
-			reiserfs_kfree(real_blocks,
-				       get_desc_trans_len(desc) *
-				       sizeof(struct buffer_head *), p_s_sb);
+			kfree(log_blocks);
+			kfree(real_blocks);
 			return -1;
 		}
 		memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data,
@@ -2193,12 +2176,8 @@ static int journal_read_transaction(struct super_block *p_s_sb,
 				     get_desc_trans_len(desc) - i);
 			brelse(c_bh);
 			brelse(d_bh);
-			reiserfs_kfree(log_blocks,
-				       get_desc_trans_len(desc) *
-				       sizeof(struct buffer_head *), p_s_sb);
-			reiserfs_kfree(real_blocks,
-				       get_desc_trans_len(desc) *
-				       sizeof(struct buffer_head *), p_s_sb);
+			kfree(log_blocks);
+			kfree(real_blocks);
 			return -1;
 		}
 		brelse(real_blocks[i]);
@@ -2217,12 +2196,8 @@ static int journal_read_transaction(struct super_block *p_s_sb,
 	journal->j_trans_id = trans_id + 1;
 	brelse(c_bh);
 	brelse(d_bh);
-	reiserfs_kfree(log_blocks,
-		       le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *),
-		       p_s_sb);
-	reiserfs_kfree(real_blocks,
-		       le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *),
-		       p_s_sb);
+	kfree(log_blocks);
+	kfree(real_blocks);
 	return 0;
 }
 
@@ -2472,13 +2447,11 @@ static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
 {
 	struct reiserfs_journal_list *jl;
       retry:
-	jl = reiserfs_kmalloc(sizeof(struct reiserfs_journal_list), GFP_NOFS,
-			      s);
+	jl = kzalloc(sizeof(struct reiserfs_journal_list), GFP_NOFS);
 	if (!jl) {
 		yield();
 		goto retry;
 	}
-	memset(jl, 0, sizeof(*jl));
 	INIT_LIST_HEAD(&jl->j_list);
 	INIT_LIST_HEAD(&jl->j_working_list);
 	INIT_LIST_HEAD(&jl->j_tail_bh_list);
@@ -3042,14 +3015,12 @@ struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
 		}
 		return th;
 	}
-	th = reiserfs_kmalloc(sizeof(struct reiserfs_transaction_handle),
-			      GFP_NOFS, s);
+	th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS);
 	if (!th)
 		return NULL;
 	ret = journal_begin(th, s, nblocks);
 	if (ret) {
-		reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle),
-			       s);
+		kfree(th);
 		return NULL;
 	}
 
@@ -3067,8 +3038,7 @@ int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th)
 		ret = -EIO;
 	if (th->t_refcount == 0) {
 		SB_JOURNAL(s)->j_persistent_trans--;
-		reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle),
-			       s);
+		kfree(th);
 	}
 	return ret;
 }
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 8f8d8d01107c..c8123308e060 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -456,7 +456,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
 	/* get memory for composing the entry */
 	buflen = DEH_SIZE + ROUND_UP(namelen);
 	if (buflen > sizeof(small_buf)) {
-		buffer = reiserfs_kmalloc(buflen, GFP_NOFS, dir->i_sb);
+		buffer = kmalloc(buflen, GFP_NOFS);
 		if (buffer == 0)
 			return -ENOMEM;
 	} else
@@ -490,7 +490,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
 	retval = reiserfs_find_entry(dir, name, namelen, &path, &de);
 	if (retval != NAME_NOT_FOUND) {
 		if (buffer != small_buf)
-			reiserfs_kfree(buffer, buflen, dir->i_sb);
+			kfree(buffer);
 		pathrelse(&path);
 
 		if (retval == IO_ERROR) {
@@ -515,7 +515,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
 		reiserfs_warning(dir->i_sb,
 				 "reiserfs_add_entry: Congratulations! we have got hash function screwed up");
 		if (buffer != small_buf)
-			reiserfs_kfree(buffer, buflen, dir->i_sb);
+			kfree(buffer);
 		pathrelse(&path);
 		return -EBUSY;
 	}
@@ -535,7 +535,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
 					 &entry_key);
 
 			if (buffer != small_buf)
-				reiserfs_kfree(buffer, buflen, dir->i_sb);
+				kfree(buffer);
 			pathrelse(&path);
 			return -EBUSY;
 		}
@@ -546,7 +546,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
 	    reiserfs_paste_into_item(th, &path, &entry_key, dir, buffer,
 				     paste_size);
 	if (buffer != small_buf)
-		reiserfs_kfree(buffer, buflen, dir->i_sb);
+		kfree(buffer);
 	if (retval) {
 		reiserfs_check_path(&path);
 		return retval;
@@ -1065,7 +1065,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
 		goto out_failed;
 	}
 
-	name = reiserfs_kmalloc(item_len, GFP_NOFS, parent_dir->i_sb);
+	name = kmalloc(item_len, GFP_NOFS);
 	if (!name) {
 		drop_new_inode(inode);
 		retval = -ENOMEM;
@@ -1079,14 +1079,14 @@ static int reiserfs_symlink(struct inode *parent_dir,
 	retval = journal_begin(&th, parent_dir->i_sb, jbegin_count);
 	if (retval) {
 		drop_new_inode(inode);
-		reiserfs_kfree(name, item_len, parent_dir->i_sb);
+		kfree(name);
 		goto out_failed;
 	}
 
 	retval =
 	    reiserfs_new_inode(&th, parent_dir, mode, name, strlen(symname),
 			       dentry, inode);
-	reiserfs_kfree(name, item_len, parent_dir->i_sb);
+	kfree(name);
 	if (retval) {		/* reiserfs_new_inode iputs for us */
 		goto out_failed;
 	}
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index fc2f43c75df4..ef6caed9336b 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -88,7 +88,6 @@ static int show_super(struct seq_file *m, struct super_block *sb)
 	seq_printf(m, "state: \t%s\n"
 		   "mount options: \t%s%s%s%s%s%s%s%s%s%s%s\n"
 		   "gen. counter: \t%i\n"
-		   "s_kmallocs: \t%i\n"
 		   "s_disk_reads: \t%i\n"
 		   "s_disk_writes: \t%i\n"
 		   "s_fix_nodes: \t%i\n"
@@ -128,7 +127,7 @@ static int show_super(struct seq_file *m, struct super_block *sb)
 		   "SMALL_TAILS " : "NO_TAILS ",
 		   replay_only(sb) ? "REPLAY_ONLY " : "",
 		   convert_reiserfs(sb) ? "CONV " : "",
-		   atomic_read(&r->s_generation_counter), SF(s_kmallocs),
+		   atomic_read(&r->s_generation_counter),
 		   SF(s_disk_reads), SF(s_disk_writes), SF(s_fix_nodes),
 		   SF(s_do_balance), SF(s_unneeded_left_neighbor),
 		   SF(s_good_search_by_key_reada), SF(s_bmaps),
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 397d9590c8f2..77891de0e02e 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -472,12 +472,6 @@ static void reiserfs_put_super(struct super_block *s)
 
 	print_statistics(s);
 
-	if (REISERFS_SB(s)->s_kmallocs != 0) {
-		reiserfs_warning(s,
-				 "vs-2004: reiserfs_put_super: allocated memory left %d",
-				 REISERFS_SB(s)->s_kmallocs);
-	}
-
 	if (REISERFS_SB(s)->reserved_blocks != 0) {
 		reiserfs_warning(s,
 				 "green-2005: reiserfs_put_super: reserved blocks left %d",
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index cc061bfd437b..4f0db4e54517 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -368,15 +368,13 @@ static int __xattr_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		if (d_reclen <= 32) {
 			local_buf = small_buf;
 		} else {
-			local_buf =
-			    reiserfs_kmalloc(d_reclen, GFP_NOFS, inode->i_sb);
+			local_buf = kmalloc(d_reclen, GFP_NOFS);
 			if (!local_buf) {
 				pathrelse(&path_to_entry);
 				return -ENOMEM;
 			}
 			if (item_moved(&tmp_ih, &path_to_entry)) {
-				reiserfs_kfree(local_buf, d_reclen,
-					       inode->i_sb);
+				kfree(local_buf);
 
 				/* sigh, must retry.  Do this same offset again */
 				next_pos = d_off;
@@ -399,13 +397,12 @@ static int __xattr_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		if (filldir(dirent, local_buf, d_reclen, d_off, d_ino,
 			    DT_UNKNOWN) < 0) {
 			if (local_buf != small_buf) {
-				reiserfs_kfree(local_buf, d_reclen,
-					       inode->i_sb);
+				kfree(local_buf);
 			}
 			goto end;
 		}
 		if (local_buf != small_buf) {
-			reiserfs_kfree(local_buf, d_reclen, inode->i_sb);
+			kfree(local_buf);
 		}
 	}			/* while */
 
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index e276c5ba2bb7..7d51149bd793 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -1971,22 +1971,6 @@ extern struct file_operations reiserfs_file_operations;
 extern struct address_space_operations reiserfs_address_space_operations;
 
 /* fix_nodes.c */
-#ifdef CONFIG_REISERFS_CHECK
-void *reiserfs_kmalloc(size_t size, gfp_t flags, struct super_block *s);
-void reiserfs_kfree(const void *vp, size_t size, struct super_block *s);
-#else
-static inline void *reiserfs_kmalloc(size_t size, int flags,
-				     struct super_block *s)
-{
-	return kmalloc(size, flags);
-}
-
-static inline void reiserfs_kfree(const void *vp, size_t size,
-				  struct super_block *s)
-{
-	kfree(vp);
-}
-#endif
 
 int fix_nodes(int n_op_mode, struct tree_balance *p_s_tb,
 	      struct item_head *p_s_ins_ih, const void *);
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index 3e68592e52e9..31b4c0bd4fa0 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -382,7 +382,6 @@ struct reiserfs_sb_info {
 					   on-disk FS format */
 
 	/* session statistics */
-	int s_kmallocs;
 	int s_disk_reads;
 	int s_disk_writes;
 	int s_fix_nodes;
-- 
cgit v1.2.3-71-gd317


From c87d0c07ea198db1ce451421904edd60b7d385ee Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 1 Feb 2006 03:06:45 -0800
Subject: [PATCH] reiserfs: remove reiserfs_permission_locked

This function is completely unused since the xattr permission checking
changes.  Remove it and fold __reiserfs_permission into
reiserfs_permission.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Chris Mason <mason@suse.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/xattr.c            | 30 ++++++++----------------------
 include/linux/reiserfs_xattr.h |  2 --
 2 files changed, 8 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 4f0db4e54517..2f085845f670 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -1319,9 +1319,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
 	return err;
 }
 
-static int
-__reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd,
-		      int need_lock)
+int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd)
 {
 	umode_t mode = inode->i_mode;
 
@@ -1357,15 +1355,14 @@ __reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd,
 		if (!(mode & S_IRWXG))
 			goto check_groups;
 
-		if (need_lock) {
-			reiserfs_read_lock_xattr_i(inode);
-			reiserfs_read_lock_xattrs(inode->i_sb);
-		}
+		reiserfs_read_lock_xattr_i(inode);
+		reiserfs_read_lock_xattrs(inode->i_sb);
+
 		acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
-		if (need_lock) {
-			reiserfs_read_unlock_xattrs(inode->i_sb);
-			reiserfs_read_unlock_xattr_i(inode);
-		}
+
+		reiserfs_read_unlock_xattrs(inode->i_sb);
+		reiserfs_read_unlock_xattr_i(inode);
+
 		if (IS_ERR(acl)) {
 			if (PTR_ERR(acl) == -ENODATA)
 				goto check_groups;
@@ -1414,14 +1411,3 @@ __reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd,
 
 	return -EACCES;
 }
-
-int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd)
-{
-	return __reiserfs_permission(inode, mask, nd, 1);
-}
-
-int
-reiserfs_permission_locked(struct inode *inode, int mask, struct nameidata *nd)
-{
-	return __reiserfs_permission(inode, mask, nd, 0);
-}
diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h
index c84354e8374c..87280eb6083d 100644
--- a/include/linux/reiserfs_xattr.h
+++ b/include/linux/reiserfs_xattr.h
@@ -43,8 +43,6 @@ int reiserfs_delete_xattrs(struct inode *inode);
 int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs);
 int reiserfs_xattr_init(struct super_block *sb, int mount_flags);
 int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd);
-int reiserfs_permission_locked(struct inode *inode, int mask,
-			       struct nameidata *nd);
 
 int reiserfs_xattr_del(struct inode *, const char *);
 int reiserfs_xattr_get(const struct inode *, const char *, void *, size_t);
-- 
cgit v1.2.3-71-gd317


From 16fb24252a8170799e7adf14d8fc31b817fcaf53 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 1 Feb 2006 12:18:22 -0500
Subject: NLM: Fix arguments to NLM_CANCEL call

 The OpenGroup docs state that the arguments "block", "exclusive" and
 "alock" must exactly match the arguments for the lock call that we are
 trying to cancel.
 Currently, "block" is always set to false, which is wrong.

 See bug# 5956 on bugzilla.kernel.org.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c         | 7 ++++---
 include/linux/lockd/lockd.h | 1 -
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 145524039577..b8ecfa1168f3 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -28,6 +28,7 @@ static int	nlmclnt_lock(struct nlm_rqst *, struct file_lock *);
 static int	nlmclnt_unlock(struct nlm_rqst *, struct file_lock *);
 static int	nlm_stat_to_errno(u32 stat);
 static void	nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *host);
+static int	nlmclnt_cancel(struct nlm_host *, int , struct file_lock *);
 
 static const struct rpc_call_ops nlmclnt_unlock_ops;
 static const struct rpc_call_ops nlmclnt_cancel_ops;
@@ -598,7 +599,7 @@ out_unblock:
 	nlmclnt_finish_block(req);
 	/* Cancel the blocked request if it is still pending */
 	if (resp->status == NLM_LCK_BLOCKED)
-		nlmclnt_cancel(host, fl);
+		nlmclnt_cancel(host, req->a_args.block, fl);
 out:
 	nlmclnt_release_lockargs(req);
 	return status;
@@ -728,8 +729,7 @@ static const struct rpc_call_ops nlmclnt_unlock_ops = {
  * We always use an async RPC call for this in order not to hang a
  * process that has been Ctrl-C'ed.
  */
-int
-nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl)
+static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl)
 {
 	struct nlm_rqst	*req;
 	unsigned long	flags;
@@ -750,6 +750,7 @@ nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl)
 	req->a_flags = RPC_TASK_ASYNC;
 
 	nlmclnt_setlockargs(req, fl);
+	req->a_args.block = block;
 
 	status = nlmclnt_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
 	if (status < 0) {
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 95c8fea293ba..afe9a8f5c5ae 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -148,7 +148,6 @@ struct nlm_rqst * nlmclnt_alloc_call(void);
 int		  nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl);
 void		  nlmclnt_finish_block(struct nlm_rqst *req);
 long		  nlmclnt_block(struct nlm_rqst *req, long timeout);
-int		  nlmclnt_cancel(struct nlm_host *, struct file_lock *);
 u32		  nlmclnt_grant(struct nlm_lock *);
 void		  nlmclnt_recovery(struct nlm_host *, u32);
 int		  nlmclnt_reclaim(struct nlm_host *, struct file_lock *);
-- 
cgit v1.2.3-71-gd317


From aaaa99423b4b1f9cfd33ea5643d9274c25f62491 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 1 Feb 2006 12:18:25 -0500
Subject: NLM: Ensure that nlmclnt_cancel_callback() doesn't loop forever

 If the server returns NLM_LCK_DENIED_NOLOCKS, we currently retry the
 entire NLM_CANCEL request. This may end up looping forever unless the
 server changes its mind (why would it do that, though?).

 Ensure that we limit the number of retries (to 3).

 See bug# 5957 in bugzilla.kernel.org.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c         | 4 ++++
 include/linux/lockd/lockd.h | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index b8ecfa1168f3..220058d8616d 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -22,6 +22,7 @@
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 #define NLMCLNT_GRACE_WAIT	(5*HZ)
 #define NLMCLNT_POLL_TIMEOUT	(30*HZ)
+#define NLMCLNT_MAX_RETRIES	3
 
 static int	nlmclnt_test(struct nlm_rqst *, struct file_lock *);
 static int	nlmclnt_lock(struct nlm_rqst *, struct file_lock *);
@@ -802,6 +803,9 @@ die:
 	return;
 
 retry_cancel:
+	/* Don't ever retry more than 3 times */
+	if (req->a_retries++ >= NLMCLNT_MAX_RETRIES)
+		goto die;
 	nlm_rebind_host(req->a_host);
 	rpc_restart_call(task);
 	rpc_delay(task, 30 * HZ);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index afe9a8f5c5ae..920766cea79c 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -84,6 +84,7 @@ struct nlm_rqst {
 	struct nlm_args		a_args;		/* arguments */
 	struct nlm_res		a_res;		/* result */
 	struct nlm_wait *	a_block;
+	unsigned int		a_retries;	/* Retry count */
 	char			a_owner[NLMCLNT_OHSIZE];
 };
 
-- 
cgit v1.2.3-71-gd317


From 8a3177604b729ec3b80e43790ee978863ac7551b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 1 Feb 2006 12:18:36 -0500
Subject: SUNRPC: Fix a lock recursion in the auth_gss downcall

 When we look up a new cred in the auth_gss downcall so that we can stuff
 the credcache, we do not want that lookup to queue up an upcall in order
 to initialise it. To do an upcall here not only redundant, but since we
 are already holding the inode->i_mutex, it will trigger a lock recursion.

 This patch allows rpcauth cache searches to indicate that they can cope
 with uninitialised credentials.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/auth.h    |  5 +++++
 net/sunrpc/auth.c              | 17 ++++++++++-------
 net/sunrpc/auth_gss/auth_gss.c | 25 +++++++++++++++++++------
 net/sunrpc/auth_unix.c         |  6 +++---
 4 files changed, 37 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index b68c11a2d6dd..bfc5fb279539 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -50,6 +50,7 @@ struct rpc_cred {
 };
 #define RPCAUTH_CRED_LOCKED	0x0001
 #define RPCAUTH_CRED_UPTODATE	0x0002
+#define RPCAUTH_CRED_NEW	0x0004
 
 #define RPCAUTH_CRED_MAGIC	0x0f4aa4f0
 
@@ -87,6 +88,10 @@ struct rpc_auth {
 						 * uid/gid, fs[ug]id, gids)
 						 */
 
+/* Flags for rpcauth_lookupcred() */
+#define RPCAUTH_LOOKUP_NEW		0x01	/* Accept an uninitialised cred */
+#define RPCAUTH_LOOKUP_ROOTCREDS	0x02	/* This really ought to go! */
+
 /*
  * Client authentication ops
  */
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 9ac1b8c26c01..1ca89c36da7a 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -184,7 +184,7 @@ rpcauth_gc_credcache(struct rpc_auth *auth, struct hlist_head *free)
  */
 struct rpc_cred *
 rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
-		int taskflags)
+		int flags)
 {
 	struct rpc_cred_cache *cache = auth->au_credcache;
 	HLIST_HEAD(free);
@@ -193,7 +193,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
 			*cred = NULL;
 	int		nr = 0;
 
-	if (!(taskflags & RPC_TASK_ROOTCREDS))
+	if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS))
 		nr = acred->uid & RPC_CREDCACHE_MASK;
 retry:
 	spin_lock(&rpc_credcache_lock);
@@ -202,7 +202,7 @@ retry:
 	hlist_for_each_safe(pos, next, &cache->hashtable[nr]) {
 		struct rpc_cred *entry;
 	       	entry = hlist_entry(pos, struct rpc_cred, cr_hash);
-		if (entry->cr_ops->crmatch(acred, entry, taskflags)) {
+		if (entry->cr_ops->crmatch(acred, entry, flags)) {
 			hlist_del(&entry->cr_hash);
 			cred = entry;
 			break;
@@ -224,7 +224,7 @@ retry:
 	rpcauth_destroy_credlist(&free);
 
 	if (!cred) {
-		new = auth->au_ops->crcreate(auth, acred, taskflags);
+		new = auth->au_ops->crcreate(auth, acred, flags);
 		if (!IS_ERR(new)) {
 #ifdef RPC_DEBUG
 			new->cr_magic = RPCAUTH_CRED_MAGIC;
@@ -238,7 +238,7 @@ retry:
 }
 
 struct rpc_cred *
-rpcauth_lookupcred(struct rpc_auth *auth, int taskflags)
+rpcauth_lookupcred(struct rpc_auth *auth, int flags)
 {
 	struct auth_cred acred = {
 		.uid = current->fsuid,
@@ -250,7 +250,7 @@ rpcauth_lookupcred(struct rpc_auth *auth, int taskflags)
 	dprintk("RPC:     looking up %s cred\n",
 		auth->au_ops->au_name);
 	get_group_info(acred.group_info);
-	ret = auth->au_ops->lookup_cred(auth, &acred, taskflags);
+	ret = auth->au_ops->lookup_cred(auth, &acred, flags);
 	put_group_info(acred.group_info);
 	return ret;
 }
@@ -265,11 +265,14 @@ rpcauth_bindcred(struct rpc_task *task)
 		.group_info = current->group_info,
 	};
 	struct rpc_cred *ret;
+	int flags = 0;
 
 	dprintk("RPC: %4d looking up %s cred\n",
 		task->tk_pid, task->tk_auth->au_ops->au_name);
 	get_group_info(acred.group_info);
-	ret = auth->au_ops->lookup_cred(auth, &acred, task->tk_flags);
+	if (task->tk_flags & RPC_TASK_ROOTCREDS)
+		flags |= RPCAUTH_LOOKUP_ROOTCREDS;
+	ret = auth->au_ops->lookup_cred(auth, &acred, flags);
 	if (!IS_ERR(ret))
 		task->tk_msg.rpc_cred = ret;
 	else
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 8d782282ec19..03affcbf6292 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -158,6 +158,7 @@ gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx)
 	old = gss_cred->gc_ctx;
 	gss_cred->gc_ctx = ctx;
 	cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
+	cred->cr_flags &= ~RPCAUTH_CRED_NEW;
 	write_unlock(&gss_ctx_lock);
 	if (old)
 		gss_put_ctx(old);
@@ -580,7 +581,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 	} else {
 		struct auth_cred acred = { .uid = uid };
 		spin_unlock(&gss_auth->lock);
-		cred = rpcauth_lookup_credcache(clnt->cl_auth, &acred, 0);
+		cred = rpcauth_lookup_credcache(clnt->cl_auth, &acred, RPCAUTH_LOOKUP_NEW);
 		if (IS_ERR(cred)) {
 			err = PTR_ERR(cred);
 			goto err_put_ctx;
@@ -758,13 +759,13 @@ gss_destroy_cred(struct rpc_cred *rc)
  * Lookup RPCSEC_GSS cred for the current process
  */
 static struct rpc_cred *
-gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int taskflags)
+gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 {
-	return rpcauth_lookup_credcache(auth, acred, taskflags);
+	return rpcauth_lookup_credcache(auth, acred, flags);
 }
 
 static struct rpc_cred *
-gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int taskflags)
+gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 {
 	struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth);
 	struct gss_cred	*cred = NULL;
@@ -785,13 +786,17 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int taskflags)
 	 */
 	cred->gc_flags = 0;
 	cred->gc_base.cr_ops = &gss_credops;
+	cred->gc_base.cr_flags = RPCAUTH_CRED_NEW;
 	cred->gc_service = gss_auth->service;
+	/* Is the caller prepared to initialise the credential? */
+	if (flags & RPCAUTH_LOOKUP_NEW)
+		goto out;
 	do {
 		err = gss_create_upcall(gss_auth, cred);
 	} while (err == -EAGAIN);
 	if (err < 0)
 		goto out_err;
-
+out:
 	return &cred->gc_base;
 
 out_err:
@@ -801,13 +806,21 @@ out_err:
 }
 
 static int
-gss_match(struct auth_cred *acred, struct rpc_cred *rc, int taskflags)
+gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
 {
 	struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
 
+	/*
+	 * If the searchflags have set RPCAUTH_LOOKUP_NEW, then
+	 * we don't really care if the credential has expired or not,
+	 * since the caller should be prepared to reinitialise it.
+	 */
+	if ((flags & RPCAUTH_LOOKUP_NEW) && (rc->cr_flags & RPCAUTH_CRED_NEW))
+		goto out;
 	/* Don't match with creds that have expired. */
 	if (gss_cred->gc_ctx && time_after(jiffies, gss_cred->gc_ctx->gc_expiry))
 		return 0;
+out:
 	return (rc->cr_uid == acred->uid);
 }
 
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 1b3ed4fd1987..df14b6bfbf10 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -75,7 +75,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 
 	atomic_set(&cred->uc_count, 1);
 	cred->uc_flags = RPCAUTH_CRED_UPTODATE;
-	if (flags & RPC_TASK_ROOTCREDS) {
+	if (flags & RPCAUTH_LOOKUP_ROOTCREDS) {
 		cred->uc_uid = 0;
 		cred->uc_gid = 0;
 		cred->uc_gids[0] = NOGROUP;
@@ -108,12 +108,12 @@ unx_destroy_cred(struct rpc_cred *cred)
  * request root creds (e.g. for NFS swapping).
  */
 static int
-unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int taskflags)
+unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
 {
 	struct unx_cred	*cred = (struct unx_cred *) rcred;
 	int		i;
 
-	if (!(taskflags & RPC_TASK_ROOTCREDS)) {
+	if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) {
 		int groups;
 
 		if (cred->uc_uid != acred->uid
-- 
cgit v1.2.3-71-gd317


From fba3bad488a2eec2d76c067edb7a5ff92ef42431 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 1 Feb 2006 12:19:27 -0500
Subject: SUNRPC: Move upcall out of auth->au_ops->crcreate()

 This fixes a bug whereby if two processes try to look up the same auth_gss
 credential, they may end up creating two creds, and triggering two upcalls
 because the upcall is performed before the credential is added to the
 credcache.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/auth.h    |  1 +
 net/sunrpc/auth.c              |  8 ++++++++
 net/sunrpc/auth_gss/auth_gss.c | 23 ++++++++++++++---------
 3 files changed, 23 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index bfc5fb279539..2647798b72c7 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -110,6 +110,7 @@ struct rpc_authops {
 
 struct rpc_credops {
 	const char *		cr_name;	/* Name of the auth flavour */
+	int			(*cr_init)(struct rpc_auth *, struct rpc_cred *);
 	void			(*crdestroy)(struct rpc_cred *);
 
 	int			(*crmatch)(struct auth_cred *, struct rpc_cred *, int);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 1ca89c36da7a..8d6f1a176b15 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -232,6 +232,14 @@ retry:
 			goto retry;
 		} else
 			cred = new;
+	} else if ((cred->cr_flags & RPCAUTH_CRED_NEW)
+			&& cred->cr_ops->cr_init != NULL
+			&& !(flags & RPCAUTH_LOOKUP_NEW)) {
+		int res = cred->cr_ops->cr_init(auth, cred);
+		if (res < 0) {
+			put_rpccred(cred);
+			cred = ERR_PTR(res);
+		}
 	}
 
 	return (struct rpc_cred *) cred;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 03affcbf6292..bb46efd92e57 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -788,15 +788,6 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 	cred->gc_base.cr_ops = &gss_credops;
 	cred->gc_base.cr_flags = RPCAUTH_CRED_NEW;
 	cred->gc_service = gss_auth->service;
-	/* Is the caller prepared to initialise the credential? */
-	if (flags & RPCAUTH_LOOKUP_NEW)
-		goto out;
-	do {
-		err = gss_create_upcall(gss_auth, cred);
-	} while (err == -EAGAIN);
-	if (err < 0)
-		goto out_err;
-out:
 	return &cred->gc_base;
 
 out_err:
@@ -805,6 +796,19 @@ out_err:
 	return ERR_PTR(err);
 }
 
+static int
+gss_cred_init(struct rpc_auth *auth, struct rpc_cred *cred)
+{
+	struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth);
+	struct gss_cred *gss_cred = container_of(cred,struct gss_cred, gc_base);
+	int err;
+
+	do {
+		err = gss_create_upcall(gss_auth, gss_cred);
+	} while (err == -EAGAIN);
+	return err;
+}
+
 static int
 gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
 {
@@ -1254,6 +1258,7 @@ static struct rpc_authops authgss_ops = {
 static struct rpc_credops gss_credops = {
 	.cr_name	= "AUTH_GSS",
 	.crdestroy	= gss_destroy_cred,
+	.cr_init	= gss_cred_init,
 	.crmatch	= gss_match,
 	.crmarshal	= gss_marshal,
 	.crrefresh	= gss_refresh,
-- 
cgit v1.2.3-71-gd317


From 00b464debf0038b1628996065f0be564ccfbfd86 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 1 Feb 2006 12:49:28 -0500
Subject: SUNRPC: Remove obsolete rpcauth #defines

 RPCAUTH_CRED_LOCKED, and RPC_AUTH_PROC_CREDS are unused. Kill them.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/auth.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 2647798b72c7..be4772ed43c0 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -48,9 +48,8 @@ struct rpc_cred {
 
 	/* per-flavor data */
 };
-#define RPCAUTH_CRED_LOCKED	0x0001
+#define RPCAUTH_CRED_NEW	0x0001
 #define RPCAUTH_CRED_UPTODATE	0x0002
-#define RPCAUTH_CRED_NEW	0x0004
 
 #define RPCAUTH_CRED_MAGIC	0x0f4aa4f0
 
@@ -84,9 +83,6 @@ struct rpc_auth {
 	struct rpc_cred_cache *	au_credcache;
 	/* per-flavor data */
 };
-#define RPC_AUTH_PROC_CREDS	0x0010		/* process creds (including
-						 * uid/gid, fs[ug]id, gids)
-						 */
 
 /* Flags for rpcauth_lookupcred() */
 #define RPCAUTH_LOOKUP_NEW		0x01	/* Accept an uninitialised cred */
-- 
cgit v1.2.3-71-gd317


From 9ad11ab48b1ad618bf47076e9e579f267f5306c2 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 2 Feb 2006 16:11:51 +1100
Subject: [PATCH] compat: fix compat_sys_openat and friends

Most of the 64 bit architectures will zero extend the first argument to
compat_sys_{openat,newfstatat,futimesat} which will fail if the 32 bit
syscall was passed AT_FDCWD (which is a small negative number).  Declare
the first argument to be an unsigned int which will force the correct
sign extension when the internal functions are called in each case.

Also, do some small white space cleanups in fs/compat.c.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat.c              | 12 ++++++------
 include/linux/syscalls.h |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/compat.c b/fs/compat.c
index cc58a20df57a..70c5af4cc270 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -73,17 +73,17 @@ asmlinkage long compat_sys_utime(char __user *filename, struct compat_utimbuf __
 	return do_utimes(AT_FDCWD, filename, t ? tv : NULL);
 }
 
-asmlinkage long compat_sys_futimesat(int dfd, char __user *filename, struct compat_timeval __user *t)
+asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename, struct compat_timeval __user *t)
 {
 	struct timeval tv[2];
 
-	if (t) { 
+	if (t) {
 		if (get_user(tv[0].tv_sec, &t[0].tv_sec) ||
 		    get_user(tv[0].tv_usec, &t[0].tv_usec) ||
 		    get_user(tv[1].tv_sec, &t[1].tv_sec) ||
 		    get_user(tv[1].tv_usec, &t[1].tv_usec))
-			return -EFAULT; 
-	} 
+			return -EFAULT;
+	}
 	return do_utimes(dfd, filename, t ? tv : NULL);
 }
 
@@ -114,7 +114,7 @@ asmlinkage long compat_sys_newlstat(char __user * filename,
 	return error;
 }
 
-asmlinkage long compat_sys_newfstatat(int dfd, char __user *filename,
+asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user *filename,
 		struct compat_stat __user *statbuf, int flag)
 {
 	struct kstat stat;
@@ -1326,7 +1326,7 @@ compat_sys_open(const char __user *filename, int flags, int mode)
  * O_LARGEFILE flag.
  */
 asmlinkage long
-compat_sys_openat(int dfd, const char __user *filename, int flags, int mode)
+compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, int mode)
 {
 	return do_sys_open(dfd, filename, flags, mode);
 }
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index fdbd436b24cc..3877209d23c3 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -559,12 +559,12 @@ asmlinkage long sys_newfstatat(int dfd, char __user *filename,
 			       struct stat __user *statbuf, int flag);
 asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *buf,
 			       int bufsiz);
-asmlinkage long compat_sys_futimesat(int dfd, char __user *filename,
+asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename,
 				     struct compat_timeval __user *t);
-asmlinkage long compat_sys_newfstatat(int dfd, char __user * filename,
+asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename,
 				      struct compat_stat __user *statbuf,
 				      int flag);
-asmlinkage long compat_sys_openat(int dfd, const char __user *filename,
+asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename,
 				   int flags, int mode);
 
 #endif
-- 
cgit v1.2.3-71-gd317


From e92251762d02a46177d4105d1744041e3f8bc465 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Thu, 2 Feb 2006 12:23:12 +0000
Subject: [MMC] Add MMC command type flags

Some hosts need to know the command type, so pass it via a set of
flags in cmd->flags.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/mmc/au1xmmc.c        | 59 ++++++++++++++++++++++----------------------
 drivers/mmc/mmc.c            | 28 ++++++++++-----------
 drivers/mmc/mmc_block.c      |  8 +++---
 drivers/mmc/mmci.c           | 11 +++------
 drivers/mmc/pxamci.c         |  9 ++++---
 drivers/mmc/wbsd.c           |  8 +++---
 include/linux/mmc/mmc.h      | 35 +++++++++++++++++---------
 include/linux/mmc/protocol.h |  2 +-
 8 files changed, 84 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/au1xmmc.c b/drivers/mmc/au1xmmc.c
index aaf04638054e..227c39a7c1b4 100644
--- a/drivers/mmc/au1xmmc.c
+++ b/drivers/mmc/au1xmmc.c
@@ -194,7 +194,7 @@ static int au1xmmc_send_command(struct au1xmmc_host *host, int wait,
 
 	u32 mmccmd = (cmd->opcode << SD_CMD_CI_SHIFT);
 
-	switch(cmd->flags) {
+	switch (mmc_rsp_type(cmd->flags)) {
 	case MMC_RSP_R1:
 		mmccmd |= SD_CMD_RT_1;
 		break;
@@ -483,34 +483,35 @@ static void au1xmmc_cmd_complete(struct au1xmmc_host *host, u32 status)
 	cmd = mrq->cmd;
 	cmd->error = MMC_ERR_NONE;
 
-	if ((cmd->flags & MMC_RSP_MASK) == MMC_RSP_SHORT) {
-
-		/* Techincally, we should be getting all 48 bits of the response
-		 * (SD_RESP1 + SD_RESP2), but because our response omits the CRC,
-		 * our data ends up being shifted 8 bits to the right.  In this case,
-		 * that means that the OSR data starts at bit 31, so we can just
-		 * read RESP0 and return that
-		 */
-
-		cmd->resp[0] = au_readl(host->iobase + SD_RESP0);
-	}
-	else if ((cmd->flags & MMC_RSP_MASK) == MMC_RSP_LONG) {
-		u32 r[4];
-		int i;
-
-		r[0] = au_readl(host->iobase + SD_RESP3);
-		r[1] = au_readl(host->iobase + SD_RESP2);
-		r[2] = au_readl(host->iobase + SD_RESP1);
-		r[3] = au_readl(host->iobase + SD_RESP0);
-
-		/* The CRC is omitted from the response, so really we only got
-		 * 120 bytes, but the engine expects 128 bits, so we have to shift
-		 * things up
-		 */
-
-		for(i = 0; i < 4; i++) {
-			cmd->resp[i] = (r[i] & 0x00FFFFFF) << 8;
-			if (i != 3) cmd->resp[i] |= (r[i + 1] & 0xFF000000) >> 24;
+	if (cmd->flags & MMC_RSP_PRESENT) {
+		if (cmd->flags & MMC_RSP_136) {
+			u32 r[4];
+			int i;
+
+			r[0] = au_readl(host->iobase + SD_RESP3);
+			r[1] = au_readl(host->iobase + SD_RESP2);
+			r[2] = au_readl(host->iobase + SD_RESP1);
+			r[3] = au_readl(host->iobase + SD_RESP0);
+
+			/* The CRC is omitted from the response, so really
+			 * we only got 120 bytes, but the engine expects
+			 * 128 bits, so we have to shift things up
+			 */
+
+			for(i = 0; i < 4; i++) {
+				cmd->resp[i] = (r[i] & 0x00FFFFFF) << 8;
+				if (i != 3)
+					cmd->resp[i] |= (r[i + 1] & 0xFF000000) >> 24;
+			}
+		} else {
+			/* Techincally, we should be getting all 48 bits of
+			 * the response (SD_RESP1 + SD_RESP2), but because
+			 * our response omits the CRC, our data ends up
+			 * being shifted 8 bits to the right.  In this case,
+			 * that means that the OSR data starts at bit 31,
+			 * so we can just read RESP0 and return that
+			 */
+			cmd->resp[0] = au_readl(host->iobase + SD_RESP0);
 		}
 	}
 
diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c
index bfca5c176e88..1888060c5e0c 100644
--- a/drivers/mmc/mmc.c
+++ b/drivers/mmc/mmc.c
@@ -211,7 +211,7 @@ int mmc_wait_for_app_cmd(struct mmc_host *host, unsigned int rca,
 
 		appcmd.opcode = MMC_APP_CMD;
 		appcmd.arg = rca << 16;
-		appcmd.flags = MMC_RSP_R1;
+		appcmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
 		appcmd.retries = 0;
 		memset(appcmd.resp, 0, sizeof(appcmd.resp));
 		appcmd.data = NULL;
@@ -331,7 +331,7 @@ static int mmc_select_card(struct mmc_host *host, struct mmc_card *card)
 
 	cmd.opcode = MMC_SELECT_CARD;
 	cmd.arg = card->rca << 16;
-	cmd.flags = MMC_RSP_R1;
+	cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
 
 	err = mmc_wait_for_cmd(host, &cmd, CMD_RETRIES);
 	if (err != MMC_ERR_NONE)
@@ -358,7 +358,7 @@ static int mmc_select_card(struct mmc_host *host, struct mmc_card *card)
 			struct mmc_command cmd;
 			cmd.opcode = SD_APP_SET_BUS_WIDTH;
 			cmd.arg = SD_BUS_WIDTH_4;
-			cmd.flags = MMC_RSP_R1;
+			cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
 
 			err = mmc_wait_for_app_cmd(host, card->rca, &cmd,
 				CMD_RETRIES);
@@ -386,7 +386,7 @@ static void mmc_deselect_cards(struct mmc_host *host)
 
 		cmd.opcode = MMC_SELECT_CARD;
 		cmd.arg = 0;
-		cmd.flags = MMC_RSP_NONE;
+		cmd.flags = MMC_RSP_NONE | MMC_CMD_AC;
 
 		mmc_wait_for_cmd(host, &cmd, 0);
 	}
@@ -677,7 +677,7 @@ static void mmc_idle_cards(struct mmc_host *host)
 
 	cmd.opcode = MMC_GO_IDLE_STATE;
 	cmd.arg = 0;
-	cmd.flags = MMC_RSP_NONE;
+	cmd.flags = MMC_RSP_NONE | MMC_CMD_BC;
 
 	mmc_wait_for_cmd(host, &cmd, 0);
 
@@ -738,7 +738,7 @@ static int mmc_send_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr)
 
 	cmd.opcode = MMC_SEND_OP_COND;
 	cmd.arg = ocr;
-	cmd.flags = MMC_RSP_R3;
+	cmd.flags = MMC_RSP_R3 | MMC_CMD_BCR;
 
 	for (i = 100; i; i--) {
 		err = mmc_wait_for_cmd(host, &cmd, 0);
@@ -766,7 +766,7 @@ static int mmc_send_app_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr)
 
 	cmd.opcode = SD_APP_OP_COND;
 	cmd.arg = ocr;
-	cmd.flags = MMC_RSP_R3;
+	cmd.flags = MMC_RSP_R3 | MMC_CMD_BCR;
 
 	for (i = 100; i; i--) {
 		err = mmc_wait_for_app_cmd(host, 0, &cmd, CMD_RETRIES);
@@ -805,7 +805,7 @@ static void mmc_discover_cards(struct mmc_host *host)
 
 		cmd.opcode = MMC_ALL_SEND_CID;
 		cmd.arg = 0;
-		cmd.flags = MMC_RSP_R2;
+		cmd.flags = MMC_RSP_R2 | MMC_CMD_BCR;
 
 		err = mmc_wait_for_cmd(host, &cmd, CMD_RETRIES);
 		if (err == MMC_ERR_TIMEOUT) {
@@ -835,7 +835,7 @@ static void mmc_discover_cards(struct mmc_host *host)
 
 			cmd.opcode = SD_SEND_RELATIVE_ADDR;
 			cmd.arg = 0;
-			cmd.flags = MMC_RSP_R6;
+			cmd.flags = MMC_RSP_R6 | MMC_CMD_BCR;
 
 			err = mmc_wait_for_cmd(host, &cmd, CMD_RETRIES);
 			if (err != MMC_ERR_NONE)
@@ -856,7 +856,7 @@ static void mmc_discover_cards(struct mmc_host *host)
 		} else {
 			cmd.opcode = MMC_SET_RELATIVE_ADDR;
 			cmd.arg = card->rca << 16;
-			cmd.flags = MMC_RSP_R1;
+			cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
 
 			err = mmc_wait_for_cmd(host, &cmd, CMD_RETRIES);
 			if (err != MMC_ERR_NONE)
@@ -878,7 +878,7 @@ static void mmc_read_csds(struct mmc_host *host)
 
 		cmd.opcode = MMC_SEND_CSD;
 		cmd.arg = card->rca << 16;
-		cmd.flags = MMC_RSP_R2;
+		cmd.flags = MMC_RSP_R2 | MMC_CMD_AC;
 
 		err = mmc_wait_for_cmd(host, &cmd, CMD_RETRIES);
 		if (err != MMC_ERR_NONE) {
@@ -920,7 +920,7 @@ static void mmc_read_scrs(struct mmc_host *host)
 
 		cmd.opcode = MMC_APP_CMD;
 		cmd.arg = card->rca << 16;
-		cmd.flags = MMC_RSP_R1;
+		cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
 
 		err = mmc_wait_for_cmd(host, &cmd, 0);
 		if ((err != MMC_ERR_NONE) || !(cmd.resp[0] & R1_APP_CMD)) {
@@ -932,7 +932,7 @@ static void mmc_read_scrs(struct mmc_host *host)
 
 		cmd.opcode = SD_APP_SEND_SCR;
 		cmd.arg = 0;
-		cmd.flags = MMC_RSP_R1;
+		cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
 
 		memset(&data, 0, sizeof(struct mmc_data));
 
@@ -1003,7 +1003,7 @@ static void mmc_check_cards(struct mmc_host *host)
 
 		cmd.opcode = MMC_SEND_STATUS;
 		cmd.arg = card->rca << 16;
-		cmd.flags = MMC_RSP_R1;
+		cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
 
 		err = mmc_wait_for_cmd(host, &cmd, CMD_RETRIES);
 		if (err == MMC_ERR_NONE)
diff --git a/drivers/mmc/mmc_block.c b/drivers/mmc/mmc_block.c
index 5b014c370e80..8eb2a2ede64b 100644
--- a/drivers/mmc/mmc_block.c
+++ b/drivers/mmc/mmc_block.c
@@ -171,14 +171,14 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 		brq.mrq.data = &brq.data;
 
 		brq.cmd.arg = req->sector << 9;
-		brq.cmd.flags = MMC_RSP_R1;
+		brq.cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
 		brq.data.timeout_ns = card->csd.tacc_ns * 10;
 		brq.data.timeout_clks = card->csd.tacc_clks * 10;
 		brq.data.blksz_bits = md->block_bits;
 		brq.data.blocks = req->nr_sectors >> (md->block_bits - 9);
 		brq.stop.opcode = MMC_STOP_TRANSMISSION;
 		brq.stop.arg = 0;
-		brq.stop.flags = MMC_RSP_R1B;
+		brq.stop.flags = MMC_RSP_R1B | MMC_CMD_AC;
 
 		if (rq_data_dir(req) == READ) {
 			brq.cmd.opcode = brq.data.blocks > 1 ? MMC_READ_MULTIPLE_BLOCK : MMC_READ_SINGLE_BLOCK;
@@ -223,7 +223,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 
 			cmd.opcode = MMC_SEND_STATUS;
 			cmd.arg = card->rca << 16;
-			cmd.flags = MMC_RSP_R1;
+			cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
 			err = mmc_wait_for_cmd(card->host, &cmd, 5);
 			if (err) {
 				printk(KERN_ERR "%s: error %d requesting status\n",
@@ -430,7 +430,7 @@ mmc_blk_set_blksize(struct mmc_blk_data *md, struct mmc_card *card)
 	mmc_card_claim_host(card);
 	cmd.opcode = MMC_SET_BLOCKLEN;
 	cmd.arg = 1 << md->block_bits;
-	cmd.flags = MMC_RSP_R1;
+	cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
 	err = mmc_wait_for_cmd(card->host, &cmd, 5);
 	mmc_card_release_host(card);
 
diff --git a/drivers/mmc/mmci.c b/drivers/mmc/mmci.c
index 634ef53e85a5..37ee7f8dc82f 100644
--- a/drivers/mmc/mmci.c
+++ b/drivers/mmc/mmci.c
@@ -124,15 +124,10 @@ mmci_start_command(struct mmci_host *host, struct mmc_command *cmd, u32 c)
 	}
 
 	c |= cmd->opcode | MCI_CPSM_ENABLE;
-	switch (cmd->flags & MMC_RSP_MASK) {
-	case MMC_RSP_NONE:
-	default:
-		break;
-	case MMC_RSP_LONG:
-		c |= MCI_CPSM_LONGRSP;
-	case MMC_RSP_SHORT:
+	if (cmd->flags & MMC_RSP_PRESENT) {
+		if (cmd->flags & MMC_RSP_136)
+			c |= MCI_CPSM_LONGRSP;
 		c |= MCI_CPSM_RESPONSE;
-		break;
 	}
 	if (/*interrupt*/0)
 		c |= MCI_CPSM_INTERRUPT;
diff --git a/drivers/mmc/pxamci.c b/drivers/mmc/pxamci.c
index ee8f8a0420d1..285d7d068097 100644
--- a/drivers/mmc/pxamci.c
+++ b/drivers/mmc/pxamci.c
@@ -178,14 +178,15 @@ static void pxamci_start_cmd(struct pxamci_host *host, struct mmc_command *cmd,
 	if (cmd->flags & MMC_RSP_BUSY)
 		cmdat |= CMDAT_BUSY;
 
-	switch (cmd->flags & (MMC_RSP_MASK | MMC_RSP_CRC)) {
-	case MMC_RSP_SHORT | MMC_RSP_CRC:
+#define RSP_TYPE(x)	((x) & ~(MMC_RSP_BUSY|MMC_RSP_OPCODE))
+	switch (RSP_TYPE(mmc_resp_type(cmd))) {
+	case RSP_TYPE(MMC_RSP_R1): /* r1, r1b, r6 */
 		cmdat |= CMDAT_RESP_SHORT;
 		break;
-	case MMC_RSP_SHORT:
+	case RSP_TYPE(MMC_RSP_R3):
 		cmdat |= CMDAT_RESP_R3;
 		break;
-	case MMC_RSP_LONG | MMC_RSP_CRC:
+	case RSP_TYPE(MMC_RSP_R2):
 		cmdat |= CMDAT_RESP_R2;
 		break;
 	default:
diff --git a/drivers/mmc/wbsd.c b/drivers/mmc/wbsd.c
index f25757625361..3be397d436fa 100644
--- a/drivers/mmc/wbsd.c
+++ b/drivers/mmc/wbsd.c
@@ -459,7 +459,7 @@ static void wbsd_send_command(struct wbsd_host *host, struct mmc_command *cmd)
 	/*
 	 * Do we expect a reply?
 	 */
-	if ((cmd->flags & MMC_RSP_MASK) != MMC_RSP_NONE) {
+	if (cmd->flags & MMC_RSP_PRESENT) {
 		/*
 		 * Read back status.
 		 */
@@ -476,10 +476,10 @@ static void wbsd_send_command(struct wbsd_host *host, struct mmc_command *cmd)
 			cmd->error = MMC_ERR_BADCRC;
 		/* All ok */
 		else {
-			if ((cmd->flags & MMC_RSP_MASK) == MMC_RSP_SHORT)
-				wbsd_get_short_reply(host, cmd);
-			else
+			if (cmd->flags & MMC_RSP_136)
 				wbsd_get_long_reply(host, cmd);
+			else
+				wbsd_get_short_reply(host, cmd);
 		}
 	}
 
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index ccd3e13de1e8..f38872abc126 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -21,24 +21,35 @@ struct mmc_command {
 	u32			arg;
 	u32			resp[4];
 	unsigned int		flags;		/* expected response type */
-#define MMC_RSP_NONE	(0 << 0)
-#define MMC_RSP_SHORT	(1 << 0)
-#define MMC_RSP_LONG	(2 << 0)
-#define MMC_RSP_MASK	(3 << 0)
-#define MMC_RSP_CRC	(1 << 3)		/* expect valid crc */
-#define MMC_RSP_BUSY	(1 << 4)		/* card may send busy */
-#define MMC_RSP_OPCODE	(1 << 5)		/* response contains opcode */
+#define MMC_RSP_PRESENT	(1 << 0)
+#define MMC_RSP_136	(1 << 1)		/* 136 bit response */
+#define MMC_RSP_CRC	(1 << 2)		/* expect valid crc */
+#define MMC_RSP_BUSY	(1 << 3)		/* card may send busy */
+#define MMC_RSP_OPCODE	(1 << 4)		/* response contains opcode */
+#define MMC_CMD_MASK	(3 << 5)		/* command type */
+#define MMC_CMD_AC	(0 << 5)
+#define MMC_CMD_ADTC	(1 << 5)
+#define MMC_CMD_BC	(2 << 5)
+#define MMC_CMD_BCR	(3 << 5)
 
 /*
  * These are the response types, and correspond to valid bit
  * patterns of the above flags.  One additional valid pattern
  * is all zeros, which means we don't expect a response.
  */
-#define MMC_RSP_R1	(MMC_RSP_SHORT|MMC_RSP_CRC|MMC_RSP_OPCODE)
-#define MMC_RSP_R1B	(MMC_RSP_SHORT|MMC_RSP_CRC|MMC_RSP_OPCODE|MMC_RSP_BUSY)
-#define MMC_RSP_R2	(MMC_RSP_LONG|MMC_RSP_CRC)
-#define MMC_RSP_R3	(MMC_RSP_SHORT)
-#define MMC_RSP_R6	(MMC_RSP_SHORT|MMC_RSP_CRC)
+#define MMC_RSP_NONE	(0)
+#define MMC_RSP_R1	(MMC_RSP_PRESENT|MMC_RSP_CRC|MMC_RSP_OPCODE)
+#define MMC_RSP_R1B	(MMC_RSP_PRESENT|MMC_RSP_CRC|MMC_RSP_OPCODE|MMC_RSP_BUSY)
+#define MMC_RSP_R2	(MMC_RSP_PRESENT|MMC_RSP_136|MMC_RSP_CRC)
+#define MMC_RSP_R3	(MMC_RSP_PRESENT)
+#define MMC_RSP_R6	(MMC_RSP_PRESENT|MMC_RSP_CRC)
+
+#define mmc_resp_type(cmd)	((cmd)->flags & (MMC_RSP_PRESENT|MMC_RSP_136|MMC_RSP_CRC|MMC_RSP_BUSY|MMC_RSP_OPCODE))
+
+/*
+ * These are the command types.
+ */
+#define mmc_cmd_type(cmd)	((cmd)->flags & MMC_CMD_TYPE)
 
 	unsigned int		retries;	/* max number of retries */
 	unsigned int		error;		/* command error */
diff --git a/include/linux/mmc/protocol.h b/include/linux/mmc/protocol.h
index a14dc306545b..81c3f77f652c 100644
--- a/include/linux/mmc/protocol.h
+++ b/include/linux/mmc/protocol.h
@@ -79,7 +79,7 @@
 /* SD commands                           type  argument     response */
   /* class 8 */
 /* This is basically the same command as for MMC with some quirks. */
-#define SD_SEND_RELATIVE_ADDR     3   /* ac                      R6  */
+#define SD_SEND_RELATIVE_ADDR     3   /* bcr                     R6  */
 
   /* Application commands */
 #define SD_APP_SET_BUS_WIDTH      6   /* ac   [1:0] bus width    R1  */
-- 
cgit v1.2.3-71-gd317


From 3ec9c59449744dcc390d593a017d30671546fd9e Mon Sep 17 00:00:00 2001
From: Andrey Panin <pazke@donpac.ru>
Date: Thu, 2 Feb 2006 20:15:09 +0000
Subject: [SERIAL] SIIG 8-port serial boards support

This patch adds support for SIIG 8-port boards. These boards have 4 ports in
separate bars and another 4 ports in the single bar. Because of this strange
port arrangement these cards need special setup function. Fortunately no other
SIIG cards have more than 4 port, so this setup function could be used for them
too.

Signed-off-by: Andrey Panin <pazke@donpac.ru>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/serial/8250_pci.c | 25 ++++++++++++++++++++++++-
 include/linux/pci_ids.h   |  3 +++
 2 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index 2a912153321e..bb9ec28ccc2b 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -439,6 +439,20 @@ static int pci_siig_init(struct pci_dev *dev)
 	return -ENODEV;
 }
 
+static int pci_siig_setup(struct serial_private *priv,
+			  struct pciserial_board *board,
+			  struct uart_port *port, int idx)
+{
+	unsigned int bar = FL_GET_BASE(board->flags) + idx, offset = 0;
+
+	if (idx > 3) {
+		bar = 4;
+		offset = (idx - 4) * 8;
+	}
+
+	return setup_port(priv, port, bar, offset, 0);
+}
+
 /*
  * Timedia has an explosion of boards, and to avoid the PCI table from
  * growing *huge*, we use this function to collapse some 70 entries
@@ -748,7 +762,7 @@ static struct pci_serial_quirk pci_serial_quirks[] = {
 		.subvendor	= PCI_ANY_ID,
 		.subdevice	= PCI_ANY_ID,
 		.init		= pci_siig_init,
-		.setup		= pci_default_setup,
+		.setup		= pci_siig_setup,
 	},
 	/*
 	 * Titan cards
@@ -2141,6 +2155,15 @@ static struct pci_device_id serial_pci_tbl[] = {
 	{	PCI_VENDOR_ID_SIIG, PCI_DEVICE_ID_SIIG_4S_20x_850,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b0_bt_4_921600 },
+	{	PCI_VENDOR_ID_SIIG, PCI_DEVICE_ID_SIIG_8S_20x_550,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b0_bt_8_921600 },
+	{	PCI_VENDOR_ID_SIIG, PCI_DEVICE_ID_SIIG_8S_20x_650,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b0_bt_8_921600 },
+	{	PCI_VENDOR_ID_SIIG, PCI_DEVICE_ID_SIIG_8S_20x_850,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b0_bt_8_921600 },
 
 	/*
 	 * Computone devices submitted by Doug McNash dmcnash@computone.com
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index b0b908f583c5..92a619ba163f 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1670,6 +1670,9 @@
 #define PCI_DEVICE_ID_SIIG_2S1P_20x_550	0x2060
 #define PCI_DEVICE_ID_SIIG_2S1P_20x_650	0x2061
 #define PCI_DEVICE_ID_SIIG_2S1P_20x_850	0x2062
+#define PCI_DEVICE_ID_SIIG_8S_20x_550	0x2080
+#define PCI_DEVICE_ID_SIIG_8S_20x_650	0x2081
+#define PCI_DEVICE_ID_SIIG_8S_20x_850	0x2082
 #define PCI_SUBDEVICE_ID_SIIG_QUARTET_SERIAL	0x2050
 
 #define PCI_VENDOR_ID_RADISYS		0x1331
-- 
cgit v1.2.3-71-gd317


From c0c1633bdb844ec5b4ac15c97ab8927d80550c42 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Fri, 3 Feb 2006 03:03:44 -0800
Subject: [PATCH] Fix build failure in recent pm_prepare_* changes.

kernel/power/power.h:49: error: static declaration of 'pm_prepare_console' follows non-static declaration
include/linux/suspend.h:46: error: previous declaration of 'pm_prepare_console' was here
kernel/power/power.h:50: error: static declaration of 'pm_restore_console' follows non-static declaration
include/linux/suspend.h:47: error: previous declaration of 'pm_restore_console' was here

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/suspend.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 5dc94e777fab..43bcd13eb1ec 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -42,10 +42,6 @@ extern void mark_free_pages(struct zone *zone);
 #ifdef CONFIG_PM
 /* kernel/power/swsusp.c */
 extern int software_suspend(void);
-
-extern int pm_prepare_console(void);
-extern void pm_restore_console(void);
-
 #else
 static inline int software_suspend(void)
 {
-- 
cgit v1.2.3-71-gd317


From f434baf4c6ae4a392b7c34843825af0894c89db2 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <mita@miraclelinux.com>
Date: Fri, 3 Feb 2006 03:03:46 -0800
Subject: [PATCH] fix generic_fls64()

Noticed by Rune Torgersen.

Fix generic_fls64().  tcp_cubic is using fls64().

Signed-off-by: Akinobu Mita <mita@miraclelinux.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/bitops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 6a2a19f14bb2..208650b1ad3a 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -81,7 +81,7 @@ static inline int generic_fls64(__u64 x)
 {
 	__u32 h = x >> 32;
 	if (h)
-		return fls(x) + 32;
+		return fls(h) + 32;
 	return fls(x);
 }
 
-- 
cgit v1.2.3-71-gd317


From 490d6ab170c94a7a59870f528d765131b8963e79 Mon Sep 17 00:00:00 2001
From: Zach Brown <zach.brown@oracle.com>
Date: Fri, 3 Feb 2006 03:03:56 -0800
Subject: [PATCH] list.h: don't evaluate macro args multiple times

I noticed that list.h init functions were evaluating macro arguments
multiple times and thought it might be nice to protect the unsuspecting
caller.  Converting the macros to inline functions seems to reduce code
size, too.  A i386 defconfig build with gcc 3.3.3 from fc4:

   text    data     bss     dec     hex filename
3573148  565664  188828 4327640  4208d8 vmlinux.before
3572177  565664  188828 4326669  42050d vmlinux

add/remove: 0/0 grow/shrink: 11/144 up/down: 88/-1016 (-928)

There was no difference in checkstack output.

Signed-off-by: Zach Brown <zach.brown@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/list.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index 945daa1f13dd..47208bd99f9e 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -34,9 +34,11 @@ struct list_head {
 #define LIST_HEAD(name) \
 	struct list_head name = LIST_HEAD_INIT(name)
 
-#define INIT_LIST_HEAD(ptr) do { \
-	(ptr)->next = (ptr); (ptr)->prev = (ptr); \
-} while (0)
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+	list->next = list;
+	list->prev = list;
+}
 
 /*
  * Insert a new entry between two known consecutive entries.
@@ -534,7 +536,11 @@ struct hlist_node {
 #define HLIST_HEAD_INIT { .first = NULL }
 #define HLIST_HEAD(name) struct hlist_head name = {  .first = NULL }
 #define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
-#define INIT_HLIST_NODE(ptr) ((ptr)->next = NULL, (ptr)->pprev = NULL)
+static inline void INIT_HLIST_NODE(struct hlist_node *h)
+{
+	h->next = NULL;
+	h->pprev = NULL;
+}
 
 static inline int hlist_unhashed(const struct hlist_node *h)
 {
-- 
cgit v1.2.3-71-gd317


From e295cfcb2907ae4c5df57f5d4ada1ce6f3ae4657 Mon Sep 17 00:00:00 2001
From: Evgeniy Dushistov <dushistov@mail.ru>
Date: Fri, 3 Feb 2006 03:04:04 -0800
Subject: [PATCH] ufs: fix oops with `ufs1' type

"rm" command, on file system with "ufs1" type cause system hang up.  This
is, in fact, not so bad as it seems to be, because of after that in "kernel
control path" there are 3-4 places which may cause "oops".

So the first patch fix oopses, and the second patch fix "kernel hang up".

"oops" appears because of reading of group's summary info partly wrong, and
access to not first group's summary info cause "oops".

Signed-off-by: Evgeniy Dushistov <dushistov@mail.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ufs/super.c            | 10 ++++++----
 include/linux/ufs_fs.h    |  3 +--
 include/linux/ufs_fs_sb.h |  2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index d4aacee593ff..e9055ef7f5ac 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -388,7 +388,8 @@ static int ufs_parse_options (char * options, unsigned * mount_options)
 /*
  * Read on-disk structures associated with cylinder groups
  */
-static int ufs_read_cylinder_structures (struct super_block *sb) {
+static int ufs_read_cylinder_structures (struct super_block *sb)
+{
 	struct ufs_sb_info * sbi = UFS_SB(sb);
 	struct ufs_sb_private_info * uspi;
 	struct ufs_super_block *usb;
@@ -415,6 +416,7 @@ static int ufs_read_cylinder_structures (struct super_block *sb) {
 	base = space = kmalloc(size, GFP_KERNEL);
 	if (!base)
 		goto failed; 
+	sbi->s_csp = (struct ufs_csum *)space;
 	for (i = 0; i < blks; i += uspi->s_fpb) {
 		size = uspi->s_bsize;
 		if (i + uspi->s_fpb > blks)
@@ -430,7 +432,6 @@ static int ufs_read_cylinder_structures (struct super_block *sb) {
 			goto failed;
 
 		ubh_ubhcpymem (space, ubh, size);
-		sbi->s_csp[ufs_fragstoblks(i)]=(struct ufs_csum *)space;
 
 		space += size;
 		ubh_brelse (ubh);
@@ -486,7 +487,8 @@ failed:
  * Put on-disk structures associated with cylinder groups and 
  * write them back to disk
  */
-static void ufs_put_cylinder_structures (struct super_block *sb) {
+static void ufs_put_cylinder_structures (struct super_block *sb)
+{
 	struct ufs_sb_info * sbi = UFS_SB(sb);
 	struct ufs_sb_private_info * uspi;
 	struct ufs_buffer_head * ubh;
@@ -499,7 +501,7 @@ static void ufs_put_cylinder_structures (struct super_block *sb) {
 
 	size = uspi->s_cssize;
 	blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift;
-	base = space = (char*) sbi->s_csp[0];
+	base = space = (char*) sbi->s_csp;
 	for (i = 0; i < blks; i += uspi->s_fpb) {
 		size = uspi->s_bsize;
 		if (i + uspi->s_fpb > blks)
diff --git a/include/linux/ufs_fs.h b/include/linux/ufs_fs.h
index 7a6babeca256..f26118ea1c58 100644
--- a/include/linux/ufs_fs.h
+++ b/include/linux/ufs_fs.h
@@ -502,8 +502,7 @@ struct ufs_super_block {
 /*
  * Convert cylinder group to base address of its global summary info.
  */
-#define fs_cs(indx) \
-	s_csp[(indx) >> uspi->s_csshift][(indx) & ~uspi->s_csmask]
+#define fs_cs(indx) s_csp[(indx)]
 
 /*
  * Cylinder group block for a file system.
diff --git a/include/linux/ufs_fs_sb.h b/include/linux/ufs_fs_sb.h
index c1be4c226486..8ff13c160f3d 100644
--- a/include/linux/ufs_fs_sb.h
+++ b/include/linux/ufs_fs_sb.h
@@ -25,7 +25,7 @@ struct ufs_csum;
 
 struct ufs_sb_info {
 	struct ufs_sb_private_info * s_uspi;	
-	struct ufs_csum	* s_csp[UFS_MAXCSBUFS];
+	struct ufs_csum	* s_csp;
 	unsigned s_bytesex;
 	unsigned s_flags;
 	struct buffer_head ** s_ucg;
-- 
cgit v1.2.3-71-gd317


From 09114eb8c53d2d3b2ff9523e011cb68b2e245dce Mon Sep 17 00:00:00 2001
From: Evgeniy Dushistov <dushistov@mail.ru>
Date: Fri, 3 Feb 2006 03:04:06 -0800
Subject: [PATCH] ufs: fix hang during `rm'

This fixes the code like this:

	bh = sb_find_get_block (sb, tmp + j);
	if ((bh && DATA_BUFFER_USED(bh)) || tmp != fs32_to_cpu(sb, *p)) {
		retry = 1;
		brelse (bh);
		goto next1;
	}
	bforget (bh);

sb_find_get_block() ordinarily returns a buffer_head with b_count>=2, and
this code assume that in case if "b_count>1" buffer is used, so this caused
infinite loop.

(akpm: that is-the-buffer-busy code is incomprehensible.  Good riddance.  Use
of block_truncate_page() seems sane).

Signed-off-by: Evgeniy Dushistov <dushistov@mail.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ufs/inode.c         |  2 +-
 fs/ufs/truncate.c      | 72 +++++++++++---------------------------------------
 include/linux/ufs_fs.h |  1 +
 3 files changed, 18 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index e0c04e36a051..3c3f62ce2ad9 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -376,7 +376,7 @@ out:
  * This function gets the block which contains the fragment.
  */
 
-static int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create)
+int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create)
 {
 	struct super_block * sb = inode->i_sb;
 	struct ufs_sb_private_info * uspi = UFS_SB(sb)->s_uspi;
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 61d2e35012a4..02e86291ef8a 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -29,6 +29,11 @@
  * Idea from Pierre del Perugia <delperug@gla.ecoledoc.ibp.fr>
  */
 
+/*
+ * Modified to avoid infinite loop on 2006 by
+ * Evgeniy Dushistov <dushistov@mail.ru>
+ */
+
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/ufs_fs.h>
@@ -65,19 +70,16 @@
 #define DIRECT_BLOCK ((inode->i_size + uspi->s_bsize - 1) >> uspi->s_bshift)
 #define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift)
 
-#define DATA_BUFFER_USED(bh) \
-	(atomic_read(&bh->b_count)>1 || buffer_locked(bh))
 
 static int ufs_trunc_direct (struct inode * inode)
 {
 	struct ufs_inode_info *ufsi = UFS_I(inode);
 	struct super_block * sb;
 	struct ufs_sb_private_info * uspi;
-	struct buffer_head * bh;
 	__fs32 * p;
 	unsigned frag1, frag2, frag3, frag4, block1, block2;
 	unsigned frag_to_free, free_count;
-	unsigned i, j, tmp;
+	unsigned i, tmp;
 	int retry;
 	
 	UFSD(("ENTER\n"))
@@ -117,15 +119,7 @@ static int ufs_trunc_direct (struct inode * inode)
 		ufs_panic (sb, "ufs_trunc_direct", "internal error");
 	frag1 = ufs_fragnum (frag1);
 	frag2 = ufs_fragnum (frag2);
-	for (j = frag1; j < frag2; j++) {
-		bh = sb_find_get_block (sb, tmp + j);
-		if ((bh && DATA_BUFFER_USED(bh)) || tmp != fs32_to_cpu(sb, *p)) {
-			retry = 1;
-			brelse (bh);
-			goto next1;
-		}
-		bforget (bh);
-	}
+
 	inode->i_blocks -= (frag2-frag1) << uspi->s_nspfshift;
 	mark_inode_dirty(inode);
 	ufs_free_fragments (inode, tmp + frag1, frag2 - frag1);
@@ -140,15 +134,7 @@ next1:
 		tmp = fs32_to_cpu(sb, *p);
 		if (!tmp)
 			continue;
-		for (j = 0; j < uspi->s_fpb; j++) {
-			bh = sb_find_get_block(sb, tmp + j);
-			if ((bh && DATA_BUFFER_USED(bh)) || tmp != fs32_to_cpu(sb, *p)) {
-				retry = 1;
-				brelse (bh);
-				goto next2;
-			}
-			bforget (bh);
-		}
+
 		*p = 0;
 		inode->i_blocks -= uspi->s_nspb;
 		mark_inode_dirty(inode);
@@ -162,7 +148,6 @@ next1:
 			frag_to_free = tmp;
 			free_count = uspi->s_fpb;
 		}
-next2:;
 	}
 	
 	if (free_count > 0)
@@ -179,15 +164,7 @@ next2:;
 	if (!tmp )
 		ufs_panic(sb, "ufs_truncate_direct", "internal error");
 	frag4 = ufs_fragnum (frag4);
-	for (j = 0; j < frag4; j++) {
-		bh = sb_find_get_block (sb, tmp + j);
-		if ((bh && DATA_BUFFER_USED(bh)) || tmp != fs32_to_cpu(sb, *p)) {
-			retry = 1;
-			brelse (bh);
-			goto next1;
-		}
-		bforget (bh);
-	}
+
 	*p = 0;
 	inode->i_blocks -= frag4 << uspi->s_nspfshift;
 	mark_inode_dirty(inode);
@@ -204,9 +181,8 @@ static int ufs_trunc_indirect (struct inode * inode, unsigned offset, __fs32 *p)
 	struct super_block * sb;
 	struct ufs_sb_private_info * uspi;
 	struct ufs_buffer_head * ind_ubh;
-	struct buffer_head * bh;
 	__fs32 * ind;
-	unsigned indirect_block, i, j, tmp;
+	unsigned indirect_block, i, tmp;
 	unsigned frag_to_free, free_count;
 	int retry;
 
@@ -238,15 +214,7 @@ static int ufs_trunc_indirect (struct inode * inode, unsigned offset, __fs32 *p)
 		tmp = fs32_to_cpu(sb, *ind);
 		if (!tmp)
 			continue;
-		for (j = 0; j < uspi->s_fpb; j++) {
-			bh = sb_find_get_block(sb, tmp + j);
-			if ((bh && DATA_BUFFER_USED(bh)) || tmp != fs32_to_cpu(sb, *ind)) {
-				retry = 1;
-				brelse (bh);
-				goto next;
-			}
-			bforget (bh);
-		}	
+
 		*ind = 0;
 		ubh_mark_buffer_dirty(ind_ubh);
 		if (free_count == 0) {
@@ -261,7 +229,6 @@ static int ufs_trunc_indirect (struct inode * inode, unsigned offset, __fs32 *p)
 		}
 		inode->i_blocks -= uspi->s_nspb;
 		mark_inode_dirty(inode);
-next:;
 	}
 
 	if (free_count > 0) {
@@ -430,9 +397,7 @@ void ufs_truncate (struct inode * inode)
 	struct ufs_inode_info *ufsi = UFS_I(inode);
 	struct super_block * sb;
 	struct ufs_sb_private_info * uspi;
-	struct buffer_head * bh;
-	unsigned offset;
-	int err, retry;
+	int retry;
 	
 	UFSD(("ENTER\n"))
 	sb = inode->i_sb;
@@ -442,6 +407,9 @@ void ufs_truncate (struct inode * inode)
 		return;
 	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
 		return;
+
+	block_truncate_page(inode->i_mapping,	inode->i_size, ufs_getfrag_block);
+
 	lock_kernel();
 	while (1) {
 		retry = ufs_trunc_direct(inode);
@@ -457,15 +425,7 @@ void ufs_truncate (struct inode * inode)
 		blk_run_address_space(inode->i_mapping);
 		yield();
 	}
-	offset = inode->i_size & uspi->s_fshift;
-	if (offset) {
-		bh = ufs_bread (inode, inode->i_size >> uspi->s_fshift, 0, &err);
-		if (bh) {
-			memset (bh->b_data + offset, 0, uspi->s_fsize - offset);
-			mark_buffer_dirty (bh);
-			brelse (bh);
-		}
-	}
+
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
 	ufsi->i_lastfrag = DIRECT_FRAGMENT;
 	unlock_kernel();
diff --git a/include/linux/ufs_fs.h b/include/linux/ufs_fs.h
index f26118ea1c58..74aaf298b40d 100644
--- a/include/linux/ufs_fs.h
+++ b/include/linux/ufs_fs.h
@@ -912,6 +912,7 @@ extern int ufs_sync_inode (struct inode *);
 extern void ufs_delete_inode (struct inode *);
 extern struct buffer_head * ufs_getfrag (struct inode *, unsigned, int, int *);
 extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *);
+extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create);
 
 /* namei.c */
 extern struct file_operations ufs_dir_operations;
-- 
cgit v1.2.3-71-gd317


From 47ba87e0b1269698801310bfd1716b0538282405 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <marcelo.tosatti@cyclades.com>
Date: Fri, 3 Feb 2006 03:04:06 -0800
Subject: [PATCH] make "struct d_cookie" depend on CONFIG_PROFILING

Shrinks "struct dentry" from 128 bytes to 124 on x86, allowing 31 objects
per slab instead of 30.

Cc: John Levon <levon@movementarian.org>
Cc: Philippe Elie <phil.el@wanadoo.fr>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dcache.c            | 2 ++
 include/linux/dcache.h | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 86bdb93789c6..a173bba32666 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -743,7 +743,9 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	dentry->d_op = NULL;
 	dentry->d_fsdata = NULL;
 	dentry->d_mounted = 0;
+#ifdef CONFIG_PROFILING
 	dentry->d_cookie = NULL;
+#endif
 	INIT_HLIST_NODE(&dentry->d_hash);
 	INIT_LIST_HEAD(&dentry->d_lru);
 	INIT_LIST_HEAD(&dentry->d_subdirs);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index a3ed5e059d47..a3f09947940e 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -108,7 +108,9 @@ struct dentry {
 	struct dentry_operations *d_op;
 	struct super_block *d_sb;	/* The root of the dentry tree */
 	void *d_fsdata;			/* fs-specific data */
+#ifdef CONFIG_PROFILING
 	struct dcookie_struct *d_cookie; /* cookie, if any */
+#endif
 	int d_mounted;
 	unsigned char d_iname[DNAME_INLINE_LEN_MIN];	/* small names */
 };
-- 
cgit v1.2.3-71-gd317


From 9d923a0603b5909a6758cfaec503955775cb7ab1 Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@suse.de>
Date: Fri, 3 Feb 2006 03:04:12 -0800
Subject: [PATCH] ufs: fix char vs. __s8 clash in ufs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix this warning:

fs/ufs/super.c: In function âufs_fill_superâ:
fs/ufs/super.c:858: warning: case label value exceeds maximum value for type

which happens because __s8 != char.  These macros are used for struct
ufs_super_block.fs_clean which is declared as __s8.

Signed-off-by: Andreas Schwab <schwab@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/ufs_fs.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ufs_fs.h b/include/linux/ufs_fs.h
index 74aaf298b40d..b0ffe4356e5a 100644
--- a/include/linux/ufs_fs.h
+++ b/include/linux/ufs_fs.h
@@ -148,11 +148,11 @@ typedef __u16 __bitwise __fs16;
 #define UFS_USEEFT  ((__u16)65535)
 
 #define UFS_FSOK      0x7c269d38
-#define UFS_FSACTIVE  ((char)0x00)
-#define UFS_FSCLEAN   ((char)0x01)
-#define UFS_FSSTABLE  ((char)0x02)
-#define UFS_FSOSF1    ((char)0x03)	/* is this correct for DEC OSF/1? */
-#define UFS_FSBAD     ((char)0xff)
+#define UFS_FSACTIVE  ((__s8)0x00)
+#define UFS_FSCLEAN   ((__s8)0x01)
+#define UFS_FSSTABLE  ((__s8)0x02)
+#define UFS_FSOSF1    ((__s8)0x03)	/* is this correct for DEC OSF/1? */
+#define UFS_FSBAD     ((__s8)0xff)
 
 /* From here to next blank line, s_flags for ufs_sb_info */
 /* directory entry encoding */
-- 
cgit v1.2.3-71-gd317


From 8e75f744289f0a1c38b669e39a489af460640881 Mon Sep 17 00:00:00 2001
From: Arnaud Giersch <arnaud.giersch@free.fr>
Date: Fri, 3 Feb 2006 03:04:16 -0800
Subject: [PATCH] parport: add parallel port support for SGI O2

Add support for the built-in parallel port on SGI O2 (a.k.a.  IP32).
Define a new configuration option: PARPORT_IP32.  The module is named
parport_ip32.

Hardware support for SPP, EPP and ECP modes along with DMA support when
available are currently implemented.

Signed-off-by: Arnaud Giersch <arnaud.giersch@free.fr>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/parport/Kconfig        |    9 +
 drivers/parport/Makefile       |    1 +
 drivers/parport/parport_ip32.c | 2253 ++++++++++++++++++++++++++++++++++++++++
 include/linux/parport.h        |    6 +
 4 files changed, 2269 insertions(+)
 create mode 100644 drivers/parport/parport_ip32.c

(limited to 'include/linux')

diff --git a/drivers/parport/Kconfig b/drivers/parport/Kconfig
index f605dea57224..f63c387976cf 100644
--- a/drivers/parport/Kconfig
+++ b/drivers/parport/Kconfig
@@ -90,6 +90,15 @@ config PARPORT_ARC
 	depends on ARM && PARPORT
 	select PARPORT_NOT_PC
 
+config PARPORT_IP32
+	tristate "SGI IP32 builtin port (EXPERIMENTAL)"
+	depends on SGI_IP32 && PARPORT && EXPERIMENTAL
+	select PARPORT_NOT_PC
+	help
+	  Say Y here if you need support for the parallel port on
+	  SGI O2 machines. This code is also available as a module (say M),
+	  called parport_ip32.  If in doubt, saying N is the safe plan.
+
 config PARPORT_AMIGA
 	tristate "Amiga builtin port"
 	depends on AMIGA && PARPORT
diff --git a/drivers/parport/Makefile b/drivers/parport/Makefile
index 5372212bb9d9..a19de35f8de2 100644
--- a/drivers/parport/Makefile
+++ b/drivers/parport/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_PARPORT_MFC3)	+= parport_mfc3.o
 obj-$(CONFIG_PARPORT_ATARI)	+= parport_atari.o
 obj-$(CONFIG_PARPORT_SUNBPP)	+= parport_sunbpp.o
 obj-$(CONFIG_PARPORT_GSC)	+= parport_gsc.o
+obj-$(CONFIG_PARPORT_IP32)	+= parport_ip32.o
diff --git a/drivers/parport/parport_ip32.c b/drivers/parport/parport_ip32.c
new file mode 100644
index 000000000000..46e06e596d73
--- /dev/null
+++ b/drivers/parport/parport_ip32.c
@@ -0,0 +1,2253 @@
+/* Low-level parallel port routines for built-in port on SGI IP32
+ *
+ * Author: Arnaud Giersch <arnaud.giersch@free.fr>
+ *
+ * Based on parport_pc.c by
+ *	Phil Blundell, Tim Waugh, Jose Renau, David Campbell,
+ *	Andrea Arcangeli, et al.
+ *
+ * Thanks to Ilya A. Volynets-Evenbakh for his help.
+ *
+ * Copyright (C) 2005, 2006 Arnaud Giersch.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+/* Current status:
+ *
+ *	Basic SPP and PS2 modes are supported.
+ *	Support for parallel port IRQ is present.
+ *	Hardware SPP (a.k.a. compatibility), EPP, and ECP modes are
+ *	supported.
+ *	SPP/ECP FIFO can be driven in PIO or DMA mode.  PIO mode can work with
+ *	or without interrupt support.
+ *
+ *	Hardware ECP mode is not fully implemented (ecp_read_data and
+ *	ecp_write_addr are actually missing).
+ *
+ * To do:
+ *
+ *	Fully implement ECP mode.
+ *	EPP and ECP mode need to be tested.  I currently do not own any
+ *	peripheral supporting these extended mode, and cannot test them.
+ *	If DMA mode works well, decide if support for PIO FIFO modes should be
+ *	dropped.
+ *	Use the io{read,write} family functions when they become available in
+ *	the linux-mips.org tree.  Note: the MIPS specific functions readsb()
+ *	and writesb() are to be translated by ioread8_rep() and iowrite8_rep()
+ *	respectively.
+ */
+
+/* The built-in parallel port on the SGI 02 workstation (a.k.a. IP32) is an
+ * IEEE 1284 parallel port driven by a Texas Instrument TL16PIR552PH chip[1].
+ * This chip supports SPP, bidirectional, EPP and ECP modes.  It has a 16 byte
+ * FIFO buffer and supports DMA transfers.
+ *
+ * [1] http://focus.ti.com/docs/prod/folders/print/tl16pir552.html
+ *
+ * Theoretically, we could simply use the parport_pc module.  It is however
+ * not so simple.  The parport_pc code assumes that the parallel port
+ * registers are port-mapped.  On the O2, they are memory-mapped.
+ * Furthermore, each register is replicated on 256 consecutive addresses (as
+ * it is for the built-in serial ports on the same chip).
+ */
+
+/*--- Some configuration defines ---------------------------------------*/
+
+/* DEBUG_PARPORT_IP32
+ *	0	disable debug
+ *	1	standard level: pr_debug1 is enabled
+ *	2	parport_ip32_dump_state is enabled
+ *	>=3	verbose level: pr_debug is enabled
+ */
+#if !defined(DEBUG_PARPORT_IP32)
+#	define DEBUG_PARPORT_IP32  0	/* 0 (disabled) for production */
+#endif
+
+/*----------------------------------------------------------------------*/
+
+/* Setup DEBUG macros.  This is done before any includes, just in case we
+ * activate pr_debug() with DEBUG_PARPORT_IP32 >= 3.
+ */
+#if DEBUG_PARPORT_IP32 == 1
+#	warning DEBUG_PARPORT_IP32 == 1
+#elif DEBUG_PARPORT_IP32 == 2
+#	warning DEBUG_PARPORT_IP32 == 2
+#elif DEBUG_PARPORT_IP32 >= 3
+#	warning DEBUG_PARPORT_IP32 >= 3
+#	if !defined(DEBUG)
+#		define DEBUG /* enable pr_debug() in kernel.h */
+#	endif
+#endif
+
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/parport.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <asm/io.h>
+#include <asm/ip32/ip32_ints.h>
+#include <asm/ip32/mace.h>
+
+/*--- Global variables -------------------------------------------------*/
+
+/* Verbose probing on by default for debugging. */
+#if DEBUG_PARPORT_IP32 >= 1
+#	define DEFAULT_VERBOSE_PROBING	1
+#else
+#	define DEFAULT_VERBOSE_PROBING	0
+#endif
+
+/* Default prefix for printk */
+#define PPIP32 "parport_ip32: "
+
+/*
+ * These are the module parameters:
+ * @features:		bit mask of features to enable/disable
+ *			(all enabled by default)
+ * @verbose_probing:	log chit-chat during initialization
+ */
+#define PARPORT_IP32_ENABLE_IRQ	(1U << 0)
+#define PARPORT_IP32_ENABLE_DMA	(1U << 1)
+#define PARPORT_IP32_ENABLE_SPP	(1U << 2)
+#define PARPORT_IP32_ENABLE_EPP	(1U << 3)
+#define PARPORT_IP32_ENABLE_ECP	(1U << 4)
+static unsigned int features =	~0U;
+static int verbose_probing =	DEFAULT_VERBOSE_PROBING;
+
+/* We do not support more than one port. */
+static struct parport *this_port = NULL;
+
+/* Timing constants for FIFO modes.  */
+#define FIFO_NFAULT_TIMEOUT	100	/* milliseconds */
+#define FIFO_POLLING_INTERVAL	50	/* microseconds */
+
+/*--- I/O register definitions -----------------------------------------*/
+
+/**
+ * struct parport_ip32_regs - virtual addresses of parallel port registers
+ * @data:	Data Register
+ * @dsr:	Device Status Register
+ * @dcr:	Device Control Register
+ * @eppAddr:	EPP Address Register
+ * @eppData0:	EPP Data Register 0
+ * @eppData1:	EPP Data Register 1
+ * @eppData2:	EPP Data Register 2
+ * @eppData3:	EPP Data Register 3
+ * @ecpAFifo:	ECP Address FIFO
+ * @fifo:	General FIFO register.  The same address is used for:
+ *		- cFifo, the Parallel Port DATA FIFO
+ *		- ecpDFifo, the ECP Data FIFO
+ *		- tFifo, the ECP Test FIFO
+ * @cnfgA:	Configuration Register A
+ * @cnfgB:	Configuration Register B
+ * @ecr:	Extended Control Register
+ */
+struct parport_ip32_regs {
+	void __iomem *data;
+	void __iomem *dsr;
+	void __iomem *dcr;
+	void __iomem *eppAddr;
+	void __iomem *eppData0;
+	void __iomem *eppData1;
+	void __iomem *eppData2;
+	void __iomem *eppData3;
+	void __iomem *ecpAFifo;
+	void __iomem *fifo;
+	void __iomem *cnfgA;
+	void __iomem *cnfgB;
+	void __iomem *ecr;
+};
+
+/* Device Status Register */
+#define DSR_nBUSY		(1U << 7)	/* PARPORT_STATUS_BUSY */
+#define DSR_nACK		(1U << 6)	/* PARPORT_STATUS_ACK */
+#define DSR_PERROR		(1U << 5)	/* PARPORT_STATUS_PAPEROUT */
+#define DSR_SELECT		(1U << 4)	/* PARPORT_STATUS_SELECT */
+#define DSR_nFAULT		(1U << 3)	/* PARPORT_STATUS_ERROR */
+#define DSR_nPRINT		(1U << 2)	/* specific to TL16PIR552 */
+/* #define DSR_reserved		(1U << 1) */
+#define DSR_TIMEOUT		(1U << 0)	/* EPP timeout */
+
+/* Device Control Register */
+/* #define DCR_reserved		(1U << 7) | (1U <<  6) */
+#define DCR_DIR			(1U << 5)	/* direction */
+#define DCR_IRQ			(1U << 4)	/* interrupt on nAck */
+#define DCR_SELECT		(1U << 3)	/* PARPORT_CONTROL_SELECT */
+#define DCR_nINIT		(1U << 2)	/* PARPORT_CONTROL_INIT */
+#define DCR_AUTOFD		(1U << 1)	/* PARPORT_CONTROL_AUTOFD */
+#define DCR_STROBE		(1U << 0)	/* PARPORT_CONTROL_STROBE */
+
+/* ECP Configuration Register A */
+#define CNFGA_IRQ		(1U << 7)
+#define CNFGA_ID_MASK		((1U << 6) | (1U << 5) | (1U << 4))
+#define CNFGA_ID_SHIFT		4
+#define CNFGA_ID_16		(00U << CNFGA_ID_SHIFT)
+#define CNFGA_ID_8		(01U << CNFGA_ID_SHIFT)
+#define CNFGA_ID_32		(02U << CNFGA_ID_SHIFT)
+/* #define CNFGA_reserved	(1U << 3) */
+#define CNFGA_nBYTEINTRANS	(1U << 2)
+#define CNFGA_PWORDLEFT		((1U << 1) | (1U << 0))
+
+/* ECP Configuration Register B */
+#define CNFGB_COMPRESS		(1U << 7)
+#define CNFGB_INTRVAL		(1U << 6)
+#define CNFGB_IRQ_MASK		((1U << 5) | (1U << 4) | (1U << 3))
+#define CNFGB_IRQ_SHIFT		3
+#define CNFGB_DMA_MASK		((1U << 2) | (1U << 1) | (1U << 0))
+#define CNFGB_DMA_SHIFT		0
+
+/* Extended Control Register */
+#define ECR_MODE_MASK		((1U << 7) | (1U << 6) | (1U << 5))
+#define ECR_MODE_SHIFT		5
+#define ECR_MODE_SPP		(00U << ECR_MODE_SHIFT)
+#define ECR_MODE_PS2		(01U << ECR_MODE_SHIFT)
+#define ECR_MODE_PPF		(02U << ECR_MODE_SHIFT)
+#define ECR_MODE_ECP		(03U << ECR_MODE_SHIFT)
+#define ECR_MODE_EPP		(04U << ECR_MODE_SHIFT)
+/* #define ECR_MODE_reserved	(05U << ECR_MODE_SHIFT) */
+#define ECR_MODE_TST		(06U << ECR_MODE_SHIFT)
+#define ECR_MODE_CFG		(07U << ECR_MODE_SHIFT)
+#define ECR_nERRINTR		(1U << 4)
+#define ECR_DMAEN		(1U << 3)
+#define ECR_SERVINTR		(1U << 2)
+#define ECR_F_FULL		(1U << 1)
+#define ECR_F_EMPTY		(1U << 0)
+
+/*--- Private data -----------------------------------------------------*/
+
+/**
+ * enum parport_ip32_irq_mode - operation mode of interrupt handler
+ * @PARPORT_IP32_IRQ_FWD:	forward interrupt to the upper parport layer
+ * @PARPORT_IP32_IRQ_HERE:	interrupt is handled locally
+ */
+enum parport_ip32_irq_mode { PARPORT_IP32_IRQ_FWD, PARPORT_IP32_IRQ_HERE };
+
+/**
+ * struct parport_ip32_private - private stuff for &struct parport
+ * @regs:		register addresses
+ * @dcr_cache:		cached contents of DCR
+ * @dcr_writable:	bit mask of writable DCR bits
+ * @pword:		number of bytes per PWord
+ * @fifo_depth:		number of PWords that FIFO will hold
+ * @readIntrThreshold:	minimum number of PWords we can read
+ *			if we get an interrupt
+ * @writeIntrThreshold:	minimum number of PWords we can write
+ *			if we get an interrupt
+ * @irq_mode:		operation mode of interrupt handler for this port
+ * @irq_complete:	mutex used to wait for an interrupt to occur
+ */
+struct parport_ip32_private {
+	struct parport_ip32_regs	regs;
+	unsigned int			dcr_cache;
+	unsigned int			dcr_writable;
+	unsigned int			pword;
+	unsigned int			fifo_depth;
+	unsigned int			readIntrThreshold;
+	unsigned int			writeIntrThreshold;
+	enum parport_ip32_irq_mode	irq_mode;
+	struct completion		irq_complete;
+};
+
+/*--- Debug code -------------------------------------------------------*/
+
+/*
+ * pr_debug1 - print debug messages
+ *
+ * This is like pr_debug(), but is defined for %DEBUG_PARPORT_IP32 >= 1
+ */
+#if DEBUG_PARPORT_IP32 >= 1
+#	define pr_debug1(...)	printk(KERN_DEBUG __VA_ARGS__)
+#else /* DEBUG_PARPORT_IP32 < 1 */
+#	define pr_debug1(...)	do { } while (0)
+#endif
+
+/*
+ * pr_trace, pr_trace1 - trace function calls
+ * @p:		pointer to &struct parport
+ * @fmt:	printk format string
+ * @...:	parameters for format string
+ *
+ * Macros used to trace function calls.  The given string is formatted after
+ * function name.  pr_trace() uses pr_debug(), and pr_trace1() uses
+ * pr_debug1().  __pr_trace() is the low-level macro and is not to be used
+ * directly.
+ */
+#define __pr_trace(pr, p, fmt, ...)					\
+	pr("%s: %s" fmt "\n",						\
+	   ({ const struct parport *__p = (p);				\
+		   __p ? __p->name : "parport_ip32"; }),		\
+	   __func__ , ##__VA_ARGS__)
+#define pr_trace(p, fmt, ...)	__pr_trace(pr_debug, p, fmt , ##__VA_ARGS__)
+#define pr_trace1(p, fmt, ...)	__pr_trace(pr_debug1, p, fmt , ##__VA_ARGS__)
+
+/*
+ * __pr_probe, pr_probe - print message if @verbose_probing is true
+ * @p:		pointer to &struct parport
+ * @fmt:	printk format string
+ * @...:	parameters for format string
+ *
+ * For new lines, use pr_probe().  Use __pr_probe() for continued lines.
+ */
+#define __pr_probe(...)							\
+	do { if (verbose_probing) printk(__VA_ARGS__); } while (0)
+#define pr_probe(p, fmt, ...)						\
+	__pr_probe(KERN_INFO PPIP32 "0x%lx: " fmt, (p)->base , ##__VA_ARGS__)
+
+/*
+ * parport_ip32_dump_state - print register status of parport
+ * @p:		pointer to &struct parport
+ * @str:	string to add in message
+ * @show_ecp_config:	shall we dump ECP configuration registers too?
+ *
+ * This function is only here for debugging purpose, and should be used with
+ * care.  Reading the parallel port registers may have undesired side effects.
+ * Especially if @show_ecp_config is true, the parallel port is resetted.
+ * This function is only defined if %DEBUG_PARPORT_IP32 >= 2.
+ */
+#if DEBUG_PARPORT_IP32 >= 2
+static void parport_ip32_dump_state(struct parport *p, char *str,
+				    unsigned int show_ecp_config)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	unsigned int i;
+
+	printk(KERN_DEBUG PPIP32 "%s: state (%s):\n", p->name, str);
+	{
+		static const char ecr_modes[8][4] = {"SPP", "PS2", "PPF",
+						     "ECP", "EPP", "???",
+						     "TST", "CFG"};
+		unsigned int ecr = readb(priv->regs.ecr);
+		printk(KERN_DEBUG PPIP32 "    ecr=0x%02x", ecr);
+		printk(" %s",
+		       ecr_modes[(ecr & ECR_MODE_MASK) >> ECR_MODE_SHIFT]);
+		if (ecr & ECR_nERRINTR)
+			printk(",nErrIntrEn");
+		if (ecr & ECR_DMAEN)
+			printk(",dmaEn");
+		if (ecr & ECR_SERVINTR)
+			printk(",serviceIntr");
+		if (ecr & ECR_F_FULL)
+			printk(",f_full");
+		if (ecr & ECR_F_EMPTY)
+			printk(",f_empty");
+		printk("\n");
+	}
+	if (show_ecp_config) {
+		unsigned int oecr, cnfgA, cnfgB;
+		oecr = readb(priv->regs.ecr);
+		writeb(ECR_MODE_PS2, priv->regs.ecr);
+		writeb(ECR_MODE_CFG, priv->regs.ecr);
+		cnfgA = readb(priv->regs.cnfgA);
+		cnfgB = readb(priv->regs.cnfgB);
+		writeb(ECR_MODE_PS2, priv->regs.ecr);
+		writeb(oecr, priv->regs.ecr);
+		printk(KERN_DEBUG PPIP32 "    cnfgA=0x%02x", cnfgA);
+		printk(" ISA-%s", (cnfgA & CNFGA_IRQ) ? "Level" : "Pulses");
+		switch (cnfgA & CNFGA_ID_MASK) {
+		case CNFGA_ID_8:
+			printk(",8 bits");
+			break;
+		case CNFGA_ID_16:
+			printk(",16 bits");
+			break;
+		case CNFGA_ID_32:
+			printk(",32 bits");
+			break;
+		default:
+			printk(",unknown ID");
+			break;
+		}
+		if (!(cnfgA & CNFGA_nBYTEINTRANS))
+			printk(",ByteInTrans");
+		if ((cnfgA & CNFGA_ID_MASK) != CNFGA_ID_8)
+			printk(",%d byte%s left", cnfgA & CNFGA_PWORDLEFT,
+			       ((cnfgA & CNFGA_PWORDLEFT) > 1) ? "s" : "");
+		printk("\n");
+		printk(KERN_DEBUG PPIP32 "    cnfgB=0x%02x", cnfgB);
+		printk(" irq=%u,dma=%u",
+		       (cnfgB & CNFGB_IRQ_MASK) >> CNFGB_IRQ_SHIFT,
+		       (cnfgB & CNFGB_DMA_MASK) >> CNFGB_DMA_SHIFT);
+		printk(",intrValue=%d", !!(cnfgB & CNFGB_INTRVAL));
+		if (cnfgB & CNFGB_COMPRESS)
+			printk(",compress");
+		printk("\n");
+	}
+	for (i = 0; i < 2; i++) {
+		unsigned int dcr = i ? priv->dcr_cache : readb(priv->regs.dcr);
+		printk(KERN_DEBUG PPIP32 "    dcr(%s)=0x%02x",
+		       i ? "soft" : "hard", dcr);
+		printk(" %s", (dcr & DCR_DIR) ? "rev" : "fwd");
+		if (dcr & DCR_IRQ)
+			printk(",ackIntEn");
+		if (!(dcr & DCR_SELECT))
+			printk(",nSelectIn");
+		if (dcr & DCR_nINIT)
+			printk(",nInit");
+		if (!(dcr & DCR_AUTOFD))
+			printk(",nAutoFD");
+		if (!(dcr & DCR_STROBE))
+			printk(",nStrobe");
+		printk("\n");
+	}
+#define sep (f++ ? ',' : ' ')
+	{
+		unsigned int f = 0;
+		unsigned int dsr = readb(priv->regs.dsr);
+		printk(KERN_DEBUG PPIP32 "    dsr=0x%02x", dsr);
+		if (!(dsr & DSR_nBUSY))
+			printk("%cBusy", sep);
+		if (dsr & DSR_nACK)
+			printk("%cnAck", sep);
+		if (dsr & DSR_PERROR)
+			printk("%cPError", sep);
+		if (dsr & DSR_SELECT)
+			printk("%cSelect", sep);
+		if (dsr & DSR_nFAULT)
+			printk("%cnFault", sep);
+		if (!(dsr & DSR_nPRINT))
+			printk("%c(Print)", sep);
+		if (dsr & DSR_TIMEOUT)
+			printk("%cTimeout", sep);
+		printk("\n");
+	}
+#undef sep
+}
+#else /* DEBUG_PARPORT_IP32 < 2 */
+#define parport_ip32_dump_state(...)	do { } while (0)
+#endif
+
+/*
+ * CHECK_EXTRA_BITS - track and log extra bits
+ * @p:		pointer to &struct parport
+ * @b:		byte to inspect
+ * @m:		bit mask of authorized bits
+ *
+ * This is used to track and log extra bits that should not be there in
+ * parport_ip32_write_control() and parport_ip32_frob_control().  It is only
+ * defined if %DEBUG_PARPORT_IP32 >= 1.
+ */
+#if DEBUG_PARPORT_IP32 >= 1
+#define CHECK_EXTRA_BITS(p, b, m)					\
+	do {								\
+		unsigned int __b = (b), __m = (m);			\
+		if (__b & ~__m)						\
+			pr_debug1(PPIP32 "%s: extra bits in %s(%s): "	\
+				  "0x%02x/0x%02x\n",			\
+				  (p)->name, __func__, #b, __b, __m);	\
+	} while (0)
+#else /* DEBUG_PARPORT_IP32 < 1 */
+#define CHECK_EXTRA_BITS(...)	do { } while (0)
+#endif
+
+/*--- IP32 parallel port DMA operations --------------------------------*/
+
+/**
+ * struct parport_ip32_dma_data - private data needed for DMA operation
+ * @dir:	DMA direction (from or to device)
+ * @buf:	buffer physical address
+ * @len:	buffer length
+ * @next:	address of next bytes to DMA transfer
+ * @left:	number of bytes remaining
+ * @ctx:	next context to write (0: context_a; 1: context_b)
+ * @irq_on:	are the DMA IRQs currently enabled?
+ * @lock:	spinlock to protect access to the structure
+ */
+struct parport_ip32_dma_data {
+	enum dma_data_direction		dir;
+	dma_addr_t			buf;
+	dma_addr_t			next;
+	size_t				len;
+	size_t				left;
+	unsigned int			ctx;
+	unsigned int			irq_on;
+	spinlock_t			lock;
+};
+static struct parport_ip32_dma_data parport_ip32_dma;
+
+/**
+ * parport_ip32_dma_setup_context - setup next DMA context
+ * @limit:	maximum data size for the context
+ *
+ * The alignment constraints must be verified in caller function, and the
+ * parameter @limit must be set accordingly.
+ */
+static void parport_ip32_dma_setup_context(unsigned int limit)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&parport_ip32_dma.lock, flags);
+	if (parport_ip32_dma.left > 0) {
+		/* Note: ctxreg is "volatile" here only because
+		 * mace->perif.ctrl.parport.context_a and context_b are
+		 * "volatile".  */
+		volatile u64 __iomem *ctxreg = (parport_ip32_dma.ctx == 0) ?
+			&mace->perif.ctrl.parport.context_a :
+			&mace->perif.ctrl.parport.context_b;
+		u64 count;
+		u64 ctxval;
+		if (parport_ip32_dma.left <= limit) {
+			count = parport_ip32_dma.left;
+			ctxval = MACEPAR_CONTEXT_LASTFLAG;
+		} else {
+			count = limit;
+			ctxval = 0;
+		}
+
+		pr_trace(NULL,
+			 "(%u): 0x%04x:0x%04x, %u -> %u%s",
+			 limit,
+			 (unsigned int)parport_ip32_dma.buf,
+			 (unsigned int)parport_ip32_dma.next,
+			 (unsigned int)count,
+			 parport_ip32_dma.ctx, ctxval ? "*" : "");
+
+		ctxval |= parport_ip32_dma.next &
+			MACEPAR_CONTEXT_BASEADDR_MASK;
+		ctxval |= ((count - 1) << MACEPAR_CONTEXT_DATALEN_SHIFT) &
+			MACEPAR_CONTEXT_DATALEN_MASK;
+		writeq(ctxval, ctxreg);
+		parport_ip32_dma.next += count;
+		parport_ip32_dma.left -= count;
+		parport_ip32_dma.ctx ^= 1U;
+	}
+	/* If there is nothing more to send, disable IRQs to avoid to
+	 * face an IRQ storm which can lock the machine.  Disable them
+	 * only once. */
+	if (parport_ip32_dma.left == 0 && parport_ip32_dma.irq_on) {
+		pr_debug(PPIP32 "IRQ off (ctx)\n");
+		disable_irq_nosync(MACEISA_PAR_CTXA_IRQ);
+		disable_irq_nosync(MACEISA_PAR_CTXB_IRQ);
+		parport_ip32_dma.irq_on = 0;
+	}
+	spin_unlock_irqrestore(&parport_ip32_dma.lock, flags);
+}
+
+/**
+ * parport_ip32_dma_interrupt - DMA interrupt handler
+ * @irq:	interrupt number
+ * @dev_id:	unused
+ * @regs:	pointer to &struct pt_regs
+ */
+static irqreturn_t parport_ip32_dma_interrupt(int irq, void *dev_id,
+					      struct pt_regs *regs)
+{
+	if (parport_ip32_dma.left)
+		pr_trace(NULL, "(%d): ctx=%d", irq, parport_ip32_dma.ctx);
+	parport_ip32_dma_setup_context(MACEPAR_CONTEXT_DATA_BOUND);
+	return IRQ_HANDLED;
+}
+
+#if DEBUG_PARPORT_IP32
+static irqreturn_t parport_ip32_merr_interrupt(int irq, void *dev_id,
+					       struct pt_regs *regs)
+{
+	pr_trace1(NULL, "(%d)", irq);
+	return IRQ_HANDLED;
+}
+#endif
+
+/**
+ * parport_ip32_dma_start - begins a DMA transfer
+ * @dir:	DMA direction: DMA_TO_DEVICE or DMA_FROM_DEVICE
+ * @addr:	pointer to data buffer
+ * @count:	buffer size
+ *
+ * Calls to parport_ip32_dma_start() and parport_ip32_dma_stop() must be
+ * correctly balanced.
+ */
+static int parport_ip32_dma_start(enum dma_data_direction dir,
+				  void *addr, size_t count)
+{
+	unsigned int limit;
+	u64 ctrl;
+
+	pr_trace(NULL, "(%d, %lu)", dir, (unsigned long)count);
+
+	/* FIXME - add support for DMA_FROM_DEVICE.  In this case, buffer must
+	 * be 64 bytes aligned. */
+	BUG_ON(dir != DMA_TO_DEVICE);
+
+	/* Reset DMA controller */
+	ctrl = MACEPAR_CTLSTAT_RESET;
+	writeq(ctrl, &mace->perif.ctrl.parport.cntlstat);
+
+	/* DMA IRQs should normally be enabled */
+	if (!parport_ip32_dma.irq_on) {
+		WARN_ON(1);
+		enable_irq(MACEISA_PAR_CTXA_IRQ);
+		enable_irq(MACEISA_PAR_CTXB_IRQ);
+		parport_ip32_dma.irq_on = 1;
+	}
+
+	/* Prepare DMA pointers */
+	parport_ip32_dma.dir = dir;
+	parport_ip32_dma.buf = dma_map_single(NULL, addr, count, dir);
+	parport_ip32_dma.len = count;
+	parport_ip32_dma.next = parport_ip32_dma.buf;
+	parport_ip32_dma.left = parport_ip32_dma.len;
+	parport_ip32_dma.ctx = 0;
+
+	/* Setup DMA direction and first two contexts */
+	ctrl = (dir == DMA_TO_DEVICE) ? 0 : MACEPAR_CTLSTAT_DIRECTION;
+	writeq(ctrl, &mace->perif.ctrl.parport.cntlstat);
+	/* Single transfer should not cross a 4K page boundary */
+	limit = MACEPAR_CONTEXT_DATA_BOUND -
+		(parport_ip32_dma.next & (MACEPAR_CONTEXT_DATA_BOUND - 1));
+	parport_ip32_dma_setup_context(limit);
+	parport_ip32_dma_setup_context(MACEPAR_CONTEXT_DATA_BOUND);
+
+	/* Real start of DMA transfer */
+	ctrl |= MACEPAR_CTLSTAT_ENABLE;
+	writeq(ctrl, &mace->perif.ctrl.parport.cntlstat);
+
+	return 0;
+}
+
+/**
+ * parport_ip32_dma_stop - ends a running DMA transfer
+ *
+ * Calls to parport_ip32_dma_start() and parport_ip32_dma_stop() must be
+ * correctly balanced.
+ */
+static void parport_ip32_dma_stop(void)
+{
+	u64 ctx_a;
+	u64 ctx_b;
+	u64 ctrl;
+	u64 diag;
+	size_t res[2];	/* {[0] = res_a, [1] = res_b} */
+
+	pr_trace(NULL, "()");
+
+	/* Disable IRQs */
+	spin_lock_irq(&parport_ip32_dma.lock);
+	if (parport_ip32_dma.irq_on) {
+		pr_debug(PPIP32 "IRQ off (stop)\n");
+		disable_irq_nosync(MACEISA_PAR_CTXA_IRQ);
+		disable_irq_nosync(MACEISA_PAR_CTXB_IRQ);
+		parport_ip32_dma.irq_on = 0;
+	}
+	spin_unlock_irq(&parport_ip32_dma.lock);
+	/* Force IRQ synchronization, even if the IRQs were disabled
+	 * elsewhere. */
+	synchronize_irq(MACEISA_PAR_CTXA_IRQ);
+	synchronize_irq(MACEISA_PAR_CTXB_IRQ);
+
+	/* Stop DMA transfer */
+	ctrl = readq(&mace->perif.ctrl.parport.cntlstat);
+	ctrl &= ~MACEPAR_CTLSTAT_ENABLE;
+	writeq(ctrl, &mace->perif.ctrl.parport.cntlstat);
+
+	/* Adjust residue (parport_ip32_dma.left) */
+	ctx_a = readq(&mace->perif.ctrl.parport.context_a);
+	ctx_b = readq(&mace->perif.ctrl.parport.context_b);
+	ctrl = readq(&mace->perif.ctrl.parport.cntlstat);
+	diag = readq(&mace->perif.ctrl.parport.diagnostic);
+	res[0] = (ctrl & MACEPAR_CTLSTAT_CTXA_VALID) ?
+		1 + ((ctx_a & MACEPAR_CONTEXT_DATALEN_MASK) >>
+		     MACEPAR_CONTEXT_DATALEN_SHIFT) :
+		0;
+	res[1] = (ctrl & MACEPAR_CTLSTAT_CTXB_VALID) ?
+		1 + ((ctx_b & MACEPAR_CONTEXT_DATALEN_MASK) >>
+		     MACEPAR_CONTEXT_DATALEN_SHIFT) :
+		0;
+	if (diag & MACEPAR_DIAG_DMACTIVE)
+		res[(diag & MACEPAR_DIAG_CTXINUSE) != 0] =
+			1 + ((diag & MACEPAR_DIAG_CTRMASK) >>
+			     MACEPAR_DIAG_CTRSHIFT);
+	parport_ip32_dma.left += res[0] + res[1];
+
+	/* Reset DMA controller, and re-enable IRQs */
+	ctrl = MACEPAR_CTLSTAT_RESET;
+	writeq(ctrl, &mace->perif.ctrl.parport.cntlstat);
+	pr_debug(PPIP32 "IRQ on (stop)\n");
+	enable_irq(MACEISA_PAR_CTXA_IRQ);
+	enable_irq(MACEISA_PAR_CTXB_IRQ);
+	parport_ip32_dma.irq_on = 1;
+
+	dma_unmap_single(NULL, parport_ip32_dma.buf, parport_ip32_dma.len,
+			 parport_ip32_dma.dir);
+}
+
+/**
+ * parport_ip32_dma_get_residue - get residue from last DMA transfer
+ *
+ * Returns the number of bytes remaining from last DMA transfer.
+ */
+static inline size_t parport_ip32_dma_get_residue(void)
+{
+	return parport_ip32_dma.left;
+}
+
+/**
+ * parport_ip32_dma_register - initialize DMA engine
+ *
+ * Returns zero for success.
+ */
+static int parport_ip32_dma_register(void)
+{
+	int err;
+
+	spin_lock_init(&parport_ip32_dma.lock);
+	parport_ip32_dma.irq_on = 1;
+
+	/* Reset DMA controller */
+	writeq(MACEPAR_CTLSTAT_RESET, &mace->perif.ctrl.parport.cntlstat);
+
+	/* Request IRQs */
+	err = request_irq(MACEISA_PAR_CTXA_IRQ, parport_ip32_dma_interrupt,
+			  0, "parport_ip32", NULL);
+	if (err)
+		goto fail_a;
+	err = request_irq(MACEISA_PAR_CTXB_IRQ, parport_ip32_dma_interrupt,
+			  0, "parport_ip32", NULL);
+	if (err)
+		goto fail_b;
+#if DEBUG_PARPORT_IP32
+	/* FIXME - what is this IRQ for? */
+	err = request_irq(MACEISA_PAR_MERR_IRQ, parport_ip32_merr_interrupt,
+			  0, "parport_ip32", NULL);
+	if (err)
+		goto fail_merr;
+#endif
+	return 0;
+
+#if DEBUG_PARPORT_IP32
+fail_merr:
+	free_irq(MACEISA_PAR_CTXB_IRQ, NULL);
+#endif
+fail_b:
+	free_irq(MACEISA_PAR_CTXA_IRQ, NULL);
+fail_a:
+	return err;
+}
+
+/**
+ * parport_ip32_dma_unregister - release and free resources for DMA engine
+ */
+static void parport_ip32_dma_unregister(void)
+{
+#if DEBUG_PARPORT_IP32
+	free_irq(MACEISA_PAR_MERR_IRQ, NULL);
+#endif
+	free_irq(MACEISA_PAR_CTXB_IRQ, NULL);
+	free_irq(MACEISA_PAR_CTXA_IRQ, NULL);
+}
+
+/*--- Interrupt handlers and associates --------------------------------*/
+
+/**
+ * parport_ip32_wakeup - wakes up code waiting for an interrupt
+ * @p:		pointer to &struct parport
+ */
+static inline void parport_ip32_wakeup(struct parport *p)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	complete(&priv->irq_complete);
+}
+
+/**
+ * parport_ip32_interrupt - interrupt handler
+ * @irq:	interrupt number
+ * @dev_id:	pointer to &struct parport
+ * @regs:	pointer to &struct pt_regs
+ *
+ * Caught interrupts are forwarded to the upper parport layer if IRQ_mode is
+ * %PARPORT_IP32_IRQ_FWD.
+ */
+static irqreturn_t parport_ip32_interrupt(int irq, void *dev_id,
+					  struct pt_regs *regs)
+{
+	struct parport * const p = dev_id;
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	enum parport_ip32_irq_mode irq_mode = priv->irq_mode;
+	switch (irq_mode) {
+	case PARPORT_IP32_IRQ_FWD:
+		parport_generic_irq(irq, p, regs);
+		break;
+	case PARPORT_IP32_IRQ_HERE:
+		parport_ip32_wakeup(p);
+		break;
+	}
+	return IRQ_HANDLED;
+}
+
+/*--- Some utility function to manipulate ECR register -----------------*/
+
+/**
+ * parport_ip32_read_econtrol - read contents of the ECR register
+ * @p:		pointer to &struct parport
+ */
+static inline unsigned int parport_ip32_read_econtrol(struct parport *p)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	return readb(priv->regs.ecr);
+}
+
+/**
+ * parport_ip32_write_econtrol - write new contents to the ECR register
+ * @p:		pointer to &struct parport
+ * @c:		new value to write
+ */
+static inline void parport_ip32_write_econtrol(struct parport *p,
+					       unsigned int c)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	writeb(c, priv->regs.ecr);
+}
+
+/**
+ * parport_ip32_frob_econtrol - change bits from the ECR register
+ * @p:		pointer to &struct parport
+ * @mask:	bit mask of bits to change
+ * @val:	new value for changed bits
+ *
+ * Read from the ECR, mask out the bits in @mask, exclusive-or with the bits
+ * in @val, and write the result to the ECR.
+ */
+static inline void parport_ip32_frob_econtrol(struct parport *p,
+					      unsigned int mask,
+					      unsigned int val)
+{
+	unsigned int c;
+	c = (parport_ip32_read_econtrol(p) & ~mask) ^ val;
+	parport_ip32_write_econtrol(p, c);
+}
+
+/**
+ * parport_ip32_set_mode - change mode of ECP port
+ * @p:		pointer to &struct parport
+ * @mode:	new mode to write in ECR
+ *
+ * ECR is reset in a sane state (interrupts and DMA disabled), and placed in
+ * mode @mode.  Go through PS2 mode if needed.
+ */
+static void parport_ip32_set_mode(struct parport *p, unsigned int mode)
+{
+	unsigned int omode;
+
+	mode &= ECR_MODE_MASK;
+	omode = parport_ip32_read_econtrol(p) & ECR_MODE_MASK;
+
+	if (!(mode == ECR_MODE_SPP || mode == ECR_MODE_PS2
+	      || omode == ECR_MODE_SPP || omode == ECR_MODE_PS2)) {
+		/* We have to go through PS2 mode */
+		unsigned int ecr = ECR_MODE_PS2 | ECR_nERRINTR | ECR_SERVINTR;
+		parport_ip32_write_econtrol(p, ecr);
+	}
+	parport_ip32_write_econtrol(p, mode | ECR_nERRINTR | ECR_SERVINTR);
+}
+
+/*--- Basic functions needed for parport -------------------------------*/
+
+/**
+ * parport_ip32_read_data - return current contents of the DATA register
+ * @p:		pointer to &struct parport
+ */
+static inline unsigned char parport_ip32_read_data(struct parport *p)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	return readb(priv->regs.data);
+}
+
+/**
+ * parport_ip32_write_data - set new contents for the DATA register
+ * @p:		pointer to &struct parport
+ * @d:		new value to write
+ */
+static inline void parport_ip32_write_data(struct parport *p, unsigned char d)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	writeb(d, priv->regs.data);
+}
+
+/**
+ * parport_ip32_read_status - return current contents of the DSR register
+ * @p:		pointer to &struct parport
+ */
+static inline unsigned char parport_ip32_read_status(struct parport *p)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	return readb(priv->regs.dsr);
+}
+
+/**
+ * __parport_ip32_read_control - return cached contents of the DCR register
+ * @p:		pointer to &struct parport
+ */
+static inline unsigned int __parport_ip32_read_control(struct parport *p)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	return priv->dcr_cache; /* use soft copy */
+}
+
+/**
+ * __parport_ip32_write_control - set new contents for the DCR register
+ * @p:		pointer to &struct parport
+ * @c:		new value to write
+ */
+static inline void __parport_ip32_write_control(struct parport *p,
+						unsigned int c)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	CHECK_EXTRA_BITS(p, c, priv->dcr_writable);
+	c &= priv->dcr_writable; /* only writable bits */
+	writeb(c, priv->regs.dcr);
+	priv->dcr_cache = c;		/* update soft copy */
+}
+
+/**
+ * __parport_ip32_frob_control - change bits from the DCR register
+ * @p:		pointer to &struct parport
+ * @mask:	bit mask of bits to change
+ * @val:	new value for changed bits
+ *
+ * This is equivalent to read from the DCR, mask out the bits in @mask,
+ * exclusive-or with the bits in @val, and write the result to the DCR.
+ * Actually, the cached contents of the DCR is used.
+ */
+static inline void __parport_ip32_frob_control(struct parport *p,
+					       unsigned int mask,
+					       unsigned int val)
+{
+	unsigned int c;
+	c = (__parport_ip32_read_control(p) & ~mask) ^ val;
+	__parport_ip32_write_control(p, c);
+}
+
+/**
+ * parport_ip32_read_control - return cached contents of the DCR register
+ * @p:		pointer to &struct parport
+ *
+ * The return value is masked so as to only return the value of %DCR_STROBE,
+ * %DCR_AUTOFD, %DCR_nINIT, and %DCR_SELECT.
+ */
+static inline unsigned char parport_ip32_read_control(struct parport *p)
+{
+	const unsigned int rm =
+		DCR_STROBE | DCR_AUTOFD | DCR_nINIT | DCR_SELECT;
+	return __parport_ip32_read_control(p) & rm;
+}
+
+/**
+ * parport_ip32_write_control - set new contents for the DCR register
+ * @p:		pointer to &struct parport
+ * @c:		new value to write
+ *
+ * The value is masked so as to only change the value of %DCR_STROBE,
+ * %DCR_AUTOFD, %DCR_nINIT, and %DCR_SELECT.
+ */
+static inline void parport_ip32_write_control(struct parport *p,
+					      unsigned char c)
+{
+	const unsigned int wm =
+		DCR_STROBE | DCR_AUTOFD | DCR_nINIT | DCR_SELECT;
+	CHECK_EXTRA_BITS(p, c, wm);
+	__parport_ip32_frob_control(p, wm, c & wm);
+}
+
+/**
+ * parport_ip32_frob_control - change bits from the DCR register
+ * @p:		pointer to &struct parport
+ * @mask:	bit mask of bits to change
+ * @val:	new value for changed bits
+ *
+ * This differs from __parport_ip32_frob_control() in that it only allows to
+ * change the value of %DCR_STROBE, %DCR_AUTOFD, %DCR_nINIT, and %DCR_SELECT.
+ */
+static inline unsigned char parport_ip32_frob_control(struct parport *p,
+						      unsigned char mask,
+						      unsigned char val)
+{
+	const unsigned int wm =
+		DCR_STROBE | DCR_AUTOFD | DCR_nINIT | DCR_SELECT;
+	CHECK_EXTRA_BITS(p, mask, wm);
+	CHECK_EXTRA_BITS(p, val, wm);
+	__parport_ip32_frob_control(p, mask & wm, val & wm);
+	return parport_ip32_read_control(p);
+}
+
+/**
+ * parport_ip32_disable_irq - disable interrupts on the rising edge of nACK
+ * @p:		pointer to &struct parport
+ */
+static inline void parport_ip32_disable_irq(struct parport *p)
+{
+	__parport_ip32_frob_control(p, DCR_IRQ, 0);
+}
+
+/**
+ * parport_ip32_enable_irq - enable interrupts on the rising edge of nACK
+ * @p:		pointer to &struct parport
+ */
+static inline void parport_ip32_enable_irq(struct parport *p)
+{
+	__parport_ip32_frob_control(p, DCR_IRQ, DCR_IRQ);
+}
+
+/**
+ * parport_ip32_data_forward - enable host-to-peripheral communications
+ * @p:		pointer to &struct parport
+ *
+ * Enable the data line drivers, for 8-bit host-to-peripheral communications.
+ */
+static inline void parport_ip32_data_forward(struct parport *p)
+{
+	__parport_ip32_frob_control(p, DCR_DIR, 0);
+}
+
+/**
+ * parport_ip32_data_reverse - enable peripheral-to-host communications
+ * @p:		pointer to &struct parport
+ *
+ * Place the data bus in a high impedance state, if @p->modes has the
+ * PARPORT_MODE_TRISTATE bit set.
+ */
+static inline void parport_ip32_data_reverse(struct parport *p)
+{
+	__parport_ip32_frob_control(p, DCR_DIR, DCR_DIR);
+}
+
+/**
+ * parport_ip32_init_state - for core parport code
+ * @dev:	pointer to &struct pardevice
+ * @s:		pointer to &struct parport_state to initialize
+ */
+static void parport_ip32_init_state(struct pardevice *dev,
+				    struct parport_state *s)
+{
+	s->u.ip32.dcr = DCR_SELECT | DCR_nINIT;
+	s->u.ip32.ecr = ECR_MODE_PS2 | ECR_nERRINTR | ECR_SERVINTR;
+}
+
+/**
+ * parport_ip32_save_state - for core parport code
+ * @p:		pointer to &struct parport
+ * @s:		pointer to &struct parport_state to save state to
+ */
+static void parport_ip32_save_state(struct parport *p,
+				    struct parport_state *s)
+{
+	s->u.ip32.dcr = __parport_ip32_read_control(p);
+	s->u.ip32.ecr = parport_ip32_read_econtrol(p);
+}
+
+/**
+ * parport_ip32_restore_state - for core parport code
+ * @p:		pointer to &struct parport
+ * @s:		pointer to &struct parport_state to restore state from
+ */
+static void parport_ip32_restore_state(struct parport *p,
+				       struct parport_state *s)
+{
+	parport_ip32_set_mode(p, s->u.ip32.ecr & ECR_MODE_MASK);
+	parport_ip32_write_econtrol(p, s->u.ip32.ecr);
+	__parport_ip32_write_control(p, s->u.ip32.dcr);
+}
+
+/*--- EPP mode functions -----------------------------------------------*/
+
+/**
+ * parport_ip32_clear_epp_timeout - clear Timeout bit in EPP mode
+ * @p:		pointer to &struct parport
+ *
+ * Returns 1 if the Timeout bit is clear, and 0 otherwise.
+ */
+static unsigned int parport_ip32_clear_epp_timeout(struct parport *p)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	unsigned int cleared;
+
+	if (!(parport_ip32_read_status(p) & DSR_TIMEOUT))
+		cleared = 1;
+	else {
+		unsigned int r;
+		/* To clear timeout some chips require double read */
+		parport_ip32_read_status(p);
+		r = parport_ip32_read_status(p);
+		/* Some reset by writing 1 */
+		writeb(r | DSR_TIMEOUT, priv->regs.dsr);
+		/* Others by writing 0 */
+		writeb(r & ~DSR_TIMEOUT, priv->regs.dsr);
+
+		r = parport_ip32_read_status(p);
+		cleared = !(r & DSR_TIMEOUT);
+	}
+
+	pr_trace(p, "(): %s", cleared ? "cleared" : "failed");
+	return cleared;
+}
+
+/**
+ * parport_ip32_epp_read - generic EPP read function
+ * @eppreg:	I/O register to read from
+ * @p:		pointer to &struct parport
+ * @buf:	buffer to store read data
+ * @len:	length of buffer @buf
+ * @flags:	may be PARPORT_EPP_FAST
+ */
+static size_t parport_ip32_epp_read(void __iomem *eppreg,
+				    struct parport *p, void *buf,
+				    size_t len, int flags)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	size_t got;
+	parport_ip32_set_mode(p, ECR_MODE_EPP);
+	parport_ip32_data_reverse(p);
+	parport_ip32_write_control(p, DCR_nINIT);
+	if ((flags & PARPORT_EPP_FAST) && (len > 1)) {
+		readsb(eppreg, buf, len);
+		if (readb(priv->regs.dsr) & DSR_TIMEOUT) {
+			parport_ip32_clear_epp_timeout(p);
+			return -EIO;
+		}
+		got = len;
+	} else {
+		u8 *bufp = buf;
+		for (got = 0; got < len; got++) {
+			*bufp++ = readb(eppreg);
+			if (readb(priv->regs.dsr) & DSR_TIMEOUT) {
+				parport_ip32_clear_epp_timeout(p);
+				break;
+			}
+		}
+	}
+	parport_ip32_data_forward(p);
+	parport_ip32_set_mode(p, ECR_MODE_PS2);
+	return got;
+}
+
+/**
+ * parport_ip32_epp_write - generic EPP write function
+ * @eppreg:	I/O register to write to
+ * @p:		pointer to &struct parport
+ * @buf:	buffer of data to write
+ * @len:	length of buffer @buf
+ * @flags:	may be PARPORT_EPP_FAST
+ */
+static size_t parport_ip32_epp_write(void __iomem *eppreg,
+				     struct parport *p, const void *buf,
+				     size_t len, int flags)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	size_t written;
+	parport_ip32_set_mode(p, ECR_MODE_EPP);
+	parport_ip32_data_forward(p);
+	parport_ip32_write_control(p, DCR_nINIT);
+	if ((flags & PARPORT_EPP_FAST) && (len > 1)) {
+		writesb(eppreg, buf, len);
+		if (readb(priv->regs.dsr) & DSR_TIMEOUT) {
+			parport_ip32_clear_epp_timeout(p);
+			return -EIO;
+		}
+		written = len;
+	} else {
+		const u8 *bufp = buf;
+		for (written = 0; written < len; written++) {
+			writeb(*bufp++, eppreg);
+			if (readb(priv->regs.dsr) & DSR_TIMEOUT) {
+				parport_ip32_clear_epp_timeout(p);
+				break;
+			}
+		}
+	}
+	parport_ip32_set_mode(p, ECR_MODE_PS2);
+	return written;
+}
+
+/**
+ * parport_ip32_epp_read_data - read a block of data in EPP mode
+ * @p:		pointer to &struct parport
+ * @buf:	buffer to store read data
+ * @len:	length of buffer @buf
+ * @flags:	may be PARPORT_EPP_FAST
+ */
+static size_t parport_ip32_epp_read_data(struct parport *p, void *buf,
+					 size_t len, int flags)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	return parport_ip32_epp_read(priv->regs.eppData0, p, buf, len, flags);
+}
+
+/**
+ * parport_ip32_epp_write_data - write a block of data in EPP mode
+ * @p:		pointer to &struct parport
+ * @buf:	buffer of data to write
+ * @len:	length of buffer @buf
+ * @flags:	may be PARPORT_EPP_FAST
+ */
+static size_t parport_ip32_epp_write_data(struct parport *p, const void *buf,
+					  size_t len, int flags)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	return parport_ip32_epp_write(priv->regs.eppData0, p, buf, len, flags);
+}
+
+/**
+ * parport_ip32_epp_read_addr - read a block of addresses in EPP mode
+ * @p:		pointer to &struct parport
+ * @buf:	buffer to store read data
+ * @len:	length of buffer @buf
+ * @flags:	may be PARPORT_EPP_FAST
+ */
+static size_t parport_ip32_epp_read_addr(struct parport *p, void *buf,
+					 size_t len, int flags)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	return parport_ip32_epp_read(priv->regs.eppAddr, p, buf, len, flags);
+}
+
+/**
+ * parport_ip32_epp_write_addr - write a block of addresses in EPP mode
+ * @p:		pointer to &struct parport
+ * @buf:	buffer of data to write
+ * @len:	length of buffer @buf
+ * @flags:	may be PARPORT_EPP_FAST
+ */
+static size_t parport_ip32_epp_write_addr(struct parport *p, const void *buf,
+					  size_t len, int flags)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	return parport_ip32_epp_write(priv->regs.eppAddr, p, buf, len, flags);
+}
+
+/*--- ECP mode functions (FIFO) ----------------------------------------*/
+
+/**
+ * parport_ip32_fifo_wait_break - check if the waiting function should return
+ * @p:		pointer to &struct parport
+ * @expire:	timeout expiring date, in jiffies
+ *
+ * parport_ip32_fifo_wait_break() checks if the waiting function should return
+ * immediately or not.  The break conditions are:
+ *	- expired timeout;
+ *	- a pending signal;
+ *	- nFault asserted low.
+ * This function also calls cond_resched().
+ */
+static unsigned int parport_ip32_fifo_wait_break(struct parport *p,
+						 unsigned long expire)
+{
+	cond_resched();
+	if (time_after(jiffies, expire)) {
+		pr_debug1(PPIP32 "%s: FIFO write timed out\n", p->name);
+		return 1;
+	}
+	if (signal_pending(current)) {
+		pr_debug1(PPIP32 "%s: Signal pending\n", p->name);
+		return 1;
+	}
+	if (!(parport_ip32_read_status(p) & DSR_nFAULT)) {
+		pr_debug1(PPIP32 "%s: nFault asserted low\n", p->name);
+		return 1;
+	}
+	return 0;
+}
+
+/**
+ * parport_ip32_fwp_wait_polling - wait for FIFO to empty (polling)
+ * @p:		pointer to &struct parport
+ *
+ * Returns the number of bytes that can safely be written in the FIFO.  A
+ * return value of zero means that the calling function should terminate as
+ * fast as possible.
+ */
+static unsigned int parport_ip32_fwp_wait_polling(struct parport *p)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	struct parport * const physport = p->physport;
+	unsigned long expire;
+	unsigned int count;
+	unsigned int ecr;
+
+	expire = jiffies + physport->cad->timeout;
+	count = 0;
+	while (1) {
+		if (parport_ip32_fifo_wait_break(p, expire))
+			break;
+
+		/* Check FIFO state.  We do nothing when the FIFO is nor full,
+		 * nor empty.  It appears that the FIFO full bit is not always
+		 * reliable, the FIFO state is sometimes wrongly reported, and
+		 * the chip gets confused if we give it another byte. */
+		ecr = parport_ip32_read_econtrol(p);
+		if (ecr & ECR_F_EMPTY) {
+			/* FIFO is empty, fill it up */
+			count = priv->fifo_depth;
+			break;
+		}
+
+		/* Wait a moment... */
+		udelay(FIFO_POLLING_INTERVAL);
+	} /* while (1) */
+
+	return count;
+}
+
+/**
+ * parport_ip32_fwp_wait_interrupt - wait for FIFO to empty (interrupt-driven)
+ * @p:		pointer to &struct parport
+ *
+ * Returns the number of bytes that can safely be written in the FIFO.  A
+ * return value of zero means that the calling function should terminate as
+ * fast as possible.
+ */
+static unsigned int parport_ip32_fwp_wait_interrupt(struct parport *p)
+{
+	static unsigned int lost_interrupt = 0;
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	struct parport * const physport = p->physport;
+	unsigned long nfault_timeout;
+	unsigned long expire;
+	unsigned int count;
+	unsigned int ecr;
+
+	nfault_timeout = min((unsigned long)physport->cad->timeout,
+			     msecs_to_jiffies(FIFO_NFAULT_TIMEOUT));
+	expire = jiffies + physport->cad->timeout;
+	count = 0;
+	while (1) {
+		if (parport_ip32_fifo_wait_break(p, expire))
+			break;
+
+		/* Initialize mutex used to take interrupts into account */
+		INIT_COMPLETION(priv->irq_complete);
+
+		/* Enable serviceIntr */
+		parport_ip32_frob_econtrol(p, ECR_SERVINTR, 0);
+
+		/* Enabling serviceIntr while the FIFO is empty does not
+		 * always generate an interrupt, so check for emptiness
+		 * now. */
+		ecr = parport_ip32_read_econtrol(p);
+		if (!(ecr & ECR_F_EMPTY)) {
+			/* FIFO is not empty: wait for an interrupt or a
+			 * timeout to occur */
+			wait_for_completion_interruptible_timeout(
+				&priv->irq_complete, nfault_timeout);
+			ecr = parport_ip32_read_econtrol(p);
+			if ((ecr & ECR_F_EMPTY) && !(ecr & ECR_SERVINTR)
+			    && !lost_interrupt) {
+				printk(KERN_WARNING PPIP32
+				       "%s: lost interrupt in %s\n",
+				       p->name, __func__);
+				lost_interrupt = 1;
+			}
+		}
+
+		/* Disable serviceIntr */
+		parport_ip32_frob_econtrol(p, ECR_SERVINTR, ECR_SERVINTR);
+
+		/* Check FIFO state */
+		if (ecr & ECR_F_EMPTY) {
+			/* FIFO is empty, fill it up */
+			count = priv->fifo_depth;
+			break;
+		} else if (ecr & ECR_SERVINTR) {
+			/* FIFO is not empty, but we know that can safely push
+			 * writeIntrThreshold bytes into it */
+			count = priv->writeIntrThreshold;
+			break;
+		}
+		/* FIFO is not empty, and we did not get any interrupt.
+		 * Either it's time to check for nFault, or a signal is
+		 * pending.  This is verified in
+		 * parport_ip32_fifo_wait_break(), so we continue the loop. */
+	} /* while (1) */
+
+	return count;
+}
+
+/**
+ * parport_ip32_fifo_write_block_pio - write a block of data (PIO mode)
+ * @p:		pointer to &struct parport
+ * @buf:	buffer of data to write
+ * @len:	length of buffer @buf
+ *
+ * Uses PIO to write the contents of the buffer @buf into the parallel port
+ * FIFO.  Returns the number of bytes that were actually written.  It can work
+ * with or without the help of interrupts.  The parallel port must be
+ * correctly initialized before calling parport_ip32_fifo_write_block_pio().
+ */
+static size_t parport_ip32_fifo_write_block_pio(struct parport *p,
+						const void *buf, size_t len)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	const u8 *bufp = buf;
+	size_t left = len;
+
+	priv->irq_mode = PARPORT_IP32_IRQ_HERE;
+
+	while (left > 0) {
+		unsigned int count;
+
+		count = (p->irq == PARPORT_IRQ_NONE) ?
+			parport_ip32_fwp_wait_polling(p) :
+			parport_ip32_fwp_wait_interrupt(p);
+		if (count == 0)
+			break;	/* Transmission should be stopped */
+		if (count > left)
+			count = left;
+		if (count == 1) {
+			writeb(*bufp, priv->regs.fifo);
+			bufp++, left--;
+		} else {
+			writesb(priv->regs.fifo, bufp, count);
+			bufp += count, left -= count;
+		}
+	}
+
+	priv->irq_mode = PARPORT_IP32_IRQ_FWD;
+
+	return len - left;
+}
+
+/**
+ * parport_ip32_fifo_write_block_dma - write a block of data (DMA mode)
+ * @p:		pointer to &struct parport
+ * @buf:	buffer of data to write
+ * @len:	length of buffer @buf
+ *
+ * Uses DMA to write the contents of the buffer @buf into the parallel port
+ * FIFO.  Returns the number of bytes that were actually written.  The
+ * parallel port must be correctly initialized before calling
+ * parport_ip32_fifo_write_block_dma().
+ */
+static size_t parport_ip32_fifo_write_block_dma(struct parport *p,
+						const void *buf, size_t len)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	struct parport * const physport = p->physport;
+	unsigned long nfault_timeout;
+	unsigned long expire;
+	size_t written;
+	unsigned int ecr;
+
+	priv->irq_mode = PARPORT_IP32_IRQ_HERE;
+
+	parport_ip32_dma_start(DMA_TO_DEVICE, (void *)buf, len);
+	INIT_COMPLETION(priv->irq_complete);
+	parport_ip32_frob_econtrol(p, ECR_DMAEN | ECR_SERVINTR, ECR_DMAEN);
+
+	nfault_timeout = min((unsigned long)physport->cad->timeout,
+			     msecs_to_jiffies(FIFO_NFAULT_TIMEOUT));
+	expire = jiffies + physport->cad->timeout;
+	while (1) {
+		if (parport_ip32_fifo_wait_break(p, expire))
+			break;
+		wait_for_completion_interruptible_timeout(&priv->irq_complete,
+							  nfault_timeout);
+		ecr = parport_ip32_read_econtrol(p);
+		if (ecr & ECR_SERVINTR)
+			break;	/* DMA transfer just finished */
+	}
+	parport_ip32_dma_stop();
+	written = len - parport_ip32_dma_get_residue();
+
+	priv->irq_mode = PARPORT_IP32_IRQ_FWD;
+
+	return written;
+}
+
+/**
+ * parport_ip32_fifo_write_block - write a block of data
+ * @p:		pointer to &struct parport
+ * @buf:	buffer of data to write
+ * @len:	length of buffer @buf
+ *
+ * Uses PIO or DMA to write the contents of the buffer @buf into the parallel
+ * p FIFO.  Returns the number of bytes that were actually written.
+ */
+static size_t parport_ip32_fifo_write_block(struct parport *p,
+					    const void *buf, size_t len)
+{
+	size_t written = 0;
+	if (len)
+		/* FIXME - Maybe some threshold value should be set for @len
+		 * under which we revert to PIO mode? */
+		written = (p->modes & PARPORT_MODE_DMA) ?
+			parport_ip32_fifo_write_block_dma(p, buf, len) :
+			parport_ip32_fifo_write_block_pio(p, buf, len);
+	return written;
+}
+
+/**
+ * parport_ip32_drain_fifo - wait for FIFO to empty
+ * @p:		pointer to &struct parport
+ * @timeout:	timeout, in jiffies
+ *
+ * This function waits for FIFO to empty.  It returns 1 when FIFO is empty, or
+ * 0 if the timeout @timeout is reached before, or if a signal is pending.
+ */
+static unsigned int parport_ip32_drain_fifo(struct parport *p,
+					    unsigned long timeout)
+{
+	unsigned long expire = jiffies + timeout;
+	unsigned int polling_interval;
+	unsigned int counter;
+
+	/* Busy wait for approx. 200us */
+	for (counter = 0; counter < 40; counter++) {
+		if (parport_ip32_read_econtrol(p) & ECR_F_EMPTY)
+			break;
+		if (time_after(jiffies, expire))
+			break;
+		if (signal_pending(current))
+			break;
+		udelay(5);
+	}
+	/* Poll slowly.  Polling interval starts with 1 millisecond, and is
+	 * increased exponentially until 128.  */
+	polling_interval = 1; /* msecs */
+	while (!(parport_ip32_read_econtrol(p) & ECR_F_EMPTY)) {
+		if (time_after_eq(jiffies, expire))
+			break;
+		msleep_interruptible(polling_interval);
+		if (signal_pending(current))
+			break;
+		if (polling_interval < 128)
+			polling_interval *= 2;
+	}
+
+	return !!(parport_ip32_read_econtrol(p) & ECR_F_EMPTY);
+}
+
+/**
+ * parport_ip32_get_fifo_residue - reset FIFO
+ * @p:		pointer to &struct parport
+ * @mode:	current operation mode (ECR_MODE_PPF or ECR_MODE_ECP)
+ *
+ * This function resets FIFO, and returns the number of bytes remaining in it.
+ */
+static unsigned int parport_ip32_get_fifo_residue(struct parport *p,
+						  unsigned int mode)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	unsigned int residue;
+	unsigned int cnfga;
+
+	/* FIXME - We are missing one byte if the printer is off-line.  I
+	 * don't know how to detect this.  It looks that the full bit is not
+	 * always reliable.  For the moment, the problem is avoided in most
+	 * cases by testing for BUSY in parport_ip32_compat_write_data().
+	 */
+	if (parport_ip32_read_econtrol(p) & ECR_F_EMPTY)
+		residue = 0;
+	else {
+		pr_debug1(PPIP32 "%s: FIFO is stuck\n", p->name);
+
+		/* Stop all transfers.
+		 *
+		 * Microsoft's document instructs to drive DCR_STROBE to 0,
+		 * but it doesn't work (at least in Compatibility mode, not
+		 * tested in ECP mode).  Switching directly to Test mode (as
+		 * in parport_pc) is not an option: it does confuse the port,
+		 * ECP service interrupts are no more working after that.  A
+		 * hard reset is then needed to revert to a sane state.
+		 *
+		 * Let's hope that the FIFO is really stuck and that the
+		 * peripheral doesn't wake up now.
+		 */
+		parport_ip32_frob_control(p, DCR_STROBE, 0);
+
+		/* Fill up FIFO */
+		for (residue = priv->fifo_depth; residue > 0; residue--) {
+			if (parport_ip32_read_econtrol(p) & ECR_F_FULL)
+				break;
+			writeb(0x00, priv->regs.fifo);
+		}
+	}
+	if (residue)
+		pr_debug1(PPIP32 "%s: %d PWord%s left in FIFO\n",
+			  p->name, residue,
+			  (residue == 1) ? " was" : "s were");
+
+	/* Now reset the FIFO */
+	parport_ip32_set_mode(p, ECR_MODE_PS2);
+
+	/* Host recovery for ECP mode */
+	if (mode == ECR_MODE_ECP) {
+		parport_ip32_data_reverse(p);
+		parport_ip32_frob_control(p, DCR_nINIT, 0);
+		if (parport_wait_peripheral(p, DSR_PERROR, 0))
+			pr_debug1(PPIP32 "%s: PEerror timeout 1 in %s\n",
+				  p->name, __func__);
+		parport_ip32_frob_control(p, DCR_STROBE, DCR_STROBE);
+		parport_ip32_frob_control(p, DCR_nINIT, DCR_nINIT);
+		if (parport_wait_peripheral(p, DSR_PERROR, DSR_PERROR))
+			pr_debug1(PPIP32 "%s: PEerror timeout 2 in %s\n",
+				  p->name, __func__);
+	}
+
+	/* Adjust residue if needed */
+	parport_ip32_set_mode(p, ECR_MODE_CFG);
+	cnfga = readb(priv->regs.cnfgA);
+	if (!(cnfga & CNFGA_nBYTEINTRANS)) {
+		pr_debug1(PPIP32 "%s: cnfgA contains 0x%02x\n",
+			  p->name, cnfga);
+		pr_debug1(PPIP32 "%s: Accounting for extra byte\n",
+			  p->name);
+		residue++;
+	}
+
+	/* Don't care about partial PWords since we do not support
+	 * PWord != 1 byte. */
+
+	/* Back to forward PS2 mode. */
+	parport_ip32_set_mode(p, ECR_MODE_PS2);
+	parport_ip32_data_forward(p);
+
+	return residue;
+}
+
+/**
+ * parport_ip32_compat_write_data - write a block of data in SPP mode
+ * @p:		pointer to &struct parport
+ * @buf:	buffer of data to write
+ * @len:	length of buffer @buf
+ * @flags:	ignored
+ */
+static size_t parport_ip32_compat_write_data(struct parport *p,
+					     const void *buf, size_t len,
+					     int flags)
+{
+	static unsigned int ready_before = 1;
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	struct parport * const physport = p->physport;
+	size_t written = 0;
+
+	/* Special case: a timeout of zero means we cannot call schedule().
+	 * Also if O_NONBLOCK is set then use the default implementation. */
+	if (physport->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK)
+		return parport_ieee1284_write_compat(p, buf, len, flags);
+
+	/* Reset FIFO, go in forward mode, and disable ackIntEn */
+	parport_ip32_set_mode(p, ECR_MODE_PS2);
+	parport_ip32_write_control(p, DCR_SELECT | DCR_nINIT);
+	parport_ip32_data_forward(p);
+	parport_ip32_disable_irq(p);
+	parport_ip32_set_mode(p, ECR_MODE_PPF);
+	physport->ieee1284.phase = IEEE1284_PH_FWD_DATA;
+
+	/* Wait for peripheral to become ready */
+	if (parport_wait_peripheral(p, DSR_nBUSY | DSR_nFAULT,
+				       DSR_nBUSY | DSR_nFAULT)) {
+		/* Avoid to flood the logs */
+		if (ready_before)
+			printk(KERN_INFO PPIP32 "%s: not ready in %s\n",
+			       p->name, __func__);
+		ready_before = 0;
+		goto stop;
+	}
+	ready_before = 1;
+
+	written = parport_ip32_fifo_write_block(p, buf, len);
+
+	/* Wait FIFO to empty.  Timeout is proportional to FIFO_depth.  */
+	parport_ip32_drain_fifo(p, physport->cad->timeout * priv->fifo_depth);
+
+	/* Check for a potential residue */
+	written -= parport_ip32_get_fifo_residue(p, ECR_MODE_PPF);
+
+	/* Then, wait for BUSY to get low. */
+	if (parport_wait_peripheral(p, DSR_nBUSY, DSR_nBUSY))
+		printk(KERN_DEBUG PPIP32 "%s: BUSY timeout in %s\n",
+		       p->name, __func__);
+
+stop:
+	/* Reset FIFO */
+	parport_ip32_set_mode(p, ECR_MODE_PS2);
+	physport->ieee1284.phase = IEEE1284_PH_FWD_IDLE;
+
+	return written;
+}
+
+/*
+ * FIXME - Insert here parport_ip32_ecp_read_data().
+ */
+
+/**
+ * parport_ip32_ecp_write_data - write a block of data in ECP mode
+ * @p:		pointer to &struct parport
+ * @buf:	buffer of data to write
+ * @len:	length of buffer @buf
+ * @flags:	ignored
+ */
+static size_t parport_ip32_ecp_write_data(struct parport *p,
+					  const void *buf, size_t len,
+					  int flags)
+{
+	static unsigned int ready_before = 1;
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	struct parport * const physport = p->physport;
+	size_t written = 0;
+
+	/* Special case: a timeout of zero means we cannot call schedule().
+	 * Also if O_NONBLOCK is set then use the default implementation. */
+	if (physport->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK)
+		return parport_ieee1284_ecp_write_data(p, buf, len, flags);
+
+	/* Negotiate to forward mode if necessary. */
+	if (physport->ieee1284.phase != IEEE1284_PH_FWD_IDLE) {
+		/* Event 47: Set nInit high. */
+		parport_ip32_frob_control(p, DCR_nINIT | DCR_AUTOFD,
+					     DCR_nINIT | DCR_AUTOFD);
+
+		/* Event 49: PError goes high. */
+		if (parport_wait_peripheral(p, DSR_PERROR, DSR_PERROR)) {
+			printk(KERN_DEBUG PPIP32 "%s: PError timeout in %s",
+			       p->name, __func__);
+			physport->ieee1284.phase = IEEE1284_PH_ECP_DIR_UNKNOWN;
+			return 0;
+		}
+	}
+
+	/* Reset FIFO, go in forward mode, and disable ackIntEn */
+	parport_ip32_set_mode(p, ECR_MODE_PS2);
+	parport_ip32_write_control(p, DCR_SELECT | DCR_nINIT);
+	parport_ip32_data_forward(p);
+	parport_ip32_disable_irq(p);
+	parport_ip32_set_mode(p, ECR_MODE_ECP);
+	physport->ieee1284.phase = IEEE1284_PH_FWD_DATA;
+
+	/* Wait for peripheral to become ready */
+	if (parport_wait_peripheral(p, DSR_nBUSY | DSR_nFAULT,
+				       DSR_nBUSY | DSR_nFAULT)) {
+		/* Avoid to flood the logs */
+		if (ready_before)
+			printk(KERN_INFO PPIP32 "%s: not ready in %s\n",
+			       p->name, __func__);
+		ready_before = 0;
+		goto stop;
+	}
+	ready_before = 1;
+
+	written = parport_ip32_fifo_write_block(p, buf, len);
+
+	/* Wait FIFO to empty.  Timeout is proportional to FIFO_depth.  */
+	parport_ip32_drain_fifo(p, physport->cad->timeout * priv->fifo_depth);
+
+	/* Check for a potential residue */
+	written -= parport_ip32_get_fifo_residue(p, ECR_MODE_ECP);
+
+	/* Then, wait for BUSY to get low. */
+	if (parport_wait_peripheral(p, DSR_nBUSY, DSR_nBUSY))
+		printk(KERN_DEBUG PPIP32 "%s: BUSY timeout in %s\n",
+		       p->name, __func__);
+
+stop:
+	/* Reset FIFO */
+	parport_ip32_set_mode(p, ECR_MODE_PS2);
+	physport->ieee1284.phase = IEEE1284_PH_FWD_IDLE;
+
+	return written;
+}
+
+/*
+ * FIXME - Insert here parport_ip32_ecp_write_addr().
+ */
+
+/*--- Default parport operations ---------------------------------------*/
+
+static __initdata struct parport_operations parport_ip32_ops = {
+	.write_data		= parport_ip32_write_data,
+	.read_data		= parport_ip32_read_data,
+
+	.write_control		= parport_ip32_write_control,
+	.read_control		= parport_ip32_read_control,
+	.frob_control		= parport_ip32_frob_control,
+
+	.read_status		= parport_ip32_read_status,
+
+	.enable_irq		= parport_ip32_enable_irq,
+	.disable_irq		= parport_ip32_disable_irq,
+
+	.data_forward		= parport_ip32_data_forward,
+	.data_reverse		= parport_ip32_data_reverse,
+
+	.init_state		= parport_ip32_init_state,
+	.save_state		= parport_ip32_save_state,
+	.restore_state		= parport_ip32_restore_state,
+
+	.epp_write_data		= parport_ieee1284_epp_write_data,
+	.epp_read_data		= parport_ieee1284_epp_read_data,
+	.epp_write_addr		= parport_ieee1284_epp_write_addr,
+	.epp_read_addr		= parport_ieee1284_epp_read_addr,
+
+	.ecp_write_data		= parport_ieee1284_ecp_write_data,
+	.ecp_read_data		= parport_ieee1284_ecp_read_data,
+	.ecp_write_addr		= parport_ieee1284_ecp_write_addr,
+
+	.compat_write_data	= parport_ieee1284_write_compat,
+	.nibble_read_data	= parport_ieee1284_read_nibble,
+	.byte_read_data		= parport_ieee1284_read_byte,
+
+	.owner			= THIS_MODULE,
+};
+
+/*--- Device detection -------------------------------------------------*/
+
+/**
+ * parport_ip32_ecp_supported - check for an ECP port
+ * @p:		pointer to the &parport structure
+ *
+ * Returns 1 if an ECP port is found, and 0 otherwise.  This function actually
+ * checks if an Extended Control Register seems to be present.  On successful
+ * return, the port is placed in SPP mode.
+ */
+static __init unsigned int parport_ip32_ecp_supported(struct parport *p)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	unsigned int ecr;
+
+	ecr = ECR_MODE_PS2 | ECR_nERRINTR | ECR_SERVINTR;
+	writeb(ecr, priv->regs.ecr);
+	if (readb(priv->regs.ecr) != (ecr | ECR_F_EMPTY))
+		goto fail;
+
+	pr_probe(p, "Found working ECR register\n");
+	parport_ip32_set_mode(p, ECR_MODE_SPP);
+	parport_ip32_write_control(p, DCR_SELECT | DCR_nINIT);
+	return 1;
+
+fail:
+	pr_probe(p, "ECR register not found\n");
+	return 0;
+}
+
+/**
+ * parport_ip32_fifo_supported - check for FIFO parameters
+ * @p:		pointer to the &parport structure
+ *
+ * Check for FIFO parameters of an Extended Capabilities Port.  Returns 1 on
+ * success, and 0 otherwise.  Adjust FIFO parameters in the parport structure.
+ * On return, the port is placed in SPP mode.
+ */
+static __init unsigned int parport_ip32_fifo_supported(struct parport *p)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	unsigned int configa, configb;
+	unsigned int pword;
+	unsigned int i;
+
+	/* Configuration mode */
+	parport_ip32_set_mode(p, ECR_MODE_CFG);
+	configa = readb(priv->regs.cnfgA);
+	configb = readb(priv->regs.cnfgB);
+
+	/* Find out PWord size */
+	switch (configa & CNFGA_ID_MASK) {
+	case CNFGA_ID_8:
+		pword = 1;
+		break;
+	case CNFGA_ID_16:
+		pword = 2;
+		break;
+	case CNFGA_ID_32:
+		pword = 4;
+		break;
+	default:
+		pr_probe(p, "Unknown implementation ID: 0x%0x\n",
+			 (configa & CNFGA_ID_MASK) >> CNFGA_ID_SHIFT);
+		goto fail;
+		break;
+	}
+	if (pword != 1) {
+		pr_probe(p, "Unsupported PWord size: %u\n", pword);
+		goto fail;
+	}
+	priv->pword = pword;
+	pr_probe(p, "PWord is %u bits\n", 8 * priv->pword);
+
+	/* Check for compression support */
+	writeb(configb | CNFGB_COMPRESS, priv->regs.cnfgB);
+	if (readb(priv->regs.cnfgB) & CNFGB_COMPRESS)
+		pr_probe(p, "Hardware compression detected (unsupported)\n");
+	writeb(configb & ~CNFGB_COMPRESS, priv->regs.cnfgB);
+
+	/* Reset FIFO and go in test mode (no interrupt, no DMA) */
+	parport_ip32_set_mode(p, ECR_MODE_TST);
+
+	/* FIFO must be empty now */
+	if (!(readb(priv->regs.ecr) & ECR_F_EMPTY)) {
+		pr_probe(p, "FIFO not reset\n");
+		goto fail;
+	}
+
+	/* Find out FIFO depth. */
+	priv->fifo_depth = 0;
+	for (i = 0; i < 1024; i++) {
+		if (readb(priv->regs.ecr) & ECR_F_FULL) {
+			/* FIFO full */
+			priv->fifo_depth = i;
+			break;
+		}
+		writeb((u8)i, priv->regs.fifo);
+	}
+	if (i >= 1024) {
+		pr_probe(p, "Can't fill FIFO\n");
+		goto fail;
+	}
+	if (!priv->fifo_depth) {
+		pr_probe(p, "Can't get FIFO depth\n");
+		goto fail;
+	}
+	pr_probe(p, "FIFO is %u PWords deep\n", priv->fifo_depth);
+
+	/* Enable interrupts */
+	parport_ip32_frob_econtrol(p, ECR_SERVINTR, 0);
+
+	/* Find out writeIntrThreshold: number of PWords we know we can write
+	 * if we get an interrupt. */
+	priv->writeIntrThreshold = 0;
+	for (i = 0; i < priv->fifo_depth; i++) {
+		if (readb(priv->regs.fifo) != (u8)i) {
+			pr_probe(p, "Invalid data in FIFO\n");
+			goto fail;
+		}
+		if (!priv->writeIntrThreshold
+		    && readb(priv->regs.ecr) & ECR_SERVINTR)
+			/* writeIntrThreshold reached */
+			priv->writeIntrThreshold = i + 1;
+		if (i + 1 < priv->fifo_depth
+		    && readb(priv->regs.ecr) & ECR_F_EMPTY) {
+			/* FIFO empty before the last byte? */
+			pr_probe(p, "Data lost in FIFO\n");
+			goto fail;
+		}
+	}
+	if (!priv->writeIntrThreshold) {
+		pr_probe(p, "Can't get writeIntrThreshold\n");
+		goto fail;
+	}
+	pr_probe(p, "writeIntrThreshold is %u\n", priv->writeIntrThreshold);
+
+	/* FIFO must be empty now */
+	if (!(readb(priv->regs.ecr) & ECR_F_EMPTY)) {
+		pr_probe(p, "Can't empty FIFO\n");
+		goto fail;
+	}
+
+	/* Reset FIFO */
+	parport_ip32_set_mode(p, ECR_MODE_PS2);
+	/* Set reverse direction (must be in PS2 mode) */
+	parport_ip32_data_reverse(p);
+	/* Test FIFO, no interrupt, no DMA */
+	parport_ip32_set_mode(p, ECR_MODE_TST);
+	/* Enable interrupts */
+	parport_ip32_frob_econtrol(p, ECR_SERVINTR, 0);
+
+	/* Find out readIntrThreshold: number of PWords we can read if we get
+	 * an interrupt. */
+	priv->readIntrThreshold = 0;
+	for (i = 0; i < priv->fifo_depth; i++) {
+		writeb(0xaa, priv->regs.fifo);
+		if (readb(priv->regs.ecr) & ECR_SERVINTR) {
+			/* readIntrThreshold reached */
+			priv->readIntrThreshold = i + 1;
+			break;
+		}
+	}
+	if (!priv->readIntrThreshold) {
+		pr_probe(p, "Can't get readIntrThreshold\n");
+		goto fail;
+	}
+	pr_probe(p, "readIntrThreshold is %u\n", priv->readIntrThreshold);
+
+	/* Reset ECR */
+	parport_ip32_set_mode(p, ECR_MODE_PS2);
+	parport_ip32_data_forward(p);
+	parport_ip32_set_mode(p, ECR_MODE_SPP);
+	return 1;
+
+fail:
+	priv->fifo_depth = 0;
+	parport_ip32_set_mode(p, ECR_MODE_SPP);
+	return 0;
+}
+
+/*--- Initialization code ----------------------------------------------*/
+
+/**
+ * parport_ip32_make_isa_registers - compute (ISA) register addresses
+ * @regs:	pointer to &struct parport_ip32_regs to fill
+ * @base:	base address of standard and EPP registers
+ * @base_hi:	base address of ECP registers
+ * @regshift:	how much to shift register offset by
+ *
+ * Compute register addresses, according to the ISA standard.  The addresses
+ * of the standard and EPP registers are computed from address @base.  The
+ * addresses of the ECP registers are computed from address @base_hi.
+ */
+static void __init
+parport_ip32_make_isa_registers(struct parport_ip32_regs *regs,
+				void __iomem *base, void __iomem *base_hi,
+				unsigned int regshift)
+{
+#define r_base(offset)    ((u8 __iomem *)base    + ((offset) << regshift))
+#define r_base_hi(offset) ((u8 __iomem *)base_hi + ((offset) << regshift))
+	*regs = (struct parport_ip32_regs){
+		.data		= r_base(0),
+		.dsr		= r_base(1),
+		.dcr		= r_base(2),
+		.eppAddr	= r_base(3),
+		.eppData0	= r_base(4),
+		.eppData1	= r_base(5),
+		.eppData2	= r_base(6),
+		.eppData3	= r_base(7),
+		.ecpAFifo	= r_base(0),
+		.fifo		= r_base_hi(0),
+		.cnfgA		= r_base_hi(0),
+		.cnfgB		= r_base_hi(1),
+		.ecr		= r_base_hi(2)
+	};
+#undef r_base_hi
+#undef r_base
+}
+
+/**
+ * parport_ip32_probe_port - probe and register IP32 built-in parallel port
+ *
+ * Returns the new allocated &parport structure.  On error, an error code is
+ * encoded in return value with the ERR_PTR function.
+ */
+static __init struct parport *parport_ip32_probe_port(void)
+{
+	struct parport_ip32_regs regs;
+	struct parport_ip32_private *priv = NULL;
+	struct parport_operations *ops = NULL;
+	struct parport *p = NULL;
+	int err;
+
+	parport_ip32_make_isa_registers(&regs, &mace->isa.parallel,
+					&mace->isa.ecp1284, 8 /* regshift */);
+
+	ops = kmalloc(sizeof(struct parport_operations), GFP_KERNEL);
+	priv = kmalloc(sizeof(struct parport_ip32_private), GFP_KERNEL);
+	p = parport_register_port(0, PARPORT_IRQ_NONE, PARPORT_DMA_NONE, ops);
+	if (ops == NULL || priv == NULL || p == NULL) {
+		err = -ENOMEM;
+		goto fail;
+	}
+	p->base = MACE_BASE + offsetof(struct sgi_mace, isa.parallel);
+	p->base_hi = MACE_BASE + offsetof(struct sgi_mace, isa.ecp1284);
+	p->private_data = priv;
+
+	*ops = parport_ip32_ops;
+	*priv = (struct parport_ip32_private){
+		.regs			= regs,
+		.dcr_writable		= DCR_DIR | DCR_SELECT | DCR_nINIT |
+					  DCR_AUTOFD | DCR_STROBE,
+		.irq_mode		= PARPORT_IP32_IRQ_FWD,
+	};
+	init_completion(&priv->irq_complete);
+
+	/* Probe port. */
+	if (!parport_ip32_ecp_supported(p)) {
+		err = -ENODEV;
+		goto fail;
+	}
+	parport_ip32_dump_state(p, "begin init", 0);
+
+	/* We found what looks like a working ECR register.  Simply assume
+	 * that all modes are correctly supported.  Enable basic modes. */
+	p->modes = PARPORT_MODE_PCSPP | PARPORT_MODE_SAFEININT;
+	p->modes |= PARPORT_MODE_TRISTATE;
+
+	if (!parport_ip32_fifo_supported(p)) {
+		printk(KERN_WARNING PPIP32
+		       "%s: error: FIFO disabled\n", p->name);
+		/* Disable hardware modes depending on a working FIFO. */
+		features &= ~PARPORT_IP32_ENABLE_SPP;
+		features &= ~PARPORT_IP32_ENABLE_ECP;
+		/* DMA is not needed if FIFO is not supported.  */
+		features &= ~PARPORT_IP32_ENABLE_DMA;
+	}
+
+	/* Request IRQ */
+	if (features & PARPORT_IP32_ENABLE_IRQ) {
+		int irq = MACEISA_PARALLEL_IRQ;
+		if (request_irq(irq, parport_ip32_interrupt, 0, p->name, p)) {
+			printk(KERN_WARNING PPIP32
+			       "%s: error: IRQ disabled\n", p->name);
+			/* DMA cannot work without interrupts. */
+			features &= ~PARPORT_IP32_ENABLE_DMA;
+		} else {
+			pr_probe(p, "Interrupt support enabled\n");
+			p->irq = irq;
+			priv->dcr_writable |= DCR_IRQ;
+		}
+	}
+
+	/* Allocate DMA resources */
+	if (features & PARPORT_IP32_ENABLE_DMA) {
+		if (parport_ip32_dma_register())
+			printk(KERN_WARNING PPIP32
+			       "%s: error: DMA disabled\n", p->name);
+		else {
+			pr_probe(p, "DMA support enabled\n");
+			p->dma = 0; /* arbitrary value != PARPORT_DMA_NONE */
+			p->modes |= PARPORT_MODE_DMA;
+		}
+	}
+
+	if (features & PARPORT_IP32_ENABLE_SPP) {
+		/* Enable compatibility FIFO mode */
+		p->ops->compat_write_data = parport_ip32_compat_write_data;
+		p->modes |= PARPORT_MODE_COMPAT;
+		pr_probe(p, "Hardware support for SPP mode enabled\n");
+	}
+	if (features & PARPORT_IP32_ENABLE_EPP) {
+		/* Set up access functions to use EPP hardware. */
+		p->ops->epp_read_data = parport_ip32_epp_read_data;
+		p->ops->epp_write_data = parport_ip32_epp_write_data;
+		p->ops->epp_read_addr = parport_ip32_epp_read_addr;
+		p->ops->epp_write_addr = parport_ip32_epp_write_addr;
+		p->modes |= PARPORT_MODE_EPP;
+		pr_probe(p, "Hardware support for EPP mode enabled\n");
+	}
+	if (features & PARPORT_IP32_ENABLE_ECP) {
+		/* Enable ECP FIFO mode */
+		p->ops->ecp_write_data = parport_ip32_ecp_write_data;
+		/* FIXME - not implemented */
+/*		p->ops->ecp_read_data  = parport_ip32_ecp_read_data; */
+/*		p->ops->ecp_write_addr = parport_ip32_ecp_write_addr; */
+		p->modes |= PARPORT_MODE_ECP;
+		pr_probe(p, "Hardware support for ECP mode enabled\n");
+	}
+
+	/* Initialize the port with sensible values */
+	parport_ip32_set_mode(p, ECR_MODE_PS2);
+	parport_ip32_write_control(p, DCR_SELECT | DCR_nINIT);
+	parport_ip32_data_forward(p);
+	parport_ip32_disable_irq(p);
+	parport_ip32_write_data(p, 0x00);
+	parport_ip32_dump_state(p, "end init", 0);
+
+	/* Print out what we found */
+	printk(KERN_INFO "%s: SGI IP32 at 0x%lx (0x%lx)",
+	       p->name, p->base, p->base_hi);
+	if (p->irq != PARPORT_IRQ_NONE)
+		printk(", irq %d", p->irq);
+	printk(" [");
+#define printmode(x)	if (p->modes & PARPORT_MODE_##x)		\
+				printk("%s%s", f++ ? "," : "", #x)
+	{
+		unsigned int f = 0;
+		printmode(PCSPP);
+		printmode(TRISTATE);
+		printmode(COMPAT);
+		printmode(EPP);
+		printmode(ECP);
+		printmode(DMA);
+	}
+#undef printmode
+	printk("]\n");
+
+	parport_announce_port(p);
+	return p;
+
+fail:
+	if (p)
+		parport_put_port(p);
+	kfree(priv);
+	kfree(ops);
+	return ERR_PTR(err);
+}
+
+/**
+ * parport_ip32_unregister_port - unregister a parallel port
+ * @p:		pointer to the &struct parport
+ *
+ * Unregisters a parallel port and free previously allocated resources
+ * (memory, IRQ, ...).
+ */
+static __exit void parport_ip32_unregister_port(struct parport *p)
+{
+	struct parport_ip32_private * const priv = p->physport->private_data;
+	struct parport_operations *ops = p->ops;
+
+	parport_remove_port(p);
+	if (p->modes & PARPORT_MODE_DMA)
+		parport_ip32_dma_unregister();
+	if (p->irq != PARPORT_IRQ_NONE)
+		free_irq(p->irq, p);
+	parport_put_port(p);
+	kfree(priv);
+	kfree(ops);
+}
+
+/**
+ * parport_ip32_init - module initialization function
+ */
+static int __init parport_ip32_init(void)
+{
+	pr_info(PPIP32 "SGI IP32 built-in parallel port driver v0.6\n");
+	pr_debug1(PPIP32 "Compiled on %s, %s\n", __DATE__, __TIME__);
+	this_port = parport_ip32_probe_port();
+	return IS_ERR(this_port) ? PTR_ERR(this_port) : 0;
+}
+
+/**
+ * parport_ip32_exit - module termination function
+ */
+static void __exit parport_ip32_exit(void)
+{
+	parport_ip32_unregister_port(this_port);
+}
+
+/*--- Module stuff -----------------------------------------------------*/
+
+MODULE_AUTHOR("Arnaud Giersch <arnaud.giersch@free.fr>");
+MODULE_DESCRIPTION("SGI IP32 built-in parallel port driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("0.6");		/* update in parport_ip32_init() too */
+
+module_init(parport_ip32_init);
+module_exit(parport_ip32_exit);
+
+module_param(verbose_probing, bool, S_IRUGO);
+MODULE_PARM_DESC(verbose_probing, "Log chit-chat during initialization");
+
+module_param(features, uint, S_IRUGO);
+MODULE_PARM_DESC(features,
+		 "Bit mask of features to enable"
+		 ", bit 0: IRQ support"
+		 ", bit 1: DMA support"
+		 ", bit 2: hardware SPP mode"
+		 ", bit 3: hardware EPP mode"
+		 ", bit 4: hardware ECP mode");
+
+/*--- Inform (X)Emacs about preferred coding style ---------------------*/
+/*
+ * Local Variables:
+ * mode: c
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * tab-width: 8
+ * fill-column: 78
+ * ispell-local-dictionary: "american"
+ * End:
+ */
diff --git a/include/linux/parport.h b/include/linux/parport.h
index f67f838a3a1f..008d736a6c9a 100644
--- a/include/linux/parport.h
+++ b/include/linux/parport.h
@@ -128,6 +128,11 @@ struct amiga_parport_state {
        unsigned char statusdir;/* ciab.ddrb & 7 */
 };
 
+struct ip32_parport_state {
+	unsigned int dcr;
+	unsigned int ecr;
+};
+
 struct parport_state {
 	union {
 		struct pc_parport_state pc;
@@ -135,6 +140,7 @@ struct parport_state {
 		struct ax_parport_state ax;
 		struct amiga_parport_state amiga;
 		/* Atari has not state. */
+		struct ip32_parport_state ip32;
 		void *misc; 
 	} u;
 };
-- 
cgit v1.2.3-71-gd317


From 8b3e09e19932835fb77c63aaf3b1af6117e78871 Mon Sep 17 00:00:00 2001
From: Markus Lidel <Markus.Lidel@shadowconnect.com>
Date: Fri, 3 Feb 2006 03:04:29 -0800
Subject: [PATCH] I2O: fix and workaround for Motorola/Freescale controller

- This controller violates the I2O spec for the I/O registers.  The patch
  contains a workaround which moves the registers to the proper location.
  (originally author: Matthew Starzewski)

- If a message frame is beyond the mapped address range a error is
  returned.

Signed-off-by: Markus Lidel <Markus.Lidel@shadowconnect.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/message/i2o/core.h |  3 +++
 drivers/message/i2o/pci.c  | 18 ++++++++++++++++++
 include/linux/i2o.h        |  6 ++++--
 3 files changed, 25 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/message/i2o/core.h b/drivers/message/i2o/core.h
index 90628562851e..184974cc734d 100644
--- a/drivers/message/i2o/core.h
+++ b/drivers/message/i2o/core.h
@@ -60,4 +60,7 @@ extern void i2o_iop_remove(struct i2o_controller *);
 #define I2O_IN_PORT	0x40
 #define I2O_OUT_PORT	0x44
 
+/* Motorola/Freescale specific register offset */
+#define I2O_MOTOROLA_PORT_OFFSET	0x10400
+
 #define I2O_IRQ_OUTBOUND_POST	0x00000008
diff --git a/drivers/message/i2o/pci.c b/drivers/message/i2o/pci.c
index e2e3fc79c78a..4f1515cae5dc 100644
--- a/drivers/message/i2o/pci.c
+++ b/drivers/message/i2o/pci.c
@@ -168,6 +168,24 @@ static int __devinit i2o_pci_alloc(struct i2o_controller *c)
 	c->in_port = c->base.virt + I2O_IN_PORT;
 	c->out_port = c->base.virt + I2O_OUT_PORT;
 
+	/* Motorola/Freescale chip does not follow spec */
+	if (pdev->vendor == PCI_VENDOR_ID_MOTOROLA && pdev->device == 0x18c0) {
+		/* Check if CPU is enabled */
+		if (be32_to_cpu(readl(c->base.virt + 0x10000)) & 0x10000000) {
+			printk(KERN_INFO "%s: MPC82XX needs CPU running to "
+			       "service I2O.\n", c->name);
+			i2o_pci_free(c);
+			return -ENODEV;
+		} else {
+			c->irq_status += I2O_MOTOROLA_PORT_OFFSET;
+			c->irq_mask += I2O_MOTOROLA_PORT_OFFSET;
+			c->in_port += I2O_MOTOROLA_PORT_OFFSET;
+			c->out_port += I2O_MOTOROLA_PORT_OFFSET;
+			printk(KERN_INFO "%s: MPC82XX workarounds activated.\n",
+			       c->name);
+		}
+	}
+
 	if (i2o_dma_alloc(dev, &c->status, 8, GFP_KERNEL)) {
 		i2o_pci_free(c);
 		return -ENOMEM;
diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index 9ba806796667..5a9d8c599171 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -1115,9 +1115,11 @@ static inline struct i2o_message *i2o_msg_get(struct i2o_controller *c)
 		return ERR_PTR(-ENOMEM);
 
 	mmsg->mfa = readl(c->in_port);
-	if (mmsg->mfa == I2O_QUEUE_EMPTY) {
+	if (unlikely(mmsg->mfa >= c->in_queue.len)) {
 		mempool_free(mmsg, c->in_msg.mempool);
-		return ERR_PTR(-EBUSY);
+		if(mmsg->mfa == I2O_QUEUE_EMPTY)
+			return ERR_PTR(-EBUSY);
+		return ERR_PTR(-EFAULT);
 	}
 
 	return &mmsg->msg;
-- 
cgit v1.2.3-71-gd317


From 6bf8d889ed453f709dedacabdcf5db47470fdae9 Mon Sep 17 00:00:00 2001
From: Herbert Poetzl <herbert@13thfloor.at>
Date: Fri, 3 Feb 2006 03:04:32 -0800
Subject: [PATCH] quota: remove unused sync_dquots_dev()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The already removed sync_dquots_dev(dev,type) is still defined in the
no-quota case.

Signed-off-by: Herbert Pötzl <herbert@13thfloor.at>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/quotaops.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 4f34d3d60f2e..21e5a9124856 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -190,7 +190,6 @@ static __inline__ int DQUOT_OFF(struct super_block *sb)
  */
 #define sb_dquot_ops				(NULL)
 #define sb_quotactl_ops				(NULL)
-#define sync_dquots_dev(dev,type)		(NULL)
 #define DQUOT_INIT(inode)			do { } while(0)
 #define DQUOT_DROP(inode)			do { } while(0)
 #define DQUOT_ALLOC_INODE(inode)		(0)
-- 
cgit v1.2.3-71-gd317


From bb3b9cf122eb097ed9fe8ae50e1b0dbba9bbe447 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@us.ibm.com>
Date: Fri, 3 Feb 2006 03:04:38 -0800
Subject: [PATCH] Fix comment to synchronize_sched()

Fix to broken comment to synchronize_rcu() noted by Keith Owens.  Also add
sentence noting that synchronize_sched() and synchronize_rcu() are not
necessarily identical.

Signed-off-by: Paul E. McKenney <paulmck@us.ibm.com>
Cc: Keith Owens <kaos@sgi.com>
Cc: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/rcupdate.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 981f9aa43353..b87aefa082e2 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -240,11 +240,14 @@ extern int rcu_pending(int cpu);
  * This means that all preempt_disable code sequences, including NMI and
  * hardware-interrupt handlers, in progress on entry will have completed
  * before this primitive returns.  However, this does not guarantee that
- * softirq handlers will have completed, since in some kernels
+ * softirq handlers will have completed, since in some kernels, these
+ * handlers can run in process context, and can block.
  *
  * This primitive provides the guarantees made by the (deprecated)
  * synchronize_kernel() API.  In contrast, synchronize_rcu() only
  * guarantees that rcu_read_lock() sections will have completed.
+ * In "classic RCU", these two guarantees happen to be one and
+ * the same, but can differ in realtime RCU implementations.
  */
 #define synchronize_sched() synchronize_rcu()
 
-- 
cgit v1.2.3-71-gd317


From 2c5d81a58106fa333467beab5f11dafab07b3e66 Mon Sep 17 00:00:00 2001
From: Fernando Luis Vazquez Cao <fernando@intellilink.co.jp>
Date: Fri, 3 Feb 2006 03:04:39 -0800
Subject: [PATCH] Compilation of kexec/kdump broken

The compilation of kexec/kdump seems to be broken for x86_64.  Remove the
dependency of kexec on CONFIG_IA32_EMULATION.

Signed-off-by: Fernando Vazquez <fernando@intellilink.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/kexec.h | 3 ++-
 include/linux/elfcore.h    | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/asm-x86_64/kexec.h b/include/asm-x86_64/kexec.h
index ae28cd44bcd3..c564bae03433 100644
--- a/include/asm-x86_64/kexec.h
+++ b/include/asm-x86_64/kexec.h
@@ -1,8 +1,9 @@
 #ifndef _X86_64_KEXEC_H
 #define _X86_64_KEXEC_H
 
+#include <linux/string.h>
+
 #include <asm/page.h>
-#include <asm/proto.h>
 #include <asm/ptrace.h>
 
 /*
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index dbd7bb4a33b7..0cf0bea010fe 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -5,6 +5,7 @@
 #include <linux/signal.h>
 #include <linux/time.h>
 #include <linux/user.h>
+#include <linux/ptrace.h>
 
 struct elf_siginfo
 {
-- 
cgit v1.2.3-71-gd317


From 808249ceba49cdb3054c0aa5b75a61862d6cab94 Mon Sep 17 00:00:00 2001
From: Paul Fulghum <paulkf@microgate.com>
Date: Fri, 3 Feb 2006 03:04:41 -0800
Subject: [PATCH] new tty buffering locking fix

Change locking in the new tty buffering facility from using tty->read_lock,
which is currently ignored by drivers and thus ineffective.  New locking
uses a new tty buffering specific lock enforced centrally in the tty
buffering code.

Two drivers (esp and cyclades) are updated to use the tty buffering
functions instead of accessing tty buffering internals directly.  This is
required for the new locking to work.

Minor checks for NULL buffers added to
tty_prepare_flip_string/tty_prepare_flip_string_flags

Signed-off-by: Paul Fulghum <paulkf@microgate.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/cyclades.c  |  6 ++--
 drivers/char/esp.c       |  4 +--
 drivers/char/tty_io.c    | 77 +++++++++++++++++++++++++++++++-----------------
 include/linux/kbd_kern.h |  5 ++++
 include/linux/tty.h      |  2 ++
 include/linux/tty_flip.h |  7 ++++-
 6 files changed, 68 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index 39c61a71176e..cc7acf877dc0 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -1233,7 +1233,7 @@ cyy_interrupt(int irq, void *dev_id, struct pt_regs *regs)
                             }
                              info->idle_stats.recv_idle = jiffies;
                         }
-                        schedule_delayed_work(&tty->buf.work, 1);
+			tty_schedule_flip(tty);
                     }
                     /* end of service */
                     cy_writeb(base_addr+(CyRIR<<index), (save_xir & 0x3f));
@@ -1606,7 +1606,7 @@ cyz_handle_rx(struct cyclades_port *info,
 	    }
 #endif
 	    info->idle_stats.recv_idle = jiffies;
-	    schedule_delayed_work(&tty->buf.work, 1);
+	    tty_schedule_flip(tty);
 	}
 	/* Update rx_get */
 	cy_writel(&buf_ctrl->rx_get, new_rx_get);
@@ -1809,7 +1809,7 @@ cyz_handle_cmd(struct cyclades_card *cinfo)
 	if(delta_count)
 	    cy_sched_event(info, Cy_EVENT_DELTA_WAKEUP);
 	if(special_count)
-	    schedule_delayed_work(&tty->buf.work, 1);
+	    tty_schedule_flip(tty);
     }
 }
 
diff --git a/drivers/char/esp.c b/drivers/char/esp.c
index 3f3ac039f4d9..57539d8f9f7c 100644
--- a/drivers/char/esp.c
+++ b/drivers/char/esp.c
@@ -359,7 +359,7 @@ static inline void receive_chars_pio(struct esp_struct *info, int num_bytes)
 		}
 	}
 
-	schedule_delayed_work(&tty->buf.work, 1);
+	tty_schedule_flip(tty);
 
 	info->stat_flags &= ~ESP_STAT_RX_TIMEOUT;
 	release_pio_buffer(pio_buf);
@@ -426,7 +426,7 @@ static inline void receive_chars_dma_done(struct esp_struct *info,
 			}
 			tty_insert_flip_char(tty, dma_buffer[num_bytes - 1], statflag);
 		}
-		schedule_delayed_work(&tty->buf.work, 1);
+		tty_schedule_flip(tty);
 	}
 
 	if (dma_bytes != num_bytes) {
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index eb8b5be4e249..076e07c1da38 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -253,6 +253,7 @@ static void tty_buffer_free_all(struct tty_struct *tty)
 
 static void tty_buffer_init(struct tty_struct *tty)
 {
+	spin_lock_init(&tty->buf.lock);
 	tty->buf.head = NULL;
 	tty->buf.tail = NULL;
 	tty->buf.free = NULL;
@@ -266,6 +267,7 @@ static struct tty_buffer *tty_buffer_alloc(size_t size)
 	p->used = 0;
 	p->size = size;
 	p->next = NULL;
+	p->active = 0;
 	p->char_buf_ptr = (char *)(p->data);
 	p->flag_buf_ptr = (unsigned char *)p->char_buf_ptr + size;
 /* 	printk("Flip create %p\n", p); */
@@ -312,25 +314,36 @@ static struct tty_buffer *tty_buffer_find(struct tty_struct *tty, size_t size)
 
 int tty_buffer_request_room(struct tty_struct *tty, size_t size)
 {
-	struct tty_buffer *b = tty->buf.tail, *n;
-	int left = 0;
+	struct tty_buffer *b, *n;
+	int left;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tty->buf.lock, flags);
 
 	/* OPTIMISATION: We could keep a per tty "zero" sized buffer to
 	   remove this conditional if its worth it. This would be invisible
 	   to the callers */
-	if(b != NULL)
+	if ((b = tty->buf.tail) != NULL) {
 		left = b->size - b->used;
-	if(left >= size)
-		return size;
-	/* This is the slow path - looking for new buffers to use */
-	n = tty_buffer_find(tty, size);
-	if(n == NULL)
-		return left;
-	if(b != NULL)
-		b->next = n;
-	else
-		tty->buf.head = n;
-	tty->buf.tail = n;
+		b->active = 1;
+	} else
+		left = 0;
+
+	if (left < size) {
+		/* This is the slow path - looking for new buffers to use */
+		if ((n = tty_buffer_find(tty, size)) != NULL) {
+			if (b != NULL) {
+				b->next = n;
+				b->active = 0;
+			} else
+				tty->buf.head = n;
+			tty->buf.tail = n;
+			n->active = 1;
+		} else
+			size = left;
+	}
+
+	spin_unlock_irqrestore(&tty->buf.lock, flags);
 	return size;
 }
 
@@ -396,10 +409,12 @@ EXPORT_SYMBOL_GPL(tty_insert_flip_string_flags);
 int tty_prepare_flip_string(struct tty_struct *tty, unsigned char **chars, size_t size)
 {
 	int space = tty_buffer_request_room(tty, size);
-	struct tty_buffer *tb = tty->buf.tail;
-	*chars = tb->char_buf_ptr + tb->used;
-	memset(tb->flag_buf_ptr + tb->used, TTY_NORMAL, space);
-	tb->used += space;
+	if (likely(space)) {
+		struct tty_buffer *tb = tty->buf.tail;
+		*chars = tb->char_buf_ptr + tb->used;
+		memset(tb->flag_buf_ptr + tb->used, TTY_NORMAL, space);
+		tb->used += space;
+	}
 	return space;
 }
 
@@ -416,10 +431,12 @@ EXPORT_SYMBOL_GPL(tty_prepare_flip_string);
 int tty_prepare_flip_string_flags(struct tty_struct *tty, unsigned char **chars, char **flags, size_t size)
 {
 	int space = tty_buffer_request_room(tty, size);
-	struct tty_buffer *tb = tty->buf.tail;
-	*chars = tb->char_buf_ptr + tb->used;
-	*flags = tb->flag_buf_ptr + tb->used;
-	tb->used += space;
+	if (likely(space)) {
+		struct tty_buffer *tb = tty->buf.tail;
+		*chars = tb->char_buf_ptr + tb->used;
+		*flags = tb->flag_buf_ptr + tb->used;
+		tb->used += space;
+	}
 	return space;
 }
 
@@ -2747,20 +2764,20 @@ static void flush_to_ldisc(void *private_)
 		schedule_delayed_work(&tty->buf.work, 1);
 		goto out;
 	}
-	spin_lock_irqsave(&tty->read_lock, flags);
-	while((tbuf = tty->buf.head) != NULL) {
+	spin_lock_irqsave(&tty->buf.lock, flags);
+	while((tbuf = tty->buf.head) != NULL && !tbuf->active) {
 		tty->buf.head = tbuf->next;
 		if (tty->buf.head == NULL)
 			tty->buf.tail = NULL;
-		spin_unlock_irqrestore(&tty->read_lock, flags);
+		spin_unlock_irqrestore(&tty->buf.lock, flags);
 		/* printk("Process buffer %p for %d\n", tbuf, tbuf->used); */
 		disc->receive_buf(tty, tbuf->char_buf_ptr,
 				       tbuf->flag_buf_ptr,
 				       tbuf->used);
-		spin_lock_irqsave(&tty->read_lock, flags);
+		spin_lock_irqsave(&tty->buf.lock, flags);
 		tty_buffer_free(tty, tbuf);
 	}
-	spin_unlock_irqrestore(&tty->read_lock, flags);
+	spin_unlock_irqrestore(&tty->buf.lock, flags);
 out:
 	tty_ldisc_deref(disc);
 }
@@ -2852,6 +2869,12 @@ EXPORT_SYMBOL(tty_get_baud_rate);
 
 void tty_flip_buffer_push(struct tty_struct *tty)
 {
+	unsigned long flags;
+	spin_lock_irqsave(&tty->buf.lock, flags);
+	if (tty->buf.tail != NULL)
+		tty->buf.tail->active = 0;
+	spin_unlock_irqrestore(&tty->buf.lock, flags);
+
 	if (tty->low_latency)
 		flush_to_ldisc((void *) tty);
 	else
diff --git a/include/linux/kbd_kern.h b/include/linux/kbd_kern.h
index 45f625d7d0b2..3aed37314ab8 100644
--- a/include/linux/kbd_kern.h
+++ b/include/linux/kbd_kern.h
@@ -151,6 +151,11 @@ extern unsigned int keymap_count;
 
 static inline void con_schedule_flip(struct tty_struct *t)
 {
+	unsigned long flags;
+	spin_lock_irqsave(&t->buf.lock, flags);
+	if (t->buf.tail != NULL)
+		t->buf.tail->active = 0;
+	spin_unlock_irqrestore(&t->buf.lock, flags);
 	schedule_work(&t->buf.work);
 }
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 3787102e4b12..a7bd3b4558d2 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -57,6 +57,7 @@ struct tty_buffer {
 	unsigned char *flag_buf_ptr;
 	int used;
 	int size;
+	int active;
 	/* Data points here */
 	unsigned long data[0];
 };
@@ -64,6 +65,7 @@ struct tty_buffer {
 struct tty_bufhead {
 	struct work_struct		work;
 	struct semaphore pty_sem;
+	spinlock_t lock;
 	struct tty_buffer *head;	/* Queue head */
 	struct tty_buffer *tail;	/* Active buffer */
 	struct tty_buffer *free;	/* Free queue head */
diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h
index be1400e82482..82961eb19888 100644
--- a/include/linux/tty_flip.h
+++ b/include/linux/tty_flip.h
@@ -17,7 +17,7 @@ _INLINE_ int tty_insert_flip_char(struct tty_struct *tty,
 				   unsigned char ch, char flag)
 {
 	struct tty_buffer *tb = tty->buf.tail;
-	if (tb && tb->used < tb->size) {
+	if (tb && tb->active && tb->used < tb->size) {
 		tb->flag_buf_ptr[tb->used] = flag;
 		tb->char_buf_ptr[tb->used++] = ch;
 		return 1;
@@ -27,6 +27,11 @@ _INLINE_ int tty_insert_flip_char(struct tty_struct *tty,
 
 _INLINE_ void tty_schedule_flip(struct tty_struct *tty)
 {
+	unsigned long flags;
+	spin_lock_irqsave(&tty->buf.lock, flags);
+	if (tty->buf.tail != NULL)
+		tty->buf.tail->active = 0;
+	spin_unlock_irqrestore(&tty->buf.lock, flags);
 	schedule_delayed_work(&tty->buf.work, 1);
 }
 
-- 
cgit v1.2.3-71-gd317


From 9810933701a09f9c4dd0ad963d5ec2efb7df07b7 Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Fri, 3 Feb 2006 03:04:55 -0800
Subject: [PATCH] stop CompactFlash devices being marked as removable

This patch stops CompactFlash devices being marked as removable.  They are
not removable (as defined by Linux) as the media and device are
inseparable.  When a card is removed, the whole device is removed from the
system and never sits in a media-less state.

This stops some nasty udev device creation/destruction loops.

Further, once this change is made, there is no need for ide to can be
removed from ide_drive_t.

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Acked-by: Bartlomiej Zolnierkiewicz <B.Zolnierkiewicz@elka.pw.edu.pl>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/ide/ide-disk.c  |  6 +-----
 drivers/ide/ide-probe.c | 51 ++++++++-----------------------------------------
 drivers/ide/ide.c       |  1 -
 include/linux/ide.h     |  1 -
 4 files changed, 9 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index ca25f9e3d0f4..f46bb0404ae4 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -889,11 +889,7 @@ static void idedisk_setup (ide_drive_t *drive)
 	if (drive->id_read == 0)
 		return;
 
-	/*
-	 * CompactFlash cards and their brethern look just like hard drives
-	 * to us, but they are removable and don't have a doorlock mechanism.
-	 */
-	if (drive->removable && !(drive->is_flash)) {
+	if (drive->removable) {
 		/*
 		 * Removable disks (eg. SYQUEST); ignore 'WD' drives 
 		 */
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index e7425546b4b1..427d1c204174 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -124,45 +124,6 @@ static void ide_disk_init_mult_count(ide_drive_t *drive)
 	}
 }
 
-/**
- *	drive_is_flashcard	-	check for compact flash
- *	@drive: drive to check
- *
- *	CompactFlash cards and their brethern pretend to be removable
- *	hard disks, except:
- * 		(1) they never have a slave unit, and
- *		(2) they don't have doorlock mechanisms.
- *	This test catches them, and is invoked elsewhere when setting
- *	appropriate config bits.
- *
- *	FIXME: This treatment is probably applicable for *all* PCMCIA (PC CARD)
- *	devices, so in linux 2.3.x we should change this to just treat all
- *	PCMCIA  drives this way, and get rid of the model-name tests below
- *	(too big of an interface change for 2.4.x).
- *	At that time, we might also consider parameterizing the timeouts and
- *	retries, since these are MUCH faster than mechanical drives. -M.Lord
- */
- 
-static inline int drive_is_flashcard (ide_drive_t *drive)
-{
-	struct hd_driveid *id = drive->id;
-
-	if (drive->removable) {
-		if (id->config == 0x848a) return 1;	/* CompactFlash */
-		if (!strncmp(id->model, "KODAK ATA_FLASH", 15)	/* Kodak */
-		 || !strncmp(id->model, "Hitachi CV", 10)	/* Hitachi */
-		 || !strncmp(id->model, "SunDisk SDCFB", 13)	/* old SanDisk */
-		 || !strncmp(id->model, "SanDisk SDCFB", 13)	/* SanDisk */
-		 || !strncmp(id->model, "HAGIWARA HPC", 12)	/* Hagiwara */
-		 || !strncmp(id->model, "LEXAR ATA_FLASH", 15)	/* Lexar */
-		 || !strncmp(id->model, "ATA_FLASH", 9))	/* Simple Tech */
-		{
-			return 1;	/* yes, it is a flash memory card */
-		}
-	}
-	return 0;	/* no, it is not a flash memory card */
-}
-
 /**
  *	do_identify	-	identify a drive
  *	@drive: drive to identify 
@@ -278,13 +239,17 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd)
 	/*
 	 * Not an ATAPI device: looks like a "regular" hard disk
 	 */
-	if (id->config & (1<<7))
+
+	/*
+	 * 0x848a = CompactFlash device
+	 * These are *not* removable in Linux definition of the term
+	 */
+
+	if ((id->config != 0x848a) && (id->config & (1<<7)))
 		drive->removable = 1;
 
-	if (drive_is_flashcard(drive))
-		drive->is_flash = 1;
 	drive->media = ide_disk;
-	printk("%s DISK drive\n", (drive->is_flash) ? "CFA" : "ATA" );
+	printk("%s DISK drive\n", (id->config == 0x848a) ? "CFA" : "ATA" );
 	QUIRK_LIST(drive);
 	return;
 
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index afeb02bbb722..b2cc43702f65 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -242,7 +242,6 @@ static void init_hwif_data(ide_hwif_t *hwif, unsigned int index)
 		drive->name[2]			= 'a' + (index * MAX_DRIVES) + unit;
 		drive->max_failures		= IDE_DEFAULT_MAX_FAILURES;
 		drive->using_dma		= 0;
-		drive->is_flash			= 0;
 		drive->vdma			= 0;
 		INIT_LIST_HEAD(&drive->list);
 		init_completion(&drive->gendev_rel_comp);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 110b3cfac021..274d15287181 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -582,7 +582,6 @@ typedef struct ide_drive_s {
 	unsigned noprobe 	: 1;	/* from:  hdx=noprobe */
 	unsigned removable	: 1;	/* 1 if need to do check_media_change */
 	unsigned attach		: 1;	/* needed for removable devices */
-	unsigned is_flash	: 1;	/* 1 if probed as flash */
 	unsigned forced_geom	: 1;	/* 1 if hdx=c,h,s was given at boot */
 	unsigned no_unmask	: 1;	/* disallow setting unmask bit */
 	unsigned no_io_32bit	: 1;	/* disallow enabling 32bit I/O */
-- 
cgit v1.2.3-71-gd317


From a7ff7d41fec06c518caa82a818a70610a29d0e75 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Fri, 3 Feb 2006 03:04:56 -0800
Subject: [PATCH] drivers/ide/ide-io.c: make __ide_end_request() static

Since there's no longer any external user, we can make __ide_end_request()
static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Bartlomiej Zolnierkiewicz <B.Zolnierkiewicz@elka.pw.edu.pl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/ide/ide-io.c | 5 ++---
 include/linux/ide.h  | 1 -
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 8d50df4526a4..c01615dec202 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -55,8 +55,8 @@
 #include <asm/io.h>
 #include <asm/bitops.h>
 
-int __ide_end_request(ide_drive_t *drive, struct request *rq, int uptodate,
-		      int nr_sectors)
+static int __ide_end_request(ide_drive_t *drive, struct request *rq,
+			     int uptodate, int nr_sectors)
 {
 	int ret = 1;
 
@@ -91,7 +91,6 @@ int __ide_end_request(ide_drive_t *drive, struct request *rq, int uptodate,
 
 	return ret;
 }
-EXPORT_SYMBOL(__ide_end_request);
 
 /**
  *	ide_end_request		-	complete an IDE I/O
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 274d15287181..a7fc4cc79b23 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1005,7 +1005,6 @@ extern	ide_hwif_t	ide_hwifs[];		/* master data repository */
 extern int noautodma;
 
 extern int ide_end_request (ide_drive_t *drive, int uptodate, int nrsecs);
-extern int __ide_end_request (ide_drive_t *drive, struct request *rq, int uptodate, int nrsecs);
 
 /*
  * This is used on exit from the driver to designate the next irq handler
-- 
cgit v1.2.3-71-gd317


From 3d0f89bb169482d26d5aa4e82e763077e7e9bc4d Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Wed, 25 Jan 2006 13:31:07 -0800
Subject: configfs: Add permission and ownership to configfs objects.

configfs always made item and attribute ownership root.root and
permissions based on a umask of 022.  Add ->setattr() to allow
chown(2)/chmod(2), and persist the changes for the lifetime of the
items and attributes.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 .../filesystems/configfs/configfs_example.c        |   2 +
 fs/configfs/configfs_internal.h                    |  11 +-
 fs/configfs/dir.c                                  |  36 +++++--
 fs/configfs/file.c                                 |  19 ++--
 fs/configfs/inode.c                                | 117 +++++++++++++++++++--
 fs/configfs/mount.c                                |  28 ++++-
 fs/configfs/symlink.c                              |   1 +
 include/linux/configfs.h                           |   2 +-
 8 files changed, 179 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/configfs/configfs_example.c b/Documentation/filesystems/configfs/configfs_example.c
index f3c6e4946f98..3d4713a6c207 100644
--- a/Documentation/filesystems/configfs/configfs_example.c
+++ b/Documentation/filesystems/configfs/configfs_example.c
@@ -320,6 +320,7 @@ static struct config_item_type simple_children_type = {
 	.ct_item_ops	= &simple_children_item_ops,
 	.ct_group_ops	= &simple_children_group_ops,
 	.ct_attrs	= simple_children_attrs,
+	.ct_owner	= THIS_MODULE,
 };
 
 static struct configfs_subsystem simple_children_subsys = {
@@ -403,6 +404,7 @@ static struct config_item_type group_children_type = {
 	.ct_item_ops	= &group_children_item_ops,
 	.ct_group_ops	= &group_children_group_ops,
 	.ct_attrs	= group_children_attrs,
+	.ct_owner	= THIS_MODULE,
 };
 
 static struct configfs_subsystem group_children_subsys = {
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index 8899d9c5f6bf..f70e46951b37 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -36,6 +36,7 @@ struct configfs_dirent {
 	int			s_type;
 	umode_t			s_mode;
 	struct dentry		* s_dentry;
+	struct iattr		* s_iattr;
 };
 
 #define CONFIGFS_ROOT		0x0001
@@ -48,10 +49,11 @@ struct configfs_dirent {
 #define CONFIGFS_NOT_PINNED	(CONFIGFS_ITEM_ATTR)
 
 extern struct vfsmount * configfs_mount;
+extern kmem_cache_t *configfs_dir_cachep;
 
 extern int configfs_is_root(struct config_item *item);
 
-extern struct inode * configfs_new_inode(mode_t mode);
+extern struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent *);
 extern int configfs_create(struct dentry *, int mode, int (*init)(struct inode *));
 
 extern int configfs_create_file(struct config_item *, const struct configfs_attribute *);
@@ -63,6 +65,7 @@ extern void configfs_hash_and_remove(struct dentry * dir, const char * name);
 
 extern const unsigned char * configfs_get_name(struct configfs_dirent *sd);
 extern void configfs_drop_dentry(struct configfs_dirent *sd, struct dentry *parent);
+extern int configfs_setattr(struct dentry *dentry, struct iattr *iattr);
 
 extern int configfs_pin_fs(void);
 extern void configfs_release_fs(void);
@@ -120,8 +123,10 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry
 
 static inline void release_configfs_dirent(struct configfs_dirent * sd)
 {
-	if (!(sd->s_type & CONFIGFS_ROOT))
-		kfree(sd);
+	if (!(sd->s_type & CONFIGFS_ROOT)) {
+		kfree(sd->s_iattr);
+		kmem_cache_free(configfs_dir_cachep, sd);
+	}
 }
 
 static inline struct configfs_dirent * configfs_get(struct configfs_dirent * sd)
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index b668ec61527e..ca60e3abef45 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -72,7 +72,7 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare
 {
 	struct configfs_dirent * sd;
 
-	sd = kmalloc(sizeof(*sd), GFP_KERNEL);
+	sd = kmem_cache_alloc(configfs_dir_cachep, GFP_KERNEL);
 	if (!sd)
 		return NULL;
 
@@ -136,13 +136,19 @@ static int create_dir(struct config_item * k, struct dentry * p,
 	int error;
 	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
 
-	error = configfs_create(d, mode, init_dir);
+	error = configfs_make_dirent(p->d_fsdata, d, k, mode,
+				     CONFIGFS_DIR);
 	if (!error) {
-		error = configfs_make_dirent(p->d_fsdata, d, k, mode,
-					   CONFIGFS_DIR);
+		error = configfs_create(d, mode, init_dir);
 		if (!error) {
 			p->d_inode->i_nlink++;
 			(d)->d_op = &configfs_dentry_ops;
+		} else {
+			struct configfs_dirent *sd = d->d_fsdata;
+			if (sd) {
+				list_del_init(&sd->s_sibling);
+				configfs_put(sd);
+			}
 		}
 	}
 	return error;
@@ -182,12 +188,19 @@ int configfs_create_link(struct configfs_symlink *sl,
 	int err = 0;
 	umode_t mode = S_IFLNK | S_IRWXUGO;
 
-	err = configfs_create(dentry, mode, init_symlink);
+	err = configfs_make_dirent(parent->d_fsdata, dentry, sl, mode,
+				   CONFIGFS_ITEM_LINK);
 	if (!err) {
-		err = configfs_make_dirent(parent->d_fsdata, dentry, sl,
-					 mode, CONFIGFS_ITEM_LINK);
+		err = configfs_create(dentry, mode, init_symlink);
 		if (!err)
 			dentry->d_op = &configfs_dentry_ops;
+		else {
+			struct configfs_dirent *sd = dentry->d_fsdata;
+			if (sd) {
+				list_del_init(&sd->s_sibling);
+				configfs_put(sd);
+			}
+		}
 	}
 	return err;
 }
@@ -241,13 +254,15 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
 	struct configfs_attribute * attr = sd->s_element;
 	int error;
 
+	dentry->d_fsdata = configfs_get(sd);
+	sd->s_dentry = dentry;
 	error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG, init_file);
-	if (error)
+	if (error) {
+		configfs_put(sd);
 		return error;
+	}
 
 	dentry->d_op = &configfs_dentry_ops;
-	dentry->d_fsdata = configfs_get(sd);
-	sd->s_dentry = dentry;
 	d_rehash(dentry);
 
 	return 0;
@@ -839,6 +854,7 @@ struct inode_operations configfs_dir_inode_operations = {
 	.symlink	= configfs_symlink,
 	.unlink		= configfs_unlink,
 	.lookup		= configfs_lookup,
+	.setattr	= configfs_setattr,
 };
 
 #if 0
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index c26cd61f13af..3921920d8716 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -26,7 +26,6 @@
 
 #include <linux/fs.h>
 #include <linux/module.h>
-#include <linux/dnotify.h>
 #include <linux/slab.h>
 #include <asm/uaccess.h>
 #include <asm/semaphore.h>
@@ -150,7 +149,7 @@ out:
 /**
  *	fill_write_buffer - copy buffer from userspace.
  *	@buffer:	data buffer for file.
- *	@userbuf:	data from user.
+ *	@buf:		data from user.
  *	@count:		number of bytes in @userbuf.
  *
  *	Allocate @buffer->page if it hasn't been already, then
@@ -177,8 +176,9 @@ fill_write_buffer(struct configfs_buffer * buffer, const char __user * buf, size
 
 /**
  *	flush_write_buffer - push buffer to config_item.
- *	@file:		file pointer.
+ *	@dentry:	dentry to the attribute
  *	@buffer:	data buffer for file.
+ *	@count:		number of bytes
  *
  *	Get the correct pointers for the config_item and the attribute we're
  *	dealing with, then call the store() method for the attribute,
@@ -217,15 +217,16 @@ static ssize_t
 configfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 {
 	struct configfs_buffer * buffer = file->private_data;
+	ssize_t len;
 
 	down(&buffer->sem);
-	count = fill_write_buffer(buffer,buf,count);
-	if (count > 0)
-		count = flush_write_buffer(file->f_dentry,buffer,count);
-	if (count > 0)
-		*ppos += count;
+	len = fill_write_buffer(buffer, buf, count);
+	if (len > 0)
+		len = flush_write_buffer(file->f_dentry, buffer, count);
+	if (len > 0)
+		*ppos += len;
 	up(&buffer->sem);
-	return count;
+	return len;
 }
 
 static int check_perm(struct inode * inode, struct file * file)
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 6577c588de9d..737842f2764b 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -31,6 +31,7 @@
 #include <linux/pagemap.h>
 #include <linux/namei.h>
 #include <linux/backing-dev.h>
+#include <linux/capability.h>
 
 #include <linux/configfs.h>
 #include "configfs_internal.h"
@@ -48,18 +49,107 @@ static struct backing_dev_info configfs_backing_dev_info = {
 	.capabilities	= BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
 };
 
-struct inode * configfs_new_inode(mode_t mode)
+static struct inode_operations configfs_inode_operations ={
+	.setattr	= configfs_setattr,
+};
+
+int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
+{
+	struct inode * inode = dentry->d_inode;
+	struct configfs_dirent * sd = dentry->d_fsdata;
+	struct iattr * sd_iattr;
+	unsigned int ia_valid = iattr->ia_valid;
+	int error;
+
+	if (!sd)
+		return -EINVAL;
+
+	sd_iattr = sd->s_iattr;
+
+	error = inode_change_ok(inode, iattr);
+	if (error)
+		return error;
+
+	error = inode_setattr(inode, iattr);
+	if (error)
+		return error;
+
+	if (!sd_iattr) {
+		/* setting attributes for the first time, allocate now */
+		sd_iattr = kmalloc(sizeof(struct iattr), GFP_KERNEL);
+		if (!sd_iattr)
+			return -ENOMEM;
+		/* assign default attributes */
+		memset(sd_iattr, 0, sizeof(struct iattr));
+		sd_iattr->ia_mode = sd->s_mode;
+		sd_iattr->ia_uid = 0;
+		sd_iattr->ia_gid = 0;
+		sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME;
+		sd->s_iattr = sd_iattr;
+	}
+
+	/* attributes were changed atleast once in past */
+
+	if (ia_valid & ATTR_UID)
+		sd_iattr->ia_uid = iattr->ia_uid;
+	if (ia_valid & ATTR_GID)
+		sd_iattr->ia_gid = iattr->ia_gid;
+	if (ia_valid & ATTR_ATIME)
+		sd_iattr->ia_atime = timespec_trunc(iattr->ia_atime,
+						inode->i_sb->s_time_gran);
+	if (ia_valid & ATTR_MTIME)
+		sd_iattr->ia_mtime = timespec_trunc(iattr->ia_mtime,
+						inode->i_sb->s_time_gran);
+	if (ia_valid & ATTR_CTIME)
+		sd_iattr->ia_ctime = timespec_trunc(iattr->ia_ctime,
+						inode->i_sb->s_time_gran);
+	if (ia_valid & ATTR_MODE) {
+		umode_t mode = iattr->ia_mode;
+
+		if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+			mode &= ~S_ISGID;
+		sd_iattr->ia_mode = sd->s_mode = mode;
+	}
+
+	return error;
+}
+
+static inline void set_default_inode_attr(struct inode * inode, mode_t mode)
+{
+	inode->i_mode = mode;
+	inode->i_uid = 0;
+	inode->i_gid = 0;
+	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+}
+
+static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
+{
+	inode->i_mode = iattr->ia_mode;
+	inode->i_uid = iattr->ia_uid;
+	inode->i_gid = iattr->ia_gid;
+	inode->i_atime = iattr->ia_atime;
+	inode->i_mtime = iattr->ia_mtime;
+	inode->i_ctime = iattr->ia_ctime;
+}
+
+struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd)
 {
 	struct inode * inode = new_inode(configfs_sb);
 	if (inode) {
-		inode->i_mode = mode;
-		inode->i_uid = 0;
-		inode->i_gid = 0;
 		inode->i_blksize = PAGE_CACHE_SIZE;
 		inode->i_blocks = 0;
-		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		inode->i_mapping->a_ops = &configfs_aops;
 		inode->i_mapping->backing_dev_info = &configfs_backing_dev_info;
+		inode->i_op = &configfs_inode_operations;
+
+		if (sd->s_iattr) {
+			/* sysfs_dirent has non-default attributes
+			 * get them for the new inode from persistent copy
+			 * in sysfs_dirent
+			 */
+			set_inode_attr(inode, sd->s_iattr);
+		} else
+			set_default_inode_attr(inode, mode);
 	}
 	return inode;
 }
@@ -70,7 +160,8 @@ int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *
 	struct inode * inode = NULL;
 	if (dentry) {
 		if (!dentry->d_inode) {
-			if ((inode = configfs_new_inode(mode))) {
+			struct configfs_dirent *sd = dentry->d_fsdata;
+			if ((inode = configfs_new_inode(mode, sd))) {
 				if (dentry->d_parent && dentry->d_parent->d_inode) {
 					struct inode *p_inode = dentry->d_parent->d_inode;
 					p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
@@ -103,7 +194,7 @@ int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *
  */
 const unsigned char * configfs_get_name(struct configfs_dirent *sd)
 {
-	struct attribute * attr;
+	struct configfs_attribute *attr;
 
 	if (!sd || !sd->s_element)
 		BUG();
@@ -114,7 +205,7 @@ const unsigned char * configfs_get_name(struct configfs_dirent *sd)
 
 	if (sd->s_type & CONFIGFS_ITEM_ATTR) {
 		attr = sd->s_element;
-		return attr->name;
+		return attr->ca_name;
 	}
 	return NULL;
 }
@@ -130,13 +221,17 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent)
 
 	if (dentry) {
 		spin_lock(&dcache_lock);
+		spin_lock(&dentry->d_lock);
 		if (!(d_unhashed(dentry) && dentry->d_inode)) {
 			dget_locked(dentry);
 			__d_drop(dentry);
+			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dcache_lock);
 			simple_unlink(parent->d_inode, dentry);
-		} else
+		} else {
+			spin_unlock(&dentry->d_lock);
 			spin_unlock(&dcache_lock);
+		}
 	}
 }
 
@@ -145,6 +240,10 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
 	struct configfs_dirent * sd;
 	struct configfs_dirent * parent_sd = dir->d_fsdata;
 
+	if (dir->d_inode == NULL)
+		/* no inode means this hasn't been made visible yet */
+		return;
+
 	mutex_lock(&dir->d_inode->i_mutex);
 	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
 		if (!sd->s_element)
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 1a2f6f6a4d91..f920d30478e5 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -38,6 +38,7 @@
 
 struct vfsmount * configfs_mount = NULL;
 struct super_block * configfs_sb = NULL;
+kmem_cache_t *configfs_dir_cachep;
 static int configfs_mnt_count = 0;
 
 static struct super_operations configfs_ops = {
@@ -62,6 +63,7 @@ static struct configfs_dirent configfs_root = {
 	.s_children	= LIST_HEAD_INIT(configfs_root.s_children),
 	.s_element	= &configfs_root_group.cg_item,
 	.s_type		= CONFIGFS_ROOT,
+	.s_iattr	= NULL,
 };
 
 static int configfs_fill_super(struct super_block *sb, void *data, int silent)
@@ -73,9 +75,11 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
 	sb->s_magic = CONFIGFS_MAGIC;
 	sb->s_op = &configfs_ops;
+	sb->s_time_gran = 1;
 	configfs_sb = sb;
 
-	inode = configfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);
+	inode = configfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
+				   &configfs_root);
 	if (inode) {
 		inode->i_op = &configfs_dir_inode_operations;
 		inode->i_fop = &configfs_dir_operations;
@@ -128,19 +132,31 @@ static decl_subsys(config, NULL, NULL);
 
 static int __init configfs_init(void)
 {
-	int err;
+	int err = -ENOMEM;
+
+	configfs_dir_cachep = kmem_cache_create("configfs_dir_cache",
+						sizeof(struct configfs_dirent),
+						0, 0, NULL, NULL);
+	if (!configfs_dir_cachep)
+		goto out;
 
 	kset_set_kset_s(&config_subsys, kernel_subsys);
 	err = subsystem_register(&config_subsys);
-	if (err)
-		return err;
+	if (err) {
+		kmem_cache_destroy(configfs_dir_cachep);
+		configfs_dir_cachep = NULL;
+		goto out;
+	}
 
 	err = register_filesystem(&configfs_fs_type);
 	if (err) {
 		printk(KERN_ERR "configfs: Unable to register filesystem!\n");
 		subsystem_unregister(&config_subsys);
+		kmem_cache_destroy(configfs_dir_cachep);
+		configfs_dir_cachep = NULL;
 	}
 
+out:
 	return err;
 }
 
@@ -148,11 +164,13 @@ static void __exit configfs_exit(void)
 {
 	unregister_filesystem(&configfs_fs_type);
 	subsystem_unregister(&config_subsys);
+	kmem_cache_destroy(configfs_dir_cachep);
+	configfs_dir_cachep = NULL;
 }
 
 MODULE_AUTHOR("Oracle");
 MODULE_LICENSE("GPL");
-MODULE_VERSION("0.0.1");
+MODULE_VERSION("0.0.2");
 MODULE_DESCRIPTION("Simple RAM filesystem for user driven kernel subsystem configuration.");
 
 module_init(configfs_init);
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 50f5840521a9..99137026b409 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -277,5 +277,6 @@ struct inode_operations configfs_symlink_inode_operations = {
 	.follow_link = configfs_follow_link,
 	.readlink = generic_readlink,
 	.put_link = configfs_put_link,
+	.setattr = configfs_setattr,
 };
 
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index acffb8c9073a..a7f015027535 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -126,7 +126,7 @@ extern struct config_item *config_group_find_obj(struct config_group *, const ch
 
 
 struct configfs_attribute {
-	char			*ca_name;
+	const char		*ca_name;
 	struct module 		*ca_owner;
 	mode_t			ca_mode;
 };
-- 
cgit v1.2.3-71-gd317


From 53ea68ecea11bcbb3451c2758ce181bd97b569a9 Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@epoch.ncsc.mil>
Date: Fri, 3 Feb 2006 08:21:12 -0500
Subject: [PATCH] SELinux: fix size-128 slab leak

Remove private inode tests from security_inode_alloc and security_inode_free,
as we otherwise end up leaking inode security structures for private inodes.

Signed-off-by:  Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by:  Linus Torvalds <torvalds@osdl.org>
---
 include/linux/security.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index bb1da86747c7..7cbef482e13a 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1499,15 +1499,11 @@ static inline void security_sb_post_pivotroot (struct nameidata *old_nd,
 
 static inline int security_inode_alloc (struct inode *inode)
 {
-	if (unlikely (IS_PRIVATE (inode)))
-		return 0;
 	return security_ops->inode_alloc_security (inode);
 }
 
 static inline void security_inode_free (struct inode *inode)
 {
-	if (unlikely (IS_PRIVATE (inode)))
-		return;
 	security_ops->inode_free_security (inode);
 }
 
-- 
cgit v1.2.3-71-gd317


From 19ea7302df2eb4f2ad7f29af814d8cf55fc8b9c9 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Sat, 4 Feb 2006 02:15:36 -0800
Subject: [NETFILTER]: iptables: fix typos in ipt_connbytes.h

Fix some typos that make iptables userspace compilation fail.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ipt_connbytes.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv4/ipt_connbytes.h b/include/linux/netfilter_ipv4/ipt_connbytes.h
index b04dfa3083c9..f63e6ee91113 100644
--- a/include/linux/netfilter_ipv4/ipt_connbytes.h
+++ b/include/linux/netfilter_ipv4/ipt_connbytes.h
@@ -1,10 +1,10 @@
 #ifndef _IPT_CONNBYTES_H
 #define _IPT_CONNBYTES_H
 
-#include <net/netfilter/xt_connbytes.h>
+#include <linux/netfilter/xt_connbytes.h>
 #define ipt_connbytes_what xt_connbytes_what
 
-#define IPT_CONNBYTES_PKTS	XT_CONNBYTES_PACKETS
+#define IPT_CONNBYTES_PKTS	XT_CONNBYTES_PKTS
 #define IPT_CONNBYTES_BYTES	XT_CONNBYTES_BYTES
 #define IPT_CONNBYTES_AVGPKT	XT_CONNBYTES_AVGPKT
 
-- 
cgit v1.2.3-71-gd317


From 0047c65a60fa3b6607b55e058ea6a89f39cb3f28 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 4 Feb 2006 02:19:09 -0800
Subject: [NETFILTER]: Prepare {ipt,ip6t}_policy match for x_tables unification

The IPv4 and IPv6 version of the policy match are identical besides address
comparison and the data structure used for userspace communication. Unify
the data structures to break compatiblity now (before it is released), so
we can port it to x_tables in 2.6.17.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ipt_policy.h  | 22 ++++++++++++++--------
 include/linux/netfilter_ipv6/ip6t_policy.h | 22 ++++++++++++++--------
 net/ipv4/netfilter/ipt_policy.c            |  9 ++++++---
 net/ipv6/netfilter/ip6t_policy.c           |  4 ++--
 4 files changed, 36 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv4/ipt_policy.h b/include/linux/netfilter_ipv4/ipt_policy.h
index 7fd1bec453f1..a3f6eff39d33 100644
--- a/include/linux/netfilter_ipv4/ipt_policy.h
+++ b/include/linux/netfilter_ipv4/ipt_policy.h
@@ -27,16 +27,22 @@ struct ipt_policy_spec
 			reqid:1;
 };
 
+union ipt_policy_addr
+{
+	struct in_addr	a4;
+	struct in6_addr	a6;
+};
+
 struct ipt_policy_elem
 {
-	u_int32_t	saddr;
-	u_int32_t	smask;
-	u_int32_t	daddr;
-	u_int32_t	dmask;
-	u_int32_t	spi;
-	u_int32_t	reqid;
-	u_int8_t	proto;
-	u_int8_t	mode;
+	union ipt_policy_addr	saddr;
+	union ipt_policy_addr	smask;
+	union ipt_policy_addr	daddr;
+	union ipt_policy_addr	dmask;
+	u_int32_t		spi;
+	u_int32_t		reqid;
+	u_int8_t		proto;
+	u_int8_t		mode;
 
 	struct ipt_policy_spec	match;
 	struct ipt_policy_spec	invert;
diff --git a/include/linux/netfilter_ipv6/ip6t_policy.h b/include/linux/netfilter_ipv6/ip6t_policy.h
index 5a93afcd2ff1..671bd818300f 100644
--- a/include/linux/netfilter_ipv6/ip6t_policy.h
+++ b/include/linux/netfilter_ipv6/ip6t_policy.h
@@ -27,16 +27,22 @@ struct ip6t_policy_spec
 			reqid:1;
 };
 
+union ip6t_policy_addr
+{
+	struct in_addr	a4;
+	struct in6_addr	a6;
+};
+
 struct ip6t_policy_elem
 {
-	struct in6_addr	saddr;
-	struct in6_addr	smask;
-	struct in6_addr	daddr;
-	struct in6_addr	dmask;
-	u_int32_t	spi;
-	u_int32_t	reqid;
-	u_int8_t	proto;
-	u_int8_t	mode;
+	union ip6t_policy_addr	saddr;
+	union ip6t_policy_addr	smask;
+	union ip6t_policy_addr	daddr;
+	union ip6t_policy_addr	dmask;
+	u_int32_t		spi;
+	u_int32_t		reqid;
+	u_int8_t		proto;
+	u_int8_t		mode;
 
 	struct ip6t_policy_spec	match;
 	struct ip6t_policy_spec	invert;
diff --git a/net/ipv4/netfilter/ipt_policy.c b/net/ipv4/netfilter/ipt_policy.c
index a48949a3a750..5a7a265280f9 100644
--- a/net/ipv4/netfilter/ipt_policy.c
+++ b/net/ipv4/netfilter/ipt_policy.c
@@ -26,10 +26,13 @@ MODULE_LICENSE("GPL");
 static inline int
 match_xfrm_state(struct xfrm_state *x, const struct ipt_policy_elem *e)
 {
-#define MATCH(x,y)	(!e->match.x || ((e->x == (y)) ^ e->invert.x))
+#define MATCH_ADDR(x,y,z)	(!e->match.x ||				     \
+		                 ((e->x.a4.s_addr == (e->y.a4.s_addr & (z))) \
+				  ^ e->invert.x))
+#define MATCH(x,y)		(!e->match.x || ((e->x == (y)) ^ e->invert.x))
 
-	return MATCH(saddr, x->props.saddr.a4 & e->smask) &&
-	       MATCH(daddr, x->id.daddr.a4 & e->dmask) &&
+	return MATCH_ADDR(saddr, smask, x->props.saddr.a4) &&
+	       MATCH_ADDR(daddr, dmask, x->id.daddr.a4) &&
 	       MATCH(proto, x->id.proto) &&
 	       MATCH(mode, x->props.mode) &&
 	       MATCH(spi, x->id.spi) &&
diff --git a/net/ipv6/netfilter/ip6t_policy.c b/net/ipv6/netfilter/ip6t_policy.c
index 1d0f48276123..3d39ec924041 100644
--- a/net/ipv6/netfilter/ip6t_policy.c
+++ b/net/ipv6/netfilter/ip6t_policy.c
@@ -26,8 +26,8 @@ MODULE_LICENSE("GPL");
 static inline int
 match_xfrm_state(struct xfrm_state *x, const struct ip6t_policy_elem *e)
 {
-#define MATCH_ADDR(x,y,z)	(!e->match.x ||				 \
-				 ((!ip6_masked_addrcmp(&e->x, &e->y, z)) \
+#define MATCH_ADDR(x,y,z)	(!e->match.x ||				       \
+				 ((!ip6_masked_addrcmp(&e->x.a6, &e->y.a6, z)) \
 				  ^ e->invert.x))
 #define MATCH(x,y)		(!e->match.x || ((e->x == (y)) ^ e->invert.x))
 	
-- 
cgit v1.2.3-71-gd317


From a460ad62260def15c42130de253d6cfc32528a2f Mon Sep 17 00:00:00 2001
From: Phillip Susi <psusi@cfl.rr.com>
Date: Sat, 4 Feb 2006 23:27:44 -0800
Subject: [PATCH] pktcdvd: Fix overflow for discs with large packets

The pktcdvd driver was using an 8 bit field to store the packet length
obtained from the disc track info.  This causes it to overflow packet length
values of 128KB or more.  I changed the field to 32 bits to fix this.

The pktcdvd driver defaulted to its maximum allowed packet length when it
detected a 0 in the track info field.  I changed this to fail the operation
and refuse to access the media.  This seems more sane than attempting to
access it with a value that almost certainly will not work.

Signed-off-by: Peter Osterlund <petero2@telia.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/block/pktcdvd.c | 2 +-
 include/linux/pktcdvd.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 93affeeef7bd..d95e7e1ac355 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -1639,7 +1639,7 @@ static int pkt_probe_settings(struct pktcdvd_device *pd)
 	pd->settings.size = be32_to_cpu(ti.fixed_packet_size) << 2;
 	if (pd->settings.size == 0) {
 		printk("pktcdvd: detected zero packet size!\n");
-		pd->settings.size = 128;
+		return -ENXIO;
 	}
 	if (pd->settings.size > PACKET_MAX_SECTORS) {
 		printk("pktcdvd: packet size is too big\n");
diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h
index 2c177e4c8f22..d1c9c4a86e52 100644
--- a/include/linux/pktcdvd.h
+++ b/include/linux/pktcdvd.h
@@ -114,7 +114,7 @@ struct pkt_ctrl_command {
 
 struct packet_settings
 {
-	__u8			size;		/* packet size in (512 byte) sectors */
+	__u32			size;		/* packet size in (512 byte) sectors */
 	__u8			fp;		/* fixed packets */
 	__u8			link_loss;	/* the rest is specified
 						 * as per Mt Fuji */
-- 
cgit v1.2.3-71-gd317


From e1bc89bc9991e994f2b3c60d9ad2fdb5ad9b10fc Mon Sep 17 00:00:00 2001
From: Peter Osterlund <petero2@telia.com>
Date: Sat, 4 Feb 2006 23:27:47 -0800
Subject: [PATCH] pktcdvd: Don't waste kernel memory

Allocate memory for read-gathering at open time, when it is known just how
much memory is needed.  This avoids wasting kernel memory when the real packet
size is smaller than the maximum packet size supported by the driver.  This is
always the case when using DVD discs.

Signed-off-by: Peter Osterlund <petero2@telia.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/block/Kconfig   |  4 ++--
 drivers/block/pktcdvd.c | 53 ++++++++++++++++++++++++++-----------------------
 include/linux/pktcdvd.h |  4 ++--
 3 files changed, 32 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index db6818fdf15d..8b1331677407 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -433,8 +433,8 @@ config CDROM_PKTCDVD_BUFFERS
 	  This controls the maximum number of active concurrent packets. More
 	  concurrent packets can increase write performance, but also require
 	  more memory. Each concurrent packet will require approximately 64Kb
-	  of non-swappable kernel memory, memory which will be allocated at
-	  pktsetup time.
+	  of non-swappable kernel memory, memory which will be allocated when
+	  a disc is opened for writing.
 
 config CDROM_PKTCDVD_WCACHE
 	bool "Enable write caching (EXPERIMENTAL)"
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index cd16813effc5..4e7dbcc425ff 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -129,7 +129,7 @@ static struct bio *pkt_bio_alloc(int nr_iovecs)
 /*
  * Allocate a packet_data struct
  */
-static struct packet_data *pkt_alloc_packet_data(void)
+static struct packet_data *pkt_alloc_packet_data(int frames)
 {
 	int i;
 	struct packet_data *pkt;
@@ -138,11 +138,12 @@ static struct packet_data *pkt_alloc_packet_data(void)
 	if (!pkt)
 		goto no_pkt;
 
-	pkt->w_bio = pkt_bio_alloc(PACKET_MAX_SIZE);
+	pkt->frames = frames;
+	pkt->w_bio = pkt_bio_alloc(frames);
 	if (!pkt->w_bio)
 		goto no_bio;
 
-	for (i = 0; i < PAGES_PER_PACKET; i++) {
+	for (i = 0; i < frames / FRAMES_PER_PAGE; i++) {
 		pkt->pages[i] = alloc_page(GFP_KERNEL|__GFP_ZERO);
 		if (!pkt->pages[i])
 			goto no_page;
@@ -150,7 +151,7 @@ static struct packet_data *pkt_alloc_packet_data(void)
 
 	spin_lock_init(&pkt->lock);
 
-	for (i = 0; i < PACKET_MAX_SIZE; i++) {
+	for (i = 0; i < frames; i++) {
 		struct bio *bio = pkt_bio_alloc(1);
 		if (!bio)
 			goto no_rd_bio;
@@ -160,14 +161,14 @@ static struct packet_data *pkt_alloc_packet_data(void)
 	return pkt;
 
 no_rd_bio:
-	for (i = 0; i < PACKET_MAX_SIZE; i++) {
+	for (i = 0; i < frames; i++) {
 		struct bio *bio = pkt->r_bios[i];
 		if (bio)
 			bio_put(bio);
 	}
 
 no_page:
-	for (i = 0; i < PAGES_PER_PACKET; i++)
+	for (i = 0; i < frames / FRAMES_PER_PAGE; i++)
 		if (pkt->pages[i])
 			__free_page(pkt->pages[i]);
 	bio_put(pkt->w_bio);
@@ -184,12 +185,12 @@ static void pkt_free_packet_data(struct packet_data *pkt)
 {
 	int i;
 
-	for (i = 0; i < PACKET_MAX_SIZE; i++) {
+	for (i = 0; i < pkt->frames; i++) {
 		struct bio *bio = pkt->r_bios[i];
 		if (bio)
 			bio_put(bio);
 	}
-	for (i = 0; i < PAGES_PER_PACKET; i++)
+	for (i = 0; i < pkt->frames / FRAMES_PER_PAGE; i++)
 		__free_page(pkt->pages[i]);
 	bio_put(pkt->w_bio);
 	kfree(pkt);
@@ -204,17 +205,17 @@ static void pkt_shrink_pktlist(struct pktcdvd_device *pd)
 	list_for_each_entry_safe(pkt, next, &pd->cdrw.pkt_free_list, list) {
 		pkt_free_packet_data(pkt);
 	}
+	INIT_LIST_HEAD(&pd->cdrw.pkt_free_list);
 }
 
 static int pkt_grow_pktlist(struct pktcdvd_device *pd, int nr_packets)
 {
 	struct packet_data *pkt;
 
-	INIT_LIST_HEAD(&pd->cdrw.pkt_free_list);
-	INIT_LIST_HEAD(&pd->cdrw.pkt_active_list);
-	spin_lock_init(&pd->cdrw.active_list_lock);
+	BUG_ON(!list_empty(&pd->cdrw.pkt_free_list));
+
 	while (nr_packets > 0) {
-		pkt = pkt_alloc_packet_data();
+		pkt = pkt_alloc_packet_data(pd->settings.size >> 2);
 		if (!pkt) {
 			pkt_shrink_pktlist(pd);
 			return 0;
@@ -949,7 +950,7 @@ try_next_bio:
 
 	pd->current_sector = zone + pd->settings.size;
 	pkt->sector = zone;
-	pkt->frames = pd->settings.size >> 2;
+	BUG_ON(pkt->frames != pd->settings.size >> 2);
 	pkt->write_size = 0;
 
 	/*
@@ -1985,8 +1986,14 @@ static int pkt_open_dev(struct pktcdvd_device *pd, int write)
 	if ((ret = pkt_set_segment_merging(pd, q)))
 		goto out_unclaim;
 
-	if (write)
+	if (write) {
+		if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) {
+			printk("pktcdvd: not enough memory for buffers\n");
+			ret = -ENOMEM;
+			goto out_unclaim;
+		}
 		printk("pktcdvd: %lukB available on disc\n", lba << 1);
+	}
 
 	return 0;
 
@@ -2012,6 +2019,8 @@ static void pkt_release_dev(struct pktcdvd_device *pd, int flush)
 	pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
 	bd_release(pd->bdev);
 	blkdev_put(pd->bdev);
+
+	pkt_shrink_pktlist(pd);
 }
 
 static struct pktcdvd_device *pkt_find_dev_from_minor(int dev_minor)
@@ -2377,12 +2386,6 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
 	/* This is safe, since we have a reference from open(). */
 	__module_get(THIS_MODULE);
 
-	if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) {
-		printk("pktcdvd: not enough memory for buffers\n");
-		ret = -ENOMEM;
-		goto out_mem;
-	}
-
 	pd->bdev = bdev;
 	set_blocksize(bdev, CD_FRAMESIZE);
 
@@ -2393,7 +2396,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
 	if (IS_ERR(pd->cdrw.thread)) {
 		printk("pktcdvd: can't start kernel thread\n");
 		ret = -ENOMEM;
-		goto out_thread;
+		goto out_mem;
 	}
 
 	proc = create_proc_entry(pd->name, 0, pkt_proc);
@@ -2404,8 +2407,6 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
 	DPRINTK("pktcdvd: writer %s mapped to %s\n", pd->name, bdevname(bdev, b));
 	return 0;
 
-out_thread:
-	pkt_shrink_pktlist(pd);
 out_mem:
 	blkdev_put(bdev);
 	/* This is safe: open() is still holding a reference. */
@@ -2501,6 +2502,10 @@ static int pkt_setup_dev(struct pkt_ctrl_command *ctrl_cmd)
 		goto out_mem;
 	pd->disk = disk;
 
+	INIT_LIST_HEAD(&pd->cdrw.pkt_free_list);
+	INIT_LIST_HEAD(&pd->cdrw.pkt_active_list);
+	spin_lock_init(&pd->cdrw.active_list_lock);
+
 	spin_lock_init(&pd->lock);
 	spin_lock_init(&pd->iosched.lock);
 	sprintf(pd->name, "pktcdvd%d", idx);
@@ -2565,8 +2570,6 @@ static int pkt_remove_dev(struct pkt_ctrl_command *ctrl_cmd)
 
 	blkdev_put(pd->bdev);
 
-	pkt_shrink_pktlist(pd);
-
 	remove_proc_entry(pd->name, pkt_proc);
 	DPRINTK("pktcdvd: writer %s unmapped\n", pd->name);
 
diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h
index d1c9c4a86e52..1623da88d6fe 100644
--- a/include/linux/pktcdvd.h
+++ b/include/linux/pktcdvd.h
@@ -170,7 +170,7 @@ struct packet_iosched
 #error "PAGE_SIZE must be a multiple of CD_FRAMESIZE"
 #endif
 #define PACKET_MAX_SIZE		32
-#define PAGES_PER_PACKET	(PACKET_MAX_SIZE * CD_FRAMESIZE / PAGE_SIZE)
+#define FRAMES_PER_PAGE		(PAGE_SIZE / CD_FRAMESIZE)
 #define PACKET_MAX_SECTORS	(PACKET_MAX_SIZE * CD_FRAMESIZE >> 9)
 
 enum packet_data_state {
@@ -219,7 +219,7 @@ struct packet_data
 	atomic_t		io_errors;	/* Number of read/write errors during IO */
 
 	struct bio		*r_bios[PACKET_MAX_SIZE]; /* bios to use during data gathering */
-	struct page		*pages[PAGES_PER_PACKET];
+	struct page		*pages[PACKET_MAX_SIZE / FRAMES_PER_PAGE];
 
 	int			cache_valid;	/* If non-zero, the data for the zone defined */
 						/* by the sector variable is completely cached */
-- 
cgit v1.2.3-71-gd317


From 5c55ac9bbca22ee134408f83de5f2bda3b1b2a53 Mon Sep 17 00:00:00 2001
From: Phillip Susi <psusi@cfl.rr.com>
Date: Sat, 4 Feb 2006 23:27:48 -0800
Subject: [PATCH] pktcdvd: Allow larger packets

The pktcdvd driver uses a compile time macro constant to define the maximum
supported packet length.  I changed this from 32 sectors to 128 sectors
because that allows over 100 MB of additional usable space on a 700 MB cdrw,
and increases throughput.

Note that you need a modified cdrwtool program that can format a CDRW disc
with larger packets to benefit from this change.

Signed-off-by: Peter Osterlund <petero2@telia.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/pktcdvd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h
index 1623da88d6fe..8a94c717c266 100644
--- a/include/linux/pktcdvd.h
+++ b/include/linux/pktcdvd.h
@@ -169,7 +169,7 @@ struct packet_iosched
 #if (PAGE_SIZE % CD_FRAMESIZE) != 0
 #error "PAGE_SIZE must be a multiple of CD_FRAMESIZE"
 #endif
-#define PACKET_MAX_SIZE		32
+#define PACKET_MAX_SIZE		128
 #define FRAMES_PER_PAGE		(PAGE_SIZE / CD_FRAMESIZE)
 #define PACKET_MAX_SECTORS	(PACKET_MAX_SIZE * CD_FRAMESIZE >> 9)
 
-- 
cgit v1.2.3-71-gd317


From bc5e483da61eb5ab8d24b4a919fb512e5886d02c Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 4 Feb 2006 23:27:51 -0800
Subject: [PATCH] reiserfs_get_acl() build fix

With CONFIG_REISERFS_FS_XATTR=y, CONFIG_REISERFS_FS_POSIX_ACL=n:

fs/reiserfs/xattr.c: In function `reiserfs_check_acl':
fs/reiserfs/xattr.c:1330: called object is not a function

Cc: Chris Mason <mason@suse.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/reiserfs_acl.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/reiserfs_acl.h b/include/linux/reiserfs_acl.h
index 0a3605099c44..806ec5b06707 100644
--- a/include/linux/reiserfs_acl.h
+++ b/include/linux/reiserfs_acl.h
@@ -58,9 +58,13 @@ extern struct reiserfs_xattr_handler posix_acl_default_handler;
 extern struct reiserfs_xattr_handler posix_acl_access_handler;
 #else
 
-#define reiserfs_get_acl NULL
 #define reiserfs_cache_default_acl(inode) 0
 
+static inline struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
+{
+	return NULL;
+}
+
 static inline int reiserfs_xattr_posix_acl_init(void)
 {
 	return 0;
-- 
cgit v1.2.3-71-gd317


From fe1dcbc4f311c2e6c23b33c0fa8572461618ab3e Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 4 Feb 2006 23:27:54 -0800
Subject: [PATCH] jbd: fix transaction batching

Ben points out that:

  When writing files out using O_SYNC, jbd's 1 jiffy delay results in a
  significant drop in throughput as the disk sits idle.  The patch below
  results in a 4-5x performance improvement (from 6.5MB/s to ~24-30MB/s on my
  IDE test box) when writing out files using O_SYNC.

So optimise the batching code by omitting it entirely if the process which is
doing a sync write is the same as the one which did the most recent sync
write.  If that's true, we're unlikely to get any other processes joining the
transaction.

(Has been in -mm for ages - it took me a long time to get on to performance
testing it)

Numbers, on write-cache-disabled IDE:

/usr/bin/time -p synctest -n 10 -uf -t 1 -p 1 dir-name

Unpatched:
	40 seconds
Patched:
	35 seconds
Batching disabled:
	35 seconds

This is the problematic single-process-doing-fsync case.  With multiple
fsyncing processes the numbers are AFACIT unaltered by the patch.

Aside: performance testing and instrumentation shows that the transaction
batching almost doesn't help (testing with synctest -n 1 -uf -t 100 -p 10
dir-name on non-writeback-caching IDE).  This is because by the time one
process is running a synchronous commit, a bunch of other processes already
have a transaction handle open, so they're all going to batch into the same
transaction anyway.

The batching seems to offer maybe 5-10% speedup with this workload, but I'm
pretty sure it was more important than that when it was first developed 4-odd
years ago...

Cc: "Stephen C. Tweedie" <sct@redhat.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd/transaction.c | 10 +++++++++-
 include/linux/jbd.h  |  4 ++++
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 429f4b263cf1..ca917973c2c0 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1308,6 +1308,7 @@ int journal_stop(handle_t *handle)
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal = transaction->t_journal;
 	int old_handle_count, err;
+	pid_t pid;
 
 	J_ASSERT(transaction->t_updates > 0);
 	J_ASSERT(journal_current_handle() == handle);
@@ -1333,8 +1334,15 @@ int journal_stop(handle_t *handle)
 	 * It doesn't cost much - we're about to run a commit and sleep
 	 * on IO anyway.  Speeds up many-threaded, many-dir operations
 	 * by 30x or more...
+	 *
+	 * But don't do this if this process was the most recent one to
+	 * perform a synchronous write.  We do this to detect the case where a
+	 * single process is doing a stream of sync writes.  No point in waiting
+	 * for joiners in that case.
 	 */
-	if (handle->h_sync) {
+	pid = current->pid;
+	if (handle->h_sync && journal->j_last_sync_writer != pid) {
+		journal->j_last_sync_writer = pid;
 		do {
 			old_handle_count = transaction->t_handle_count;
 			schedule_timeout_uninterruptible(1);
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 558cb4c26ec9..751bb3849467 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -23,6 +23,7 @@
 #define jfs_debug jbd_debug
 #else
 
+#include <linux/types.h>
 #include <linux/buffer_head.h>
 #include <linux/journal-head.h>
 #include <linux/stddef.h>
@@ -618,6 +619,7 @@ struct transaction_s
  * @j_wbuf: array of buffer_heads for journal_commit_transaction
  * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the
  *	number that will fit in j_blocksize
+ * @j_last_sync_writer: most recent pid which did a synchronous write
  * @j_private: An opaque pointer to fs-private information.
  */
 
@@ -807,6 +809,8 @@ struct journal_s
 	struct buffer_head	**j_wbuf;
 	int			j_wbufsize;
 
+	pid_t			j_last_sync_writer;
+
 	/*
 	 * An opaque pointer to fs-private information.  ext3 puts its
 	 * superblock pointer here
-- 
cgit v1.2.3-71-gd317


From 21bbd691827e3610ef975a88863859381ac8d8e0 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Mon, 9 Jan 2006 15:19:18 +1100
Subject: [PATCH] I2C: Resurrect i2c_smbus_write_i2c_block_data.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 drivers/i2c/i2c-core.c | 15 +++++++++++++++
 include/linux/i2c.h    |  3 +++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 0ce58b506046..1a2c9ab5d9e3 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -946,6 +946,20 @@ s32 i2c_smbus_read_i2c_block_data(struct i2c_client *client, u8 command, u8 *val
 	}
 }
 
+s32 i2c_smbus_write_i2c_block_data(struct i2c_client *client, u8 command,
+				   u8 length, u8 *values)
+{
+	union i2c_smbus_data data;
+
+	if (length > I2C_SMBUS_BLOCK_MAX)
+		length = I2C_SMBUS_BLOCK_MAX;
+	data.block[0] = length;
+	memcpy(data.block + 1, values, length);
+	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+			      I2C_SMBUS_WRITE, command,
+			      I2C_SMBUS_I2C_BLOCK_DATA, &data);
+}
+
 /* Simulate a SMBus command using the i2c protocol 
    No checking of parameters is done!  */
 static s32 i2c_smbus_xfer_emulated(struct i2c_adapter * adapter, u16 addr, 
@@ -1150,6 +1164,7 @@ EXPORT_SYMBOL(i2c_smbus_read_word_data);
 EXPORT_SYMBOL(i2c_smbus_write_word_data);
 EXPORT_SYMBOL(i2c_smbus_write_block_data);
 EXPORT_SYMBOL(i2c_smbus_read_i2c_block_data);
+EXPORT_SYMBOL(i2c_smbus_write_i2c_block_data);
 
 MODULE_AUTHOR("Simon G. Vogl <simon@tk.uni-linz.ac.at>");
 MODULE_DESCRIPTION("I2C-Bus main module");
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 7863a59bd598..63f1d63cc1d8 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -100,6 +100,9 @@ extern s32 i2c_smbus_write_block_data(struct i2c_client * client,
 /* Returns the number of read bytes */
 extern s32 i2c_smbus_read_i2c_block_data(struct i2c_client * client,
 					 u8 command, u8 *values);
+extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client * client,
+					  u8 command, u8 length,
+					  u8 *values);
 
 /*
  * A driver is capable of handling one or more physical devices present on
-- 
cgit v1.2.3-71-gd317


From 0dfd812d4b2afc797310943b451608d347854e76 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Tue, 7 Feb 2006 06:45:34 -0200
Subject: V4L/DVB (3300): Add standard for South Korean NTSC-M using A2 audio.

South Korea uses NTSC-M but with A2 audio instead of BTSC. Several audio
chips need this information in order to set the correct audio processing
registers.

Acked-by: Mauro Carvalho Chehab <mauro_chehab@yahoo.com.br>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>
---
 drivers/media/video/bttv-driver.c          |  2 +-
 drivers/media/video/cx25840/cx25840-core.c | 50 ++++++++++++------------------
 drivers/media/video/tda9887.c              |  7 ++++-
 drivers/media/video/tuner-core.c           |  5 +++
 include/linux/videodev2.h                  |  4 ++-
 5 files changed, 35 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/video/bttv-driver.c b/drivers/media/video/bttv-driver.c
index aa4c4c521880..578b20085082 100644
--- a/drivers/media/video/bttv-driver.c
+++ b/drivers/media/video/bttv-driver.c
@@ -214,7 +214,7 @@ const struct bttv_tvnorm bttv_tvnorms[] = {
 		   we can capture, of the first and second field. */
 		.vbistart	= { 7,320 },
 	},{
-		.v4l2_id        = V4L2_STD_NTSC_M,
+		.v4l2_id        = V4L2_STD_NTSC_M | V4L2_STD_NTSC_M_KR,
 		.name           = "NTSC",
 		.Fsc            = 28636363,
 		.swidth         = 768,
diff --git a/drivers/media/video/cx25840/cx25840-core.c b/drivers/media/video/cx25840/cx25840-core.c
index c66c2c1f4809..08ffd1f325fc 100644
--- a/drivers/media/video/cx25840/cx25840-core.c
+++ b/drivers/media/video/cx25840/cx25840-core.c
@@ -220,33 +220,23 @@ static void input_change(struct i2c_client *client)
 		cx25840_write(client, 0x808, 0xff);
 		cx25840_write(client, 0x80b, 0x10);
 	} else if (std & V4L2_STD_NTSC) {
-		/* NTSC */
-		if (state->pvr150_workaround) {
-			/* Certain Hauppauge PVR150 models have a hardware bug
-			   that causes audio to drop out. For these models the
-			   audio standard must be set explicitly.
-			   To be precise: it affects cards with tuner models
-			   85, 99 and 112 (model numbers from tveeprom). */
-			if (std == V4L2_STD_NTSC_M_JP) {
-				/* Japan uses EIAJ audio standard */
-				cx25840_write(client, 0x808, 0x2f);
-			} else {
-				/* Others use the BTSC audio standard */
-				cx25840_write(client, 0x808, 0x1f);
-			}
-			/* South Korea uses the A2-M (aka Zweiton M) audio
-			   standard, and should set 0x808 to 0x3f, but I don't
-			   know how to detect this. */
-		} else if (std == V4L2_STD_NTSC_M_JP) {
+		/* Certain Hauppauge PVR150 models have a hardware bug
+		   that causes audio to drop out. For these models the
+		   audio standard must be set explicitly.
+		   To be precise: it affects cards with tuner models
+		   85, 99 and 112 (model numbers from tveeprom). */
+		int hw_fix = state->pvr150_workaround;
+
+		if (std == V4L2_STD_NTSC_M_JP) {
 			/* Japan uses EIAJ audio standard */
-			cx25840_write(client, 0x808, 0xf7);
+			cx25840_write(client, 0x808, hw_fix ? 0x2f : 0xf7);
+		} else if (std == V4L2_STD_NTSC_M_KR) {
+			/* South Korea uses A2 audio standard */
+			cx25840_write(client, 0x808, hw_fix ? 0x3f : 0xf8);
 		} else {
 			/* Others use the BTSC audio standard */
-			cx25840_write(client, 0x808, 0xf6);
+			cx25840_write(client, 0x808, hw_fix ? 0x1f : 0xf6);
 		}
-		/* South Korea uses the A2-M (aka Zweiton M) audio standard,
-		   and should set 0x808 to 0xf8, but I don't know how to
-		   detect this. */
 		cx25840_write(client, 0x80b, 0x00);
 	}
 
@@ -330,17 +320,17 @@ static int set_v4lstd(struct i2c_client *client, v4l2_std_id std)
 	u8 fmt=0; 	/* zero is autodetect */
 
 	/* First tests should be against specific std */
-	if (std & V4L2_STD_NTSC_M_JP) {
+	if (std == V4L2_STD_NTSC_M_JP) {
 		fmt=0x2;
-	} else if (std & V4L2_STD_NTSC_443) {
+	} else if (std == V4L2_STD_NTSC_443) {
 		fmt=0x3;
-	} else if (std & V4L2_STD_PAL_M) {
+	} else if (std == V4L2_STD_PAL_M) {
 		fmt=0x5;
-	} else if (std & V4L2_STD_PAL_N) {
+	} else if (std == V4L2_STD_PAL_N) {
 		fmt=0x6;
-	} else if (std & V4L2_STD_PAL_Nc) {
+	} else if (std == V4L2_STD_PAL_Nc) {
 		fmt=0x7;
-	} else if (std & V4L2_STD_PAL_60) {
+	} else if (std == V4L2_STD_PAL_60) {
 		fmt=0x8;
 	} else {
 		/* Then, test against generic ones */
@@ -369,7 +359,7 @@ v4l2_std_id cx25840_get_v4lstd(struct i2c_client * client)
 	}
 
 	switch (fmt) {
-	case 0x1: return V4L2_STD_NTSC_M;
+	case 0x1: return V4L2_STD_NTSC_M | V4L2_STD_NTSC_M_KR;
 	case 0x2: return V4L2_STD_NTSC_M_JP;
 	case 0x3: return V4L2_STD_NTSC_443;
 	case 0x4: return V4L2_STD_PAL;
diff --git a/drivers/media/video/tda9887.c b/drivers/media/video/tda9887.c
index 7c71422f5d3f..0d54f6c1982b 100644
--- a/drivers/media/video/tda9887.c
+++ b/drivers/media/video/tda9887.c
@@ -231,7 +231,7 @@ static struct tvnorm tvnorms[] = {
 			   cAudioIF_6_5   |
 			   cVideoIF_38_90 ),
 	},{
-		.std   = V4L2_STD_NTSC_M,
+		.std   = V4L2_STD_NTSC_M | V4L2_STD_NTSC_M_KR,
 		.name  = "NTSC-M",
 		.b     = ( cNegativeFmTV  |
 			   cQSS           ),
@@ -619,6 +619,11 @@ static int tda9887_fixup_std(struct tda9887 *t)
 			tda9887_dbg("insmod fixup: NTSC => NTSC_M_JP\n");
 			t->std = V4L2_STD_NTSC_M_JP;
 			break;
+		case 'k':
+		case 'K':
+			tda9887_dbg("insmod fixup: NTSC => NTSC_M_KR\n");
+			t->std = V4L2_STD_NTSC_M_KR;
+			break;
 		case '-':
 			/* default parameter, do nothing */
 			break;
diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c
index 873bf3d9679c..e7ee619d62c5 100644
--- a/drivers/media/video/tuner-core.c
+++ b/drivers/media/video/tuner-core.c
@@ -366,6 +366,11 @@ static int tuner_fixup_std(struct tuner *t)
 			tuner_dbg("insmod fixup: NTSC => NTSC_M_JP\n");
 			t->std = V4L2_STD_NTSC_M_JP;
 			break;
+		case 'k':
+		case 'K':
+			tuner_dbg("insmod fixup: NTSC => NTSC_M_KR\n");
+			t->std = V4L2_STD_NTSC_M_KR;
+			break;
 		case '-':
 			/* default parameter, do nothing */
 			break;
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index ce40675324bd..839ccc70698e 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -628,6 +628,7 @@ typedef __u64 v4l2_std_id;
 #define V4L2_STD_NTSC_M         ((v4l2_std_id)0x00001000)
 #define V4L2_STD_NTSC_M_JP      ((v4l2_std_id)0x00002000)
 #define V4L2_STD_NTSC_443       ((v4l2_std_id)0x00004000)
+#define V4L2_STD_NTSC_M_KR      ((v4l2_std_id)0x00008000)
 
 #define V4L2_STD_SECAM_B        ((v4l2_std_id)0x00010000)
 #define V4L2_STD_SECAM_D        ((v4l2_std_id)0x00020000)
@@ -660,7 +661,8 @@ typedef __u64 v4l2_std_id;
 				 V4L2_STD_PAL_H		|\
 				 V4L2_STD_PAL_I)
 #define V4L2_STD_NTSC           (V4L2_STD_NTSC_M	|\
-				 V4L2_STD_NTSC_M_JP)
+				 V4L2_STD_NTSC_M_JP     |\
+				 V4L2_STD_NTSC_M_KR)
 #define V4L2_STD_SECAM_DK      	(V4L2_STD_SECAM_D	|\
 				 V4L2_STD_SECAM_K	|\
 				 V4L2_STD_SECAM_K1)
-- 
cgit v1.2.3-71-gd317


From 46cd2f32baf181b74b16cceb123bab6fe1f61f85 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 7 Feb 2006 12:58:50 -0800
Subject: [PATCH] Fix build failure in recent pm_prepare_* changes.

Fix compilation problem in PM headers.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/suspend.h | 10 +++++++++-
 kernel/power/console.c  |  4 +++-
 kernel/power/power.h    | 16 ----------------
 3 files changed, 12 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 43bcd13eb1ec..37c1c76fd547 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -42,13 +42,21 @@ extern void mark_free_pages(struct zone *zone);
 #ifdef CONFIG_PM
 /* kernel/power/swsusp.c */
 extern int software_suspend(void);
+
+#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
+extern int pm_prepare_console(void);
+extern void pm_restore_console(void);
+#else
+static inline int pm_prepare_console(void) { return 0; }
+static inline void pm_restore_console(void) {}
+#endif /* defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE) */
 #else
 static inline int software_suspend(void)
 {
 	printk("Warning: fake suspend called\n");
 	return -EPERM;
 }
-#endif
+#endif /* CONFIG_PM */
 
 #ifdef CONFIG_SUSPEND_SMP
 extern void disable_nonboot_cpus(void);
diff --git a/kernel/power/console.c b/kernel/power/console.c
index 579d239d129f..623786d44159 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -9,7 +9,9 @@
 #include <linux/console.h>
 #include "power.h"
 
-#ifdef SUSPEND_CONSOLE
+#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
+#define SUSPEND_CONSOLE	(MAX_NR_CONSOLES-1)
+
 static int orig_fgconsole, orig_kmsg;
 
 int pm_prepare_console(void)
diff --git a/kernel/power/power.h b/kernel/power/power.h
index d8f0d1a76bae..388dba680841 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -1,14 +1,6 @@
 #include <linux/suspend.h>
 #include <linux/utsname.h>
 
-/* With SUSPEND_CONSOLE defined suspend looks *really* cool, but
-   we probably do not take enough locks for switching consoles, etc,
-   so bad things might happen.
-*/
-#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
-#define SUSPEND_CONSOLE	(MAX_NR_CONSOLES-1)
-#endif
-
 struct swsusp_info {
 	struct new_utsname	uts;
 	u32			version_code;
@@ -42,14 +34,6 @@ static struct subsys_attribute _name##_attr = {	\
 
 extern struct subsystem power_subsys;
 
-#ifdef SUSPEND_CONSOLE
-extern int pm_prepare_console(void);
-extern void pm_restore_console(void);
-#else
-static int pm_prepare_console(void) { return 0; }
-static void pm_restore_console(void) {}
-#endif
-
 /* References to section boundaries */
 extern const void __nosave_begin, __nosave_end;
 
-- 
cgit v1.2.3-71-gd317


From 8519fb30e438f8088b71a94a7d5a660a814d3872 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Tue, 7 Feb 2006 12:58:52 -0800
Subject: [PATCH] mm: compound release fix

Compound pages on SMP systems can now often be freed from pagetables via
the release_pages path.  This uses put_page_testzero which does not handle
compound pages at all.  Releasing constituent pages from process mappings
decrements their count to a large negative number and leaks the reference
at the head page - net result is a memory leak.

The problem was hidden because the debug check in put_page_testzero itself
actually did take compound pages into consideration.

Fix the bug and the debug check.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Acked-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h |  2 +-
 mm/swap.c          | 32 ++++++++++++++++++++++----------
 2 files changed, 23 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 85854b867463..75e9f0724997 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -303,7 +303,7 @@ struct page {
  */
 #define put_page_testzero(p)				\
 	({						\
-		BUG_ON(page_count(p) == 0);		\
+		BUG_ON(atomic_read(&(p)->_count) == -1);\
 		atomic_add_negative(-1, &(p)->_count);	\
 	})
 
diff --git a/mm/swap.c b/mm/swap.c
index bc2442a7b0ee..76247424dea1 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -34,19 +34,22 @@
 /* How many pages do we try to swap or page in/out together? */
 int page_cluster;
 
-void put_page(struct page *page)
+static void put_compound_page(struct page *page)
 {
-	if (unlikely(PageCompound(page))) {
-		page = (struct page *)page_private(page);
-		if (put_page_testzero(page)) {
-			void (*dtor)(struct page *page);
+	page = (struct page *)page_private(page);
+	if (put_page_testzero(page)) {
+		void (*dtor)(struct page *page);
 
-			dtor = (void (*)(struct page *))page[1].mapping;
-			(*dtor)(page);
-		}
-		return;
+		dtor = (void (*)(struct page *))page[1].mapping;
+		(*dtor)(page);
 	}
-	if (put_page_testzero(page))
+}
+
+void put_page(struct page *page)
+{
+	if (unlikely(PageCompound(page)))
+		put_compound_page(page);
+	else if (put_page_testzero(page))
 		__page_cache_release(page);
 }
 EXPORT_SYMBOL(put_page);
@@ -244,6 +247,15 @@ void release_pages(struct page **pages, int nr, int cold)
 		struct page *page = pages[i];
 		struct zone *pagezone;
 
+		if (unlikely(PageCompound(page))) {
+			if (zone) {
+				spin_unlock_irq(&zone->lru_lock);
+				zone = NULL;
+			}
+			put_compound_page(page);
+			continue;
+		}
+
 		if (!put_page_testzero(page))
 			continue;
 
-- 
cgit v1.2.3-71-gd317


From 741a295130606143edbf9fc740f633dbc1e6225f Mon Sep 17 00:00:00 2001
From: JANAK DESAI <janak@us.ibm.com>
Date: Tue, 7 Feb 2006 12:59:00 -0800
Subject: [PATCH] unshare system call -v5: unshare namespace

If the namespace structure is being shared, allocate a new one and copy
information from the current, shared, structure.

Signed-off-by: Janak Desai <janak@us.ibm.com>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Cc: Andi Kleen <ak@muc.de>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namespace.c            | 56 ++++++++++++++++++++++++++++++-----------------
 include/linux/namespace.h |  1 +
 kernel/fork.c             | 17 +++++++++-----
 3 files changed, 48 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index ce97becff461..a2bef5c81033 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1325,27 +1325,17 @@ dput_out:
 	return retval;
 }
 
-int copy_namespace(int flags, struct task_struct *tsk)
+/*
+ * Allocate a new namespace structure and populate it with contents
+ * copied from the namespace of the passed in task structure.
+ */
+struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs)
 {
 	struct namespace *namespace = tsk->namespace;
 	struct namespace *new_ns;
 	struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL;
-	struct fs_struct *fs = tsk->fs;
 	struct vfsmount *p, *q;
 
-	if (!namespace)
-		return 0;
-
-	get_namespace(namespace);
-
-	if (!(flags & CLONE_NEWNS))
-		return 0;
-
-	if (!capable(CAP_SYS_ADMIN)) {
-		put_namespace(namespace);
-		return -EPERM;
-	}
-
 	new_ns = kmalloc(sizeof(struct namespace), GFP_KERNEL);
 	if (!new_ns)
 		goto out;
@@ -1396,8 +1386,6 @@ int copy_namespace(int flags, struct task_struct *tsk)
 	}
 	up_write(&namespace_sem);
 
-	tsk->namespace = new_ns;
-
 	if (rootmnt)
 		mntput(rootmnt);
 	if (pwdmnt)
@@ -1405,12 +1393,40 @@ int copy_namespace(int flags, struct task_struct *tsk)
 	if (altrootmnt)
 		mntput(altrootmnt);
 
-	put_namespace(namespace);
-	return 0;
+out:
+	return new_ns;
+}
+
+int copy_namespace(int flags, struct task_struct *tsk)
+{
+	struct namespace *namespace = tsk->namespace;
+	struct namespace *new_ns;
+	int err = 0;
+
+	if (!namespace)
+		return 0;
+
+	get_namespace(namespace);
+
+	if (!(flags & CLONE_NEWNS))
+		return 0;
+
+	if (!capable(CAP_SYS_ADMIN)) {
+		err = -EPERM;
+		goto out;
+	}
+
+	new_ns = dup_namespace(tsk, tsk->fs);
+	if (!new_ns) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	tsk->namespace = new_ns;
 
 out:
 	put_namespace(namespace);
-	return -ENOMEM;
+	return err;
 }
 
 asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name,
diff --git a/include/linux/namespace.h b/include/linux/namespace.h
index 6731977c4c13..3abc8e3b4879 100644
--- a/include/linux/namespace.h
+++ b/include/linux/namespace.h
@@ -15,6 +15,7 @@ struct namespace {
 
 extern int copy_namespace(int, struct task_struct *);
 extern void __put_namespace(struct namespace *namespace);
+extern struct namespace *dup_namespace(struct task_struct *, struct fs_struct *);
 
 static inline void put_namespace(struct namespace *namespace)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index 598e5c27242c..07dd241aa1e0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1388,16 +1388,21 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
 }
 
 /*
- * Unsharing of namespace for tasks created without CLONE_NEWNS is not
- * supported yet
+ * Unshare the namespace structure if it is being shared
  */
-static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp)
+static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs)
 {
 	struct namespace *ns = current->namespace;
 
 	if ((unshare_flags & CLONE_NEWNS) &&
-	    (ns && atomic_read(&ns->count) > 1))
-		return -EINVAL;
+	    (ns && atomic_read(&ns->count) > 1)) {
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		*new_nsp = dup_namespace(current, new_fs ? new_fs : current->fs);
+		if (!*new_nsp)
+			return -ENOMEM;
+	}
 
 	return 0;
 }
@@ -1482,7 +1487,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
 		goto bad_unshare_out;
 	if ((err = unshare_fs(unshare_flags, &new_fs)))
 		goto bad_unshare_cleanup_thread;
-	if ((err = unshare_namespace(unshare_flags, &new_ns)))
+	if ((err = unshare_namespace(unshare_flags, &new_ns, new_fs)))
 		goto bad_unshare_cleanup_fs;
 	if ((err = unshare_sighand(unshare_flags, &new_sigh)))
 		goto bad_unshare_cleanup_ns;
-- 
cgit v1.2.3-71-gd317


From 1b8623545b42c03eb92e51b28c84acf4b8ba00a3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 15 Dec 2005 01:07:03 -0500
Subject: [PATCH] remove bogus asm/bug.h includes.

A bunch of asm/bug.h includes are both not needed (since it will get
pulled anyway) and bogus (since they are done too early).  Removed.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 crypto/scatterwalk.c                | 1 -
 drivers/cdrom/viocd.c               | 2 --
 drivers/net/hamradio/baycom_par.c   | 1 -
 drivers/tc/tc.c                     | 1 -
 drivers/video/backlight/backlight.c | 1 -
 drivers/video/backlight/lcd.c       | 1 -
 drivers/video/pmag-ba-fb.c          | 1 -
 drivers/video/pmagb-b-fb.c          | 1 -
 fs/reiserfs/hashes.c                | 1 -
 include/asm-mips/io.h               | 1 -
 include/asm-powerpc/dma-mapping.h   | 1 -
 include/linux/cpumask.h             | 1 -
 include/linux/dcache.h              | 1 -
 include/linux/jbd.h                 | 1 -
 include/linux/mtd/map.h             | 1 -
 include/linux/nodemask.h            | 1 -
 include/linux/smp.h                 | 1 -
 kernel/compat.c                     | 1 -
 net/dccp/ccids/lib/tfrc_equation.c  | 1 -
 net/ipv4/xfrm4_policy.c             | 1 -
 net/ipv6/raw.c                      | 1 -
 net/ipv6/xfrm6_policy.c             | 1 -
 net/xfrm/xfrm_policy.c              | 1 -
 23 files changed, 24 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
index 47ac90e615f4..2953e2cc56f0 100644
--- a/crypto/scatterwalk.c
+++ b/crypto/scatterwalk.c
@@ -17,7 +17,6 @@
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
-#include <asm/bug.h>
 #include <asm/scatterlist.h>
 #include "internal.h"
 #include "scatterwalk.h"
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index 193446e6a08a..e27617259552 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -42,8 +42,6 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 
-#include <asm/bug.h>
-
 #include <asm/vio.h>
 #include <asm/scatterlist.h>
 #include <asm/iseries/hv_types.h>
diff --git a/drivers/net/hamradio/baycom_par.c b/drivers/net/hamradio/baycom_par.c
index 3b1bef1ee215..77411a00d1ee 100644
--- a/drivers/net/hamradio/baycom_par.c
+++ b/drivers/net/hamradio/baycom_par.c
@@ -86,7 +86,6 @@
 #include <linux/bitops.h>
 #include <linux/jiffies.h>
 
-#include <asm/bug.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
 
diff --git a/drivers/tc/tc.c b/drivers/tc/tc.c
index a0e5af638e0e..4a51e56f85b6 100644
--- a/drivers/tc/tc.c
+++ b/drivers/tc/tc.c
@@ -17,7 +17,6 @@
 #include <linux/types.h>
 
 #include <asm/addrspace.h>
-#include <asm/bug.h>
 #include <asm/errno.h>
 #include <asm/io.h>
 #include <asm/paccess.h>
diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index 9d5015e99372..bd39bbd88d41 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -13,7 +13,6 @@
 #include <linux/ctype.h>
 #include <linux/err.h>
 #include <linux/fb.h>
-#include <asm/bug.h>
 
 static ssize_t backlight_show_power(struct class_device *cdev, char *buf)
 {
diff --git a/drivers/video/backlight/lcd.c b/drivers/video/backlight/lcd.c
index 68c690605aa7..9e32485ee7bb 100644
--- a/drivers/video/backlight/lcd.c
+++ b/drivers/video/backlight/lcd.c
@@ -13,7 +13,6 @@
 #include <linux/ctype.h>
 #include <linux/err.h>
 #include <linux/fb.h>
-#include <asm/bug.h>
 
 static ssize_t lcd_show_power(struct class_device *cdev, char *buf)
 {
diff --git a/drivers/video/pmag-ba-fb.c b/drivers/video/pmag-ba-fb.c
index f3927b6cda9d..f5361cd8ccce 100644
--- a/drivers/video/pmag-ba-fb.c
+++ b/drivers/video/pmag-ba-fb.c
@@ -30,7 +30,6 @@
 #include <linux/module.h>
 #include <linux/types.h>
 
-#include <asm/bug.h>
 #include <asm/io.h>
 #include <asm/system.h>
 
diff --git a/drivers/video/pmagb-b-fb.c b/drivers/video/pmagb-b-fb.c
index 25148de5fe67..eeeac924b500 100644
--- a/drivers/video/pmagb-b-fb.c
+++ b/drivers/video/pmagb-b-fb.c
@@ -27,7 +27,6 @@
 #include <linux/module.h>
 #include <linux/types.h>
 
-#include <asm/bug.h>
 #include <asm/io.h>
 #include <asm/system.h>
 
diff --git a/fs/reiserfs/hashes.c b/fs/reiserfs/hashes.c
index a3ec238fd9e0..e664ac16fad9 100644
--- a/fs/reiserfs/hashes.c
+++ b/fs/reiserfs/hashes.c
@@ -21,7 +21,6 @@
 #include <linux/kernel.h>
 #include <linux/reiserfs_fs.h>
 #include <asm/types.h>
-#include <asm/bug.h>
 
 #define DELTA 0x9E3779B9
 #define FULLROUNDS 10		/* 32 is overkill, 16 is strong crypto */
diff --git a/include/asm-mips/io.h b/include/asm-mips/io.h
index d42685747e7d..a9fa1254894a 100644
--- a/include/asm-mips/io.h
+++ b/include/asm-mips/io.h
@@ -18,7 +18,6 @@
 #include <linux/types.h>
 
 #include <asm/addrspace.h>
-#include <asm/bug.h>
 #include <asm/byteorder.h>
 #include <asm/cpu.h>
 #include <asm/cpu-features.h>
diff --git a/include/asm-powerpc/dma-mapping.h b/include/asm-powerpc/dma-mapping.h
index 837756ab7dc7..2ac63f569592 100644
--- a/include/asm-powerpc/dma-mapping.h
+++ b/include/asm-powerpc/dma-mapping.h
@@ -15,7 +15,6 @@
 #include <linux/mm.h>
 #include <asm/scatterlist.h>
 #include <asm/io.h>
-#include <asm/bug.h>
 
 #define DMA_ERROR_CODE		(~(dma_addr_t)0x0)
 
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 13e9f4a3ab26..20b446f26ecd 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -84,7 +84,6 @@
 #include <linux/kernel.h>
 #include <linux/threads.h>
 #include <linux/bitmap.h>
-#include <asm/bug.h>
 
 typedef struct { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
 extern cpumask_t _unused_cpumask_arg_;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index a3f09947940e..4361f3789975 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -8,7 +8,6 @@
 #include <linux/spinlock.h>
 #include <linux/cache.h>
 #include <linux/rcupdate.h>
-#include <asm/bug.h>
 
 struct nameidata;
 struct vfsmount;
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 751bb3849467..0fe4aa891ddc 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -239,7 +239,6 @@ typedef struct journal_superblock_s
 
 #include <linux/fs.h>
 #include <linux/sched.h>
-#include <asm/bug.h>
 
 #define JBD_ASSERTIONS
 #ifdef JBD_ASSERTIONS
diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index fedfbc8a287f..7dfd6e1fcde7 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -15,7 +15,6 @@
 #include <asm/unaligned.h>
 #include <asm/system.h>
 #include <asm/io.h>
-#include <asm/bug.h>
 
 #ifdef CONFIG_MTD_MAP_BANK_WIDTH_1
 #define map_bankwidth(map) 1
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 4726ef7ba8e8..b959a4525cbd 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -84,7 +84,6 @@
 #include <linux/threads.h>
 #include <linux/bitmap.h>
 #include <linux/numa.h>
-#include <asm/bug.h>
 
 typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;
 extern nodemask_t _unused_nodemask_arg_;
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 9dfa3ee769ae..44153fdf73fc 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -17,7 +17,6 @@ extern void cpu_idle(void);
 #include <linux/compiler.h>
 #include <linux/thread_info.h>
 #include <asm/smp.h>
-#include <asm/bug.h>
 
 /*
  * main cross-CPU interfaces, handles INIT, TLB flush, STOP, etc.
diff --git a/kernel/compat.c b/kernel/compat.c
index 1867290c37e3..8c9cd88b6785 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -23,7 +23,6 @@
 #include <linux/security.h>
 
 #include <asm/uaccess.h>
-#include <asm/bug.h>
 
 int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
 {
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index d2b5933b4510..add3cae65e2d 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -15,7 +15,6 @@
 #include <linux/config.h>
 #include <linux/module.h>
 
-#include <asm/bug.h>
 #include <asm/div64.h>
 
 #include "tfrc.h"
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 42196ba3b0b9..45f7ae58f2c0 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -8,7 +8,6 @@
  * 	
  */
 
-#include <asm/bug.h>
 #include <linux/compiler.h>
 #include <linux/config.h>
 #include <linux/inetdevice.h>
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 66f1d12ea578..738376cf0c51 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -35,7 +35,6 @@
 #include <linux/skbuff.h>
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
-#include <asm/bug.h>
 
 #include <net/ip.h>
 #include <net/sock.h>
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 69bd957380e7..91cce8b2d7a5 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -11,7 +11,6 @@
  * 
  */
 
-#include <asm/bug.h>
 #include <linux/compiler.h>
 #include <linux/config.h>
 #include <linux/netdevice.h>
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 077bbf9fb9b7..dbf4620768d6 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -13,7 +13,6 @@
  *
  */
 
-#include <asm/bug.h>
 #include <linux/config.h>
 #include <linux/slab.h>
 #include <linux/kmod.h>
-- 
cgit v1.2.3-71-gd317


From bee14e1f8ae2d5fd3f324e0c8562f791537160b2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 1 Feb 2006 07:33:44 -0500
Subject: [PATCH] __user annotations of video_spu_palette

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/dvb/video.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dvb/video.h b/include/linux/dvb/video.h
index b1999bfeaa56..b81e58b2ebf8 100644
--- a/include/linux/dvb/video.h
+++ b/include/linux/dvb/video.h
@@ -135,7 +135,7 @@ typedef struct video_spu {
 
 typedef struct video_spu_palette {      /* SPU Palette information */
 	int length;
-	uint8_t *palette;
+	uint8_t __user *palette;
 } video_spu_palette_t;
 
 
-- 
cgit v1.2.3-71-gd317


From 5b1a43d7df65689b4c3b5a1c5c8158f1d4f74fbd Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 1 Feb 2006 05:24:20 -0500
Subject: [PATCH] drivers/media/video __user annotations and fixes

* compat_alloc_user_space() returns __user pointer
* copying between two userland areas is copy_in_user(), not copy_from_user()
* dereferencing userland pointers is bad
* so's get_user() from local variables

... plus usual __user annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/media/video/compat_ioctl32.c | 89 +++++++++++++++++-------------------
 include/linux/videodev2.h            |  2 +-
 2 files changed, 42 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/video/compat_ioctl32.c b/drivers/media/video/compat_ioctl32.c
index 297c32ab51e3..840fe0177121 100644
--- a/drivers/media/video/compat_ioctl32.c
+++ b/drivers/media/video/compat_ioctl32.c
@@ -167,29 +167,32 @@ static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user
 	if (kp->clipcount > 2048)
 		return -EINVAL;
 	if (kp->clipcount) {
-		struct v4l2_clip32 *uclips = compat_ptr(up->clips);
-		struct v4l2_clip *kclips;
+		struct v4l2_clip32 __user *uclips;
+		struct v4l2_clip __user *kclips;
 		int n = kp->clipcount;
+		compat_caddr_t p;
 
+		if (get_user(p, &up->clips))
+			return -EFAULT;
+		uclips = compat_ptr(p);
 		kclips = compat_alloc_user_space(n * sizeof(struct v4l2_clip));
 		kp->clips = kclips;
 		while (--n >= 0) {
-			if (!access_ok(VERIFY_READ, &uclips->c, sizeof(uclips->c)) ||
-				copy_from_user(&kclips->c, &uclips->c, sizeof(uclips->c)))
+			if (copy_in_user(&kclips->c, &uclips->c, sizeof(uclips->c)))
+				return -EFAULT;
+			if (put_user(n ? kclips + 1 : NULL, &kclips->next))
 				return -EFAULT;
-			kclips->next = n ? kclips + 1 : 0;
 			uclips += 1;
 			kclips += 1;
 		}
 	} else
-		kp->clips = 0;
+		kp->clips = NULL;
 	return 0;
 }
 
 static int put_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up)
 {
-	if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_window32)) ||
-		copy_to_user(&up->w, &kp->w, sizeof(up->w)) ||
+	if (copy_to_user(&up->w, &kp->w, sizeof(up->w)) ||
 		put_user(kp->field, &up->field) ||
 		put_user(kp->chromakey, &up->chromakey) ||
 		put_user(kp->clipcount, &up->clipcount))
@@ -199,33 +202,29 @@ static int put_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user
 
 static inline int get_v4l2_pix_format(struct v4l2_pix_format *kp, struct v4l2_pix_format __user *up)
 {
-	if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_pix_format)) ||
-		copy_from_user(kp, up, sizeof(struct v4l2_pix_format)))
-			return -EFAULT;
+	if (copy_from_user(kp, up, sizeof(struct v4l2_pix_format)))
+		return -EFAULT;
 	return 0;
 }
 
 static inline int put_v4l2_pix_format(struct v4l2_pix_format *kp, struct v4l2_pix_format __user *up)
 {
-	if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_pix_format)) ||
-		copy_to_user(up, kp, sizeof(struct v4l2_pix_format)))
-			return -EFAULT;
+	if (copy_to_user(up, kp, sizeof(struct v4l2_pix_format)))
+		return -EFAULT;
 	return 0;
 }
 
 static inline int get_v4l2_vbi_format(struct v4l2_vbi_format *kp, struct v4l2_vbi_format __user *up)
 {
-	if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_vbi_format)) ||
-		copy_from_user(kp, up, sizeof(struct v4l2_vbi_format)))
-			return -EFAULT;
+	if (copy_from_user(kp, up, sizeof(struct v4l2_vbi_format)))
+		return -EFAULT;
 	return 0;
 }
 
 static inline int put_v4l2_vbi_format(struct v4l2_vbi_format *kp, struct v4l2_vbi_format __user *up)
 {
-	if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_vbi_format)) ||
-		copy_to_user(up, kp, sizeof(struct v4l2_vbi_format)))
-			return -EFAULT;
+	if (copy_to_user(up, kp, sizeof(struct v4l2_vbi_format)))
+		return -EFAULT;
 	return 0;
 }
 
@@ -279,18 +278,16 @@ static int put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user
 
 static inline int get_v4l2_standard(struct v4l2_standard *kp, struct v4l2_standard __user *up)
 {
-	if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_standard)) ||
-		copy_from_user(kp, up, sizeof(struct v4l2_standard)))
-			return -EFAULT;
+	if (copy_from_user(kp, up, sizeof(struct v4l2_standard)))
+		return -EFAULT;
 	return 0;
 
 }
 
 static inline int put_v4l2_standard(struct v4l2_standard *kp, struct v4l2_standard __user *up)
 {
-	if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_standard)) ||
-		copy_to_user(up, kp, sizeof(struct v4l2_standard)))
-			return -EFAULT;
+	if (copy_to_user(up, kp, sizeof(struct v4l2_standard)))
+		return -EFAULT;
 	return 0;
 }
 
@@ -328,18 +325,16 @@ static int put_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32
 
 static inline int get_v4l2_tuner(struct v4l2_tuner *kp, struct v4l2_tuner __user *up)
 {
-	if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_tuner)) ||
-		copy_from_user(kp, up, sizeof(struct v4l2_tuner)))
-			return -EFAULT;
+	if (copy_from_user(kp, up, sizeof(struct v4l2_tuner)))
+		return -EFAULT;
 	return 0;
 
 }
 
 static inline int put_v4l2_tuner(struct v4l2_tuner *kp, struct v4l2_tuner __user *up)
 {
-	if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_tuner)) ||
-		copy_to_user(up, kp, sizeof(struct v4l2_tuner)))
-			return -EFAULT;
+	if (copy_to_user(up, kp, sizeof(struct v4l2_tuner)))
+		return -EFAULT;
 	return 0;
 }
 
@@ -380,11 +375,13 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user
 		break;
 	case V4L2_MEMORY_USERPTR:
 		{
-		unsigned long tmp = (unsigned long)compat_ptr(up->m.userptr);
+		compat_long_t tmp;
 
-		if(get_user(kp->length, &up->length) ||
-			get_user(kp->m.userptr, &tmp))
-				return -EFAULT;
+		if (get_user(kp->length, &up->length) ||
+		    get_user(tmp, &up->m.userptr))
+			return -EFAULT;
+
+		kp->m.userptr = (unsigned long)compat_ptr(tmp);
 		}
 		break;
 	case V4L2_MEMORY_OVERLAY:
@@ -468,33 +465,29 @@ static int put_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_frame
 
 static inline int get_v4l2_input32(struct v4l2_input *kp, struct v4l2_input __user *up)
 {
-	if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_input) - 4) ||
-		copy_from_user(kp, up, sizeof(struct v4l2_input) - 4))
-			return -EFAULT;
+	if (copy_from_user(kp, up, sizeof(struct v4l2_input) - 4))
+		return -EFAULT;
 	return 0;
 }
 
 static inline int put_v4l2_input32(struct v4l2_input *kp, struct v4l2_input __user *up)
 {
-	if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_input) - 4) ||
-		copy_to_user(up, kp, sizeof(struct v4l2_input) - 4))
-			return -EFAULT;
+	if (copy_to_user(up, kp, sizeof(struct v4l2_input) - 4))
+		return -EFAULT;
 	return 0;
 }
 
 static inline int get_v4l2_input(struct v4l2_input *kp, struct v4l2_input __user *up)
 {
-	if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_input)) ||
-		copy_from_user(kp, up, sizeof(struct v4l2_input)))
-			return -EFAULT;
+	if (copy_from_user(kp, up, sizeof(struct v4l2_input)))
+		return -EFAULT;
 	return 0;
 }
 
 static inline int put_v4l2_input(struct v4l2_input *kp, struct v4l2_input __user *up)
 {
-	if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_input)) ||
-		copy_to_user(up, kp, sizeof(struct v4l2_input)))
-			return -EFAULT;
+	if (copy_to_user(up, kp, sizeof(struct v4l2_input)))
+		return -EFAULT;
 	return 0;
 }
 
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index b23be44cbea8..5208b12d5550 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -549,7 +549,7 @@ struct v4l2_framebuffer
 struct v4l2_clip
 {
 	struct v4l2_rect        c;
-	struct v4l2_clip	*next;
+	struct v4l2_clip	__user *next;
 };
 
 struct v4l2_window
-- 
cgit v1.2.3-71-gd317


From d656101009d76000b8fc0998a33d592100334d52 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 1 Feb 2006 05:59:06 -0500
Subject: [PATCH] sn3 iomem annotations and fixes

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/sn/ioc3.c    | 18 +++++++++---------
 include/linux/ioc3.h |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/sn/ioc3.c b/drivers/sn/ioc3.c
index c70ae81b5d98..12357e1fa558 100644
--- a/drivers/sn/ioc3.c
+++ b/drivers/sn/ioc3.c
@@ -38,10 +38,10 @@ static inline unsigned mcr_pack(unsigned pulse, unsigned sample)
 
 static int nic_wait(struct ioc3_driver_data *idd)
 {
-	volatile unsigned mcr;
+	unsigned mcr;
 
         do {
-                mcr = (volatile unsigned)idd->vma->mcr;
+                mcr = readl(&idd->vma->mcr);
         } while (!(mcr & 2));
 
         return mcr & 1;
@@ -53,7 +53,7 @@ static int nic_reset(struct ioc3_driver_data *idd)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	idd->vma->mcr = mcr_pack(500, 65);
+	writel(mcr_pack(500, 65), &idd->vma->mcr);
 	presence = nic_wait(idd);
 	local_irq_restore(flags);
 
@@ -68,7 +68,7 @@ static inline int nic_read_bit(struct ioc3_driver_data *idd)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	idd->vma->mcr = mcr_pack(6, 13);
+	writel(mcr_pack(6, 13), &idd->vma->mcr);
 	result = nic_wait(idd);
 	local_irq_restore(flags);
 
@@ -80,9 +80,9 @@ static inline int nic_read_bit(struct ioc3_driver_data *idd)
 static inline void nic_write_bit(struct ioc3_driver_data *idd, int bit)
 {
 	if (bit)
-		idd->vma->mcr = mcr_pack(6, 110);
+		writel(mcr_pack(6, 110), &idd->vma->mcr);
 	else
-		idd->vma->mcr = mcr_pack(80, 30);
+		writel(mcr_pack(80, 30), &idd->vma->mcr);
 
 	nic_wait(idd);
 }
@@ -337,7 +337,7 @@ static void probe_nic(struct ioc3_driver_data *idd)
         int save = 0, loops = 3;
         unsigned long first, addr;
 
-        idd->vma->gpcr_s = GPCR_MLAN_EN;
+        writel(GPCR_MLAN_EN, &idd->vma->gpcr_s);
 
         while(loops>0) {
                 idd->nic_part[0] = 0;
@@ -408,7 +408,7 @@ static irqreturn_t ioc3_intr_io(int irq, void *arg, struct pt_regs *regs)
 
 	read_lock_irqsave(&ioc3_submodules_lock, flags);
 
-	if(idd->dual_irq && idd->vma->eisr) {
+	if(idd->dual_irq && readb(&idd->vma->eisr)) {
 		/* send Ethernet IRQ to the driver */
 		if(ioc3_ethernet && idd->active[ioc3_ethernet->id] &&
 						ioc3_ethernet->intr) {
@@ -682,7 +682,7 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id)
 	idd->id = ioc3_counter++;
 	up_write(&ioc3_devices_rwsem);
 
-	idd->gpdr_shadow = idd->vma->gpdr;
+	idd->gpdr_shadow = readl(&idd->vma->gpdr);
 
 	/* Read IOC3 NIC contents */
 	probe_nic(idd);
diff --git a/include/linux/ioc3.h b/include/linux/ioc3.h
index e7906a72a4f1..da7c09e4ede6 100644
--- a/include/linux/ioc3.h
+++ b/include/linux/ioc3.h
@@ -27,7 +27,7 @@ struct ioc3_driver_data {
 	int id;				/* IOC3 sequence number */
 	/* PCI mapping */
 	unsigned long pma;		/* physical address */
-	struct __iomem ioc3 *vma;	/* pointer to registers */
+	struct ioc3 __iomem *vma;	/* pointer to registers */
 	struct pci_dev *pdev;		/* PCI device */
 	/* IRQ stuff */
 	int dual_irq;			/* set if separate IRQs are used */
-- 
cgit v1.2.3-71-gd317


From 30e9656cc340035e102fea46e1908689494b042d Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Wed, 8 Feb 2006 01:01:31 -0800
Subject: [PATCH] block: implement elv_insert and use it (fix ordcolor flipping
 bug)

q->ordcolor must only be flipped on initial queueing of a hardbarrier
request.

Constructing ordered sequence and requeueing used to pass through
__elv_add_request() which flips q->ordcolor when it sees a barrier
request.

This patch separates out elv_insert() from __elv_add_request() and uses
elv_insert() when constructing ordered sequence and requeueing.
elv_insert() inserts the given request at the specified position and
does nothing else.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Acked-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 block/elevator.c         | 70 ++++++++++++++++++++++++++----------------------
 block/ll_rw_blk.c        |  4 +--
 include/linux/elevator.h |  1 +
 3 files changed, 41 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/block/elevator.c b/block/elevator.c
index 2fc269f69726..24b702d649a9 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -293,7 +293,7 @@ void elv_requeue_request(request_queue_t *q, struct request *rq)
 
 	rq->flags &= ~REQ_STARTED;
 
-	__elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE, 0);
+	elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
 }
 
 static void elv_drain_elevator(request_queue_t *q)
@@ -310,41 +310,11 @@ static void elv_drain_elevator(request_queue_t *q)
 	}
 }
 
-void __elv_add_request(request_queue_t *q, struct request *rq, int where,
-		       int plug)
+void elv_insert(request_queue_t *q, struct request *rq, int where)
 {
 	struct list_head *pos;
 	unsigned ordseq;
 
-	if (q->ordcolor)
-		rq->flags |= REQ_ORDERED_COLOR;
-
-	if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
-		/*
-		 * toggle ordered color
-		 */
-		if (blk_barrier_rq(rq))
-			q->ordcolor ^= 1;
-
-		/*
-		 * barriers implicitly indicate back insertion
-		 */
-		if (where == ELEVATOR_INSERT_SORT)
-			where = ELEVATOR_INSERT_BACK;
-
-		/*
-		 * this request is scheduling boundary, update end_sector
-		 */
-		if (blk_fs_request(rq)) {
-			q->end_sector = rq_end_sector(rq);
-			q->boundary_rq = rq;
-		}
-	} else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
-		where = ELEVATOR_INSERT_BACK;
-
-	if (plug)
-		blk_plug_device(q);
-
 	rq->q = q;
 
 	switch (where) {
@@ -425,6 +395,42 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 	}
 }
 
+void __elv_add_request(request_queue_t *q, struct request *rq, int where,
+		       int plug)
+{
+	if (q->ordcolor)
+		rq->flags |= REQ_ORDERED_COLOR;
+
+	if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
+		/*
+		 * toggle ordered color
+		 */
+		if (blk_barrier_rq(rq))
+			q->ordcolor ^= 1;
+
+		/*
+		 * barriers implicitly indicate back insertion
+		 */
+		if (where == ELEVATOR_INSERT_SORT)
+			where = ELEVATOR_INSERT_BACK;
+
+		/*
+		 * this request is scheduling boundary, update
+		 * end_sector
+		 */
+		if (blk_fs_request(rq)) {
+			q->end_sector = rq_end_sector(rq);
+			q->boundary_rq = rq;
+		}
+	} else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
+		where = ELEVATOR_INSERT_BACK;
+
+	if (plug)
+		blk_plug_device(q);
+
+	elv_insert(q, rq, where);
+}
+
 void elv_add_request(request_queue_t *q, struct request *rq, int where,
 		     int plug)
 {
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index ee5ed98db4cd..03d9c82b0fe7 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -454,7 +454,7 @@ static void queue_flush(request_queue_t *q, unsigned which)
 	rq->end_io = end_io;
 	q->prepare_flush_fn(q, rq);
 
-	__elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
+	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 }
 
 static inline struct request *start_ordered(request_queue_t *q,
@@ -490,7 +490,7 @@ static inline struct request *start_ordered(request_queue_t *q,
 	else
 		q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
 
-	__elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
+	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 
 	if (q->ordered & QUEUE_ORDERED_PREFLUSH) {
 		queue_flush(q, QUEUE_ORDERED_PREFLUSH);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 23fe746a1d51..18cf1f3e1184 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -82,6 +82,7 @@ struct elevator_queue
 extern void elv_dispatch_sort(request_queue_t *, struct request *);
 extern void elv_add_request(request_queue_t *, struct request *, int, int);
 extern void __elv_add_request(request_queue_t *, struct request *, int, int);
+extern void elv_insert(request_queue_t *, struct request *, int);
 extern int elv_merge(request_queue_t *, struct request **, struct bio *);
 extern void elv_merge_requests(request_queue_t *, struct request *,
 			       struct request *);
-- 
cgit v1.2.3-71-gd317


From 85d1494e5ff8e20a52ce514584ffda4f0265025e Mon Sep 17 00:00:00 2001
From: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Date: Wed, 8 Feb 2006 21:46:24 +0000
Subject: [SERIAL] 8250_pci: add new PCI serial card support

This patch adds new PCI serial card support.

Signed-off-by: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/serial/8250_pci.c | 4 ++++
 include/linux/pci_ids.h   | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index bb9ec28ccc2b..94886c000d2a 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -1882,6 +1882,10 @@ static struct pci_device_id serial_pci_tbl[] = {
 		PCI_SUBVENDOR_ID_CONNECT_TECH,
 		PCI_SUBDEVICE_ID_CONNECT_TECH_TITAN_4, 0, 0,
 		pbn_b0_4_1843200 },
+	{	PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI954,
+		PCI_VENDOR_ID_AFAVLAB,
+		PCI_SUBDEVICE_ID_AFAVLAB_P061, 0, 0,
+		pbn_b0_4_1152000 },
 	{	PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17C152,
 		PCI_SUBVENDOR_ID_CONNECT_TECH,
 		PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_2_232, 0, 0,
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 92a619ba163f..7a61ccdcbc4b 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1832,6 +1832,7 @@
 #define PCI_VENDOR_ID_AFAVLAB		0x14db
 #define PCI_DEVICE_ID_AFAVLAB_P028	0x2180
 #define PCI_DEVICE_ID_AFAVLAB_P030	0x2182
+#define PCI_SUBDEVICE_ID_AFAVLAB_P061		0x2150
 
 #define PCI_VENDOR_ID_BROADCOM		0x14e4
 #define PCI_DEVICE_ID_TIGON3_5752	0x1600
-- 
cgit v1.2.3-71-gd317


From 9ac95f2f90e022c16d293d7978faddf7e779a1a9 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Thu, 9 Feb 2006 22:41:50 +0300
Subject: [PATCH] do_sigaction: cleanup ->sa_mask manipulation

Clear unblockable signals beforehand.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 2 +-
 kernel/signal.c       | 8 +++-----
 2 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0cfcd1c7865e..9c1da0269a18 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1098,7 +1098,7 @@ extern struct sigqueue *sigqueue_alloc(void);
 extern void sigqueue_free(struct sigqueue *);
 extern int send_sigqueue(int, struct sigqueue *,  struct task_struct *);
 extern int send_group_sigqueue(int, struct sigqueue *,  struct task_struct *);
-extern int do_sigaction(int, const struct k_sigaction *, struct k_sigaction *);
+extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
 extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long);
 
 /* These can be the second arg to send_sig_info/send_group_sig_info.  */
diff --git a/kernel/signal.c b/kernel/signal.c
index 01a1e7f7acf7..ea154104a00b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2430,7 +2430,7 @@ sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo)
 }
 
 int
-do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
+do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
 {
 	struct k_sigaction *k;
 	sigset_t mask;
@@ -2454,6 +2454,8 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
 		*oact = *k;
 
 	if (act) {
+		sigdelsetmask(&act->sa.sa_mask,
+			      sigmask(SIGKILL) | sigmask(SIGSTOP));
 		/*
 		 * POSIX 3.3.1.3:
 		 *  "Setting a signal action to SIG_IGN for a signal that is
@@ -2479,8 +2481,6 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
 			read_lock(&tasklist_lock);
 			spin_lock_irq(&t->sighand->siglock);
 			*k = *act;
-			sigdelsetmask(&k->sa.sa_mask,
-				      sigmask(SIGKILL) | sigmask(SIGSTOP));
 			sigemptyset(&mask);
 			sigaddset(&mask, sig);
 			rm_from_queue_full(&mask, &t->signal->shared_pending);
@@ -2495,8 +2495,6 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
 		}
 
 		*k = *act;
-		sigdelsetmask(&k->sa.sa_mask,
-			      sigmask(SIGKILL) | sigmask(SIGSTOP));
 	}
 
 	spin_unlock_irq(&current->sighand->siglock);
-- 
cgit v1.2.3-71-gd317


From a70ea994a0d83fd0151a070be72b87d014ef0a7e Mon Sep 17 00:00:00 2001
From: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Date: Thu, 9 Feb 2006 16:40:11 -0800
Subject: [NETLINK]: Fix a severe bug

netlink overrun was broken while improvement of netlink.
Destination socket is used in the place where it was meant to be source socket,
so that now overrun is never sent to user netlink sockets, when it should be,
and it even can be set on kernel socket, which results in complete deadlock
of rtnetlink.

Suggested fix is to restore status quo passing source socket as additional
argument to netlink_attachskb().

A little explanation: overrun is set on a socket, when it failed
to receive some message and sender of this messages does not or even
have no way to handle this error. This happens in two cases:
1. when kernel sends something. Kernel never retransmits and cannot
   wait for buffer space.
2. when user sends a broadcast and the message was not delivered
   to some recipients.

Signed-off-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  | 3 ++-
 ipc/mqueue.c             | 3 ++-
 net/netlink/af_netlink.c | 7 ++++---
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 6a2ccf78a356..c256ebe2a7b4 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -160,7 +160,8 @@ extern int netlink_unregister_notifier(struct notifier_block *nb);
 
 /* finegrained unicast helpers: */
 struct sock *netlink_getsockbyfilp(struct file *filp);
-int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo);
+int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock,
+		long timeo, struct sock *ssk);
 void netlink_detachskb(struct sock *sk, struct sk_buff *skb);
 int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol);
 
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 59302fc3643b..fd2e26b6f966 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -1018,7 +1018,8 @@ retry:
 				goto out;
 			}
 
-			ret = netlink_attachskb(sock, nc, 0, MAX_SCHEDULE_TIMEOUT);
+			ret = netlink_attachskb(sock, nc, 0,
+					MAX_SCHEDULE_TIMEOUT, NULL);
 			if (ret == 1)
 		       		goto retry;
 			if (ret) {
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 2101b45d2ec6..6b9772d95872 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -702,7 +702,8 @@ struct sock *netlink_getsockbyfilp(struct file *filp)
  * 0: continue
  * 1: repeat lookup - reference dropped while waiting for socket memory.
  */
-int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo)
+int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock,
+		long timeo, struct sock *ssk)
 {
 	struct netlink_sock *nlk;
 
@@ -712,7 +713,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long t
 	    test_bit(0, &nlk->state)) {
 		DECLARE_WAITQUEUE(wait, current);
 		if (!timeo) {
-			if (!nlk->pid)
+			if (!ssk || nlk_sk(ssk)->pid == 0)
 				netlink_overrun(sk);
 			sock_put(sk);
 			kfree_skb(skb);
@@ -797,7 +798,7 @@ retry:
 		kfree_skb(skb);
 		return PTR_ERR(sk);
 	}
-	err = netlink_attachskb(sk, skb, nonblock, timeo);
+	err = netlink_attachskb(sk, skb, nonblock, timeo, ssk);
 	if (err == 1)
 		goto retry;
 	if (err)
-- 
cgit v1.2.3-71-gd317


From 9c15e852a524d55ab768cf48c97f5c684f876af2 Mon Sep 17 00:00:00 2001
From: Haren Myneni <haren@us.ibm.com>
Date: Fri, 10 Feb 2006 01:51:05 -0800
Subject: [PATCH] kexec: fix in free initrd when overlapped with crashkernel
 region

It is possible that the reserved crashkernel region can be overlapped with
initrd since the bootloader sets the initrd location.  When the initrd
region is freed, the second kernel memory will not be contiguous.  The
Kexec_load can cause an oops since there is no contiguous memory to write
the second kernel or this memory could be used in the first kernel itself
and may not be part of the dump.  For example, on powerpc, the initrd is
located at 36MB and the crashkernel starts at 32MB.  The kexec_load caused
panic since writing into non-allocated memory (after 36MB).  We could see
the similar issue even on other archs.

One possibility is to move the initrd outside of crashkernel region.  But,
the initrd region will be freed anyway before the system is up.  This patch
fixes this issue and frees only regions that are not part of crashkernel
memory in case overlaps.

Signed-off-by: Haren Myneni <haren@us.ibm.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kexec.h |  1 +
 init/initramfs.c      | 24 +++++++++++++++++++++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index a311f58c8a7c..cfb3410e32b1 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -6,6 +6,7 @@
 #include <linux/list.h>
 #include <linux/linkage.h>
 #include <linux/compat.h>
+#include <linux/ioport.h>
 #include <asm/kexec.h>
 
 /* Verify architecture specific macros are defined */
diff --git a/init/initramfs.c b/init/initramfs.c
index 0c5d9a3f951b..637344b05981 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -466,10 +466,32 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only)
 extern char __initramfs_start[], __initramfs_end[];
 #ifdef CONFIG_BLK_DEV_INITRD
 #include <linux/initrd.h>
+#include <linux/kexec.h>
 
 static void __init free_initrd(void)
 {
-	free_initrd_mem(initrd_start, initrd_end);
+#ifdef CONFIG_KEXEC
+	unsigned long crashk_start = (unsigned long)__va(crashk_res.start);
+	unsigned long crashk_end   = (unsigned long)__va(crashk_res.end);
+
+	/*
+	 * If the initrd region is overlapped with crashkernel reserved region,
+	 * free only memory that is not part of crashkernel region.
+	 */
+	if (initrd_start < crashk_end && initrd_end > crashk_start) {
+		/*
+		 * Initialize initrd memory region since the kexec boot does
+		 * not do.
+		 */
+		memset((void *)initrd_start, 0, initrd_end - initrd_start);
+		if (initrd_start < crashk_start)
+			free_initrd_mem(initrd_start, crashk_start);
+		if (initrd_end > crashk_end)
+			free_initrd_mem(crashk_end, initrd_end);
+	} else
+#endif
+		free_initrd_mem(initrd_start, initrd_end);
+
 	initrd_start = 0;
 	initrd_end = 0;
 }
-- 
cgit v1.2.3-71-gd317


From 7a8ef1cb774e5438d292365626f9b96616283706 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 10 Feb 2006 01:51:08 -0800
Subject: [PATCH] x86: don't initialise cpu_possible_map to all ones

Initialising cpu_possible_map to all-ones with CONFIG_HOTPLUG_CPU means that

a) All for_each_cpu() loops will iterate across all NR_CPUS CPUs, rather
   than over possible ones.  That can be quite expensive.

b) Soon we'll be allocating per-cpu areas only for possible CPUs.  So with
   CPU_MASK_ALL, we'll be wasting memory.

I also switched voyager over to not use CPU_MASK_ALL in the non-CPU-hotplug
case.  Should be OK..

I note that parisc is also using CPU_MASK_ALL.  Suggest that it stop doing
that.

Cc: James Bottomley <James.Bottomley@steeleye.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Paul Jackson <pj@sgi.com>
Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Zwane Mwaikambo <zwane@linuxpower.ca>
Cc: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/smpboot.c           | 4 ----
 arch/i386/mach-voyager/voyager_smp.c | 2 +-
 include/linux/cpumask.h              | 2 +-
 3 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 255adb498268..fb00ab7b7612 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -87,11 +87,7 @@ EXPORT_SYMBOL(cpu_online_map);
 cpumask_t cpu_callin_map;
 cpumask_t cpu_callout_map;
 EXPORT_SYMBOL(cpu_callout_map);
-#ifdef CONFIG_HOTPLUG_CPU
-cpumask_t cpu_possible_map = CPU_MASK_ALL;
-#else
 cpumask_t cpu_possible_map;
-#endif
 EXPORT_SYMBOL(cpu_possible_map);
 static cpumask_t smp_commenced_mask;
 
diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c
index 72a1b9cae2e4..6e4c3baef6cc 100644
--- a/arch/i386/mach-voyager/voyager_smp.c
+++ b/arch/i386/mach-voyager/voyager_smp.c
@@ -240,7 +240,7 @@ static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
 cpumask_t cpu_callin_map = CPU_MASK_NONE;
 cpumask_t cpu_callout_map = CPU_MASK_NONE;
 EXPORT_SYMBOL(cpu_callout_map);
-cpumask_t cpu_possible_map = CPU_MASK_ALL;
+cpumask_t cpu_possible_map = CPU_MASK_NONE;
 EXPORT_SYMBOL(cpu_possible_map);
 
 /* The per processor IRQ masks (these are usually kept in sync) */
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 20b446f26ecd..60e56c6e03dd 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -328,7 +328,7 @@ static inline void __cpus_remap(cpumask_t *dstp, const cpumask_t *srcp,
  * bitmap of size NR_CPUS.
  *
  *  #ifdef CONFIG_HOTPLUG_CPU
- *     cpu_possible_map - all NR_CPUS bits set
+ *     cpu_possible_map - has bit 'cpu' set iff cpu is populatable
  *     cpu_present_map  - has bit 'cpu' set iff cpu is populated
  *     cpu_online_map   - has bit 'cpu' set iff cpu available to scheduler
  *  #else
-- 
cgit v1.2.3-71-gd317


From 8977d929e49021d9a6e031310aab01fa72f849c2 Mon Sep 17 00:00:00 2001
From: Paul Fulghum <paulkf@microgate.com>
Date: Fri, 10 Feb 2006 01:51:14 -0800
Subject: [PATCH] tty buffering stall fix

Prevent stalled processing of received data when a driver allocates tty
buffer space but does not immediately follow the allocation with more data
and a call to schedule receive tty processing.  (example: hvc_console) This
bug was introduced by the first locking patch for the new tty buffering.

Signed-off-by: Paul Fulghum <paulkf@microgate.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/tty_io.c    | 30 ++++++++++++++++++++++--------
 include/linux/kbd_kern.h |  4 +++-
 include/linux/tty.h      |  2 ++
 include/linux/tty_flip.h |  4 +++-
 4 files changed, 30 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 076e07c1da38..a23816d3e9a1 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -268,6 +268,8 @@ static struct tty_buffer *tty_buffer_alloc(size_t size)
 	p->size = size;
 	p->next = NULL;
 	p->active = 0;
+	p->commit = 0;
+	p->read = 0;
 	p->char_buf_ptr = (char *)(p->data);
 	p->flag_buf_ptr = (unsigned char *)p->char_buf_ptr + size;
 /* 	printk("Flip create %p\n", p); */
@@ -298,6 +300,8 @@ static struct tty_buffer *tty_buffer_find(struct tty_struct *tty, size_t size)
 			*tbh = t->next;
 			t->next = NULL;
 			t->used = 0;
+			t->commit = 0;
+			t->read = 0;
 			/* DEBUG ONLY */
 			memset(t->data, '*', size);
 /* 			printk("Flip recycle %p\n", t); */
@@ -335,6 +339,7 @@ int tty_buffer_request_room(struct tty_struct *tty, size_t size)
 			if (b != NULL) {
 				b->next = n;
 				b->active = 0;
+				b->commit = b->used;
 			} else
 				tty->buf.head = n;
 			tty->buf.tail = n;
@@ -2752,6 +2757,9 @@ static void flush_to_ldisc(void *private_)
 	unsigned long 	flags;
 	struct tty_ldisc *disc;
 	struct tty_buffer *tbuf;
+	int count;
+	char *char_buf;
+	unsigned char *flag_buf;
 
 	disc = tty_ldisc_ref(tty);
 	if (disc == NULL)	/*  !TTY_LDISC */
@@ -2765,16 +2773,20 @@ static void flush_to_ldisc(void *private_)
 		goto out;
 	}
 	spin_lock_irqsave(&tty->buf.lock, flags);
-	while((tbuf = tty->buf.head) != NULL && !tbuf->active) {
+	while((tbuf = tty->buf.head) != NULL) {
+		while ((count = tbuf->commit - tbuf->read) != 0) {
+			char_buf = tbuf->char_buf_ptr + tbuf->read;
+			flag_buf = tbuf->flag_buf_ptr + tbuf->read;
+			tbuf->read += count;
+			spin_unlock_irqrestore(&tty->buf.lock, flags);
+			disc->receive_buf(tty, char_buf, flag_buf, count);
+			spin_lock_irqsave(&tty->buf.lock, flags);
+		}
+		if (tbuf->active)
+			break;
 		tty->buf.head = tbuf->next;
 		if (tty->buf.head == NULL)
 			tty->buf.tail = NULL;
-		spin_unlock_irqrestore(&tty->buf.lock, flags);
-		/* printk("Process buffer %p for %d\n", tbuf, tbuf->used); */
-		disc->receive_buf(tty, tbuf->char_buf_ptr,
-				       tbuf->flag_buf_ptr,
-				       tbuf->used);
-		spin_lock_irqsave(&tty->buf.lock, flags);
 		tty_buffer_free(tty, tbuf);
 	}
 	spin_unlock_irqrestore(&tty->buf.lock, flags);
@@ -2871,8 +2883,10 @@ void tty_flip_buffer_push(struct tty_struct *tty)
 {
 	unsigned long flags;
 	spin_lock_irqsave(&tty->buf.lock, flags);
-	if (tty->buf.tail != NULL)
+	if (tty->buf.tail != NULL) {
 		tty->buf.tail->active = 0;
+		tty->buf.tail->commit = tty->buf.tail->used;
+	}
 	spin_unlock_irqrestore(&tty->buf.lock, flags);
 
 	if (tty->low_latency)
diff --git a/include/linux/kbd_kern.h b/include/linux/kbd_kern.h
index 3aed37314ab8..e87c32a5c86a 100644
--- a/include/linux/kbd_kern.h
+++ b/include/linux/kbd_kern.h
@@ -153,8 +153,10 @@ static inline void con_schedule_flip(struct tty_struct *t)
 {
 	unsigned long flags;
 	spin_lock_irqsave(&t->buf.lock, flags);
-	if (t->buf.tail != NULL)
+	if (t->buf.tail != NULL) {
 		t->buf.tail->active = 0;
+		t->buf.tail->commit = t->buf.tail->used;
+	}
 	spin_unlock_irqrestore(&t->buf.lock, flags);
 	schedule_work(&t->buf.work);
 }
diff --git a/include/linux/tty.h b/include/linux/tty.h
index a7bd3b4558d2..f45cd74e6f24 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -58,6 +58,8 @@ struct tty_buffer {
 	int used;
 	int size;
 	int active;
+	int commit;
+	int read;
 	/* Data points here */
 	unsigned long data[0];
 };
diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h
index 82961eb19888..222faf97d5f9 100644
--- a/include/linux/tty_flip.h
+++ b/include/linux/tty_flip.h
@@ -29,8 +29,10 @@ _INLINE_ void tty_schedule_flip(struct tty_struct *tty)
 {
 	unsigned long flags;
 	spin_lock_irqsave(&tty->buf.lock, flags);
-	if (tty->buf.tail != NULL)
+	if (tty->buf.tail != NULL) {
 		tty->buf.tail->active = 0;
+		tty->buf.tail->commit = tty->buf.tail->used;
+	}
 	spin_unlock_irqrestore(&tty->buf.lock, flags);
 	schedule_delayed_work(&tty->buf.work, 1);
 }
-- 
cgit v1.2.3-71-gd317


From cff2b760096d1e6feaa31948e7af4abbefe47822 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Sat, 11 Feb 2006 17:55:47 -0800
Subject: [PATCH] fstatat64 support

The *at patches introduced fstatat and, due to inusfficient research, I
used the newfstat functions generally as the guideline.  The result is that
on 32-bit platforms we don't have all the information needed to implement
fstatat64.

This patch modifies the code to pass up 64-bit information if
__ARCH_WANT_STAT64 is defined.  I renamed the syscall entry point to make
this clear.  Other archs will continue to use the existing code.  On x86-64
the compat code is implemented using a new sys32_ function.  this is what
is done for the other stat syscalls as well.

This patch might break some other archs (those which define
__ARCH_WANT_STAT64 and which already wired up the syscall).  Yet others
might need changes to accomodate the compatibility mode.  I really don't
want to do that work because all this stat handling is a mess (more so in
glibc, but the kernel is also affected).  It should be done by the arch
maintainers.  I'll provide some stand-alone test shortly.  Those who are
eager could compile glibc and run 'make check' (no installation needed).

The patch below has been tested on x86 and x86-64.

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/syscall_table.S |  2 +-
 arch/x86_64/ia32/ia32entry.S     |  2 +-
 arch/x86_64/ia32/sys_ia32.c      | 22 ++++++++++++++++++++++
 fs/stat.c                        | 22 ++++++++++++++++++++++
 include/asm-i386/unistd.h        |  2 +-
 include/asm-x86_64/ia32_unistd.h |  2 +-
 include/linux/syscalls.h         |  2 ++
 7 files changed, 50 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index 5a8b3fb6d27b..ac687d00a1ce 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -299,7 +299,7 @@ ENTRY(sys_call_table)
 	.long sys_mknodat
 	.long sys_fchownat
 	.long sys_futimesat
-	.long sys_newfstatat		/* 300 */
+	.long sys_fstatat64		/* 300 */
 	.long sys_unlinkat
 	.long sys_renameat
 	.long sys_linkat
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index ada4535d0161..00dee176c08e 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -677,7 +677,7 @@ ia32_sys_call_table:
 	.quad sys_mknodat
 	.quad sys_fchownat
 	.quad compat_sys_futimesat
-	.quad compat_sys_newfstatat	/* 300 */
+	.quad sys32_fstatat		/* 300 */
 	.quad sys_unlinkat
 	.quad sys_renameat
 	.quad sys_linkat
diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c
index 54481af5344a..2bc55af95419 100644
--- a/arch/x86_64/ia32/sys_ia32.c
+++ b/arch/x86_64/ia32/sys_ia32.c
@@ -180,6 +180,28 @@ sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
 	return ret;
 }
 
+asmlinkage long
+sys32_fstatat(unsigned int dfd, char __user *filename,
+	      struct stat64 __user* statbuf, int flag)
+{
+	struct kstat stat;
+	int error = -EINVAL;
+
+	if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
+		goto out;
+
+	if (flag & AT_SYMLINK_NOFOLLOW)
+		error = vfs_lstat_fd(dfd, filename, &stat);
+	else
+		error = vfs_stat_fd(dfd, filename, &stat);
+
+	if (!error)
+		error = cp_stat64(statbuf, &stat);
+
+out:
+	return error;
+}
+
 /*
  * Linux/i386 didn't use to be able to handle more than
  * 4 system call parameters, so these system calls used a memory
diff --git a/fs/stat.c b/fs/stat.c
index 24211b030f39..9948cc1685a4 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -261,6 +261,7 @@ asmlinkage long sys_newlstat(char __user *filename, struct stat __user *statbuf)
 	return error;
 }
 
+#ifndef __ARCH_WANT_STAT64
 asmlinkage long sys_newfstatat(int dfd, char __user *filename,
 				struct stat __user *statbuf, int flag)
 {
@@ -281,6 +282,7 @@ asmlinkage long sys_newfstatat(int dfd, char __user *filename,
 out:
 	return error;
 }
+#endif
 
 asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf)
 {
@@ -395,6 +397,26 @@ asmlinkage long sys_fstat64(unsigned long fd, struct stat64 __user * statbuf)
 	return error;
 }
 
+asmlinkage long sys_fstatat64(int dfd, char __user *filename,
+			       struct stat64 __user *statbuf, int flag)
+{
+	struct kstat stat;
+	int error = -EINVAL;
+
+	if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
+		goto out;
+
+	if (flag & AT_SYMLINK_NOFOLLOW)
+		error = vfs_lstat_fd(dfd, filename, &stat);
+	else
+		error = vfs_stat_fd(dfd, filename, &stat);
+
+	if (!error)
+		error = cp_new_stat64(&stat, statbuf);
+
+out:
+	return error;
+}
 #endif /* __ARCH_WANT_STAT64 */
 
 void inode_add_bytes(struct inode *inode, loff_t bytes)
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index cf6f2cd9c514..dc81a55dd94d 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -305,7 +305,7 @@
 #define __NR_mknodat		297
 #define __NR_fchownat		298
 #define __NR_futimesat		299
-#define __NR_newfstatat		300
+#define __NR_fstatat64		300
 #define __NR_unlinkat		301
 #define __NR_renameat		302
 #define __NR_linkat		303
diff --git a/include/asm-x86_64/ia32_unistd.h b/include/asm-x86_64/ia32_unistd.h
index 20468983d453..eeb2bcd635de 100644
--- a/include/asm-x86_64/ia32_unistd.h
+++ b/include/asm-x86_64/ia32_unistd.h
@@ -305,7 +305,7 @@
 #define __NR_ia32_mknodat		297
 #define __NR_ia32_fchownat		298
 #define __NR_ia32_futimesat		299
-#define __NR_ia32_newfstatat		300
+#define __NR_ia32_fstatat64		300
 #define __NR_ia32_unlinkat		301
 #define __NR_ia32_renameat		302
 #define __NR_ia32_linkat		303
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 3877209d23c3..d73501ba7e44 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -557,6 +557,8 @@ asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
 			   int mode);
 asmlinkage long sys_newfstatat(int dfd, char __user *filename,
 			       struct stat __user *statbuf, int flag);
+asmlinkage long sys_fstatat64(int dfd, char __user *filename,
+			       struct stat64 __user *statbuf, int flag);
 asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *buf,
 			       int bufsiz);
 asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename,
-- 
cgit v1.2.3-71-gd317


From 643a654540579b0dcc7a206a4a7475276a41aff0 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 11 Feb 2006 17:55:52 -0800
Subject: [PATCH] select: fix returned timeval

With David Woodhouse <dwmw2@infradead.org>

select() presently has a habit of increasing the value of the user's
`timeout' argument on return.

We were writing back a timeout larger than the original.  We _deliberately_
round up, since we know we must wait at _least_ as long as the caller asks
us to.

The patch adds a couple of helper functions for magnitude comparison of
timespecs and of timevals, and uses them to prevent the various poll and
select functions from returning a timeout which is larger than the one which
was passed in.

The patch also fixes a bug in compat_sys_pselect7(): it was adding the new
timeout value to the old one and was returning that.  It should just return
the new timeout value.

(We have various handy timespec/timeval-to-from-nsec conversion functions in
time.h.  But this code open-codes it all).

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Andi Kleen <ak@muc.de>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: george anzinger <george@mvista.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat.c            | 37 +++++++++++++++++++++++++------------
 fs/select.c            | 32 +++++++++++++++++++++++---------
 include/linux/compat.h | 20 ++++++++++++++++++++
 include/linux/time.h   | 25 ++++++++++++++++++++++++-
 4 files changed, 92 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/compat.c b/fs/compat.c
index 70c5af4cc270..a2ba78bdf7f7 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1751,11 +1751,15 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
 	ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
 
 	if (tvp) {
+		struct compat_timeval rtv;
+
 		if (current->personality & STICKY_TIMEOUTS)
 			goto sticky;
-		tv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
-		tv.tv_sec = timeout;
-		if (copy_to_user(tvp, &tv, sizeof(tv))) {
+		rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
+		rtv.tv_sec = timeout;
+		if (compat_timeval_compare(&rtv, &tv) < 0)
+			rtv = tv;
+		if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
 sticky:
 			/*
 			 * If an application puts its timeval in read-only
@@ -1822,13 +1826,17 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
 	} while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
 
 	if (tsp && !(current->personality & STICKY_TIMEOUTS)) {
-		ts.tv_sec += timeout / HZ;
-		ts.tv_nsec += (timeout % HZ) * (1000000000/HZ);
-		if (ts.tv_nsec >= 1000000000) {
-			ts.tv_sec++;
-			ts.tv_nsec -= 1000000000;
+		struct compat_timespec rts;
+
+		rts.tv_sec = timeout / HZ;
+		rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ);
+		if (rts.tv_nsec >= NSEC_PER_SEC) {
+			rts.tv_sec++;
+			rts.tv_nsec -= NSEC_PER_SEC;
 		}
-		(void)copy_to_user(tsp, &ts, sizeof(ts));
+		if (compat_timespec_compare(&rts, &ts) < 0)
+			rts = ts;
+		copy_to_user(tsp, &rts, sizeof(rts));
 	}
 
 	if (ret == -ERESTARTNOHAND) {
@@ -1918,12 +1926,17 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
 
 	if (tsp && timeout >= 0) {
+		struct compat_timespec rts;
+
 		if (current->personality & STICKY_TIMEOUTS)
 			goto sticky;
 		/* Yes, we know it's actually an s64, but it's also positive. */
-		ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000;
-		ts.tv_sec = timeout;
-		if (copy_to_user(tsp, &ts, sizeof(ts))) {
+		rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
+					1000;
+		rts.tv_sec = timeout;
+		if (compat_timespec_compare(&rts, &ts) < 0)
+			rts = ts;
+		if (copy_to_user(tsp, &rts, sizeof(rts))) {
 sticky:
 			/*
 			 * If an application puts its timeval in read-only
diff --git a/fs/select.c b/fs/select.c
index bc60a3e14ef3..6ce68a9c8976 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -398,11 +398,15 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 	ret = core_sys_select(n, inp, outp, exp, &timeout);
 
 	if (tvp) {
+		struct timeval rtv;
+
 		if (current->personality & STICKY_TIMEOUTS)
 			goto sticky;
-		tv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
-		tv.tv_sec = timeout;
-		if (copy_to_user(tvp, &tv, sizeof(tv))) {
+		rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
+		rtv.tv_sec = timeout;
+		if (timeval_compare(&rtv, &tv) < 0)
+			rtv = tv;
+		if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
 sticky:
 			/*
 			 * If an application puts its timeval in read-only
@@ -460,11 +464,16 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
 	ret = core_sys_select(n, inp, outp, exp, &timeout);
 
 	if (tsp) {
+		struct timespec rts;
+
 		if (current->personality & STICKY_TIMEOUTS)
 			goto sticky;
-		ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000;
-		ts.tv_sec = timeout;
-		if (copy_to_user(tsp, &ts, sizeof(ts))) {
+		rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
+						1000;
+		rts.tv_sec = timeout;
+		if (timespec_compare(&rts, &ts) < 0)
+			rts = ts;
+		if (copy_to_user(tsp, &rts, sizeof(rts))) {
 sticky:
 			/*
 			 * If an application puts its timeval in read-only
@@ -758,12 +767,17 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
 
 	if (tsp && timeout >= 0) {
+		struct timespec rts;
+
 		if (current->personality & STICKY_TIMEOUTS)
 			goto sticky;
 		/* Yes, we know it's actually an s64, but it's also positive. */
-		ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000;
-		ts.tv_sec = timeout;
-		if (copy_to_user(tsp, &ts, sizeof(ts))) {
+		rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
+						1000;
+		rts.tv_sec = timeout;
+		if (timespec_compare(&rts, &ts) < 0)
+			rts = ts;
+		if (copy_to_user(tsp, &rts, sizeof(rts))) {
 		sticky:
 			/*
 			 * If an application puts its timeval in read-only
diff --git a/include/linux/compat.h b/include/linux/compat.h
index f9ca534787e2..c9ab2a26348c 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -161,5 +161,25 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from);
 int get_compat_sigevent(struct sigevent *event,
 		const struct compat_sigevent __user *u_event);
 
+static inline int compat_timeval_compare(struct compat_timeval *lhs,
+					struct compat_timeval *rhs)
+{
+	if (lhs->tv_sec < rhs->tv_sec)
+		return -1;
+	if (lhs->tv_sec > rhs->tv_sec)
+		return 1;
+	return lhs->tv_usec - rhs->tv_usec;
+}
+
+static inline int compat_timespec_compare(struct compat_timespec *lhs,
+					struct compat_timespec *rhs)
+{
+	if (lhs->tv_sec < rhs->tv_sec)
+		return -1;
+	if (lhs->tv_sec > rhs->tv_sec)
+		return 1;
+	return lhs->tv_nsec - rhs->tv_nsec;
+}
+
 #endif /* CONFIG_COMPAT */
 #endif /* _LINUX_COMPAT_H */
diff --git a/include/linux/time.h b/include/linux/time.h
index 7b4dc36532bb..d9cdba54b789 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -33,11 +33,34 @@ struct timezone {
 #define NSEC_PER_SEC		1000000000L
 #define NSEC_PER_USEC		1000L
 
-static __inline__ int timespec_equal(struct timespec *a, struct timespec *b)
+static inline int timespec_equal(struct timespec *a, struct timespec *b)
 {
 	return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec);
 }
 
+/*
+ * lhs < rhs:  return <0
+ * lhs == rhs: return 0
+ * lhs > rhs:  return >0
+ */
+static inline int timespec_compare(struct timespec *lhs, struct timespec *rhs)
+{
+	if (lhs->tv_sec < rhs->tv_sec)
+		return -1;
+	if (lhs->tv_sec > rhs->tv_sec)
+		return 1;
+	return lhs->tv_nsec - rhs->tv_nsec;
+}
+
+static inline int timeval_compare(struct timeval *lhs, struct timeval *rhs)
+{
+	if (lhs->tv_sec < rhs->tv_sec)
+		return -1;
+	if (lhs->tv_sec > rhs->tv_sec)
+		return 1;
+	return lhs->tv_usec - rhs->tv_usec;
+}
+
 extern unsigned long mktime(const unsigned int year, const unsigned int mon,
 			    const unsigned int day, const unsigned int hour,
 			    const unsigned int min, const unsigned int sec);
-- 
cgit v1.2.3-71-gd317


From bc7fc0601b3eb2254f080492f3fd69e319ed32d0 Mon Sep 17 00:00:00 2001
From: "Antonino A. Daplas" <adaplas@gmail.com>
Date: Sat, 11 Feb 2006 17:56:07 -0800
Subject: [PATCH] nvidiafb: Add support for Geforce4 MX 4000

Add support for Geforce4 MX 4000 (0x185)

Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/video/nvidia/nvidia.c | 2 ++
 include/linux/pci_ids.h       | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/video/nvidia/nvidia.c b/drivers/video/nvidia/nvidia.c
index dbcb8962e57d..a7c4e5e8ead6 100644
--- a/drivers/video/nvidia/nvidia.c
+++ b/drivers/video/nvidia/nvidia.c
@@ -138,6 +138,8 @@ static struct pci_device_id nvidiafb_pci_tbl[] = {
 	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
 	{PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_MX_420_8X,
 	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_MX_4000,
+	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
 	{PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_448_GO,
 	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
 	{PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE4_488_GO,
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 7a61ccdcbc4b..82b83da25d77 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1087,6 +1087,7 @@
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_MX_440_8X 0x0181
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_MX_440SE_8X 0x0182
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_MX_420_8X 0x0183
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE4_MX_4000   0x0185
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_448_GO    0x0186
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_488_GO    0x0187
 #define PCI_DEVICE_ID_NVIDIA_QUADRO4_580_XGL    0x0188
-- 
cgit v1.2.3-71-gd317


From 7c8903f6373f9abecf060bad53ca36bc4ac037f2 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 14 Feb 2006 13:53:03 -0800
Subject: [PATCH] jbd: revert checkpoint list changes

This patch reverts commit f93ea411b73594f7d144855fd34278bcf34a9afc:
  [PATCH] jbd: split checkpoint lists

This broke journal_flush() for OCFS2, which is its method of being sure
that metadata is sent to disk for another node.

And two related commits 8d3c7fce2d20ecc3264c8d8c91ae3beacdeaed1b and
43c3e6f5abdf6acac9b90c86bf03f995bf7d3d92 with the subjects:
  [PATCH] jbd: log_do_checkpoint fix
  [PATCH] jbd: remove_transaction fix

These seem to be incremental bugfixes on the original patch and as such are
no longer needed.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Cc: Jan Kara <jack@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd/checkpoint.c | 418 ++++++++++++++++++++++------------------------------
 fs/jbd/commit.c     |   3 +-
 include/linux/jbd.h |   8 +-
 3 files changed, 179 insertions(+), 250 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index e6265a0b56b8..543ed543d1e5 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -24,75 +24,29 @@
 #include <linux/slab.h>
 
 /*
- * Unlink a buffer from a transaction checkpoint list.
+ * Unlink a buffer from a transaction.
  *
  * Called with j_list_lock held.
  */
 
-static void __buffer_unlink_first(struct journal_head *jh)
+static inline void __buffer_unlink(struct journal_head *jh)
 {
 	transaction_t *transaction;
 
 	transaction = jh->b_cp_transaction;
+	jh->b_cp_transaction = NULL;
 
 	jh->b_cpnext->b_cpprev = jh->b_cpprev;
 	jh->b_cpprev->b_cpnext = jh->b_cpnext;
-	if (transaction->t_checkpoint_list == jh) {
+	if (transaction->t_checkpoint_list == jh)
 		transaction->t_checkpoint_list = jh->b_cpnext;
-		if (transaction->t_checkpoint_list == jh)
-			transaction->t_checkpoint_list = NULL;
-	}
-}
-
-/*
- * Unlink a buffer from a transaction checkpoint(io) list.
- *
- * Called with j_list_lock held.
- */
-
-static inline void __buffer_unlink(struct journal_head *jh)
-{
-	transaction_t *transaction;
-
-	transaction = jh->b_cp_transaction;
-
-	__buffer_unlink_first(jh);
-	if (transaction->t_checkpoint_io_list == jh) {
-		transaction->t_checkpoint_io_list = jh->b_cpnext;
-		if (transaction->t_checkpoint_io_list == jh)
-			transaction->t_checkpoint_io_list = NULL;
-	}
-}
-
-/*
- * Move a buffer from the checkpoint list to the checkpoint io list
- *
- * Called with j_list_lock held
- */
-
-static inline void __buffer_relink_io(struct journal_head *jh)
-{
-	transaction_t *transaction;
-
-	transaction = jh->b_cp_transaction;
-	__buffer_unlink_first(jh);
-
-	if (!transaction->t_checkpoint_io_list) {
-		jh->b_cpnext = jh->b_cpprev = jh;
-	} else {
-		jh->b_cpnext = transaction->t_checkpoint_io_list;
-		jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
-		jh->b_cpprev->b_cpnext = jh;
-		jh->b_cpnext->b_cpprev = jh;
-	}
-	transaction->t_checkpoint_io_list = jh;
+	if (transaction->t_checkpoint_list == jh)
+		transaction->t_checkpoint_list = NULL;
 }
 
 /*
  * Try to release a checkpointed buffer from its transaction.
- * Returns 1 if we released it and 2 if we also released the
- * whole transaction.
- *
+ * Returns 1 if we released it.
  * Requires j_list_lock
  * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
  */
@@ -103,11 +57,12 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
 
 	if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
 		JBUFFER_TRACE(jh, "remove from checkpoint list");
-		ret = __journal_remove_checkpoint(jh) + 1;
+		__journal_remove_checkpoint(jh);
 		jbd_unlock_bh_state(bh);
 		journal_remove_journal_head(bh);
 		BUFFER_TRACE(bh, "release");
 		__brelse(bh);
+		ret = 1;
 	} else {
 		jbd_unlock_bh_state(bh);
 	}
@@ -162,53 +117,83 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
 }
 
 /*
- * Clean up transaction's list of buffers submitted for io.
- * We wait for any pending IO to complete and remove any clean
- * buffers. Note that we take the buffers in the opposite ordering
- * from the one in which they were submitted for IO.
+ * Clean up a transaction's checkpoint list.
+ *
+ * We wait for any pending IO to complete and make sure any clean
+ * buffers are removed from the transaction.
+ *
+ * Return 1 if we performed any actions which might have destroyed the
+ * checkpoint.  (journal_remove_checkpoint() deletes the transaction when
+ * the last checkpoint buffer is cleansed)
  *
  * Called with j_list_lock held.
  */
-
-static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
+static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
 {
-	struct journal_head *jh;
+	struct journal_head *jh, *next_jh, *last_jh;
 	struct buffer_head *bh;
-	tid_t this_tid;
-	int released = 0;
-
-	this_tid = transaction->t_tid;
-restart:
-	/* Didn't somebody clean up the transaction in the meanwhile */
-	if (journal->j_checkpoint_transactions != transaction ||
-		transaction->t_tid != this_tid)
-		return;
-	while (!released && transaction->t_checkpoint_io_list) {
-		jh = transaction->t_checkpoint_io_list;
+	int ret = 0;
+
+	assert_spin_locked(&journal->j_list_lock);
+	jh = transaction->t_checkpoint_list;
+	if (!jh)
+		return 0;
+
+	last_jh = jh->b_cpprev;
+	next_jh = jh;
+	do {
+		jh = next_jh;
 		bh = jh2bh(jh);
-		if (!jbd_trylock_bh_state(bh)) {
-			jbd_sync_bh(journal, bh);
-			spin_lock(&journal->j_list_lock);
-			goto restart;
-		}
 		if (buffer_locked(bh)) {
 			atomic_inc(&bh->b_count);
 			spin_unlock(&journal->j_list_lock);
-			jbd_unlock_bh_state(bh);
 			wait_on_buffer(bh);
 			/* the journal_head may have gone by now */
 			BUFFER_TRACE(bh, "brelse");
 			__brelse(bh);
-			spin_lock(&journal->j_list_lock);
-			goto restart;
+			goto out_return_1;
 		}
+
 		/*
-		 * Now in whatever state the buffer currently is, we know that
-		 * it has been written out and so we can drop it from the list
+		 * This is foul
 		 */
-		released = __journal_remove_checkpoint(jh);
-		jbd_unlock_bh_state(bh);
-	}
+		if (!jbd_trylock_bh_state(bh)) {
+			jbd_sync_bh(journal, bh);
+			goto out_return_1;
+		}
+
+		if (jh->b_transaction != NULL) {
+			transaction_t *t = jh->b_transaction;
+			tid_t tid = t->t_tid;
+
+			spin_unlock(&journal->j_list_lock);
+			jbd_unlock_bh_state(bh);
+			log_start_commit(journal, tid);
+			log_wait_commit(journal, tid);
+			goto out_return_1;
+		}
+
+		/*
+		 * AKPM: I think the buffer_jbddirty test is redundant - it
+		 * shouldn't have NULL b_transaction?
+		 */
+		next_jh = jh->b_cpnext;
+		if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) {
+			BUFFER_TRACE(bh, "remove from checkpoint");
+			__journal_remove_checkpoint(jh);
+			jbd_unlock_bh_state(bh);
+			journal_remove_journal_head(bh);
+			__brelse(bh);
+			ret = 1;
+		} else {
+			jbd_unlock_bh_state(bh);
+		}
+	} while (jh != last_jh);
+
+	return ret;
+out_return_1:
+	spin_lock(&journal->j_list_lock);
+	return 1;
 }
 
 #define NR_BATCH	64
@@ -218,7 +203,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
 {
 	int i;
 
+	spin_unlock(&journal->j_list_lock);
 	ll_rw_block(SWRITE, *batch_count, bhs);
+	spin_lock(&journal->j_list_lock);
 	for (i = 0; i < *batch_count; i++) {
 		struct buffer_head *bh = bhs[i];
 		clear_buffer_jwrite(bh);
@@ -234,46 +221,19 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
  * Return 1 if something happened which requires us to abort the current
  * scan of the checkpoint list.  
  *
- * Called with j_list_lock held and drops it if 1 is returned
+ * Called with j_list_lock held.
  * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
  */
-static int __process_buffer(journal_t *journal, struct journal_head *jh,
-			struct buffer_head **bhs, int *batch_count)
+static int __flush_buffer(journal_t *journal, struct journal_head *jh,
+			struct buffer_head **bhs, int *batch_count,
+			int *drop_count)
 {
 	struct buffer_head *bh = jh2bh(jh);
 	int ret = 0;
 
-	if (buffer_locked(bh)) {
-		get_bh(bh);
-		spin_unlock(&journal->j_list_lock);
-		jbd_unlock_bh_state(bh);
-		wait_on_buffer(bh);
-		/* the journal_head may have gone by now */
-		BUFFER_TRACE(bh, "brelse");
-		put_bh(bh);
-		ret = 1;
-	}
-	else if (jh->b_transaction != NULL) {
-		transaction_t *t = jh->b_transaction;
-		tid_t tid = t->t_tid;
+	if (buffer_dirty(bh) && !buffer_locked(bh) && jh->b_jlist == BJ_None) {
+		J_ASSERT_JH(jh, jh->b_transaction == NULL);
 
-		spin_unlock(&journal->j_list_lock);
-		jbd_unlock_bh_state(bh);
-		log_start_commit(journal, tid);
-		log_wait_commit(journal, tid);
-		ret = 1;
-	}
-	else if (!buffer_dirty(bh)) {
-		J_ASSERT_JH(jh, !buffer_jbddirty(bh));
-		BUFFER_TRACE(bh, "remove from checkpoint");
-		__journal_remove_checkpoint(jh);
-		spin_unlock(&journal->j_list_lock);
-		jbd_unlock_bh_state(bh);
-		journal_remove_journal_head(bh);
-		put_bh(bh);
-		ret = 1;
-	}
-	else {
 		/*
 		 * Important: we are about to write the buffer, and
 		 * possibly block, while still holding the journal lock.
@@ -286,30 +246,45 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
 		J_ASSERT_BH(bh, !buffer_jwrite(bh));
 		set_buffer_jwrite(bh);
 		bhs[*batch_count] = bh;
-		__buffer_relink_io(jh);
 		jbd_unlock_bh_state(bh);
 		(*batch_count)++;
 		if (*batch_count == NR_BATCH) {
-			spin_unlock(&journal->j_list_lock);
 			__flush_batch(journal, bhs, batch_count);
 			ret = 1;
 		}
+	} else {
+		int last_buffer = 0;
+		if (jh->b_cpnext == jh) {
+			/* We may be about to drop the transaction.  Tell the
+			 * caller that the lists have changed.
+			 */
+			last_buffer = 1;
+		}
+		if (__try_to_free_cp_buf(jh)) {
+			(*drop_count)++;
+			ret = last_buffer;
+		}
 	}
 	return ret;
 }
 
 /*
- * Perform an actual checkpoint. We take the first transaction on the
- * list of transactions to be checkpointed and send all its buffers
- * to disk. We submit larger chunks of data at once.
+ * Perform an actual checkpoint.  We don't write out only enough to
+ * satisfy the current blocked requests: rather we submit a reasonably
+ * sized chunk of the outstanding data to disk at once for
+ * efficiency.  __log_wait_for_space() will retry if we didn't free enough.
  * 
+ * However, we _do_ take into account the amount requested so that once
+ * the IO has been queued, we can return as soon as enough of it has
+ * completed to disk.
+ *
  * The journal should be locked before calling this function.
  */
 int log_do_checkpoint(journal_t *journal)
 {
-	transaction_t *transaction;
-	tid_t this_tid;
 	int result;
+	int batch_count = 0;
+	struct buffer_head *bhs[NR_BATCH];
 
 	jbd_debug(1, "Start checkpoint\n");
 
@@ -324,70 +299,79 @@ int log_do_checkpoint(journal_t *journal)
 		return result;
 
 	/*
-	 * OK, we need to start writing disk blocks.  Take one transaction
-	 * and write it.
+	 * OK, we need to start writing disk blocks.  Try to free up a
+	 * quarter of the log in a single checkpoint if we can.
 	 */
-	spin_lock(&journal->j_list_lock);
-	if (!journal->j_checkpoint_transactions)
-		goto out;
-	transaction = journal->j_checkpoint_transactions;
-	this_tid = transaction->t_tid;
-restart:
 	/*
-	 * If someone cleaned up this transaction while we slept, we're
-	 * done (maybe it's a new transaction, but it fell at the same
-	 * address).
+	 * AKPM: check this code.  I had a feeling a while back that it
+	 * degenerates into a busy loop at unmount time.
 	 */
- 	if (journal->j_checkpoint_transactions == transaction &&
-			transaction->t_tid == this_tid) {
-		int batch_count = 0;
-		struct buffer_head *bhs[NR_BATCH];
-		struct journal_head *jh;
-		int retry = 0;
-
-		while (!retry && transaction->t_checkpoint_list) {
+	spin_lock(&journal->j_list_lock);
+	while (journal->j_checkpoint_transactions) {
+		transaction_t *transaction;
+		struct journal_head *jh, *last_jh, *next_jh;
+		int drop_count = 0;
+		int cleanup_ret, retry = 0;
+		tid_t this_tid;
+
+		transaction = journal->j_checkpoint_transactions;
+		this_tid = transaction->t_tid;
+		jh = transaction->t_checkpoint_list;
+		last_jh = jh->b_cpprev;
+		next_jh = jh;
+		do {
 			struct buffer_head *bh;
 
-			jh = transaction->t_checkpoint_list;
+			jh = next_jh;
+			next_jh = jh->b_cpnext;
 			bh = jh2bh(jh);
 			if (!jbd_trylock_bh_state(bh)) {
 				jbd_sync_bh(journal, bh);
+				spin_lock(&journal->j_list_lock);
 				retry = 1;
 				break;
 			}
-			retry = __process_buffer(journal, jh, bhs,
-						&batch_count);
-			if (!retry &&
-			    lock_need_resched(&journal->j_list_lock)) {
-				spin_unlock(&journal->j_list_lock);
+			retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count);
+			if (cond_resched_lock(&journal->j_list_lock)) {
 				retry = 1;
 				break;
 			}
-		}
+		} while (jh != last_jh && !retry);
 
 		if (batch_count) {
-			if (!retry) {
-				spin_unlock(&journal->j_list_lock);
-				retry = 1;
-			}
 			__flush_batch(journal, bhs, &batch_count);
+			retry = 1;
 		}
 
-		if (retry) {
-			spin_lock(&journal->j_list_lock);
-			goto restart;
-		}
 		/*
-		 * Now we have cleaned up the first transaction's checkpoint
-		 * list.  Let's clean up the second one.
+		 * If someone cleaned up this transaction while we slept, we're
+		 * done
+		 */
+		if (journal->j_checkpoint_transactions != transaction)
+			break;
+		if (retry)
+			continue;
+		/*
+		 * Maybe it's a new transaction, but it fell at the same
+		 * address
 		 */
-		__wait_cp_io(journal, transaction);
+		if (transaction->t_tid != this_tid)
+			continue;
+		/*
+		 * We have walked the whole transaction list without
+		 * finding anything to write to disk.  We had better be
+		 * able to make some progress or we are in trouble.
+		 */
+		cleanup_ret = __cleanup_transaction(journal, transaction);
+		J_ASSERT(drop_count != 0 || cleanup_ret != 0);
+		if (journal->j_checkpoint_transactions != transaction)
+			break;
 	}
-out:
 	spin_unlock(&journal->j_list_lock);
 	result = cleanup_journal_tail(journal);
 	if (result < 0)
 		return result;
+
 	return 0;
 }
 
@@ -471,53 +455,6 @@ int cleanup_journal_tail(journal_t *journal)
 
 /* Checkpoint list management */
 
-/*
- * journal_clean_one_cp_list
- *
- * Find all the written-back checkpoint buffers in the given list and release them.
- *
- * Called with the journal locked.
- * Called with j_list_lock held.
- * Returns number of bufers reaped (for debug)
- */
-
-static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
-{
-	struct journal_head *last_jh;
-	struct journal_head *next_jh = jh;
-	int ret, freed = 0;
-
-	*released = 0;
-	if (!jh)
-		return 0;
-
- 	last_jh = jh->b_cpprev;
-	do {
-		jh = next_jh;
-		next_jh = jh->b_cpnext;
-		/* Use trylock because of the ranking */
-		if (jbd_trylock_bh_state(jh2bh(jh))) {
-			ret = __try_to_free_cp_buf(jh);
-			if (ret) {
-				freed++;
-				if (ret == 2) {
-					*released = 1;
-					return freed;
-				}
-			}
-		}
-		/*
-		 * This function only frees up some memory if possible so we
-		 * dont have an obligation to finish processing. Bail out if
-		 * preemption requested:
-		 */
-		if (need_resched())
-			return freed;
-	} while (jh != last_jh);
-
-	return freed;
-}
-
 /*
  * journal_clean_checkpoint_list
  *
@@ -525,38 +462,46 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
  *
  * Called with the journal locked.
  * Called with j_list_lock held.
- * Returns number of buffers reaped (for debug)
+ * Returns number of bufers reaped (for debug)
  */
 
 int __journal_clean_checkpoint_list(journal_t *journal)
 {
 	transaction_t *transaction, *last_transaction, *next_transaction;
-	int ret = 0, released;
+	int ret = 0;
 
 	transaction = journal->j_checkpoint_transactions;
-	if (!transaction)
+	if (transaction == 0)
 		goto out;
 
 	last_transaction = transaction->t_cpprev;
 	next_transaction = transaction;
 	do {
+		struct journal_head *jh;
+
 		transaction = next_transaction;
 		next_transaction = transaction->t_cpnext;
-		ret += journal_clean_one_cp_list(transaction->
-				t_checkpoint_list, &released);
-		if (need_resched())
-			goto out;
-		if (released)
-			continue;
-		/*
-		 * It is essential that we are as careful as in the case of
-		 * t_checkpoint_list with removing the buffer from the list as
-		 * we can possibly see not yet submitted buffers on io_list
-		 */
-		ret += journal_clean_one_cp_list(transaction->
-				t_checkpoint_io_list, &released);
-		if (need_resched())
-			goto out;
+		jh = transaction->t_checkpoint_list;
+		if (jh) {
+			struct journal_head *last_jh = jh->b_cpprev;
+			struct journal_head *next_jh = jh;
+
+			do {
+				jh = next_jh;
+				next_jh = jh->b_cpnext;
+				/* Use trylock because of the ranknig */
+				if (jbd_trylock_bh_state(jh2bh(jh)))
+					ret += __try_to_free_cp_buf(jh);
+				/*
+				 * This function only frees up some memory
+				 * if possible so we dont have an obligation
+				 * to finish processing. Bail out if preemption
+				 * requested:
+				 */
+				if (need_resched())
+					goto out;
+			} while (jh != last_jh);
+		}
 	} while (transaction != last_transaction);
 out:
 	return ret;
@@ -571,22 +516,18 @@ out:
  * buffer updates committed in that transaction have safely been stored
  * elsewhere on disk.  To achieve this, all of the buffers in a
  * transaction need to be maintained on the transaction's checkpoint
- * lists until they have been rewritten, at which point this function is
+ * list until they have been rewritten, at which point this function is
  * called to remove the buffer from the existing transaction's
- * checkpoint lists.
- *
- * The function returns 1 if it frees the transaction, 0 otherwise.
+ * checkpoint list.
  *
  * This function is called with the journal locked.
  * This function is called with j_list_lock held.
- * This function is called with jbd_lock_bh_state(jh2bh(jh))
  */
 
-int __journal_remove_checkpoint(struct journal_head *jh)
+void __journal_remove_checkpoint(struct journal_head *jh)
 {
 	transaction_t *transaction;
 	journal_t *journal;
-	int ret = 0;
 
 	JBUFFER_TRACE(jh, "entry");
 
@@ -597,10 +538,8 @@ int __journal_remove_checkpoint(struct journal_head *jh)
 	journal = transaction->t_journal;
 
 	__buffer_unlink(jh);
-	jh->b_cp_transaction = NULL;
 
-	if (transaction->t_checkpoint_list != NULL ||
-	    transaction->t_checkpoint_io_list != NULL)
+	if (transaction->t_checkpoint_list != NULL)
 		goto out;
 	JBUFFER_TRACE(jh, "transaction has no more buffers");
 
@@ -626,10 +565,8 @@ int __journal_remove_checkpoint(struct journal_head *jh)
 	/* Just in case anybody was waiting for more transactions to be
            checkpointed... */
 	wake_up(&journal->j_wait_logspace);
-	ret = 1;
 out:
 	JBUFFER_TRACE(jh, "exit");
-	return ret;
 }
 
 /*
@@ -691,7 +628,6 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
 	J_ASSERT(transaction->t_shadow_list == NULL);
 	J_ASSERT(transaction->t_log_list == NULL);
 	J_ASSERT(transaction->t_checkpoint_list == NULL);
-	J_ASSERT(transaction->t_checkpoint_io_list == NULL);
 	J_ASSERT(transaction->t_updates == 0);
 	J_ASSERT(journal->j_committing_transaction != transaction);
 	J_ASSERT(journal->j_running_transaction != transaction);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 29e62d98bae6..002ad2bbc769 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -829,8 +829,7 @@ restart_loop:
 	journal->j_committing_transaction = NULL;
 	spin_unlock(&journal->j_state_lock);
 
-	if (commit_transaction->t_checkpoint_list == NULL &&
-	    commit_transaction->t_checkpoint_io_list == NULL) {
+	if (commit_transaction->t_checkpoint_list == NULL) {
 		__journal_drop_transaction(journal, commit_transaction);
 	} else {
 		if (journal->j_checkpoint_transactions == NULL) {
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 0fe4aa891ddc..41ee79962bb2 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -497,12 +497,6 @@ struct transaction_s
 	 */
 	struct journal_head	*t_checkpoint_list;
 
-	/*
-	 * Doubly-linked circular list of all buffers submitted for IO while
-	 * checkpointing. [j_list_lock]
-	 */
-	struct journal_head	*t_checkpoint_io_list;
-
 	/*
 	 * Doubly-linked circular list of temporary buffers currently undergoing
 	 * IO in the log [j_list_lock]
@@ -852,7 +846,7 @@ extern void journal_commit_transaction(journal_t *);
 
 /* Checkpoint list management */
 int __journal_clean_checkpoint_list(journal_t *journal);
-int __journal_remove_checkpoint(struct journal_head *);
+void __journal_remove_checkpoint(struct journal_head *);
 void __journal_insert_checkpoint(struct journal_head *, transaction_t *);
 
 /* Buffer IO */
-- 
cgit v1.2.3-71-gd317


From 5ac5f9d1ce8492163dbde5d357dc5d03becf7e36 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 14 Feb 2006 13:53:04 -0800
Subject: [PATCH] NLM: Fix the NLM_GRANTED callback checks

If 2 threads attached to the same process are blocking on different locks on
different files (maybe even on different servers) but have the same lock
arguments (i.e.  same offset+length - actually quite common, since most
processes try to lock the entire file) then the first GRANTED call that wakes
one up will also wake the other.

Currently when the NLM_GRANTED callback comes in, lockd walks the list of
blocked locks in search of a match to the lock that the NLM server has
granted.  Although it checks the lock pid, start and end, it fails to check
the filehandle and the server address.

By checking the filehandle and server IP address, we ensure that this only
happens if the locks truly are referencing the same file.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/lockd/clntlock.c         | 27 +++++++++++++++++----------
 fs/lockd/svc4proc.c         |  2 +-
 fs/lockd/svcproc.c          |  2 +-
 include/linux/lockd/lockd.h |  6 +++---
 4 files changed, 22 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 3eaf6e701087..da6354baa0b8 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -111,9 +111,10 @@ long nlmclnt_block(struct nlm_rqst *req, long timeout)
 /*
  * The server lockd has called us back to tell us the lock was granted
  */
-u32
-nlmclnt_grant(struct nlm_lock *lock)
+u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
 {
+	const struct file_lock *fl = &lock->fl;
+	const struct nfs_fh *fh = &lock->fh;
 	struct nlm_wait	*block;
 	u32 res = nlm_lck_denied;
 
@@ -122,14 +123,20 @@ nlmclnt_grant(struct nlm_lock *lock)
 	 * Warning: must not use cookie to match it!
 	 */
 	list_for_each_entry(block, &nlm_blocked, b_list) {
-		if (nlm_compare_locks(block->b_lock, &lock->fl)) {
-			/* Alright, we found a lock. Set the return status
-			 * and wake up the caller
-			 */
-			block->b_status = NLM_LCK_GRANTED;
-			wake_up(&block->b_wait);
-			res = nlm_granted;
-		}
+		struct file_lock *fl_blocked = block->b_lock;
+
+		if (!nlm_compare_locks(fl_blocked, fl))
+			continue;
+		if (!nlm_cmp_addr(&block->b_host->h_addr, addr))
+			continue;
+		if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_dentry->d_inode) ,fh) != 0)
+			continue;
+		/* Alright, we found a lock. Set the return status
+		 * and wake up the caller
+		 */
+		block->b_status = NLM_LCK_GRANTED;
+		wake_up(&block->b_wait);
+		res = nlm_granted;
 	}
 	return res;
 }
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 4063095d849e..b10f913aa06a 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -228,7 +228,7 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 	resp->cookie = argp->cookie;
 
 	dprintk("lockd: GRANTED       called\n");
-	resp->status = nlmclnt_grant(&argp->lock);
+	resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock);
 	dprintk("lockd: GRANTED       status %d\n", ntohl(resp->status));
 	return rpc_success;
 }
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 3bc437e0cf5b..35681d9cf1fc 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -256,7 +256,7 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 	resp->cookie = argp->cookie;
 
 	dprintk("lockd: GRANTED       called\n");
-	resp->status = nlmclnt_grant(&argp->lock);
+	resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock);
 	dprintk("lockd: GRANTED       status %d\n", ntohl(resp->status));
 	return rpc_success;
 }
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 920766cea79c..ef21ed296039 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -149,7 +149,7 @@ struct nlm_rqst * nlmclnt_alloc_call(void);
 int		  nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl);
 void		  nlmclnt_finish_block(struct nlm_rqst *req);
 long		  nlmclnt_block(struct nlm_rqst *req, long timeout);
-u32		  nlmclnt_grant(struct nlm_lock *);
+u32		  nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *);
 void		  nlmclnt_recovery(struct nlm_host *, u32);
 int		  nlmclnt_reclaim(struct nlm_host *, struct file_lock *);
 int		  nlmclnt_setgrantargs(struct nlm_rqst *, struct nlm_lock *);
@@ -204,7 +204,7 @@ nlmsvc_file_inode(struct nlm_file *file)
  * Compare two host addresses (needs modifying for ipv6)
  */
 static __inline__ int
-nlm_cmp_addr(struct sockaddr_in *sin1, struct sockaddr_in *sin2)
+nlm_cmp_addr(const struct sockaddr_in *sin1, const struct sockaddr_in *sin2)
 {
 	return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
 }
@@ -214,7 +214,7 @@ nlm_cmp_addr(struct sockaddr_in *sin1, struct sockaddr_in *sin2)
  * When the second lock is of type F_UNLCK, this acts like a wildcard.
  */
 static __inline__ int
-nlm_compare_locks(struct file_lock *fl1, struct file_lock *fl2)
+nlm_compare_locks(const struct file_lock *fl1, const struct file_lock *fl2)
 {
 	return	fl1->fl_pid   == fl2->fl_pid
 	     && fl1->fl_start == fl2->fl_start
-- 
cgit v1.2.3-71-gd317


From d6077cb80cde4506720f9165eba99ee07438513f Mon Sep 17 00:00:00 2001
From: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Date: Tue, 14 Feb 2006 13:53:10 -0800
Subject: [PATCH] sched: revert "filter affine wakeups"

Revert commit d7102e95b7b9c00277562c29aad421d2d521c5f6:

    [PATCH] sched: filter affine wakeups

Apparently caused more than 10% performance regression for aim7 benchmark.
The setup in use is 16-cpu HP rx8620, 64Gb of memory and 12 MSA1000s with 144
disks.  Each disk is 72Gb with a single ext3 filesystem (courtesy of HP, who
supplied benchmark results).

The problem is, for aim7, the wake-up pattern is random, but it still needs
load balancing action in the wake-up path to achieve best performance.  With
the above commit, lack of load balancing hurts that workload.

However, for workloads like database transaction processing, the requirement
is exactly opposite.  In the wake up path, best performance is achieved with
absolutely zero load balancing.  We simply wake up the process on the CPU that
it was previously run.  Worst performance is obtained when we do load
balancing at wake up.

There isn't an easy way to auto detect the workload characteristics.  Ingo's
earlier patch that detects idle CPU and decide whether to load balance or not
doesn't perform with aim7 either since all CPUs are busy (it causes even
bigger perf.  regression).

Revert commit d7102e95b7b9c00277562c29aad421d2d521c5f6, which causes more
than 10% performance regression with aim7.

Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h |  5 +----
 kernel/sched.c        | 10 +---------
 2 files changed, 2 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c1da0269a18..b6f51e3a38ec 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -697,11 +697,8 @@ struct task_struct {
 
 	int lock_depth;		/* BKL lock depth */
 
-#if defined(CONFIG_SMP)
-	int last_waker_cpu;	/* CPU that last woke this task up */
-#if defined(__ARCH_WANT_UNLOCKED_CTXSW)
+#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
 	int oncpu;
-#endif
 #endif
 	int prio, static_prio;
 	struct list_head run_list;
diff --git a/kernel/sched.c b/kernel/sched.c
index 87d93be336a1..66d957227de9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1204,9 +1204,6 @@ static int try_to_wake_up(task_t *p, unsigned int state, int sync)
 		}
 	}
 
-	if (p->last_waker_cpu != this_cpu)
-		goto out_set_cpu;
-
 	if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
 		goto out_set_cpu;
 
@@ -1277,8 +1274,6 @@ out_set_cpu:
 		cpu = task_cpu(p);
 	}
 
-	p->last_waker_cpu = this_cpu;
-
 out_activate:
 #endif /* CONFIG_SMP */
 	if (old_state == TASK_UNINTERRUPTIBLE) {
@@ -1360,12 +1355,9 @@ void fastcall sched_fork(task_t *p, int clone_flags)
 #ifdef CONFIG_SCHEDSTATS
 	memset(&p->sched_info, 0, sizeof(p->sched_info));
 #endif
-#if defined(CONFIG_SMP)
-	p->last_waker_cpu = cpu;
-#if defined(__ARCH_WANT_UNLOCKED_CTXSW)
+#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
 	p->oncpu = 0;
 #endif
-#endif
 #ifdef CONFIG_PREEMPT
 	/* Want to start with kernel preemption disabled. */
 	task_thread_info(p)->preempt_count = 1;
-- 
cgit v1.2.3-71-gd317


From ee68cea2c26b7a8222f9020f54d22c6067011e8b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 15 Feb 2006 01:34:23 -0800
Subject: [NETFILTER]: Fix xfrm lookup after SNAT

To find out if a packet needs to be handled by IPsec after SNAT, packets
are currently rerouted in POST_ROUTING and a new xfrm lookup is done. This
breaks SNAT of non-unicast packets to non-local addresses because the
packet is routed as incoming packet and no neighbour entry is bound to the
dst_entry. In general, it seems to be a bad idea to replace the dst_entry
after the packet was already sent to the output routine because its state
might not match what's expected.

This patch changes the xfrm lookup in POST_ROUTING to re-use the original
dst_entry without routing the packet again. This means no policy routing
can be used for transport mode transforms (which keep the original route)
when packets are SNATed to match the policy, but it looks like the best
we can do for now.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4.h         |  2 +-
 net/ipv4/netfilter.c                   | 41 ++++++++++++++++++++++++++++++++++
 net/ipv4/netfilter/ip_nat_standalone.c |  6 ++---
 3 files changed, 45 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index fdc4a9527343..43c09d790b83 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -79,7 +79,7 @@ enum nf_ip_hook_priorities {
 
 #ifdef __KERNEL__
 extern int ip_route_me_harder(struct sk_buff **pskb);
-
+extern int ip_xfrm_me_harder(struct sk_buff **pskb);
 #endif /*__KERNEL__*/
 
 #endif /*__LINUX_IP_NETFILTER_H*/
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 52a3d7c57907..ed42cdc57cd9 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -78,6 +78,47 @@ int ip_route_me_harder(struct sk_buff **pskb)
 }
 EXPORT_SYMBOL(ip_route_me_harder);
 
+#ifdef CONFIG_XFRM
+int ip_xfrm_me_harder(struct sk_buff **pskb)
+{
+	struct flowi fl;
+	unsigned int hh_len;
+	struct dst_entry *dst;
+
+	if (IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED)
+		return 0;
+	if (xfrm_decode_session(*pskb, &fl, AF_INET) < 0)
+		return -1;
+
+	dst = (*pskb)->dst;
+	if (dst->xfrm)
+		dst = ((struct xfrm_dst *)dst)->route;
+	dst_hold(dst);
+
+	if (xfrm_lookup(&dst, &fl, (*pskb)->sk, 0) < 0)
+		return -1;
+
+	dst_release((*pskb)->dst);
+	(*pskb)->dst = dst;
+
+	/* Change in oif may mean change in hh_len. */
+	hh_len = (*pskb)->dst->dev->hard_header_len;
+	if (skb_headroom(*pskb) < hh_len) {
+		struct sk_buff *nskb;
+
+		nskb = skb_realloc_headroom(*pskb, hh_len);
+		if (!nskb)
+			return -1;
+		if ((*pskb)->sk)
+			skb_set_owner_w(nskb, (*pskb)->sk);
+		kfree_skb(*pskb);
+		*pskb = nskb;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ip_xfrm_me_harder);
+#endif
+
 void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *);
 EXPORT_SYMBOL(ip_nat_decode_session);
 
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 92c54999a19d..7c3f7d380240 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -235,19 +235,19 @@ ip_nat_out(unsigned int hooknum,
 		return NF_ACCEPT;
 
 	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
+#ifdef CONFIG_XFRM
 	if (ret != NF_DROP && ret != NF_STOLEN
 	    && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 
 		if (ct->tuplehash[dir].tuple.src.ip !=
 		    ct->tuplehash[!dir].tuple.dst.ip
-#ifdef CONFIG_XFRM
 		    || ct->tuplehash[dir].tuple.src.u.all !=
 		       ct->tuplehash[!dir].tuple.dst.u.all
-#endif
 		    )
-			return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+			return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP;
 	}
+#endif
 	return ret;
 }
 
-- 
cgit v1.2.3-71-gd317


From 5ecfbae093f0c37311e89b29bfc0c9d586eace87 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 15 Feb 2006 22:50:10 +0300
Subject: [PATCH] fix zap_thread's ptrace related problems

1. The tracee can go from ptrace_stop() to do_signal_stop()
   after __ptrace_unlink(p).

2. It is unsafe to __ptrace_unlink(p) while p->parent may wait
   for tasklist_lock in ptrace_detach().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c              |  2 +-
 include/linux/ptrace.h |  1 +
 kernel/ptrace.c        | 25 +++++++++++++++----------
 3 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 055378d2513e..0e1c95074d42 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1403,7 +1403,7 @@ static void zap_threads (struct mm_struct *mm)
 		do_each_thread(g,p) {
 			if (mm == p->mm && p != tsk &&
 			    p->ptrace && p->parent->mm == mm) {
-				__ptrace_unlink(p);
+				__ptrace_detach(p, 0);
 			}
 		} while_each_thread(g,p);
 		write_unlock_irq(&tasklist_lock);
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 9d5cd106b344..0d36750fc0f1 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -84,6 +84,7 @@ extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __us
 extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
 extern int ptrace_attach(struct task_struct *tsk);
 extern int ptrace_detach(struct task_struct *, unsigned int);
+extern void __ptrace_detach(struct task_struct *, unsigned int);
 extern void ptrace_disable(struct task_struct *);
 extern int ptrace_check_attach(struct task_struct *task, int kill);
 extern int ptrace_request(struct task_struct *child, long request, long addr, long data);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index d2cf144d0af5..d95a72c9279d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -72,8 +72,8 @@ void ptrace_untrace(task_t *child)
  */
 void __ptrace_unlink(task_t *child)
 {
-	if (!child->ptrace)
-		BUG();
+	BUG_ON(!child->ptrace);
+
 	child->ptrace = 0;
 	if (!list_empty(&child->ptrace_list)) {
 		list_del_init(&child->ptrace_list);
@@ -184,22 +184,27 @@ bad:
 	return retval;
 }
 
+void __ptrace_detach(struct task_struct *child, unsigned int data)
+{
+	child->exit_code = data;
+	/* .. re-parent .. */
+	__ptrace_unlink(child);
+	/* .. and wake it up. */
+	if (child->exit_state != EXIT_ZOMBIE)
+		wake_up_process(child);
+}
+
 int ptrace_detach(struct task_struct *child, unsigned int data)
 {
 	if (!valid_signal(data))
-		return	-EIO;
+		return -EIO;
 
 	/* Architecture-specific hardware disable .. */
 	ptrace_disable(child);
 
-	/* .. re-parent .. */
-	child->exit_code = data;
-
 	write_lock_irq(&tasklist_lock);
-	__ptrace_unlink(child);
-	/* .. and wake it up. */
-	if (child->exit_state != EXIT_ZOMBIE)
-		wake_up_process(child);
+	if (child->ptrace)
+		__ptrace_detach(child, data);
 	write_unlock_irq(&tasklist_lock);
 
 	return 0;
-- 
cgit v1.2.3-71-gd317


From 48d5cad87c3a4998d0bda16ccfb5c60dfe4de5fb Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 15 Feb 2006 15:10:22 -0800
Subject: [XFRM]: Fix SNAT-related crash in xfrm4_output_finish

When a packet matching an IPsec policy is SNATed so it doesn't match any
policy anymore it looses its xfrm bundle, which makes xfrm4_output_finish
crash because of a NULL pointer dereference.

This patch directs these packets to the original output path instead. Since
the packets have already passed the POST_ROUTING hook, but need to start at
the beginning of the original output path which includes another
POST_ROUTING invocation, a flag is added to the IPCB to indicate that the
packet was rerouted and doesn't need to pass the POST_ROUTING hook again.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h | 19 +++++++++++++++----
 include/net/ip.h          |  1 +
 include/net/xfrm.h        |  1 -
 net/ipv4/ip_gre.c         |  3 ++-
 net/ipv4/ip_output.c      | 16 ++++++++++------
 net/ipv4/ipip.c           |  3 ++-
 net/ipv4/xfrm4_output.c   | 13 ++++++++++---
 7 files changed, 40 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 4cf6088625c1..3ca3d9ee78a9 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -184,8 +184,11 @@ static inline int nf_hook_thresh(int pf, unsigned int hook,
 				 struct sk_buff **pskb,
 				 struct net_device *indev,
 				 struct net_device *outdev,
-				 int (*okfn)(struct sk_buff *), int thresh)
+				 int (*okfn)(struct sk_buff *), int thresh,
+				 int cond)
 {
+	if (!cond)
+		return 1;
 #ifndef CONFIG_NETFILTER_DEBUG
 	if (list_empty(&nf_hooks[pf][hook]))
 		return 1;
@@ -197,7 +200,7 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb,
 			  struct net_device *indev, struct net_device *outdev,
 			  int (*okfn)(struct sk_buff *))
 {
-	return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN);
+	return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN, 1);
 }
                    
 /* Activate hook; either okfn or kfree_skb called, unless a hook
@@ -224,7 +227,13 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb,
 
 #define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh)	       \
 ({int __ret;								       \
-if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh)) == 1)\
+if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh, 1)) == 1)\
+	__ret = (okfn)(skb);						       \
+__ret;})
+
+#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond)		       \
+({int __ret;								       \
+if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, INT_MIN, cond)) == 1)\
 	__ret = (okfn)(skb);						       \
 __ret;})
 
@@ -295,11 +304,13 @@ extern struct proc_dir_entry *proc_net_netfilter;
 
 #else /* !CONFIG_NETFILTER */
 #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)
+#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb)
 static inline int nf_hook_thresh(int pf, unsigned int hook,
 				 struct sk_buff **pskb,
 				 struct net_device *indev,
 				 struct net_device *outdev,
-				 int (*okfn)(struct sk_buff *), int thresh)
+				 int (*okfn)(struct sk_buff *), int thresh,
+				 int cond)
 {
 	return okfn(*pskb);
 }
diff --git a/include/net/ip.h b/include/net/ip.h
index 8de0697b364c..fab3d5b3ab1c 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -41,6 +41,7 @@ struct inet_skb_parm
 #define IPSKB_XFRM_TUNNEL_SIZE	2
 #define IPSKB_XFRM_TRANSFORMED	4
 #define IPSKB_FRAG_COMPLETE	8
+#define IPSKB_REROUTED		16
 };
 
 struct ipcm_cookie
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index d09ca0e7d139..d6111a2f0a23 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -866,7 +866,6 @@ extern int xfrm_state_mtu(struct xfrm_state *x, int mtu);
 extern int xfrm_init_state(struct xfrm_state *x);
 extern int xfrm4_rcv(struct sk_buff *skb);
 extern int xfrm4_output(struct sk_buff *skb);
-extern int xfrm4_output_finish(struct sk_buff *skb);
 extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler);
 extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler);
 extern int xfrm6_rcv_spi(struct sk_buff **pskb, u32 spi);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index abe23923e4e7..9981dcd68f11 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -830,7 +830,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb->h.raw = skb->nh.raw;
 	skb->nh.raw = skb_push(skb, gre_hlen);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
+	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
+			      IPSKB_REROUTED);
 	dst_release(skb->dst);
 	skb->dst = &rt->u.dst;
 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3324fbfe528a..57d290d89ec2 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -207,8 +207,10 @@ static inline int ip_finish_output(struct sk_buff *skb)
 {
 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 	/* Policy lookup after SNAT yielded a new policy */
-	if (skb->dst->xfrm != NULL)
-		return xfrm4_output_finish(skb);
+	if (skb->dst->xfrm != NULL) {
+		IPCB(skb)->flags |= IPSKB_REROUTED;
+		return dst_output(skb);
+	}
 #endif
 	if (skb->len > dst_mtu(skb->dst) &&
 	    !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
@@ -271,8 +273,9 @@ int ip_mc_output(struct sk_buff *skb)
 				newskb->dev, ip_dev_loopback_xmit);
 	}
 
-	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev,
-		       ip_finish_output);
+	return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev,
+			    ip_finish_output,
+			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
 
 int ip_output(struct sk_buff *skb)
@@ -284,8 +287,9 @@ int ip_output(struct sk_buff *skb)
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_IP);
 
-	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
-		       ip_finish_output);
+	return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
+		            ip_finish_output,
+			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
 
 int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index e5cbe72c6b80..03d13742a4b8 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -622,7 +622,8 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb->h.raw = skb->nh.raw;
 	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
+	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
+			      IPSKB_REROUTED);
 	dst_release(skb->dst);
 	skb->dst = &rt->u.dst;
 
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index d4df0ddd424b..32ad229b4fed 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -152,10 +152,16 @@ error_nolock:
 	goto out_exit;
 }
 
-int xfrm4_output_finish(struct sk_buff *skb)
+static int xfrm4_output_finish(struct sk_buff *skb)
 {
 	int err;
 
+#ifdef CONFIG_NETFILTER
+	if (!skb->dst->xfrm) {
+		IPCB(skb)->flags |= IPSKB_REROUTED;
+		return dst_output(skb);
+	}
+#endif
 	while (likely((err = xfrm4_output_one(skb)) == 0)) {
 		nf_reset(skb);
 
@@ -178,6 +184,7 @@ int xfrm4_output_finish(struct sk_buff *skb)
 
 int xfrm4_output(struct sk_buff *skb)
 {
-	return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev,
-		       xfrm4_output_finish);
+	return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev,
+			    xfrm4_output_finish,
+			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
-- 
cgit v1.2.3-71-gd317


From 9c92d3486434e7310cb288587953e2dae4a79701 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 15 Feb 2006 15:18:19 -0800
Subject: [NETFILTER]: Don't invoke okfn in CONFIG_NETFILTER=n variant of
 nf_hook()

nf_hook() is supposed to call the netfilter hook and return control of the
packet back to the caller in case it may pass, the okfn is only used for
queueing.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 3ca3d9ee78a9..468896939843 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -318,7 +318,7 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb,
 			  struct net_device *indev, struct net_device *outdev,
 			  int (*okfn)(struct sk_buff *))
 {
-	return okfn(*pskb);
+	return 1;
 }
 static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
 struct flowi;
-- 
cgit v1.2.3-71-gd317


From b2ee9dbfad14ba8e34a589d552ddc67300a26bec Mon Sep 17 00:00:00 2001
From: Roman Zippel <zippel@linux-m68k.org>
Date: Wed, 15 Feb 2006 15:17:40 -0800
Subject: [PATCH] hrtimer: fix multiple macro argument expansion

For two macros the arguments were expanded twice, change them to inline
functions to avoid it.

Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/ktime.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 6aca67a569a2..f3dec45ef874 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -96,10 +96,16 @@ static inline ktime_t ktime_set(const long secs, const unsigned long nsecs)
 		({ (ktime_t){ .tv64 = (kt).tv64 + (nsval) }; })
 
 /* convert a timespec to ktime_t format: */
-#define timespec_to_ktime(ts)		ktime_set((ts).tv_sec, (ts).tv_nsec)
+static inline ktime_t timespec_to_ktime(struct timespec ts)
+{
+	return ktime_set(ts.tv_sec, ts.tv_nsec);
+}
 
 /* convert a timeval to ktime_t format: */
-#define timeval_to_ktime(tv)		ktime_set((tv).tv_sec, (tv).tv_usec * 1000)
+static inline ktime_t timeval_to_ktime(struct timeval tv)
+{
+	return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC);
+}
 
 /* Map the ktime_t to timespec conversion to ns_to_timespec function */
 #define ktime_to_timespec(kt)		ns_to_timespec((kt).tv64)
-- 
cgit v1.2.3-71-gd317


From a62eaf151d9cb478d127cfbc2e93c498869785b0 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 16 Feb 2006 23:41:58 +0100
Subject: [PATCH] x86_64: Add boot option to disable randomized mappings and
 cleanup

AMD SimNow!'s JIT doesn't like them at all in the guest. For distribution
installation it's easiest if it's a boot time option.

Also I moved the variable to a more appropiate place and make
it independent from sysctl

And marked __read_mostly which it is.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/kernel-parameters.txt |  3 +++
 arch/i386/kernel/cpu/transmeta.c    |  1 +
 include/linux/kernel.h              |  6 ------
 include/linux/mm.h                  |  2 ++
 kernel/sysctl.c                     |  2 --
 mm/memory.c                         | 10 ++++++++++
 6 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index ac75b57edf2e..b874771385cd 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1638,6 +1638,9 @@ running once the system is up.
 			Format:
 			<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
 
+	norandmaps	Don't use address space randomization
+			Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space
+
 
 ______________________________________________________________________
 Changelog:
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c
index bdbeb77f4e22..7214c9b577ab 100644
--- a/arch/i386/kernel/cpu/transmeta.c
+++ b/arch/i386/kernel/cpu/transmeta.c
@@ -1,4 +1,5 @@
 #include <linux/kernel.h>
+#include <linux/mm.h>
 #include <linux/init.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index b49affa0ac5a..3b507bf05d09 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -326,12 +326,6 @@ struct sysinfo {
 /* Force a compilation error if condition is true */
 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
 
-#ifdef CONFIG_SYSCTL
-extern int randomize_va_space;
-#else
-#define randomize_va_space 1
-#endif
-
 /* Trap pasters of __FUNCTION__ at compile-time */
 #define __FUNCTION__ (__func__)
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 75e9f0724997..26e1663a5cbe 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1051,5 +1051,7 @@ int shrink_slab(unsigned long scanned, gfp_t gfp_mask,
 void drop_pagecache(void);
 void drop_slab(void);
 
+extern int randomize_va_space;
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 71dd6f62efec..7654d55c47f5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -126,8 +126,6 @@ extern int sysctl_hz_timer;
 extern int acct_parm[];
 #endif
 
-int randomize_va_space = 1;
-
 static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
 		       ctl_table *, void **);
 static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
diff --git a/mm/memory.c b/mm/memory.c
index 2bee1f21aa8a..9abc6008544b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -82,6 +82,16 @@ EXPORT_SYMBOL(num_physpages);
 EXPORT_SYMBOL(high_memory);
 EXPORT_SYMBOL(vmalloc_earlyreserve);
 
+int randomize_va_space __read_mostly = 1;
+
+static int __init disable_randmaps(char *s)
+{
+	randomize_va_space = 0;
+	return 0;
+}
+__setup("norandmaps", disable_randmaps);
+
+
 /*
  * If a p?d_bad entry is found while walking page tables, report
  * the error, before resetting entry to p?d_none.  Usually (but
-- 
cgit v1.2.3-71-gd317


From 726c14bf499e91e7ede4f1728830aba05c675061 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 17 Feb 2006 10:30:23 +1100
Subject: [PATCH] Provide an interface for getting the current tick length

This provides an interface for arch code to find out how many
nanoseconds are going to be added on to xtime by the next call to
do_timer.  The value returned is a fixed-point number in 52.12 format
in nanoseconds.  The reason for this format is that it gives the
full precision that the timekeeping code is using internally.

The motivation for this is to fix a problem that has arisen on 32-bit
powerpc in that the value returned by do_gettimeofday drifts apart
from xtime if NTP is being used.  PowerPC is now using a lockless
do_gettimeofday based on reading the timebase register and performing
some simple arithmetic.  (This method of getting the time is also
exported to userspace via the VDSO.)  However, the factor and offset
it uses were calculated based on the nominal tick length and weren't
being adjusted when NTP varied the tick length.

Note that 64-bit powerpc has had the lockless do_gettimeofday for a
long time now.  It also had an extremely hairy routine that got called
from the 32-bit compat routine for adjtimex, which adjusted the
factor and offset according to what it thought the timekeeping code
was going to do.  Not only was this only called if a 32-bit task did
adjtimex (i.e. not if a 64-bit task did adjtimex), it was also
duplicating computations from kernel/timer.c and it wasn't clear that
it was (still) correct.

The simple solution is to ask the timekeeping code how long the
current jiffy will be on each timer interrupt, after calling
do_timer.  If this jiffy will be a different length from the last one,
we then need to compute new values for the factor and offset used in
the lockless do_gettimeofday.  In this way we can keep xtime and
do_gettimeofday in sync, even when NTP is varying the tick length.

Note that when adjtimex varies the tick length, it almost always
introduces the variation from the next tick on.  The only case I could
see where adjtimex would vary the length of the current tick is when
an old-style adjtime adjustment is being cancelled.  (It's not clear
to me why the adjustment has to be cancelled immediately rather than
from the next tick on.)  Thus I don't see any real need for a hook in
adjtimex; the rare case of an old-style adjustment being cancelled can
be fixed up at the next tick.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: john stultz <johnstul@us.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/timex.h |  3 +++
 kernel/timer.c        | 39 ++++++++++++++++++++++++++++++++++-----
 2 files changed, 37 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timex.h b/include/linux/timex.h
index 04a4a8cb4ed3..b7ca1204e42a 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -345,6 +345,9 @@ time_interpolator_reset(void)
 
 #endif /* !CONFIG_TIME_INTERPOLATION */
 
+/* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */
+extern u64 current_tick_length(void);
+
 #endif /* KERNEL */
 
 #endif /* LINUX_TIMEX_H */
diff --git a/kernel/timer.c b/kernel/timer.c
index b9dad3994676..fe3a9a9f8328 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -717,12 +717,16 @@ static void second_overflow(void)
 #endif
 }
 
-/* in the NTP reference this is called "hardclock()" */
-static void update_wall_time_one_tick(void)
+/*
+ * Returns how many microseconds we need to add to xtime this tick
+ * in doing an adjustment requested with adjtime.
+ */
+static long adjtime_adjustment(void)
 {
-	long time_adjust_step, delta_nsec;
+	long time_adjust_step;
 
-	if ((time_adjust_step = time_adjust) != 0 ) {
+	time_adjust_step = time_adjust;
+	if (time_adjust_step) {
 		/*
 		 * We are doing an adjtime thing.  Prepare time_adjust_step to
 		 * be within bounds.  Note that a positive time_adjust means we
@@ -733,10 +737,19 @@ static void update_wall_time_one_tick(void)
 		 */
 		time_adjust_step = min(time_adjust_step, (long)tickadj);
 		time_adjust_step = max(time_adjust_step, (long)-tickadj);
+	}
+	return time_adjust_step;
+}
 
+/* in the NTP reference this is called "hardclock()" */
+static void update_wall_time_one_tick(void)
+{
+	long time_adjust_step, delta_nsec;
+
+	time_adjust_step = adjtime_adjustment();
+	if (time_adjust_step)
 		/* Reduce by this step the amount of time left  */
 		time_adjust -= time_adjust_step;
-	}
 	delta_nsec = tick_nsec + time_adjust_step * 1000;
 	/*
 	 * Advance the phase, once it gets to one microsecond, then
@@ -758,6 +771,22 @@ static void update_wall_time_one_tick(void)
 	}
 }
 
+/*
+ * Return how long ticks are at the moment, that is, how much time
+ * update_wall_time_one_tick will add to xtime next time we call it
+ * (assuming no calls to do_adjtimex in the meantime).
+ * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10
+ * bits to the right of the binary point.
+ * This function has no side-effects.
+ */
+u64 current_tick_length(void)
+{
+	long delta_nsec;
+
+	delta_nsec = tick_nsec + adjtime_adjustment() * 1000;
+	return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj;
+}
+
 /*
  * Using a loop looks inefficient, but "ticks" is
  * usually just one (we shouldn't be losing ticks,
-- 
cgit v1.2.3-71-gd317


From cc1887f3d8ae8ea61efa1a75af8ec0467b9dd546 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Mon, 20 Feb 2006 23:48:38 +0900
Subject: [PATCH] libata: fix qc->n_elem == 0 case handling in ata_qc_next_sg

This patch makes ata_for_each_sg() start with pad_sgent when
qc->n_elem is zero.  Previously, ata_for_each_sg() unconditionally
started with qc->__sg, handling the first sg to fill_sg() routines
even when the entry was invalid.  And while at it, unwind ?: in
ata_qc_next_sg() into if statement.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 include/linux/libata.h | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 9e5db2949c58..c91be5e64ede 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -556,6 +556,16 @@ ata_sg_is_last(struct scatterlist *sg, struct ata_queued_cmd *qc)
 	return 0;
 }
 
+static inline struct scatterlist *
+ata_qc_first_sg(struct ata_queued_cmd *qc)
+{
+	if (qc->n_elem)
+		return qc->__sg;
+	if (qc->pad_len)
+		return &qc->pad_sgent;
+	return NULL;
+}
+
 static inline struct scatterlist *
 ata_qc_next_sg(struct scatterlist *sg, struct ata_queued_cmd *qc)
 {
@@ -563,11 +573,13 @@ ata_qc_next_sg(struct scatterlist *sg, struct ata_queued_cmd *qc)
 		return NULL;
 	if (++sg - qc->__sg < qc->n_elem)
 		return sg;
-	return qc->pad_len ? &qc->pad_sgent : NULL;
+	if (qc->pad_len)
+		return &qc->pad_sgent;
+	return NULL;
 }
 
 #define ata_for_each_sg(sg, qc) \
-	for (sg = qc->__sg; sg; sg = ata_qc_next_sg(sg, qc))
+	for (sg = ata_qc_first_sg(qc); sg; sg = ata_qc_next_sg(sg, qc))
 
 static inline unsigned int ata_tag_valid(unsigned int tag)
 {
-- 
cgit v1.2.3-71-gd317


From 9b0f8b040acd8dfd23860754c0d09ff4f44e2cbc Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Mon, 20 Feb 2006 18:27:52 -0800
Subject: [PATCH] Terminate process that fails on a constrained allocation

Some allocations are restricted to a limited set of nodes (due to memory
policies or cpuset constraints).  If the page allocator is not able to find
enough memory then that does not mean that overall system memory is low.

In particular going postal and more or less randomly shooting at processes
is not likely going to help the situation but may just lead to suicide (the
whole system coming down).

It is better to signal to the process that no memory exists given the
constraints that the process (or the configuration of the process) has
placed on the allocation behavior.  The process may be killed but then the
sysadmin or developer can investigate the situation.  The solution is
similar to what we do when running out of hugepages.

This patch adds a check before we kill processes.  At that point
performance considerations do not matter much so we just scan the zonelist
and reconstruct a list of nodes.  If the list of nodes does not contain all
online nodes then this is a constrained allocation and we should kill the
current process.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/sysrq.c |   2 +-
 include/linux/swap.h |   2 +-
 mm/oom_kill.c        | 103 ++++++++++++++++++++++++++++++++++++++-------------
 mm/page_alloc.c      |   2 +-
 4 files changed, 81 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 5765f672e853..d58f82318853 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -243,7 +243,7 @@ static struct sysrq_key_op sysrq_term_op = {
 
 static void moom_callback(void *ignored)
 {
-	out_of_memory(GFP_KERNEL, 0);
+	out_of_memory(&NODE_DATA(0)->node_zonelists[ZONE_NORMAL], GFP_KERNEL, 0);
 }
 
 static DECLARE_WORK(moom_work, moom_callback, NULL);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index f3e17d5963c3..d572b19afb7d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -147,7 +147,7 @@ struct swap_list_t {
 #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages)
 
 /* linux/mm/oom_kill.c */
-extern void out_of_memory(gfp_t gfp_mask, int order);
+extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
 
 /* linux/mm/memory.c */
 extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 949eba1d5ba3..8123fad5a485 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -132,6 +132,36 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 	return points;
 }
 
+/*
+ * Types of limitations to the nodes from which allocations may occur
+ */
+#define CONSTRAINT_NONE 1
+#define CONSTRAINT_MEMORY_POLICY 2
+#define CONSTRAINT_CPUSET 3
+
+/*
+ * Determine the type of allocation constraint.
+ */
+static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask)
+{
+#ifdef CONFIG_NUMA
+	struct zone **z;
+	nodemask_t nodes = node_online_map;
+
+	for (z = zonelist->zones; *z; z++)
+		if (cpuset_zone_allowed(*z, gfp_mask))
+			node_clear((*z)->zone_pgdat->node_id,
+					nodes);
+		else
+			return CONSTRAINT_CPUSET;
+
+	if (!nodes_empty(nodes))
+		return CONSTRAINT_MEMORY_POLICY;
+#endif
+
+	return CONSTRAINT_NONE;
+}
+
 /*
  * Simple selection loop. We chose the process with the highest
  * number of 'points'. We expect the caller will lock the tasklist.
@@ -184,7 +214,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
  * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that
  * we select a process with CAP_SYS_RAW_IO set).
  */
-static void __oom_kill_task(task_t *p)
+static void __oom_kill_task(task_t *p, const char *message)
 {
 	if (p->pid == 1) {
 		WARN_ON(1);
@@ -200,8 +230,8 @@ static void __oom_kill_task(task_t *p)
 		return;
 	}
 	task_unlock(p);
-	printk(KERN_ERR "Out of Memory: Killed process %d (%s).\n",
-							p->pid, p->comm);
+	printk(KERN_ERR "%s: Killed process %d (%s).\n",
+				message, p->pid, p->comm);
 
 	/*
 	 * We give our sacrificial lamb high priority and access to
@@ -214,7 +244,7 @@ static void __oom_kill_task(task_t *p)
 	force_sig(SIGKILL, p);
 }
 
-static struct mm_struct *oom_kill_task(task_t *p)
+static struct mm_struct *oom_kill_task(task_t *p, const char *message)
 {
 	struct mm_struct *mm = get_task_mm(p);
 	task_t * g, * q;
@@ -226,21 +256,21 @@ static struct mm_struct *oom_kill_task(task_t *p)
 		return NULL;
 	}
 
-	__oom_kill_task(p);
+	__oom_kill_task(p, message);
 	/*
 	 * kill all processes that share the ->mm (i.e. all threads),
 	 * but are in a different thread group
 	 */
 	do_each_thread(g, q)
 		if (q->mm == mm && q->tgid != p->tgid)
-			__oom_kill_task(q);
+			__oom_kill_task(q, message);
 	while_each_thread(g, q);
 
 	return mm;
 }
 
 static struct mm_struct *oom_kill_process(struct task_struct *p,
-					  unsigned long points)
+				unsigned long points, const char *message)
 {
  	struct mm_struct *mm;
 	struct task_struct *c;
@@ -253,11 +283,11 @@ static struct mm_struct *oom_kill_process(struct task_struct *p,
 		c = list_entry(tsk, struct task_struct, sibling);
 		if (c->mm == p->mm)
 			continue;
-		mm = oom_kill_task(c);
+		mm = oom_kill_task(c, message);
 		if (mm)
 			return mm;
 	}
-	return oom_kill_task(p);
+	return oom_kill_task(p, message);
 }
 
 /**
@@ -268,10 +298,10 @@ static struct mm_struct *oom_kill_process(struct task_struct *p,
  * OR try to be smart about which process to kill. Note that we
  * don't have to be perfect here, we just have to be good.
  */
-void out_of_memory(gfp_t gfp_mask, int order)
+void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
 {
 	struct mm_struct *mm = NULL;
-	task_t * p;
+	task_t *p;
 	unsigned long points;
 
 	if (printk_ratelimit()) {
@@ -283,25 +313,48 @@ void out_of_memory(gfp_t gfp_mask, int order)
 
 	cpuset_lock();
 	read_lock(&tasklist_lock);
+
+	/*
+	 * Check if there were limitations on the allocation (only relevant for
+	 * NUMA) that may require different handling.
+	 */
+	switch (constrained_alloc(zonelist, gfp_mask)) {
+	case CONSTRAINT_MEMORY_POLICY:
+		mm = oom_kill_process(current, points,
+				"No available memory (MPOL_BIND)");
+		break;
+
+	case CONSTRAINT_CPUSET:
+		mm = oom_kill_process(current, points,
+				"No available memory in cpuset");
+		break;
+
+	case CONSTRAINT_NONE:
 retry:
-	p = select_bad_process(&points);
+		/*
+		 * Rambo mode: Shoot down a process and hope it solves whatever
+		 * issues we may have.
+		 */
+		p = select_bad_process(&points);
 
-	if (PTR_ERR(p) == -1UL)
-		goto out;
+		if (PTR_ERR(p) == -1UL)
+			goto out;
 
-	/* Found nothing?!?! Either we hang forever, or we panic. */
-	if (!p) {
-		read_unlock(&tasklist_lock);
-		cpuset_unlock();
-		panic("Out of memory and no killable processes...\n");
-	}
+		/* Found nothing?!?! Either we hang forever, or we panic. */
+		if (!p) {
+			read_unlock(&tasklist_lock);
+			cpuset_unlock();
+			panic("Out of memory and no killable processes...\n");
+		}
 
-	mm = oom_kill_process(p, points);
-	if (!mm)
-		goto retry;
+		mm = oom_kill_process(p, points, "Out of memory");
+		if (!mm)
+			goto retry;
+
+		break;
+	}
 
- out:
-	read_unlock(&tasklist_lock);
+out:
 	cpuset_unlock();
 	if (mm)
 		mmput(mm);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 208812b25597..791690d7d3fa 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1015,7 +1015,7 @@ rebalance:
 		if (page)
 			goto got_pg;
 
-		out_of_memory(gfp_mask, order);
+		out_of_memory(zonelist, gfp_mask, order);
 		goto restart;
 	}
 
-- 
cgit v1.2.3-71-gd317


From c255d844dd73616f23e4b4733edcc2e5fa4042b2 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Mon, 20 Feb 2006 18:27:58 -0800
Subject: [PATCH] suspend-to-ram: allow video options to be set at runtime

Currently, acpi video options can only be set on kernel command line.  That's
little inflexible; I'd like userland s2ram application that just works, and
modifying kernel command line according to whitelist is not fun.  It is better
to just allow s2ram application to set video options just before suspend
(according to the whitelist).

This implements sysctl to allow setting suspend video options without reboot.

(akpm: Documentation updates for this new sysctl are pending..)

Signed-off-by: Pavel Machek <pavel@suse.cz>
Cc: "Brown, Len" <len.brown@intel.com>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/sysctl/kernel.txt | 10 ++++++++++
 include/linux/acpi.h            |  3 ++-
 include/linux/sysctl.h          |  1 +
 kernel/sysctl.c                 | 16 ++++++++++++----
 4 files changed, 25 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 9f11d36a8c10..b0c7ab93dcb9 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -16,6 +16,7 @@ before actually making adjustments.
 
 Currently, these files might (depending on your configuration)
 show up in /proc/sys/kernel:
+- acpi_video_flags
 - acct
 - core_pattern
 - core_uses_pid
@@ -57,6 +58,15 @@ show up in /proc/sys/kernel:
 
 ==============================================================
 
+acpi_video_flags:
+
+flags
+
+See Doc*/kernel/power/video.txt, it allows mode of video boot to be
+set during run time.
+
+==============================================================
+
 acct:
 
 highwater lowwater frequency
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 84d3d9f034ce..d3bc25e6d27d 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -427,7 +427,8 @@ extern int acpi_mp_config;
 extern struct acpi_table_mcfg_config *pci_mmcfg_config;
 extern int pci_mmcfg_config_num;
 
-extern int sbf_port ;
+extern int sbf_port;
+extern unsigned long acpi_video_flags;
 
 #else	/* !CONFIG_ACPI */
 
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 32a4139c4ad8..0e92bf7ec28e 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -146,6 +146,7 @@ enum
 	KERN_RANDOMIZE=68, /* int: randomize virtual address space */
 	KERN_SETUID_DUMPABLE=69, /* int: behaviour of dumps for setuid core */
 	KERN_SPIN_RETRY=70,	/* int: number of spinlock retries */
+	KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */
 };
 
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7654d55c47f5..ebc41bf22f1e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -44,14 +44,12 @@
 #include <linux/limits.h>
 #include <linux/dcache.h>
 #include <linux/syscalls.h>
+#include <linux/nfs_fs.h>
+#include <linux/acpi.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 
-#ifdef CONFIG_ROOT_NFS
-#include <linux/nfs_fs.h>
-#endif
-
 #if defined(CONFIG_SYSCTL)
 
 /* External variables not in a header file. */
@@ -655,6 +653,16 @@ static ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+#endif
+#ifdef CONFIG_ACPI_SLEEP
+	{
+		.ctl_name	= KERN_ACPI_VIDEO_FLAGS,
+		.procname	= "acpi_video_flags",
+		.data		= &acpi_video_flags,
+		.maxlen		= sizeof (unsigned long),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #endif
 	{ .ctl_name = 0 }
 };
-- 
cgit v1.2.3-71-gd317


From 7a9166e3b037296366cea6f3c97f705d33e209e6 Mon Sep 17 00:00:00 2001
From: Luke Yang <luke.adi@gmail.com>
Date: Mon, 20 Feb 2006 18:28:07 -0800
Subject: [PATCH] Fix undefined symbols for nommu architecture

Signed-off-by: Luke Yang <luke.adi@gmail.com>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h | 4 ++++
 kernel/sysctl.c    | 2 ++
 mm/nommu.c         | 2 ++
 3 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 26e1663a5cbe..498ff8778fb6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1051,7 +1051,11 @@ int shrink_slab(unsigned long scanned, gfp_t gfp_mask,
 void drop_pagecache(void);
 void drop_slab(void);
 
+#ifndef CONFIG_MMU
+#define randomize_va_space 0
+#else
 extern int randomize_va_space;
+#endif
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ebc41bf22f1e..c05a2b7125e1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -636,6 +636,7 @@ static ctl_table kern_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 #endif
+#if defined(CONFIG_MMU)
 	{
 		.ctl_name	= KERN_RANDOMIZE,
 		.procname	= "randomize_va_space",
@@ -644,6 +645,7 @@ static ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+#endif
 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
 	{
 		.ctl_name	= KERN_SPIN_RETRY,
diff --git a/mm/nommu.c b/mm/nommu.c
index c10262d68232..99d21020ec9d 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -57,6 +57,8 @@ EXPORT_SYMBOL(vmalloc);
 EXPORT_SYMBOL(vfree);
 EXPORT_SYMBOL(vmalloc_to_page);
 EXPORT_SYMBOL(vmalloc_32);
+EXPORT_SYMBOL(vmap);
+EXPORT_SYMBOL(vunmap);
 
 /*
  * Handle all mappings that got truncated by a "truncate()"
-- 
cgit v1.2.3-71-gd317


From 7fd105e758c8d746d57ab7e77f100e096bf153c8 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 20 Feb 2006 18:28:08 -0800
Subject: [PATCH] Fix compile for CONFIG_SYSVIPC=n or CONFIG_SYSCTL=n

The compat syscalls are added to sys_ni.c since they are not defined if the
above CONFIG options are off.  Also, nfs would not build with CONFIG_SYSCTL
off.

Noticed by Arthur Othieno.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nfs_fs.h | 2 +-
 kernel/sys_ni.c        | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 547d649b274e..b4dc6e2e10c9 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -398,7 +398,7 @@ extern struct inode_operations nfs_symlink_inode_operations;
 extern int nfs_register_sysctl(void);
 extern void nfs_unregister_sysctl(void);
 #else
-#define nfs_register_sysctl() do { } while(0)
+#define nfs_register_sysctl() 0
 #define nfs_unregister_sysctl() do { } while(0)
 #endif
 
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 17313b99e53d..1067090db6b1 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -104,6 +104,8 @@ cond_syscall(sys_setreuid16);
 cond_syscall(sys_setuid16);
 cond_syscall(sys_vm86old);
 cond_syscall(sys_vm86);
+cond_syscall(compat_sys_ipc);
+cond_syscall(compat_sys_sysctl);
 
 /* arch-specific weak syscall entries */
 cond_syscall(sys_pciconfig_read);
-- 
cgit v1.2.3-71-gd317


From 5bd546aa78b5d74f3162815e41940f862215d9e3 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Fri, 17 Feb 2006 20:23:29 +0000
Subject: [MMC] Fix mmc_cmd_type() mask

It's MMC_CMD_MASK not MMC_CMD_TYPE.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/mmc/mmc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index f38872abc126..bdc556d88498 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -49,7 +49,7 @@ struct mmc_command {
 /*
  * These are the command types.
  */
-#define mmc_cmd_type(cmd)	((cmd)->flags & MMC_CMD_TYPE)
+#define mmc_cmd_type(cmd)	((cmd)->flags & MMC_CMD_MASK)
 
 	unsigned int		retries;	/* max number of retries */
 	unsigned int		error;		/* command error */
-- 
cgit v1.2.3-71-gd317


From fa675765afed59bb89adba3369094ebd428b930b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Wed, 22 Feb 2006 09:39:02 -0800
Subject: Revert mount/umount uevent removal

This change reverts the 033b96fd30db52a710d97b06f87d16fc59fee0f1 commit
from Kay Sievers that removed the mount/umount uevents from the kernel.
Some older versions of HAL still depend on these events to detect when a
new device has been mounted.  These events are not correctly emitted,
and are broken by design, and so, should not be relied upon by any
future program.  Instead, the /proc/mounts file should be polled to
properly detect this kind of event.

A feature-removal-schedule.txt entry has been added, noting when this
interface will be removed from the kernel.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/feature-removal-schedule.txt |  9 +++++++++
 fs/super.c                                 | 15 ++++++++++++++-
 include/linux/kobject.h                    |  6 ++++--
 lib/kobject_uevent.c                       |  4 ++++
 4 files changed, 31 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index b730d765b525..be5ae600f533 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -171,3 +171,12 @@ Why:	The ISA interface is faster and should be always available. The I2C
 	probing is also known to cause trouble in at least one case (see
 	bug #5889.)
 Who:	Jean Delvare <khali@linux-fr.org>
+
+---------------------------
+
+What:	mount/umount uevents
+When:	February 2007
+Why:	These events are not correct, and do not properly let userspace know
+	when a file system has been mounted or unmounted.  Userspace should
+	poll the /proc/mounts file instead to detect this properly.
+Who:	Greg Kroah-Hartman <gregkh@suse.de>
diff --git a/fs/super.c b/fs/super.c
index 30294218fa63..e20b5580afd5 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -666,6 +666,16 @@ static int test_bdev_super(struct super_block *s, void *data)
 	return (void *)s->s_bdev == data;
 }
 
+static void bdev_uevent(struct block_device *bdev, enum kobject_action action)
+{
+	if (bdev->bd_disk) {
+		if (bdev->bd_part)
+			kobject_uevent(&bdev->bd_part->kobj, action);
+		else
+			kobject_uevent(&bdev->bd_disk->kobj, action);
+	}
+}
+
 struct super_block *get_sb_bdev(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data,
 	int (*fill_super)(struct super_block *, void *, int))
@@ -707,8 +717,10 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
 			up_write(&s->s_umount);
 			deactivate_super(s);
 			s = ERR_PTR(error);
-		} else
+		} else {
 			s->s_flags |= MS_ACTIVE;
+			bdev_uevent(bdev, KOBJ_MOUNT);
+		}
 	}
 
 	return s;
@@ -724,6 +736,7 @@ void kill_block_super(struct super_block *sb)
 {
 	struct block_device *bdev = sb->s_bdev;
 
+	bdev_uevent(bdev, KOBJ_UMOUNT);
 	generic_shutdown_super(sb);
 	sync_blockdev(bdev);
 	close_bdev_excl(bdev);
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 2a8d8da70961..c374b5fa8d3b 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -41,8 +41,10 @@ enum kobject_action {
 	KOBJ_ADD	= (__force kobject_action_t) 0x01,	/* exclusive to core */
 	KOBJ_REMOVE	= (__force kobject_action_t) 0x02,	/* exclusive to core */
 	KOBJ_CHANGE	= (__force kobject_action_t) 0x03,	/* device state change */
-	KOBJ_OFFLINE	= (__force kobject_action_t) 0x04,	/* device offline */
-	KOBJ_ONLINE	= (__force kobject_action_t) 0x05,	/* device online */
+	KOBJ_MOUNT	= (__force kobject_action_t) 0x04,	/* mount event for block devices (broken) */
+	KOBJ_UMOUNT	= (__force kobject_action_t) 0x05,	/* umount event for block devices (broken) */
+	KOBJ_OFFLINE	= (__force kobject_action_t) 0x06,	/* device offline */
+	KOBJ_ONLINE	= (__force kobject_action_t) 0x07,	/* device online */
 };
 
 struct kobject {
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 1b1985c136ec..086a0c6e888e 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -38,6 +38,10 @@ static char *action_to_string(enum kobject_action action)
 		return "remove";
 	case KOBJ_CHANGE:
 		return "change";
+	case KOBJ_MOUNT:
+		return "mount";
+	case KOBJ_UMOUNT:
+		return "umount";
 	case KOBJ_OFFLINE:
 		return "offline";
 	case KOBJ_ONLINE:
-- 
cgit v1.2.3-71-gd317


From 85edae14e4ee5e68cf037e9e4bca7498ea16874d Mon Sep 17 00:00:00 2001
From: Michal Janusz Miroslaw <M.Miroslaw@elka.pw.edu.pl>
Date: Thu, 23 Feb 2006 09:49:35 +0000
Subject: [SERIAL] Trivial comment fix: include/linux/serial_reg.h

Trivial comment fix for include/linux/serial_reg.h

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/serial_reg.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h
index 6a2bb955844b..3c8a6aa77415 100644
--- a/include/linux/serial_reg.h
+++ b/include/linux/serial_reg.h
@@ -247,10 +247,10 @@
 #define UART_CTR	0xFF
 
 /*
- * The 16C950 Additional Control Reigster
+ * The 16C950 Additional Control Register
  */
 #define UART_ACR_RXDIS	0x01	/* Receiver disable */
-#define UART_ACR_TXDIS	0x02	/* Receiver disable */
+#define UART_ACR_TXDIS	0x02	/* Transmitter disable */
 #define UART_ACR_DSRFC	0x04	/* DSR Flow Control */
 #define UART_ACR_TLENB	0x20	/* 950 trigger levels enable */
 #define UART_ACR_ICRRD	0x40	/* ICR Read enable */
-- 
cgit v1.2.3-71-gd317


From c04030e16dbea2f7581f82cc6688695927f6ac5b Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Fri, 24 Feb 2006 13:04:21 -0800
Subject: [PATCH] flags parameter for linkat

I'm currently at the POSIX meeting and one thing covered was the
incompatibility of Linux's link() with the POSIX definition.  The name.
Linux does not follow symlinks, POSIX requires it does.

Even if somebody thinks this is a good default behavior we cannot change this
because it would break the ABI.  But the fact remains that some application
might want this behavior.

We have one chance to help implementing this without breaking the behavior.
 For this we could use the new linkat interface which would need a new
flags parameter.  If the new parameter is AT_SYMLINK_FOLLOW the new
behavior could be invoked.

I do not want to introduce such a patch now.  But we could add the
parameter now, just don't use it.  The patch below would do this.  Can we
get this late patch applied before the release more or less fixes the
syscall API?

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/mips/kernel/scall32-o32.S    | 2 +-
 arch/s390/kernel/compat_wrapper.S | 1 +
 fs/namei.c                        | 8 ++++++--
 include/linux/syscalls.h          | 2 +-
 4 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index d83e033dbc87..2f2dc54b2e26 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -626,7 +626,7 @@ einval:	li	v0, -EINVAL
 	sys	sys_fstatat64		4
 	sys	sys_unlinkat		3
 	sys	sys_renameat		4	/* 4295 */
-	sys	sys_linkat		4
+	sys	sys_linkat		5
 	sys	sys_symlinkat		3
 	sys	sys_readlinkat		4
 	sys	sys_fchmodat		3
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 615964cca15f..50e80138e7ad 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1552,6 +1552,7 @@ sys_linkat_wrapper:
 	llgtr	%r3,%r3			# const char *
 	lgfr	%r4,%r4			# int
 	llgtr	%r5,%r5			# const char *
+	lgfr	%r6,%r6			# int
 	jg	sys_linkat
 
 	.globl sys_symlinkat_wrapper
diff --git a/fs/namei.c b/fs/namei.c
index e28de846c591..557dcf395ca1 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2224,13 +2224,17 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
  * and other special files.  --ADM
  */
 asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
-			   int newdfd, const char __user *newname)
+			   int newdfd, const char __user *newname,
+			   int flags)
 {
 	struct dentry *new_dentry;
 	struct nameidata nd, old_nd;
 	int error;
 	char * to;
 
+	if (flags != 0)
+		return -EINVAL;
+
 	to = getname(newname);
 	if (IS_ERR(to))
 		return PTR_ERR(to);
@@ -2263,7 +2267,7 @@ exit:
 
 asmlinkage long sys_link(const char __user *oldname, const char __user *newname)
 {
-	return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname);
+	return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
 }
 
 /*
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index d73501ba7e44..b9ea44ac0ddb 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -543,7 +543,7 @@ asmlinkage long sys_unlinkat(int dfd, const char __user * pathname, int flag);
 asmlinkage long sys_symlinkat(const char __user * oldname,
 			      int newdfd, const char __user * newname);
 asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
-			   int newdfd, const char __user *newname);
+			   int newdfd, const char __user *newname, int flags);
 asmlinkage long sys_renameat(int olddfd, const char __user * oldname,
 			     int newdfd, const char __user * newname);
 asmlinkage long sys_futimesat(int dfd, char __user *filename,
-- 
cgit v1.2.3-71-gd317


From bafac2a512bf4fd2ce7520f3976ce8aab4435f74 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 27 Feb 2006 13:04:17 -0800
Subject: [NETFILTER]: Restore {ipt,ip6t,ebt}_LOG compatibility

The nfnetlink_log infrastructure changes broke compatiblity of the LOG
targets. They currently use whatever log backend was registered first,
which means that if ipt_ULOG was loaded first, no messages will be printed
to the ring buffer anymore.

Restore compatiblity by using the old log functions by default and only use
the nf_log backend if the user explicitly said so.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_bridge/ebt_log.h | 1 +
 include/linux/netfilter_ipv4/ipt_LOG.h   | 3 ++-
 include/linux/netfilter_ipv6/ip6t_LOG.h  | 3 ++-
 net/bridge/netfilter/ebt_log.c           | 7 ++++++-
 net/ipv4/netfilter/ipt_LOG.c             | 7 ++++++-
 net/ipv6/netfilter/ip6t_LOG.c            | 7 ++++++-
 6 files changed, 23 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_bridge/ebt_log.h b/include/linux/netfilter_bridge/ebt_log.h
index 358fbc84fb59..96e231ae7554 100644
--- a/include/linux/netfilter_bridge/ebt_log.h
+++ b/include/linux/netfilter_bridge/ebt_log.h
@@ -3,6 +3,7 @@
 
 #define EBT_LOG_IP 0x01 /* if the frame is made by ip, log the ip information */
 #define EBT_LOG_ARP 0x02
+#define EBT_LOG_NFLOG 0x04
 #define EBT_LOG_MASK (EBT_LOG_IP | EBT_LOG_ARP)
 #define EBT_LOG_PREFIX_SIZE 30
 #define EBT_LOG_WATCHER "log"
diff --git a/include/linux/netfilter_ipv4/ipt_LOG.h b/include/linux/netfilter_ipv4/ipt_LOG.h
index 22d16177319b..892f9a33fea8 100644
--- a/include/linux/netfilter_ipv4/ipt_LOG.h
+++ b/include/linux/netfilter_ipv4/ipt_LOG.h
@@ -6,7 +6,8 @@
 #define IPT_LOG_TCPOPT		0x02	/* Log TCP options */
 #define IPT_LOG_IPOPT		0x04	/* Log IP options */
 #define IPT_LOG_UID		0x08	/* Log UID owning local socket */
-#define IPT_LOG_MASK		0x0f
+#define IPT_LOG_NFLOG		0x10	/* Log using nf_log backend */
+#define IPT_LOG_MASK		0x1f
 
 struct ipt_log_info {
 	unsigned char level;
diff --git a/include/linux/netfilter_ipv6/ip6t_LOG.h b/include/linux/netfilter_ipv6/ip6t_LOG.h
index 9008ff5c40ae..060c1a1c6c60 100644
--- a/include/linux/netfilter_ipv6/ip6t_LOG.h
+++ b/include/linux/netfilter_ipv6/ip6t_LOG.h
@@ -6,7 +6,8 @@
 #define IP6T_LOG_TCPOPT		0x02	/* Log TCP options */
 #define IP6T_LOG_IPOPT		0x04	/* Log IP options */
 #define IP6T_LOG_UID		0x08	/* Log UID owning local socket */
-#define IP6T_LOG_MASK		0x0f
+#define IP6T_LOG_NFLOG		0x10	/* Log using nf_log backend */
+#define IP6T_LOG_MASK		0x1f
 
 struct ip6t_log_info {
 	unsigned char level;
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 0128fbbe2328..288ff1d4ccc4 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -166,7 +166,12 @@ static void ebt_log(const struct sk_buff *skb, unsigned int hooknr,
 	li.u.log.level = info->loglevel;
 	li.u.log.logflags = info->bitmask;
 
-	nf_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, info->prefix);
+	if (info->bitmask & EBT_LOG_NFLOG)
+		nf_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li,
+		              info->prefix);
+	else
+		ebt_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li,
+		               info->prefix);
 }
 
 static struct ebt_watcher log =
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 6606ddb66a29..cc27545ff97f 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -425,7 +425,12 @@ ipt_log_target(struct sk_buff **pskb,
 	li.u.log.level = loginfo->level;
 	li.u.log.logflags = loginfo->logflags;
 
-	nf_log_packet(PF_INET, hooknum, *pskb, in, out, &li, loginfo->prefix);
+	if (loginfo->logflags & IPT_LOG_NFLOG)
+		nf_log_packet(PF_INET, hooknum, *pskb, in, out, &li,
+		              loginfo->prefix);
+	else
+		ipt_log_packet(PF_INET, hooknum, *pskb, in, out, &li,
+		               loginfo->prefix);
 
 	return IPT_CONTINUE;
 }
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 77c725832dec..6b930efa9fb9 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -436,7 +436,12 @@ ip6t_log_target(struct sk_buff **pskb,
 	li.u.log.level = loginfo->level;
 	li.u.log.logflags = loginfo->logflags;
 
-	nf_log_packet(PF_INET6, hooknum, *pskb, in, out, &li, loginfo->prefix);
+	if (loginfo->logflags & IP6T_LOG_NFLOG)
+		nf_log_packet(PF_INET6, hooknum, *pskb, in, out, &li,
+		              loginfo->prefix);
+	else
+		ip6t_log_packet(PF_INET6, hooknum, *pskb, in, out, &li,
+		                loginfo->prefix);
 
 	return IP6T_CONTINUE;
 }
-- 
cgit v1.2.3-71-gd317


From d2b176ed878d4d5fcc0bd35656dfd373f3702af9 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Tue, 28 Feb 2006 09:42:23 -0800
Subject: [IA64] sysctl option to silence unaligned trap warnings

Allow sysadmin to disable all warnings about userland apps
making unaligned accesses by using:
 # echo 1 > /proc/sys/kernel/ignore-unaligned-usertrap
Rather than having to use prctl on a process by process basis.

Default behaivour leaves the warnings enabled.

Signed-off-by: Jes Sorensen <jes@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/unaligned.c | 31 ++++++++++++++++++++++++++++---
 include/linux/sysctl.h       |  1 +
 kernel/sysctl.c              | 14 ++++++++++++++
 3 files changed, 43 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index 112913896844..1e357550c776 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -52,6 +52,15 @@ dump (const char *str, void *vp, size_t len)
 #define IA64_FIRST_ROTATING_FR	32
 #define SIGN_EXT9		0xffffffffffffff00ul
 
+/*
+ *  sysctl settable hook which tells the kernel whether to honor the
+ *  IA64_THREAD_UAC_NOPRINT prctl.  Because this is user settable, we want
+ *  to allow the super user to enable/disable this for security reasons
+ *  (i.e. don't allow attacker to fill up logs with unaligned accesses).
+ */
+int no_unaligned_warning;
+static int noprint_warning;
+
 /*
  * For M-unit:
  *
@@ -1324,8 +1333,9 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
 		if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
 			goto force_sigbus;
 
-		if (!(current->thread.flags & IA64_THREAD_UAC_NOPRINT)
-		    && within_logging_rate_limit())
+		if (!no_unaligned_warning &&
+		    !(current->thread.flags & IA64_THREAD_UAC_NOPRINT) &&
+		    within_logging_rate_limit())
 		{
 			char buf[200];	/* comm[] is at most 16 bytes... */
 			size_t len;
@@ -1340,7 +1350,22 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
 			if (user_mode(regs))
 				tty_write_message(current->signal->tty, buf);
 			buf[len-1] = '\0';	/* drop '\r' */
-			printk(KERN_WARNING "%s", buf);	/* watch for command names containing %s */
+			/* watch for command names containing %s */
+			printk(KERN_WARNING "%s", buf);
+		} else {
+			if (no_unaligned_warning && !noprint_warning) {
+				noprint_warning = 1;
+				printk(KERN_WARNING "%s(%d) encountered an "
+				       "unaligned exception which required\n"
+				       "kernel assistance, which degrades "
+				       "the performance of the application.\n"
+				       "Unaligned exception warnings have "
+				       "been disabled by the system "
+				       "administrator\n"
+				       "echo 0 > /proc/sys/kernel/ignore-"
+				       "unaligned-usertrap to re-enable\n",
+				       current->comm, current->pid);
+			}
 		}
 	} else {
 		if (within_logging_rate_limit())
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 0e92bf7ec28e..bac61db26456 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -147,6 +147,7 @@ enum
 	KERN_SETUID_DUMPABLE=69, /* int: behaviour of dumps for setuid core */
 	KERN_SPIN_RETRY=70,	/* int: number of spinlock retries */
 	KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */
+	KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */
 };
 
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c05a2b7125e1..acf6c1550f27 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -124,6 +124,10 @@ extern int sysctl_hz_timer;
 extern int acct_parm[];
 #endif
 
+#ifdef CONFIG_IA64
+extern int no_unaligned_warning;
+#endif
+
 static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
 		       ctl_table *, void **);
 static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
@@ -665,6 +669,16 @@ static ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+#endif
+#ifdef CONFIG_IA64
+	{
+		.ctl_name	= KERN_IA64_UNALIGNED,
+		.procname	= "ignore-unaligned-usertrap",
+		.data		= &no_unaligned_warning,
+		.maxlen		= sizeof (int),
+	 	.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #endif
 	{ .ctl_name = 0 }
 };
-- 
cgit v1.2.3-71-gd317


From 0551fbd29e16fccd46e41b7d01bf0f8f39b14212 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 28 Feb 2006 16:59:19 -0800
Subject: [PATCH] Add mm->task_size and fix powerpc vdso

This patch adds mm->task_size to keep track of the task size of a given mm
and uses that to fix the powerpc vdso so that it uses the mm task size to
decide what pages to fault in instead of the current thread flags (which
broke when ptracing).

(akpm: I expect that mm_struct.task_size will become the way in which we
finally sort out the confusion between 32-bit processes and 32-bit mm's.  It
may need tweaks, but at this stage this patch is powerpc-only.)

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/powerpc/kernel/vdso.c | 4 ++--
 fs/exec.c                  | 6 ++++++
 include/linux/sched.h      | 5 +++--
 3 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index f0c47dab0903..04f7df39ffbb 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -182,8 +182,8 @@ static struct page * vdso_vma_nopage(struct vm_area_struct * vma,
 	unsigned long offset = address - vma->vm_start;
 	struct page *pg;
 #ifdef CONFIG_PPC64
-	void *vbase = test_thread_flag(TIF_32BIT) ?
-		vdso32_kbase : vdso64_kbase;
+	void *vbase = (vma->vm_mm->task_size > TASK_SIZE_USER32) ?
+		vdso64_kbase : vdso32_kbase;
 #else
 	void *vbase = vdso32_kbase;
 #endif
diff --git a/fs/exec.c b/fs/exec.c
index 0e1c95074d42..0b515ac53134 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -885,6 +885,12 @@ int flush_old_exec(struct linux_binprm * bprm)
 	current->flags &= ~PF_RANDOMIZE;
 	flush_thread();
 
+	/* Set the new mm task size. We have to do that late because it may
+	 * depend on TIF_32BIT which is only updated in flush_thread() on
+	 * some architectures like powerpc
+	 */
+	current->mm->task_size = TASK_SIZE;
+
 	if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || 
 	    file_permission(bprm->file, MAY_READ) ||
 	    (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b6f51e3a38ec..ff2e09c953b9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -298,8 +298,9 @@ struct mm_struct {
 				unsigned long addr, unsigned long len,
 				unsigned long pgoff, unsigned long flags);
 	void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
-        unsigned long mmap_base;		/* base of mmap area */
-        unsigned long cached_hole_size;         /* if non-zero, the largest hole below free_area_cache */
+	unsigned long mmap_base;		/* base of mmap area */
+	unsigned long task_size;		/* size of task vm space */
+	unsigned long cached_hole_size;         /* if non-zero, the largest hole below free_area_cache */
 	unsigned long free_area_cache;		/* first hole of size cached_hole_size or larger */
 	pgd_t * pgd;
 	atomic_t mm_users;			/* How many users with user space? */
-- 
cgit v1.2.3-71-gd317


From 3af1efe8a301f5b1c813f5f761cb1e10d6175605 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Thu, 2 Mar 2006 13:25:26 -0500
Subject: [PATCH] reiserfs: fix unaligned bitmap usage

The bitmaps associated with generation numbers for directory entries
are declared as an array of ints. On some platforms, this causes alignment
exceptions.

The following patch uses the standard bitmap declaration macros to
declare the bitmaps, fixing the problem.

Originally from Takashi Iwai.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Acked-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/namei.c         | 8 ++++----
 include/linux/reiserfs_fs.h | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index c8123308e060..284f7852de8b 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -247,7 +247,7 @@ static int linear_search_in_dir_item(struct cpu_key *key,
 		/* mark, that this generation number is used */
 		if (de->de_gen_number_bit_string)
 			set_bit(GET_GENERATION_NUMBER(deh_offset(deh)),
-				(unsigned long *)de->de_gen_number_bit_string);
+				de->de_gen_number_bit_string);
 
 		// calculate pointer to name and namelen
 		de->de_entry_num = i;
@@ -431,7 +431,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
 	struct reiserfs_de_head *deh;
 	INITIALIZE_PATH(path);
 	struct reiserfs_dir_entry de;
-	int bit_string[MAX_GENERATION_NUMBER / (sizeof(int) * 8) + 1];
+	DECLARE_BITMAP(bit_string, MAX_GENERATION_NUMBER + 1);
 	int gen_number;
 	char small_buf[32 + DEH_SIZE];	/* 48 bytes now and we avoid kmalloc
 					   if we create file with short name */
@@ -486,7 +486,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
 
 	/* find the proper place for the new entry */
 	memset(bit_string, 0, sizeof(bit_string));
-	de.de_gen_number_bit_string = (char *)bit_string;
+	de.de_gen_number_bit_string = bit_string;
 	retval = reiserfs_find_entry(dir, name, namelen, &path, &de);
 	if (retval != NAME_NOT_FOUND) {
 		if (buffer != small_buf)
@@ -508,7 +508,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
 	}
 
 	gen_number =
-	    find_first_zero_bit((unsigned long *)bit_string,
+	    find_first_zero_bit(bit_string,
 				MAX_GENERATION_NUMBER + 1);
 	if (gen_number > MAX_GENERATION_NUMBER) {
 		/* there is no free generation number */
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 7d51149bd793..dad78cecfd20 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -1052,7 +1052,7 @@ struct reiserfs_dir_entry {
 	int de_entrylen;
 	int de_namelen;
 	char *de_name;
-	char *de_gen_number_bit_string;
+	unsigned long *de_gen_number_bit_string;
 
 	__u32 de_dir_id;
 	__u32 de_objectid;
-- 
cgit v1.2.3-71-gd317


From 1e4b27df55166ce3b276f55bab223fa4ae8c5525 Mon Sep 17 00:00:00 2001
From: Karsten Keil <kkeil@suse.de>
Date: Mon, 6 Mar 2006 15:42:37 -0800
Subject: [PATCH] i4l: add new PCI IDs for HFC-S PCI

Add new PCI IDs for HFC-S PCI based ISDN TA 'Primux II S0' and 'Primux II S0'
from Gerdes AG

Signed-off-by: Martin Bachem <info@colognechip.com>
Signed-off-by: Karsten Keil <kkeil@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/isdn/hisax/config.c  | 2 ++
 drivers/isdn/hisax/hfc_pci.c | 2 ++
 include/linux/pci_ids.h      | 2 ++
 3 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/isdn/hisax/config.c b/drivers/isdn/hisax/config.c
index 8159bcecd0c2..df9d65201819 100644
--- a/drivers/isdn/hisax/config.c
+++ b/drivers/isdn/hisax/config.c
@@ -1929,6 +1929,8 @@ static struct pci_device_id hisax_pci_tbl[] __initdata = {
 	{PCI_VENDOR_ID_CCD,      PCI_DEVICE_ID_CCD_B00B,         PCI_ANY_ID, PCI_ANY_ID},
 	{PCI_VENDOR_ID_CCD,      PCI_DEVICE_ID_CCD_B00C,         PCI_ANY_ID, PCI_ANY_ID},
 	{PCI_VENDOR_ID_CCD,      PCI_DEVICE_ID_CCD_B100,         PCI_ANY_ID, PCI_ANY_ID},
+	{PCI_VENDOR_ID_CCD,      PCI_DEVICE_ID_CCD_B700,         PCI_ANY_ID, PCI_ANY_ID},
+	{PCI_VENDOR_ID_CCD,      PCI_DEVICE_ID_CCD_B701,         PCI_ANY_ID, PCI_ANY_ID},
 	{PCI_VENDOR_ID_ABOCOM,   PCI_DEVICE_ID_ABOCOM_2BD1,      PCI_ANY_ID, PCI_ANY_ID},
 	{PCI_VENDOR_ID_ASUSTEK,  PCI_DEVICE_ID_ASUSTEK_0675,     PCI_ANY_ID, PCI_ANY_ID},
 	{PCI_VENDOR_ID_BERKOM,   PCI_DEVICE_ID_BERKOM_T_CONCEPT, PCI_ANY_ID, PCI_ANY_ID},
diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c
index 4866fc32d8d9..91d25acb5ede 100644
--- a/drivers/isdn/hisax/hfc_pci.c
+++ b/drivers/isdn/hisax/hfc_pci.c
@@ -51,6 +51,8 @@ static const PCI_ENTRY id_list[] =
 	{PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B00B, "Billion", "B00B"},
 	{PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B00C, "Billion", "B00C"},
 	{PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B100, "Seyeon", "B100"},
+	{PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B700, "Primux II S0", "B700"},
+	{PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B701, "Primux II S0 NT", "B701"},
 	{PCI_VENDOR_ID_ABOCOM, PCI_DEVICE_ID_ABOCOM_2BD1, "Abocom/Magitek", "2BD1"},
 	{PCI_VENDOR_ID_ASUSTEK, PCI_DEVICE_ID_ASUSTEK_0675, "Asuscom/Askey", "675"},
 	{PCI_VENDOR_ID_BERKOM, PCI_DEVICE_ID_BERKOM_T_CONCEPT, "German telekom", "T-Concept"},
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 82b83da25d77..1709b5009d2e 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1752,6 +1752,8 @@
 #define PCI_DEVICE_ID_CCD_B00B		0xb00b
 #define PCI_DEVICE_ID_CCD_B00C		0xb00c
 #define PCI_DEVICE_ID_CCD_B100		0xb100
+#define PCI_DEVICE_ID_CCD_B700		0xb700
+#define PCI_DEVICE_ID_CCD_B701		0xb701
 
 #define PCI_VENDOR_ID_EXAR		0x13a8
 #define PCI_DEVICE_ID_EXAR_XR17C152	0x0152
-- 
cgit v1.2.3-71-gd317


From 69239749e1ac4f3496906aa4267cb9f61ce52c9c Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 6 Mar 2006 15:42:45 -0800
Subject: [PATCH] fix next_timer_interrupt() for hrtimer

Also from Thomas Gleixner <tglx@linutronix.de>

Function next_timer_interrupt() got broken with a recent patch
6ba1b91213e81aa92b5cf7539f7d2a94ff54947c as sys_nanosleep() was moved to
hrtimer.  This broke things as next_timer_interrupt() did not check hrtimer
tree for next event.

Function next_timer_interrupt() is needed with dyntick (CONFIG_NO_IDLE_HZ,
VST) implementations, as the system can be in idle when next hrtimer event
was supposed to happen.  At least ARM and S390 currently use
next_timer_interrupt().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/arm/kernel/time.c  | 10 ++++++----
 include/linux/hrtimer.h |  4 ++++
 kernel/hrtimer.c        | 35 +++++++++++++++++++++++++++++++++++
 kernel/timer.c          | 16 ++++++++++++++++
 4 files changed, 61 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index d7d932c02866..d6bd435a6857 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -422,12 +422,14 @@ static int timer_dyn_tick_disable(void)
 void timer_dyn_reprogram(void)
 {
 	struct dyn_tick_timer *dyn_tick = system_timer->dyn_tick;
+	unsigned long next, seq;
 
-	if (dyn_tick) {
-		write_seqlock(&xtime_lock);
-		if (dyn_tick->state & DYN_TICK_ENABLED)
+	if (dyn_tick && (dyn_tick->state & DYN_TICK_ENABLED)) {
+		next = next_timer_interrupt();
+		do {
+			seq = read_seqbegin(&xtime_lock);
 			dyn_tick->reprogram(next_timer_interrupt() - jiffies);
-		write_sequnlock(&xtime_lock);
+		} while (read_seqretry(&xtime_lock, seq));
 	}
 }
 
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 6361544bb6ae..6401c31d6add 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -116,6 +116,10 @@ extern int hrtimer_try_to_cancel(struct hrtimer *timer);
 extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
 extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
 
+#ifdef CONFIG_NO_IDLE_HZ
+extern ktime_t hrtimer_get_next_event(void);
+#endif
+
 static inline int hrtimer_active(const struct hrtimer *timer)
 {
 	return timer->state == HRTIMER_PENDING;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 5ae51f1bc7c8..14bc9cfa6399 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -505,6 +505,41 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
 	return rem;
 }
 
+#ifdef CONFIG_NO_IDLE_HZ
+/**
+ * hrtimer_get_next_event - get the time until next expiry event
+ *
+ * Returns the delta to the next expiry event or KTIME_MAX if no timer
+ * is pending.
+ */
+ktime_t hrtimer_get_next_event(void)
+{
+	struct hrtimer_base *base = __get_cpu_var(hrtimer_bases);
+	ktime_t delta, mindelta = { .tv64 = KTIME_MAX };
+	unsigned long flags;
+	int i;
+
+	for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) {
+		struct hrtimer *timer;
+
+		spin_lock_irqsave(&base->lock, flags);
+		if (!base->first) {
+			spin_unlock_irqrestore(&base->lock, flags);
+			continue;
+		}
+		timer = rb_entry(base->first, struct hrtimer, node);
+		delta.tv64 = timer->expires.tv64;
+		spin_unlock_irqrestore(&base->lock, flags);
+		delta = ktime_sub(delta, base->get_time());
+		if (delta.tv64 < mindelta.tv64)
+			mindelta.tv64 = delta.tv64;
+	}
+	if (mindelta.tv64 < 0)
+		mindelta.tv64 = 0;
+	return mindelta;
+}
+#endif
+
 /**
  * hrtimer_init - initialize a timer to the given clock
  *
diff --git a/kernel/timer.c b/kernel/timer.c
index fc6646fd5aab..8256f3f5ec0d 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -489,9 +489,21 @@ unsigned long next_timer_interrupt(void)
 	struct list_head *list;
 	struct timer_list *nte;
 	unsigned long expires;
+	unsigned long hr_expires = MAX_JIFFY_OFFSET;
+	ktime_t hr_delta;
 	tvec_t *varray[4];
 	int i, j;
 
+	hr_delta = hrtimer_get_next_event();
+	if (hr_delta.tv64 != KTIME_MAX) {
+		struct timespec tsdelta;
+		tsdelta = ktime_to_timespec(hr_delta);
+		hr_expires = timespec_to_jiffies(&tsdelta);
+		if (hr_expires < 3)
+			return hr_expires + jiffies;
+	}
+	hr_expires += jiffies;
+
 	base = &__get_cpu_var(tvec_bases);
 	spin_lock(&base->t_base.lock);
 	expires = base->timer_jiffies + (LONG_MAX >> 1);
@@ -542,6 +554,10 @@ found:
 		}
 	}
 	spin_unlock(&base->t_base.lock);
+
+	if (time_before(hr_expires, expires))
+		return hr_expires;
+
 	return expires;
 }
 #endif
-- 
cgit v1.2.3-71-gd317


From 78679302fe428f4f3dc853a51ee24f306010d874 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 6 Mar 2006 15:42:49 -0800
Subject: [PATCH] memory-hotplug compile fix

include/linux/memory_hotplug.h:53: warning: 'struct page' declared inside parameter list

(akpm: I tossed in a couple more possibly-needed-sometime struct decls too)

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/memory_hotplug.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 01f03bc06eff..968b1aa3732c 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -6,6 +6,10 @@
 #include <linux/mmzone.h>
 #include <linux/notifier.h>
 
+struct page;
+struct zone;
+struct pglist_data;
+
 #ifdef CONFIG_MEMORY_HOTPLUG
 /*
  * pgdat resizing functions
-- 
cgit v1.2.3-71-gd317


From a615fa83959896f8eac76c235953fb164cd1a9b9 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Mon, 6 Mar 2006 15:42:50 -0800
Subject: [PATCH] Increase max kmalloc size for very large systems

Systems with extemely large numbers of nodes or cpus need to kmalloc
structures larger than is currently supported.  This patch increases the
maximum supported size for very large systems.

This patch should have no effect on current systems.

(akpm: why not just use alloc_pages() for sysfs_cpus?)

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kmalloc_sizes.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kmalloc_sizes.h b/include/linux/kmalloc_sizes.h
index d82d4c05c12d..bda23e00ed71 100644
--- a/include/linux/kmalloc_sizes.h
+++ b/include/linux/kmalloc_sizes.h
@@ -19,8 +19,10 @@
 	CACHE(32768)
 	CACHE(65536)
 	CACHE(131072)
-#ifndef CONFIG_MMU
+#if (NR_CPUS > 512) || (MAX_NUMNODES > 256) || !defined(CONFIG_MMU)
 	CACHE(262144)
+#endif
+#ifndef CONFIG_MMU
 	CACHE(524288)
 	CACHE(1048576)
 #ifdef CONFIG_LARGE_ALLOCS
-- 
cgit v1.2.3-71-gd317


From a19cbd4bf258840ade3b6ee9e9256006d0644e09 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Wed, 8 Mar 2006 14:03:09 -0800
Subject: Mark the pipe file operations static

They aren't used (nor even really usable) outside of pipe.c anyway

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/pipe.c          | 6 +++---
 include/linux/fs.h | 3 ---
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/pipe.c b/fs/pipe.c
index d722579df79a..8aada8e426f4 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -605,7 +605,7 @@ struct file_operations rdwr_fifo_fops = {
 	.fasync		= pipe_rdwr_fasync,
 };
 
-struct file_operations read_pipe_fops = {
+static struct file_operations read_pipe_fops = {
 	.llseek		= no_llseek,
 	.read		= pipe_read,
 	.readv		= pipe_readv,
@@ -617,7 +617,7 @@ struct file_operations read_pipe_fops = {
 	.fasync		= pipe_read_fasync,
 };
 
-struct file_operations write_pipe_fops = {
+static struct file_operations write_pipe_fops = {
 	.llseek		= no_llseek,
 	.read		= bad_pipe_r,
 	.write		= pipe_write,
@@ -629,7 +629,7 @@ struct file_operations write_pipe_fops = {
 	.fasync		= pipe_write_fasync,
 };
 
-struct file_operations rdwr_pipe_fops = {
+static struct file_operations rdwr_pipe_fops = {
 	.llseek		= no_llseek,
 	.read		= pipe_read,
 	.readv		= pipe_readv,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e059da947007..0cc34b1c42c9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1418,9 +1418,6 @@ extern int is_bad_inode(struct inode *);
 extern struct file_operations read_fifo_fops;
 extern struct file_operations write_fifo_fops;
 extern struct file_operations rdwr_fifo_fops;
-extern struct file_operations read_pipe_fops;
-extern struct file_operations write_pipe_fops;
-extern struct file_operations rdwr_pipe_fops;
 
 extern int fs_may_remount_ro(struct super_block *);
 
-- 
cgit v1.2.3-71-gd317


From e2bab3d92486fb781f4d06f56339264ed1492392 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 7 Mar 2006 21:55:31 -0800
Subject: [PATCH] percpu_counter_sum()

Implement percpu_counter_sum().  This is a more accurate but slower version of
percpu_counter_read_positive().

We need this for Alex's speedup-ext3_statfs patch and for the nr_file
accounting fix.  Otherwise these things would be too inaccurate on large CPU
counts.

Cc: Ravikiran G Thirumalai <kiran@scalex86.org>
Cc: Alex Tomas <alex@clusterfs.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/percpu_counter.h |  6 ++++++
 mm/swap.c                      | 25 +++++++++++++++++++++++--
 2 files changed, 29 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index bd6708e2c027..682525511c9e 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -39,6 +39,7 @@ static inline void percpu_counter_destroy(struct percpu_counter *fbc)
 }
 
 void percpu_counter_mod(struct percpu_counter *fbc, long amount);
+long percpu_counter_sum(struct percpu_counter *fbc);
 
 static inline long percpu_counter_read(struct percpu_counter *fbc)
 {
@@ -92,6 +93,11 @@ static inline long percpu_counter_read_positive(struct percpu_counter *fbc)
 	return fbc->count;
 }
 
+static inline long percpu_counter_sum(struct percpu_counter *fbc)
+{
+	return percpu_counter_read_positive(fbc);
+}
+
 #endif	/* CONFIG_SMP */
 
 static inline void percpu_counter_inc(struct percpu_counter *fbc)
diff --git a/mm/swap.c b/mm/swap.c
index cce3dda59c59..e9ec06d845e8 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -489,13 +489,34 @@ void percpu_counter_mod(struct percpu_counter *fbc, long amount)
 	if (count >= FBC_BATCH || count <= -FBC_BATCH) {
 		spin_lock(&fbc->lock);
 		fbc->count += count;
+		*pcount = 0;
 		spin_unlock(&fbc->lock);
-		count = 0;
+	} else {
+		*pcount = count;
 	}
-	*pcount = count;
 	put_cpu();
 }
 EXPORT_SYMBOL(percpu_counter_mod);
+
+/*
+ * Add up all the per-cpu counts, return the result.  This is a more accurate
+ * but much slower version of percpu_counter_read_positive()
+ */
+long percpu_counter_sum(struct percpu_counter *fbc)
+{
+	long ret;
+	int cpu;
+
+	spin_lock(&fbc->lock);
+	ret = fbc->count;
+	for_each_cpu(cpu) {
+		long *pcount = per_cpu_ptr(fbc->counters, cpu);
+		ret += *pcount;
+	}
+	spin_unlock(&fbc->lock);
+	return ret < 0 ? 0 : ret;
+}
+EXPORT_SYMBOL(percpu_counter_sum);
 #endif
 
 /*
-- 
cgit v1.2.3-71-gd317


From 21a1ea9eb40411d4ee29448c53b9e4c0654d6ceb Mon Sep 17 00:00:00 2001
From: Dipankar Sarma <dipankar@in.ibm.com>
Date: Tue, 7 Mar 2006 21:55:33 -0800
Subject: [PATCH] rcu batch tuning

This patch adds new tunables for RCU queue and finished batches.  There are
two types of controls - number of completed RCU updates invoked in a batch
(blimit) and monitoring for high rate of incoming RCUs on a cpu (qhimark,
qlowmark).

By default, the per-cpu batch limit is set to a small value.  If the input
RCU rate exceeds the high watermark, we do two things - force quiescent
state on all cpus and set the batch limit of the CPU to INTMAX.  Setting
batch limit to INTMAX forces all finished RCUs to be processed in one shot.
 If we have more than INTMAX RCUs queued up, then we have bigger problems
anyway.  Once the incoming queued RCUs fall below the low watermark, the
batch limit is set to the default.

Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/kernel-parameters.txt | 13 +++++++
 include/linux/rcupdate.h            |  6 ++-
 kernel/rcupdate.c                   | 76 ++++++++++++++++++++++++++++---------
 3 files changed, 76 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 75205391b335..bad5987c4727 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1284,6 +1284,19 @@ running once the system is up.
 			New name for the ramdisk parameter.
 			See Documentation/ramdisk.txt.
 
+	rcu.blimit=	[KNL,BOOT] Set maximum number of finished
+			RCU callbacks to process in one batch.
+
+	rcu.qhimark=	[KNL,BOOT] Set threshold of queued
+			RCU callbacks over which batch limiting is disabled.
+
+	rcu.qlowmark=	[KNL,BOOT] Set threshold of queued
+			RCU callbacks below which batch limiting is re-enabled.
+
+	rcu.rsinterval=	[KNL,BOOT,SMP] Set the number of additional
+			RCU callbacks to queued before forcing reschedule
+			on all cpus.
+
 	rdinit=		[KNL]
 			Format: <full_path>
 			Run specified binary instead of /init from the ramdisk,
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index b87aefa082e2..c2ec6c77874e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -98,13 +98,17 @@ struct rcu_data {
 	long  	       	batch;           /* Batch # for current RCU batch */
 	struct rcu_head *nxtlist;
 	struct rcu_head **nxttail;
-	long            count; /* # of queued items */
+	long            qlen; 	 	 /* # of queued callbacks */
 	struct rcu_head *curlist;
 	struct rcu_head **curtail;
 	struct rcu_head *donelist;
 	struct rcu_head **donetail;
+	long		blimit;		 /* Upper limit on a processed batch */
 	int cpu;
 	struct rcu_head barrier;
+#ifdef CONFIG_SMP
+	long		last_rs_qlen;	 /* qlen during the last resched */
+#endif
 };
 
 DECLARE_PER_CPU(struct rcu_data, rcu_data);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 0cf8146bd585..8cf15a569fcd 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -67,7 +67,43 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
 
 /* Fake initialization required by compiler */
 static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
-static int maxbatch = 10000;
+static int blimit = 10;
+static int qhimark = 10000;
+static int qlowmark = 100;
+#ifdef CONFIG_SMP
+static int rsinterval = 1000;
+#endif
+
+static atomic_t rcu_barrier_cpu_count;
+static struct semaphore rcu_barrier_sema;
+static struct completion rcu_barrier_completion;
+
+#ifdef CONFIG_SMP
+static void force_quiescent_state(struct rcu_data *rdp,
+			struct rcu_ctrlblk *rcp)
+{
+	int cpu;
+	cpumask_t cpumask;
+	set_need_resched();
+	if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) {
+		rdp->last_rs_qlen = rdp->qlen;
+		/*
+		 * Don't send IPI to itself. With irqs disabled,
+		 * rdp->cpu is the current cpu.
+		 */
+		cpumask = rcp->cpumask;
+		cpu_clear(rdp->cpu, cpumask);
+		for_each_cpu_mask(cpu, cpumask)
+			smp_send_reschedule(cpu);
+	}
+}
+#else
+static inline void force_quiescent_state(struct rcu_data *rdp,
+			struct rcu_ctrlblk *rcp)
+{
+	set_need_resched();
+}
+#endif
 
 /**
  * call_rcu - Queue an RCU callback for invocation after a grace period.
@@ -92,17 +128,13 @@ void fastcall call_rcu(struct rcu_head *head,
 	rdp = &__get_cpu_var(rcu_data);
 	*rdp->nxttail = head;
 	rdp->nxttail = &head->next;
-
-	if (unlikely(++rdp->count > 10000))
-		set_need_resched();
-
+	if (unlikely(++rdp->qlen > qhimark)) {
+		rdp->blimit = INT_MAX;
+		force_quiescent_state(rdp, &rcu_ctrlblk);
+	}
 	local_irq_restore(flags);
 }
 
-static atomic_t rcu_barrier_cpu_count;
-static struct semaphore rcu_barrier_sema;
-static struct completion rcu_barrier_completion;
-
 /**
  * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
  * @head: structure to be used for queueing the RCU updates.
@@ -131,12 +163,12 @@ void fastcall call_rcu_bh(struct rcu_head *head,
 	rdp = &__get_cpu_var(rcu_bh_data);
 	*rdp->nxttail = head;
 	rdp->nxttail = &head->next;
-	rdp->count++;
-/*
- *  Should we directly call rcu_do_batch() here ?
- *  if (unlikely(rdp->count > 10000))
- *      rcu_do_batch(rdp);
- */
+
+	if (unlikely(++rdp->qlen > qhimark)) {
+		rdp->blimit = INT_MAX;
+		force_quiescent_state(rdp, &rcu_bh_ctrlblk);
+	}
+
 	local_irq_restore(flags);
 }
 
@@ -199,10 +231,12 @@ static void rcu_do_batch(struct rcu_data *rdp)
 		next = rdp->donelist = list->next;
 		list->func(list);
 		list = next;
-		rdp->count--;
-		if (++count >= maxbatch)
+		rdp->qlen--;
+		if (++count >= rdp->blimit)
 			break;
 	}
+	if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
+		rdp->blimit = blimit;
 	if (!rdp->donelist)
 		rdp->donetail = &rdp->donelist;
 	else
@@ -473,6 +507,7 @@ static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
 	rdp->quiescbatch = rcp->completed;
 	rdp->qs_pending = 0;
 	rdp->cpu = cpu;
+	rdp->blimit = blimit;
 }
 
 static void __devinit rcu_online_cpu(int cpu)
@@ -567,7 +602,12 @@ void synchronize_kernel(void)
 	synchronize_rcu();
 }
 
-module_param(maxbatch, int, 0);
+module_param(blimit, int, 0);
+module_param(qhimark, int, 0);
+module_param(qlowmark, int, 0);
+#ifdef CONFIG_SMP
+module_param(rsinterval, int, 0);
+#endif
 EXPORT_SYMBOL_GPL(rcu_batches_completed);
 EXPORT_SYMBOL(call_rcu);  /* WARNING: GPL-only in April 2006. */
 EXPORT_SYMBOL(call_rcu_bh);  /* WARNING: GPL-only in April 2006. */
-- 
cgit v1.2.3-71-gd317


From 529bf6be5c04f2e869d07bfdb122e9fd98ade714 Mon Sep 17 00:00:00 2001
From: Dipankar Sarma <dipankar@in.ibm.com>
Date: Tue, 7 Mar 2006 21:55:35 -0800
Subject: [PATCH] fix file counting

I have benchmarked this on an x86_64 NUMA system and see no significant
performance difference on kernbench.  Tested on both x86_64 and powerpc.

The way we do file struct accounting is not very suitable for batched
freeing.  For scalability reasons, file accounting was
constructor/destructor based.  This meant that nr_files was decremented
only when the object was removed from the slab cache.  This is susceptible
to slab fragmentation.  With RCU based file structure, consequent batched
freeing and a test program like Serge's, we just speed this up and end up
with a very fragmented slab -

llm22:~ # cat /proc/sys/fs/file-nr
587730  0       758844

At the same time, I see only a 2000+ objects in filp cache.  The following
patch I fixes this problem.

This patch changes the file counting by removing the filp_count_lock.
Instead we use a separate percpu counter, nr_files, for now and all
accesses to it are through get_nr_files() api.  In the sysctl handler for
nr_files, we populate files_stat.nr_files before returning to user.

Counting files as an when they are created and destroyed (as opposed to
inside slab) allows us to correctly count open files with RCU.

Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dcache.c          |  2 +-
 fs/file_table.c      | 87 +++++++++++++++++++++++++++++++++-------------------
 include/linux/file.h |  2 --
 include/linux/fs.h   |  1 +
 kernel/sysctl.c      |  5 ++-
 net/unix/af_unix.c   |  2 +-
 6 files changed, 62 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index a173bba32666..11dc83092d4a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1736,7 +1736,7 @@ void __init vfs_caches_init(unsigned long mempages)
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 
 	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 
 	dcache_init(mempages);
 	inode_init(mempages);
diff --git a/fs/file_table.c b/fs/file_table.c
index 768b58167543..44fabeaa9415 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -5,6 +5,7 @@
  *  Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
  */
 
+#include <linux/config.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/file.h>
@@ -19,52 +20,67 @@
 #include <linux/capability.h>
 #include <linux/cdev.h>
 #include <linux/fsnotify.h>
+#include <linux/sysctl.h>
+#include <linux/percpu_counter.h>
+
+#include <asm/atomic.h>
 
 /* sysctl tunables... */
 struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
 };
 
-EXPORT_SYMBOL(files_stat); /* Needed by unix.o */
-
 /* public. Not pretty! */
- __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
+__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
 
-static DEFINE_SPINLOCK(filp_count_lock);
+static struct percpu_counter nr_files __cacheline_aligned_in_smp;
 
-/* slab constructors and destructors are called from arbitrary
- * context and must be fully threaded - use a local spinlock
- * to protect files_stat.nr_files
- */
-void filp_ctor(void *objp, struct kmem_cache *cachep, unsigned long cflags)
+static inline void file_free_rcu(struct rcu_head *head)
 {
-	if ((cflags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
-	    SLAB_CTOR_CONSTRUCTOR) {
-		unsigned long flags;
-		spin_lock_irqsave(&filp_count_lock, flags);
-		files_stat.nr_files++;
-		spin_unlock_irqrestore(&filp_count_lock, flags);
-	}
+	struct file *f =  container_of(head, struct file, f_u.fu_rcuhead);
+	kmem_cache_free(filp_cachep, f);
 }
 
-void filp_dtor(void *objp, struct kmem_cache *cachep, unsigned long dflags)
+static inline void file_free(struct file *f)
 {
-	unsigned long flags;
-	spin_lock_irqsave(&filp_count_lock, flags);
-	files_stat.nr_files--;
-	spin_unlock_irqrestore(&filp_count_lock, flags);
+	percpu_counter_dec(&nr_files);
+	call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
 }
 
-static inline void file_free_rcu(struct rcu_head *head)
+/*
+ * Return the total number of open files in the system
+ */
+static int get_nr_files(void)
 {
-	struct file *f =  container_of(head, struct file, f_u.fu_rcuhead);
-	kmem_cache_free(filp_cachep, f);
+	return percpu_counter_read_positive(&nr_files);
 }
 
-static inline void file_free(struct file *f)
+/*
+ * Return the maximum number of open files in the system
+ */
+int get_max_files(void)
 {
-	call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
+	return files_stat.max_files;
 }
+EXPORT_SYMBOL_GPL(get_max_files);
+
+/*
+ * Handle nr_files sysctl
+ */
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
+int proc_nr_files(ctl_table *table, int write, struct file *filp,
+                     void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	files_stat.nr_files = get_nr_files();
+	return proc_dointvec(table, write, filp, buffer, lenp, ppos);
+}
+#else
+int proc_nr_files(ctl_table *table, int write, struct file *filp,
+                     void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+#endif
 
 /* Find an unused file structure and return a pointer to it.
  * Returns NULL, if there are no more free file structures or
@@ -78,14 +94,20 @@ struct file *get_empty_filp(void)
 	/*
 	 * Privileged users can go above max_files
 	 */
-	if (files_stat.nr_files >= files_stat.max_files &&
-				!capable(CAP_SYS_ADMIN))
-		goto over;
+	if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) {
+		/*
+		 * percpu_counters are inaccurate.  Do an expensive check before
+		 * we go and fail.
+		 */
+		if (percpu_counter_sum(&nr_files) >= files_stat.max_files)
+			goto over;
+	}
 
 	f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
 	if (f == NULL)
 		goto fail;
 
+	percpu_counter_inc(&nr_files);
 	memset(f, 0, sizeof(*f));
 	if (security_file_alloc(f))
 		goto fail_sec;
@@ -101,10 +123,10 @@ struct file *get_empty_filp(void)
 
 over:
 	/* Ran out of filps - report that */
-	if (files_stat.nr_files > old_max) {
+	if (get_nr_files() > old_max) {
 		printk(KERN_INFO "VFS: file-max limit %d reached\n",
-					files_stat.max_files);
-		old_max = files_stat.nr_files;
+					get_max_files());
+		old_max = get_nr_files();
 	}
 	goto fail;
 
@@ -276,4 +298,5 @@ void __init files_init(unsigned long mempages)
 	if (files_stat.max_files < NR_FILE)
 		files_stat.max_files = NR_FILE;
 	files_defer_init();
+	percpu_counter_init(&nr_files);
 } 
diff --git a/include/linux/file.h b/include/linux/file.h
index 418b6101b59a..9901b850f2e4 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -60,8 +60,6 @@ extern void put_filp(struct file *);
 extern int get_unused_fd(void);
 extern void FASTCALL(put_unused_fd(unsigned int fd));
 struct kmem_cache;
-extern void filp_ctor(void * objp, struct kmem_cache *cachep, unsigned long cflags);
-extern void filp_dtor(void * objp, struct kmem_cache *cachep, unsigned long dflags);
 
 extern struct file ** alloc_fd_array(int);
 extern void free_fd_array(struct file **, int);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0cc34b1c42c9..51c0c93bdf93 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -35,6 +35,7 @@ struct files_stat_struct {
 	int max_files;		/* tunable */
 };
 extern struct files_stat_struct files_stat;
+extern int get_max_files(void);
 
 struct inodes_stat_t {
 	int nr_inodes;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index de2d9109194e..32b48e8ee36e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -50,6 +50,9 @@
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 
+extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
+                     void __user *buffer, size_t *lenp, loff_t *ppos);
+
 #if defined(CONFIG_SYSCTL)
 
 /* External variables not in a header file. */
@@ -943,7 +946,7 @@ static ctl_table fs_table[] = {
 		.data		= &files_stat,
 		.maxlen		= 3*sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= &proc_dointvec,
+		.proc_handler	= &proc_nr_files,
 	},
 	{
 		.ctl_name	= FS_MAXFILE,
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 1b5989b1b670..c323cc6a28b0 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -547,7 +547,7 @@ static struct sock * unix_create1(struct socket *sock)
 	struct sock *sk = NULL;
 	struct unix_sock *u;
 
-	if (atomic_read(&unix_nr_socks) >= 2*files_stat.max_files)
+	if (atomic_read(&unix_nr_socks) >= 2*get_max_files())
 		goto out;
 
 	sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
-- 
cgit v1.2.3-71-gd317


From 0ef675d491bd65028fa838015ebc6ce8abefab6f Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Thu, 9 Mar 2006 17:33:38 -0800
Subject: [PATCH] mtd: 64 bit fixes

Fix some bugs in mtd/jffs2 on 64bit platform.

The MEMGETBADBLOCK/MEMSETBADBLOCK ioctl are not listed in compat_ioctl.h.

And some variables in jffs2 are declared as uint32_t but used to hold
size_t values.

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Acked-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jffs2/nodelist.c          | 3 ++-
 fs/jffs2/readinode.c         | 2 +-
 include/linux/compat_ioctl.h | 2 ++
 3 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index b635e167a3fa..d4d0c41490cd 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -406,7 +406,8 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
 	int err = 0, pointed = 0;
 	struct jffs2_eraseblock *jeb;
 	unsigned char *buffer;
-	uint32_t crc, ofs, retlen, len;
+	uint32_t crc, ofs, len;
+	size_t retlen;
 
 	BUG_ON(tn->csize == 0);
 
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 5f0652df5d47..f1695642d0f7 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -112,7 +112,7 @@ static struct jffs2_raw_node_ref *jffs2_first_valid_node(struct jffs2_raw_node_r
  * 	    negative error code on failure.
  */
 static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
-				struct jffs2_raw_dirent *rd, uint32_t read, struct jffs2_full_dirent **fdp,
+				struct jffs2_raw_dirent *rd, size_t read, struct jffs2_full_dirent **fdp,
 				uint32_t *latest_mctime, uint32_t *mctime_ver)
 {
 	struct jffs2_full_dirent *fd;
diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h
index 8fad50f8e389..ae7dfb790df3 100644
--- a/include/linux/compat_ioctl.h
+++ b/include/linux/compat_ioctl.h
@@ -696,6 +696,8 @@ COMPATIBLE_IOCTL(MEMLOCK)
 COMPATIBLE_IOCTL(MEMUNLOCK)
 COMPATIBLE_IOCTL(MEMGETREGIONCOUNT)
 COMPATIBLE_IOCTL(MEMGETREGIONINFO)
+COMPATIBLE_IOCTL(MEMGETBADBLOCK)
+COMPATIBLE_IOCTL(MEMSETBADBLOCK)
 /* NBD */
 ULONG_IOCTL(NBD_SET_SOCK)
 ULONG_IOCTL(NBD_SET_BLKSIZE)
-- 
cgit v1.2.3-71-gd317


From 8fce4d8e3b9e3cf47cc8afeb6077e22ab795d989 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Thu, 9 Mar 2006 17:33:54 -0800
Subject: [PATCH] slab: Node rotor for freeing alien caches and remote per cpu
 pages.

The cache reaper currently tries to free all alien caches and all remote
per cpu pages in each pass of cache_reap.  For a machines with large number
of nodes (such as Altix) this may lead to sporadic delays of around ~10ms.
Interrupts are disabled while reclaiming creating unacceptable delays.

This patch changes that behavior by adding a per cpu reap_node variable.
Instead of attempting to free all caches, we free only one alien cache and
the per cpu pages from one remote node.  That reduces the time spend in
cache_reap.  However, doing so will lengthen the time it takes to
completely drain all remote per cpu pagesets and all alien caches.  The
time needed will grow with the number of nodes in the system.  All caches
are drained when they overflow their respective capacity.  So the drawback
here is only that a bit of memory may be wasted for awhile longer.

Details:

1. Rename drain_remote_pages to drain_node_pages to allow the specification
   of the node to drain of pcp pages.

2. Add additional functions init_reap_node, next_reap_node for NUMA
   that manage a per cpu reap_node counter.

3. Add a reap_alien function that reaps only from the current reap_node.

For us this seems to be a critical issue.  Holdoffs of an average of ~7ms
cause some HPC benchmarks to slow down significantly.  F.e.  NAS parallel
slows down dramatically.  NAS parallel has a 12-16 seconds runtime w/o rotor
compared to 5.8 secs with the rotor patches.  It gets down to 5.05 secs with
the additional interrupt holdoff reductions.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/gfp.h |  4 ++--
 mm/page_alloc.c     | 17 +++++++-------
 mm/slab.c           | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 72 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 20f9148e38d9..7851e6b520cf 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -157,9 +157,9 @@ extern void FASTCALL(free_cold_page(struct page *page));
 
 void page_alloc_init(void);
 #ifdef CONFIG_NUMA
-void drain_remote_pages(void);
+void drain_node_pages(int node);
 #else
-static inline void drain_remote_pages(void) { };
+static inline void drain_node_pages(int node) { };
 #endif
 
 #endif /* __LINUX_GFP_H */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 791690d7d3fa..234bd4895d14 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -590,21 +590,20 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 }
 
 #ifdef CONFIG_NUMA
-/* Called from the slab reaper to drain remote pagesets */
-void drain_remote_pages(void)
+/*
+ * Called from the slab reaper to drain pagesets on a particular node that
+ * belong to the currently executing processor.
+ */
+void drain_node_pages(int nodeid)
 {
-	struct zone *zone;
-	int i;
+	int i, z;
 	unsigned long flags;
 
 	local_irq_save(flags);
-	for_each_zone(zone) {
+	for (z = 0; z < MAX_NR_ZONES; z++) {
+		struct zone *zone = NODE_DATA(nodeid)->node_zones + z;
 		struct per_cpu_pageset *pset;
 
-		/* Do not drain local pagesets */
-		if (zone->zone_pgdat->node_id == numa_node_id())
-			continue;
-
 		pset = zone_pcp(zone, smp_processor_id());
 		for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
 			struct per_cpu_pages *pcp;
diff --git a/mm/slab.c b/mm/slab.c
index 61800b88e241..d0bd7f07ab04 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -789,6 +789,47 @@ static void __slab_error(const char *function, struct kmem_cache *cachep, char *
 	dump_stack();
 }
 
+#ifdef CONFIG_NUMA
+/*
+ * Special reaping functions for NUMA systems called from cache_reap().
+ * These take care of doing round robin flushing of alien caches (containing
+ * objects freed on different nodes from which they were allocated) and the
+ * flushing of remote pcps by calling drain_node_pages.
+ */
+static DEFINE_PER_CPU(unsigned long, reap_node);
+
+static void init_reap_node(int cpu)
+{
+	int node;
+
+	node = next_node(cpu_to_node(cpu), node_online_map);
+	if (node == MAX_NUMNODES)
+		node = 0;
+
+	__get_cpu_var(reap_node) = node;
+}
+
+static void next_reap_node(void)
+{
+	int node = __get_cpu_var(reap_node);
+
+	/*
+	 * Also drain per cpu pages on remote zones
+	 */
+	if (node != numa_node_id())
+		drain_node_pages(node);
+
+	node = next_node(node, node_online_map);
+	if (unlikely(node >= MAX_NUMNODES))
+		node = first_node(node_online_map);
+	__get_cpu_var(reap_node) = node;
+}
+
+#else
+#define init_reap_node(cpu) do { } while (0)
+#define next_reap_node(void) do { } while (0)
+#endif
+
 /*
  * Initiate the reap timer running on the target CPU.  We run at around 1 to 2Hz
  * via the workqueue/eventd.
@@ -806,6 +847,7 @@ static void __devinit start_cpu_timer(int cpu)
 	 * at that time.
 	 */
 	if (keventd_up() && reap_work->func == NULL) {
+		init_reap_node(cpu);
 		INIT_WORK(reap_work, cache_reap, NULL);
 		schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu);
 	}
@@ -884,6 +926,23 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
 	}
 }
 
+/*
+ * Called from cache_reap() to regularly drain alien caches round robin.
+ */
+static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
+{
+	int node = __get_cpu_var(reap_node);
+
+	if (l3->alien) {
+		struct array_cache *ac = l3->alien[node];
+		if (ac && ac->avail) {
+			spin_lock_irq(&ac->lock);
+			__drain_alien_cache(cachep, ac, node);
+			spin_unlock_irq(&ac->lock);
+		}
+	}
+}
+
 static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien)
 {
 	int i = 0;
@@ -902,6 +961,7 @@ static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **al
 #else
 
 #define drain_alien_cache(cachep, alien) do { } while (0)
+#define reap_alien(cachep, l3) do { } while (0)
 
 static inline struct array_cache **alloc_alien_cache(int node, int limit)
 {
@@ -3497,8 +3557,7 @@ static void cache_reap(void *unused)
 		check_irq_on();
 
 		l3 = searchp->nodelists[numa_node_id()];
-		if (l3->alien)
-			drain_alien_cache(searchp, l3->alien);
+		reap_alien(searchp, l3);
 		spin_lock_irq(&l3->list_lock);
 
 		drain_array_locked(searchp, cpu_cache_get(searchp), 0,
@@ -3548,7 +3607,7 @@ static void cache_reap(void *unused)
 	}
 	check_irq_on();
 	mutex_unlock(&cache_chain_mutex);
-	drain_remote_pages();
+	next_reap_node();
 	/* Setup the next iteration */
 	schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
 }
-- 
cgit v1.2.3-71-gd317


From 0adb25d2e71ab047423d6fc63d5d184590d0a66f Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@openvz.org>
Date: Sat, 11 Mar 2006 03:27:13 -0800
Subject: [PATCH] ext3: ext3_symlink should use GFP_NOFS allocations inside

This patch fixes illegal __GFP_FS allocation inside ext3 transaction in
ext3_symlink().  Such allocation may re-enter ext3 code from
try_to_free_pages.  But JBD/ext3 code keeps a pointer to current journal
handle in task_struct and, hence, is not reentrable.

This bug led to "Assertion failure in journal_dirty_metadata()" messages.

http://bugzilla.openvz.org/show_bug.cgi?id=115

Signed-off-by: Andrey Savochkin <saw@saw.sw.com.sg>
Signed-off-by: Kirill Korotaev <dev@openvz.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/namei.c    |  3 ++-
 fs/namei.c         | 13 +++++++++++--
 include/linux/fs.h |  2 ++
 3 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 8bd8ac077704..b8f5cd1e540d 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2141,7 +2141,8 @@ retry:
 		 * We have a transaction open.  All is sweetness.  It also sets
 		 * i_size in generic_commit_write().
 		 */
-		err = page_symlink(inode, symname, l);
+		err = __page_symlink(inode, symname, l,
+				mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
 		if (err) {
 			ext3_dec_count(handle, inode);
 			ext3_mark_inode_dirty(handle, inode);
diff --git a/fs/namei.c b/fs/namei.c
index 557dcf395ca1..8dc2b038d5d9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2613,13 +2613,15 @@ void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
 	}
 }
 
-int page_symlink(struct inode *inode, const char *symname, int len)
+int __page_symlink(struct inode *inode, const char *symname, int len,
+		gfp_t gfp_mask)
 {
 	struct address_space *mapping = inode->i_mapping;
-	struct page *page = grab_cache_page(mapping, 0);
+	struct page *page;
 	int err = -ENOMEM;
 	char *kaddr;
 
+	page = find_or_create_page(mapping, 0, gfp_mask);
 	if (!page)
 		goto fail;
 	err = mapping->a_ops->prepare_write(NULL, page, 0, len-1);
@@ -2654,6 +2656,12 @@ fail:
 	return err;
 }
 
+int page_symlink(struct inode *inode, const char *symname, int len)
+{
+	return __page_symlink(inode, symname, len,
+			mapping_gfp_mask(inode->i_mapping));
+}
+
 struct inode_operations page_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= page_follow_link_light,
@@ -2672,6 +2680,7 @@ EXPORT_SYMBOL(lookup_one_len);
 EXPORT_SYMBOL(page_follow_link_light);
 EXPORT_SYMBOL(page_put_link);
 EXPORT_SYMBOL(page_readlink);
+EXPORT_SYMBOL(__page_symlink);
 EXPORT_SYMBOL(page_symlink);
 EXPORT_SYMBOL(page_symlink_inode_operations);
 EXPORT_SYMBOL(path_lookup);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 51c0c93bdf93..128d0082522c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1664,6 +1664,8 @@ extern int vfs_follow_link(struct nameidata *, const char *);
 extern int page_readlink(struct dentry *, char __user *, int);
 extern void *page_follow_link_light(struct dentry *, struct nameidata *);
 extern void page_put_link(struct dentry *, struct nameidata *, void *);
+extern int __page_symlink(struct inode *inode, const char *symname, int len,
+		gfp_t gfp_mask);
 extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern struct inode_operations page_symlink_inode_operations;
 extern int generic_readlink(struct dentry *, char __user *, int);
-- 
cgit v1.2.3-71-gd317


From 7cd9013be6c22f3ff6f777354f766c8c0b955e17 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 11 Mar 2006 03:27:18 -0800
Subject: [PATCH] remove __put_task_struct_cb export again

The patch '[PATCH] RCU signal handling' [1] added an export for
__put_task_struct_cb, a put_task_struct helper newly introduced in that
patch.  But the put_task_struct couldn't be used modular previously as
__put_task_struct wasn't exported.  There are not callers of it in modular
code, and it shouldn't be exported because we don't want drivers to hold
references to task_structs.

This patch removes the export and folds __put_task_struct into
__put_task_struct_cb as there's no other caller.

[1] http://www2.kernel.org/git/gitweb.cgi?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=e56d090310d7625ecb43a1eeebd479f04affb48b

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Paul E. McKenney <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 1 -
 kernel/fork.c         | 4 +++-
 kernel/sched.c        | 7 -------
 3 files changed, 3 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ff2e09c953b9..62e6314382f0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -892,7 +892,6 @@ static inline int pid_alive(struct task_struct *p)
 }
 
 extern void free_task(struct task_struct *tsk);
-extern void __put_task_struct(struct task_struct *tsk);
 #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
 
 extern void __put_task_struct_cb(struct rcu_head *rhp);
diff --git a/kernel/fork.c b/kernel/fork.c
index fbea12d7a943..a8eab86de7f1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -108,8 +108,10 @@ void free_task(struct task_struct *tsk)
 }
 EXPORT_SYMBOL(free_task);
 
-void __put_task_struct(struct task_struct *tsk)
+void __put_task_struct_cb(struct rcu_head *rhp)
 {
+	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+
 	WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
 	WARN_ON(atomic_read(&tsk->usage));
 	WARN_ON(tsk == current);
diff --git a/kernel/sched.c b/kernel/sched.c
index e82c99f1db64..4d46e90f59c3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -178,13 +178,6 @@ static unsigned int task_timeslice(task_t *p)
 #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran)	\
 				< (long long) (sd)->cache_hot_time)
 
-void __put_task_struct_cb(struct rcu_head *rhp)
-{
-	__put_task_struct(container_of(rhp, struct task_struct, rcu));
-}
-
-EXPORT_SYMBOL_GPL(__put_task_struct_cb);
-
 /*
  * These are the runqueue data structures:
  */
-- 
cgit v1.2.3-71-gd317


From 4a29cc2e503b33a1e96db4c3f9a94165f153f259 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Sun, 19 Mar 2006 13:21:12 -0800
Subject: [TG3]: 40-bit DMA workaround part 2

The 40-bit DMA workaround recently implemented for 5714, 5715, and
5780 needs to be expanded because there may be other tg3 devices
behind the EPB Express to PCIX bridge in the 5780 class device.

For example, some 4-port card or mother board designs have 5704 behind
the 5714.

All devices behind the EPB require the 40-bit DMA workaround.

Thanks to Chris Elmquist again for reporting the problem and testing
the patch.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tg3.c       | 52 ++++++++++++++++++++++++++++++++++++++++---------
 drivers/net/tg3.h       |  1 +
 include/linux/pci_ids.h |  1 +
 3 files changed, 45 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index b8f1524da557..caf4102b54ce 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -9552,12 +9552,36 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 		}
 	}
 
-	/* Find msi capability. */
+	/* The EPB bridge inside 5714, 5715, and 5780 cannot support
+	 * DMA addresses > 40-bit. This bridge may have other additional
+	 * 57xx devices behind it in some 4-port NIC designs for example.
+	 * Any tg3 device found behind the bridge will also need the 40-bit
+	 * DMA workaround.
+	 */
 	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5780 ||
 	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5714) {
 		tp->tg3_flags2 |= TG3_FLG2_5780_CLASS;
+		tp->tg3_flags |= TG3_FLAG_40BIT_DMA_BUG;
 		tp->msi_cap = pci_find_capability(tp->pdev, PCI_CAP_ID_MSI);
 	}
+	else {
+		struct pci_dev *bridge = NULL;
+
+		do {
+			bridge = pci_get_device(PCI_VENDOR_ID_SERVERWORKS,
+						PCI_DEVICE_ID_SERVERWORKS_EPB,
+						bridge);
+			if (bridge && bridge->subordinate &&
+			    (bridge->subordinate->number <=
+			     tp->pdev->bus->number) &&
+			    (bridge->subordinate->subordinate >=
+			     tp->pdev->bus->number)) {
+				tp->tg3_flags |= TG3_FLAG_40BIT_DMA_BUG;
+				pci_dev_put(bridge);
+				break;
+			}
+		} while (bridge);
+	}
 
 	/* Initialize misc host control in PCI block. */
 	tp->misc_host_ctrl |= (misc_ctrl_reg &
@@ -10303,7 +10327,14 @@ static int __devinit tg3_test_dma(struct tg3 *tp)
 		    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704) {
 			u32 ccval = (tr32(TG3PCI_CLOCK_CTRL) & 0x1f);
 
-			if (ccval == 0x6 || ccval == 0x7)
+			/* If the 5704 is behind the EPB bridge, we can
+			 * do the less restrictive ONE_DMA workaround for
+			 * better performance.
+			 */
+			if ((tp->tg3_flags & TG3_FLAG_40BIT_DMA_BUG) &&
+			    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704)
+				tp->dma_rwctrl |= 0x8000;
+			else if (ccval == 0x6 || ccval == 0x7)
 				tp->dma_rwctrl |= DMA_RWCTRL_ONE_DMA;
 
 			/* Set bit 23 to enable PCIX hw bug fix */
@@ -10759,19 +10790,20 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 		goto err_out_iounmap;
 	}
 
-	/* 5714, 5715 and 5780 cannot support DMA addresses > 40-bit.
+	/* The EPB bridge inside 5714, 5715, and 5780 and any
+	 * device behind the EPB cannot support DMA addresses > 40-bit.
 	 * On 64-bit systems with IOMMU, use 40-bit dma_mask.
 	 * On 64-bit systems without IOMMU, use 64-bit dma_mask and
 	 * do DMA address check in tg3_start_xmit().
 	 */
-	if (tp->tg3_flags2 & TG3_FLG2_5780_CLASS) {
+	if (tp->tg3_flags2 & TG3_FLG2_IS_5788)
+		persist_dma_mask = dma_mask = DMA_32BIT_MASK;
+	else if (tp->tg3_flags & TG3_FLAG_40BIT_DMA_BUG) {
 		persist_dma_mask = dma_mask = DMA_40BIT_MASK;
 #ifdef CONFIG_HIGHMEM
 		dma_mask = DMA_64BIT_MASK;
 #endif
-	} else if (tp->tg3_flags2 & TG3_FLG2_IS_5788)
-		persist_dma_mask = dma_mask = DMA_32BIT_MASK;
-	else
+	} else
 		persist_dma_mask = dma_mask = DMA_64BIT_MASK;
 
 	/* Configure DMA attributes. */
@@ -10908,8 +10940,10 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 	       (tp->tg3_flags & TG3_FLAG_SPLIT_MODE) != 0,
 	       (tp->tg3_flags2 & TG3_FLG2_NO_ETH_WIRE_SPEED) == 0,
 	       (tp->tg3_flags2 & TG3_FLG2_TSO_CAPABLE) != 0);
-	printk(KERN_INFO "%s: dma_rwctrl[%08x]\n",
-	       dev->name, tp->dma_rwctrl);
+	printk(KERN_INFO "%s: dma_rwctrl[%08x] dma_mask[%d-bit]\n",
+	       dev->name, tp->dma_rwctrl,
+	       (pdev->dma_mask == DMA_32BIT_MASK) ? 32 :
+	        (((u64) pdev->dma_mask == DMA_40BIT_MASK) ? 40 : 64));
 
 	return 0;
 
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 7f4b7f6ac40d..7e3b613afb29 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -2163,6 +2163,7 @@ struct tg3 {
 #define TG3_FLAG_10_100_ONLY		0x01000000
 #define TG3_FLAG_PAUSE_AUTONEG		0x02000000
 #define TG3_FLAG_IN_RESET_TASK		0x04000000
+#define TG3_FLAG_40BIT_DMA_BUG		0x08000000
 #define TG3_FLAG_BROKEN_CHECKSUMS	0x10000000
 #define TG3_FLAG_GOT_SERDES_FLOWCTL	0x20000000
 #define TG3_FLAG_SPLIT_MODE		0x40000000
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 1709b5009d2e..751eea58bde8 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1365,6 +1365,7 @@
 #define PCI_DEVICE_ID_SERVERWORKS_HE	  0x0008
 #define PCI_DEVICE_ID_SERVERWORKS_LE	  0x0009
 #define PCI_DEVICE_ID_SERVERWORKS_GCNB_LE 0x0017
+#define PCI_DEVICE_ID_SERVERWORKS_EPB	  0x0103
 #define PCI_DEVICE_ID_SERVERWORKS_OSB4	  0x0200
 #define PCI_DEVICE_ID_SERVERWORKS_CSB5	  0x0201
 #define PCI_DEVICE_ID_SERVERWORKS_CSB6    0x0203
-- 
cgit v1.2.3-71-gd317