From 490dea45d00f01847ebebd007685d564aaf2cd98 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 24 Nov 2008 17:06:57 +0100 Subject: itimers: remove the per-cpu-ish-ness Either we bounce once cacheline per cpu per tick, yielding n^2 bounces or we just bounce a single.. Also, using per-cpu allocations for the thread-groups complicates the per-cpu allocator in that its currently aimed to be a fixed sized allocator and the only possible extention to that would be vmap based, which is seriously constrained on 32 bit archs. So making the per-cpu memory requirement depend on the number of processes is an issue. Lastly, it didn't deal with cpu-hotplug, although admittedly that might be fixable. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 6 ++++++ include/linux/sched.h | 29 ++++++++++++++++++----------- 2 files changed, 24 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 2f3c2d4ef73b..ea0ea1a4c36f 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -48,6 +48,12 @@ extern struct fs_struct init_fs; .posix_timers = LIST_HEAD_INIT(sig.posix_timers), \ .cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \ .rlim = INIT_RLIMITS, \ + .cputime = { .totals = { \ + .utime = cputime_zero, \ + .stime = cputime_zero, \ + .sum_exec_runtime = 0, \ + .lock = __SPIN_LOCK_UNLOCKED(sig.cputime.totals.lock), \ + }, }, \ } extern struct nsproxy init_nsproxy; diff --git a/include/linux/sched.h b/include/linux/sched.h index 4cae9b81a1f8..c20943eabb4c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -450,6 +450,7 @@ struct task_cputime { cputime_t utime; cputime_t stime; unsigned long long sum_exec_runtime; + spinlock_t lock; }; /* Alternate field names when used to cache expirations. */ #define prof_exp stime @@ -465,7 +466,7 @@ struct task_cputime { * used for thread group CPU clock calculations. */ struct thread_group_cputime { - struct task_cputime *totals; + struct task_cputime totals; }; /* @@ -2180,24 +2181,30 @@ static inline int spin_needbreak(spinlock_t *lock) * Thread group CPU time accounting. */ -extern int thread_group_cputime_alloc(struct task_struct *); -extern void thread_group_cputime(struct task_struct *, struct task_cputime *); - -static inline void thread_group_cputime_init(struct signal_struct *sig) +static inline +void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) { - sig->cputime.totals = NULL; + struct task_cputime *totals = &tsk->signal->cputime.totals; + unsigned long flags; + + spin_lock_irqsave(&totals->lock, flags); + *times = *totals; + spin_unlock_irqrestore(&totals->lock, flags); } -static inline int thread_group_cputime_clone_thread(struct task_struct *curr) +static inline void thread_group_cputime_init(struct signal_struct *sig) { - if (curr->signal->cputime.totals) - return 0; - return thread_group_cputime_alloc(curr); + sig->cputime.totals = (struct task_cputime){ + .utime = cputime_zero, + .stime = cputime_zero, + .sum_exec_runtime = 0, + }; + + spin_lock_init(&sig->cputime.totals.lock); } static inline void thread_group_cputime_free(struct signal_struct *sig) { - free_percpu(sig->cputime.totals); } /* -- cgit v1.2.3-71-gd317 From 2526c151c31358aec66b63921dd712bbec5ee0cb Mon Sep 17 00:00:00 2001 From: Jon Smirl Date: Fri, 9 Jan 2009 15:49:06 -0700 Subject: drivers/of: Add the of_find_i2c_device_by_node function. The of_find_i2c_device_by_node function allows you to follow a reference in the device tree to an i2c device node and then locate the linux device instantiated by the device tree. Example use: an I2S bus driver finding the i2c_device instance for a codec described by a device tree node. This was waiting for Anton's i2c patches that were just added. Signed-off-by: Jon Smirl Signed-off-by: Grant Likely --- drivers/of/of_i2c.c | 19 +++++++++++++++++++ include/linux/of_i2c.h | 3 +++ 2 files changed, 22 insertions(+) (limited to 'include/linux') diff --git a/drivers/of/of_i2c.c b/drivers/of/of_i2c.c index e1b0ad6e918f..fa65a2b2ae2e 100644 --- a/drivers/of/of_i2c.c +++ b/drivers/of/of_i2c.c @@ -66,4 +66,23 @@ void of_register_i2c_devices(struct i2c_adapter *adap, } EXPORT_SYMBOL(of_register_i2c_devices); +static int of_dev_node_match(struct device *dev, void *data) +{ + return dev_archdata_get_node(&dev->archdata) == data; +} + +/* must call put_device() when done with returned i2c_client device */ +struct i2c_client *of_find_i2c_device_by_node(struct device_node *node) +{ + struct device *dev; + + dev = bus_find_device(&i2c_bus_type, NULL, node, + of_dev_node_match); + if (!dev) + return NULL; + + return to_i2c_client(dev); +} +EXPORT_SYMBOL(of_find_i2c_device_by_node); + MODULE_LICENSE("GPL"); diff --git a/include/linux/of_i2c.h b/include/linux/of_i2c.h index bd2a870ec296..34974b5a76f7 100644 --- a/include/linux/of_i2c.h +++ b/include/linux/of_i2c.h @@ -17,4 +17,7 @@ void of_register_i2c_devices(struct i2c_adapter *adap, struct device_node *adap_node); +/* must call put_device() when done with returned i2c_client device */ +struct i2c_client *of_find_i2c_device_by_node(struct device_node *node); + #endif /* __LINUX_OF_I2C_H */ -- cgit v1.2.3-71-gd317 From f52046b14b1e1a8a02ae48d0c69d39c5e204644f Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Fri, 9 Jan 2009 01:49:01 +0100 Subject: mfd: PCF50633 core driver This patch implements the core of the PCF50633 driver. This core driver has generic register read/write functions and does interrupt management for its sub devices. Signed-off-by: Balaji Rao Cc: Andy Green Signed-off-by: Samuel Ortiz --- drivers/mfd/Kconfig | 9 + drivers/mfd/Makefile | 2 + drivers/mfd/pcf50633-core.c | 710 ++++++++++++++++++++++++++++++++++++++ include/linux/mfd/pcf50633/core.h | 218 ++++++++++++ 4 files changed, 939 insertions(+) create mode 100644 drivers/mfd/pcf50633-core.c create mode 100644 include/linux/mfd/pcf50633/core.h (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 416f9e7286ba..3ac32e45b03b 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -217,6 +217,15 @@ config MFD_WM8350_I2C I2C as the control interface. Additional options must be selected to enable support for the functionality of the chip. +config MFD_PCF50633 + tristate "Support for NXP PCF50633" + depends on I2C + help + Say yes here if you have NXP PCF50633 chip on your board. + This core driver provides register access and IRQ handling + facilities, and registers devices for the various functions + so that function-specific drivers can bind to them. + endmenu menu "Multimedia Capabilities Port drivers" diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 0c9418b36c26..23d4d10981b3 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -37,3 +37,5 @@ endif obj-$(CONFIG_UCB1400_CORE) += ucb1400_core.o obj-$(CONFIG_PMIC_DA903X) += da903x.o + +obj-$(CONFIG_MFD_PCF50633) += pcf50633-core.o \ No newline at end of file diff --git a/drivers/mfd/pcf50633-core.c b/drivers/mfd/pcf50633-core.c new file mode 100644 index 000000000000..24508e28e3fb --- /dev/null +++ b/drivers/mfd/pcf50633-core.c @@ -0,0 +1,710 @@ +/* NXP PCF50633 Power Management Unit (PMU) driver + * + * (C) 2006-2008 by Openmoko, Inc. + * Author: Harald Welte + * Balaji Rao + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* Two MBCS registers used during cold start */ +#define PCF50633_REG_MBCS1 0x4b +#define PCF50633_REG_MBCS2 0x4c +#define PCF50633_MBCS1_USBPRES 0x01 +#define PCF50633_MBCS1_ADAPTPRES 0x01 + +static int __pcf50633_read(struct pcf50633 *pcf, u8 reg, int num, u8 *data) +{ + int ret; + + ret = i2c_smbus_read_i2c_block_data(pcf->i2c_client, reg, + num, data); + if (ret < 0) + dev_err(pcf->dev, "Error reading %d regs at %d\n", num, reg); + + return ret; +} + +static int __pcf50633_write(struct pcf50633 *pcf, u8 reg, int num, u8 *data) +{ + int ret; + + ret = i2c_smbus_write_i2c_block_data(pcf->i2c_client, reg, + num, data); + if (ret < 0) + dev_err(pcf->dev, "Error writing %d regs at %d\n", num, reg); + + return ret; + +} + +/* Read a block of upto 32 regs */ +int pcf50633_read_block(struct pcf50633 *pcf, u8 reg, + int nr_regs, u8 *data) +{ + int ret; + + mutex_lock(&pcf->lock); + ret = __pcf50633_read(pcf, reg, nr_regs, data); + mutex_unlock(&pcf->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(pcf50633_read_block); + +/* Write a block of upto 32 regs */ +int pcf50633_write_block(struct pcf50633 *pcf , u8 reg, + int nr_regs, u8 *data) +{ + int ret; + + mutex_lock(&pcf->lock); + ret = __pcf50633_write(pcf, reg, nr_regs, data); + mutex_unlock(&pcf->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(pcf50633_write_block); + +u8 pcf50633_reg_read(struct pcf50633 *pcf, u8 reg) +{ + u8 val; + + mutex_lock(&pcf->lock); + __pcf50633_read(pcf, reg, 1, &val); + mutex_unlock(&pcf->lock); + + return val; +} +EXPORT_SYMBOL_GPL(pcf50633_reg_read); + +int pcf50633_reg_write(struct pcf50633 *pcf, u8 reg, u8 val) +{ + int ret; + + mutex_lock(&pcf->lock); + ret = __pcf50633_write(pcf, reg, 1, &val); + mutex_unlock(&pcf->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(pcf50633_reg_write); + +int pcf50633_reg_set_bit_mask(struct pcf50633 *pcf, u8 reg, u8 mask, u8 val) +{ + int ret; + u8 tmp; + + val &= mask; + + mutex_lock(&pcf->lock); + ret = __pcf50633_read(pcf, reg, 1, &tmp); + if (ret < 0) + goto out; + + tmp &= ~mask; + tmp |= val; + ret = __pcf50633_write(pcf, reg, 1, &tmp); + +out: + mutex_unlock(&pcf->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(pcf50633_reg_set_bit_mask); + +int pcf50633_reg_clear_bits(struct pcf50633 *pcf, u8 reg, u8 val) +{ + int ret; + u8 tmp; + + mutex_lock(&pcf->lock); + ret = __pcf50633_read(pcf, reg, 1, &tmp); + if (ret < 0) + goto out; + + tmp &= ~val; + ret = __pcf50633_write(pcf, reg, 1, &tmp); + +out: + mutex_unlock(&pcf->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(pcf50633_reg_clear_bits); + +/* sysfs attributes */ +static ssize_t show_dump_regs(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct pcf50633 *pcf = dev_get_drvdata(dev); + u8 dump[16]; + int n, n1, idx = 0; + char *buf1 = buf; + static u8 address_no_read[] = { /* must be ascending */ + PCF50633_REG_INT1, + PCF50633_REG_INT2, + PCF50633_REG_INT3, + PCF50633_REG_INT4, + PCF50633_REG_INT5, + 0 /* terminator */ + }; + + for (n = 0; n < 256; n += sizeof(dump)) { + for (n1 = 0; n1 < sizeof(dump); n1++) + if (n == address_no_read[idx]) { + idx++; + dump[n1] = 0x00; + } else + dump[n1] = pcf50633_reg_read(pcf, n + n1); + + hex_dump_to_buffer(dump, sizeof(dump), 16, 1, buf1, 128, 0); + buf1 += strlen(buf1); + *buf1++ = '\n'; + *buf1 = '\0'; + } + + return buf1 - buf; +} +static DEVICE_ATTR(dump_regs, 0400, show_dump_regs, NULL); + +static ssize_t show_resume_reason(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pcf50633 *pcf = dev_get_drvdata(dev); + int n; + + n = sprintf(buf, "%02x%02x%02x%02x%02x\n", + pcf->resume_reason[0], + pcf->resume_reason[1], + pcf->resume_reason[2], + pcf->resume_reason[3], + pcf->resume_reason[4]); + + return n; +} +static DEVICE_ATTR(resume_reason, 0400, show_resume_reason, NULL); + +static struct attribute *pcf_sysfs_entries[] = { + &dev_attr_dump_regs.attr, + &dev_attr_resume_reason.attr, + NULL, +}; + +static struct attribute_group pcf_attr_group = { + .name = NULL, /* put in device directory */ + .attrs = pcf_sysfs_entries, +}; + +int pcf50633_register_irq(struct pcf50633 *pcf, int irq, + void (*handler) (int, void *), void *data) +{ + if (irq < 0 || irq > PCF50633_NUM_IRQ || !handler) + return -EINVAL; + + if (WARN_ON(pcf->irq_handler[irq].handler)) + return -EBUSY; + + mutex_lock(&pcf->lock); + pcf->irq_handler[irq].handler = handler; + pcf->irq_handler[irq].data = data; + mutex_unlock(&pcf->lock); + + return 0; +} +EXPORT_SYMBOL_GPL(pcf50633_register_irq); + +int pcf50633_free_irq(struct pcf50633 *pcf, int irq) +{ + if (irq < 0 || irq > PCF50633_NUM_IRQ) + return -EINVAL; + + mutex_lock(&pcf->lock); + pcf->irq_handler[irq].handler = NULL; + mutex_unlock(&pcf->lock); + + return 0; +} +EXPORT_SYMBOL_GPL(pcf50633_free_irq); + +static int __pcf50633_irq_mask_set(struct pcf50633 *pcf, int irq, u8 mask) +{ + u8 reg, bits, tmp; + int ret = 0, idx; + + idx = irq >> 3; + reg = PCF50633_REG_INT1M + idx; + bits = 1 << (irq & 0x07); + + mutex_lock(&pcf->lock); + + if (mask) { + ret = __pcf50633_read(pcf, reg, 1, &tmp); + if (ret < 0) + goto out; + + tmp |= bits; + + ret = __pcf50633_write(pcf, reg, 1, &tmp); + if (ret < 0) + goto out; + + pcf->mask_regs[idx] &= ~bits; + pcf->mask_regs[idx] |= bits; + } else { + ret = __pcf50633_read(pcf, reg, 1, &tmp); + if (ret < 0) + goto out; + + tmp &= ~bits; + + ret = __pcf50633_write(pcf, reg, 1, &tmp); + if (ret < 0) + goto out; + + pcf->mask_regs[idx] &= ~bits; + } +out: + mutex_unlock(&pcf->lock); + + return ret; +} + +int pcf50633_irq_mask(struct pcf50633 *pcf, int irq) +{ + dev_info(pcf->dev, "Masking IRQ %d\n", irq); + + return __pcf50633_irq_mask_set(pcf, irq, 1); +} +EXPORT_SYMBOL_GPL(pcf50633_irq_mask); + +int pcf50633_irq_unmask(struct pcf50633 *pcf, int irq) +{ + dev_info(pcf->dev, "Unmasking IRQ %d\n", irq); + + return __pcf50633_irq_mask_set(pcf, irq, 0); +} +EXPORT_SYMBOL_GPL(pcf50633_irq_unmask); + +int pcf50633_irq_mask_get(struct pcf50633 *pcf, int irq) +{ + u8 reg, bits; + + reg = irq >> 3; + bits = 1 << (irq & 0x07); + + return pcf->mask_regs[reg] & bits; +} +EXPORT_SYMBOL_GPL(pcf50633_irq_mask_get); + +static void pcf50633_irq_call_handler(struct pcf50633 *pcf, int irq) +{ + if (pcf->irq_handler[irq].handler) + pcf->irq_handler[irq].handler(irq, pcf->irq_handler[irq].data); +} + +/* Maximum amount of time ONKEY is held before emergency action is taken */ +#define PCF50633_ONKEY1S_TIMEOUT 8 + +static void pcf50633_irq_worker(struct work_struct *work) +{ + struct pcf50633 *pcf; + int ret, i, j; + u8 pcf_int[5], chgstat; + + pcf = container_of(work, struct pcf50633, irq_work); + + /* Read the 5 INT regs in one transaction */ + ret = pcf50633_read_block(pcf, PCF50633_REG_INT1, + ARRAY_SIZE(pcf_int), pcf_int); + if (ret != ARRAY_SIZE(pcf_int)) { + dev_err(pcf->dev, "Error reading INT registers\n"); + + /* + * If this doesn't ACK the interrupt to the chip, we'll be + * called once again as we're level triggered. + */ + goto out; + } + + /* We immediately read the usb and adapter status. We thus make sure + * only of USBINS/USBREM IRQ handlers are called */ + if (pcf_int[0] & (PCF50633_INT1_USBINS | PCF50633_INT1_USBREM)) { + chgstat = pcf50633_reg_read(pcf, PCF50633_REG_MBCS2); + if (chgstat & (0x3 << 4)) + pcf_int[0] &= ~(1 << PCF50633_INT1_USBREM); + else + pcf_int[0] &= ~(1 << PCF50633_INT1_USBINS); + } + + /* Make sure only one of ADPINS or ADPREM is set */ + if (pcf_int[0] & (PCF50633_INT1_ADPINS | PCF50633_INT1_ADPREM)) { + chgstat = pcf50633_reg_read(pcf, PCF50633_REG_MBCS2); + if (chgstat & (0x3 << 4)) + pcf_int[0] &= ~(1 << PCF50633_INT1_ADPREM); + else + pcf_int[0] &= ~(1 << PCF50633_INT1_ADPINS); + } + + dev_dbg(pcf->dev, "INT1=0x%02x INT2=0x%02x INT3=0x%02x " + "INT4=0x%02x INT5=0x%02x\n", pcf_int[0], + pcf_int[1], pcf_int[2], pcf_int[3], pcf_int[4]); + + /* Some revisions of the chip don't have a 8s standby mode on + * ONKEY1S press. We try to manually do it in such cases. */ + if ((pcf_int[0] & PCF50633_INT1_SECOND) && pcf->onkey1s_held) { + dev_info(pcf->dev, "ONKEY1S held for %d secs\n", + pcf->onkey1s_held); + if (pcf->onkey1s_held++ == PCF50633_ONKEY1S_TIMEOUT) + if (pcf->pdata->force_shutdown) + pcf->pdata->force_shutdown(pcf); + } + + if (pcf_int[2] & PCF50633_INT3_ONKEY1S) { + dev_info(pcf->dev, "ONKEY1S held\n"); + pcf->onkey1s_held = 1 ; + + /* Unmask IRQ_SECOND */ + pcf50633_reg_clear_bits(pcf, PCF50633_REG_INT1M, + PCF50633_INT1_SECOND); + + /* Unmask IRQ_ONKEYR */ + pcf50633_reg_clear_bits(pcf, PCF50633_REG_INT2M, + PCF50633_INT2_ONKEYR); + } + + if ((pcf_int[1] & PCF50633_INT2_ONKEYR) && pcf->onkey1s_held) { + pcf->onkey1s_held = 0; + + /* Mask SECOND and ONKEYR interrupts */ + if (pcf->mask_regs[0] & PCF50633_INT1_SECOND) + pcf50633_reg_set_bit_mask(pcf, + PCF50633_REG_INT1M, + PCF50633_INT1_SECOND, + PCF50633_INT1_SECOND); + + if (pcf->mask_regs[1] & PCF50633_INT2_ONKEYR) + pcf50633_reg_set_bit_mask(pcf, + PCF50633_REG_INT2M, + PCF50633_INT2_ONKEYR, + PCF50633_INT2_ONKEYR); + } + + /* Have we just resumed ? */ + if (pcf->is_suspended) { + pcf->is_suspended = 0; + + /* Set the resume reason filtering out non resumers */ + for (i = 0; i < ARRAY_SIZE(pcf_int); i++) + pcf->resume_reason[i] = pcf_int[i] & + pcf->pdata->resumers[i]; + + /* Make sure we don't pass on any ONKEY events to + * userspace now */ + pcf_int[1] &= ~(PCF50633_INT2_ONKEYR | PCF50633_INT2_ONKEYF); + } + + for (i = 0; i < ARRAY_SIZE(pcf_int); i++) { + /* Unset masked interrupts */ + pcf_int[i] &= ~pcf->mask_regs[i]; + + for (j = 0; j < 8 ; j++) + if (pcf_int[i] & (1 << j)) + pcf50633_irq_call_handler(pcf, (i * 8) + j); + } + +out: + put_device(pcf->dev); + enable_irq(pcf->irq); +} + +static irqreturn_t pcf50633_irq(int irq, void *data) +{ + struct pcf50633 *pcf = data; + + dev_dbg(pcf->dev, "pcf50633_irq\n"); + + get_device(pcf->dev); + disable_irq(pcf->irq); + schedule_work(&pcf->irq_work); + + return IRQ_HANDLED; +} + +static void +pcf50633_client_dev_register(struct pcf50633 *pcf, const char *name, + struct platform_device **pdev) +{ + struct pcf50633_subdev_pdata *subdev_pdata; + int ret; + + *pdev = platform_device_alloc(name, -1); + if (!*pdev) { + dev_err(pcf->dev, "Falied to allocate %s\n", name); + return; + } + + subdev_pdata = kmalloc(sizeof(*subdev_pdata), GFP_KERNEL); + if (!subdev_pdata) { + dev_err(pcf->dev, "Error allocating subdev pdata\n"); + platform_device_put(*pdev); + } + + subdev_pdata->pcf = pcf; + platform_device_add_data(*pdev, subdev_pdata, sizeof(*subdev_pdata)); + + (*pdev)->dev.parent = pcf->dev; + + ret = platform_device_add(*pdev); + if (ret) { + dev_err(pcf->dev, "Failed to register %s: %d\n", name, ret); + platform_device_put(*pdev); + *pdev = NULL; + } +} + +#ifdef CONFIG_PM +static int pcf50633_suspend(struct device *dev, pm_message_t state) +{ + struct pcf50633 *pcf; + int ret = 0, i; + u8 res[5]; + + pcf = dev_get_drvdata(dev); + + /* Make sure our interrupt handlers are not called + * henceforth */ + disable_irq(pcf->irq); + + /* Make sure that any running IRQ worker has quit */ + cancel_work_sync(&pcf->irq_work); + + /* Save the masks */ + ret = pcf50633_read_block(pcf, PCF50633_REG_INT1M, + ARRAY_SIZE(pcf->suspend_irq_masks), + pcf->suspend_irq_masks); + if (ret < 0) { + dev_err(pcf->dev, "error saving irq masks\n"); + goto out; + } + + /* Write wakeup irq masks */ + for (i = 0; i < ARRAY_SIZE(res); i++) + res[i] = ~pcf->pdata->resumers[i]; + + ret = pcf50633_write_block(pcf, PCF50633_REG_INT1M, + ARRAY_SIZE(res), &res[0]); + if (ret < 0) { + dev_err(pcf->dev, "error writing wakeup irq masks\n"); + goto out; + } + + pcf->is_suspended = 1; + +out: + return ret; +} + +static int pcf50633_resume(struct device *dev) +{ + struct pcf50633 *pcf; + int ret; + + pcf = dev_get_drvdata(dev); + + /* Write the saved mask registers */ + ret = pcf50633_write_block(pcf, PCF50633_REG_INT1M, + ARRAY_SIZE(pcf->suspend_irq_masks), + pcf->suspend_irq_masks); + if (ret < 0) + dev_err(pcf->dev, "Error restoring saved suspend masks\n"); + + /* Restore regulators' state */ + + + get_device(pcf->dev); + + /* + * Clear any pending interrupts and set resume reason if any. + * This will leave with enable_irq() + */ + pcf50633_irq_worker(&pcf->irq_work); + + return 0; +} +#else +#define pcf50633_suspend NULL +#define pcf50633_resume NULL +#endif + +static int __devinit pcf50633_probe(struct i2c_client *client, + const struct i2c_device_id *ids) +{ + struct pcf50633 *pcf; + struct pcf50633_platform_data *pdata = client->dev.platform_data; + int i, ret = 0; + int version, variant; + + pcf = kzalloc(sizeof(*pcf), GFP_KERNEL); + if (!pcf) + return -ENOMEM; + + pcf->pdata = pdata; + + mutex_init(&pcf->lock); + + i2c_set_clientdata(client, pcf); + pcf->dev = &client->dev; + pcf->i2c_client = client; + pcf->irq = client->irq; + + INIT_WORK(&pcf->irq_work, pcf50633_irq_worker); + + version = pcf50633_reg_read(pcf, 0); + variant = pcf50633_reg_read(pcf, 1); + if (version < 0 || variant < 0) { + dev_err(pcf->dev, "Unable to probe pcf50633\n"); + ret = -ENODEV; + goto err; + } + + dev_info(pcf->dev, "Probed device version %d variant %d\n", + version, variant); + + /* Enable all interrupts except RTC SECOND */ + pcf->mask_regs[0] = 0x80; + pcf50633_reg_write(pcf, PCF50633_REG_INT1M, pcf->mask_regs[0]); + pcf50633_reg_write(pcf, PCF50633_REG_INT2M, 0x00); + pcf50633_reg_write(pcf, PCF50633_REG_INT3M, 0x00); + pcf50633_reg_write(pcf, PCF50633_REG_INT4M, 0x00); + pcf50633_reg_write(pcf, PCF50633_REG_INT5M, 0x00); + + /* Create sub devices */ + pcf50633_client_dev_register(pcf, "pcf50633-input", + &pcf->input_pdev); + pcf50633_client_dev_register(pcf, "pcf50633-rtc", + &pcf->rtc_pdev); + pcf50633_client_dev_register(pcf, "pcf50633-mbc", + &pcf->mbc_pdev); + pcf50633_client_dev_register(pcf, "pcf50633-adc", + &pcf->adc_pdev); + + for (i = 0; i < PCF50633_NUM_REGULATORS; i++) { + struct platform_device *pdev; + + pdev = platform_device_alloc("pcf50633-regltr", i); + if (!pdev) { + dev_err(pcf->dev, "Cannot create regulator\n"); + continue; + } + + pdev->dev.parent = pcf->dev; + pdev->dev.platform_data = &pdata->reg_init_data[i]; + pdev->dev.driver_data = pcf; + pcf->regulator_pdev[i] = pdev; + + platform_device_add(pdev); + } + + if (client->irq) { + set_irq_handler(client->irq, handle_level_irq); + ret = request_irq(client->irq, pcf50633_irq, + IRQF_TRIGGER_LOW, "pcf50633", pcf); + + if (ret) { + dev_err(pcf->dev, "Failed to request IRQ %d\n", ret); + goto err; + } + } else { + dev_err(pcf->dev, "No IRQ configured\n"); + goto err; + } + + if (enable_irq_wake(client->irq) < 0) + dev_err(pcf->dev, "IRQ %u cannot be enabled as wake-up source" + "in this hardware revision", client->irq); + + ret = sysfs_create_group(&client->dev.kobj, &pcf_attr_group); + if (ret) + dev_err(pcf->dev, "error creating sysfs entries\n"); + + if (pdata->probe_done) + pdata->probe_done(pcf); + + return 0; + +err: + kfree(pcf); + return ret; +} + +static int __devexit pcf50633_remove(struct i2c_client *client) +{ + struct pcf50633 *pcf = i2c_get_clientdata(client); + int i; + + free_irq(pcf->irq, pcf); + + platform_device_unregister(pcf->input_pdev); + platform_device_unregister(pcf->rtc_pdev); + platform_device_unregister(pcf->mbc_pdev); + platform_device_unregister(pcf->adc_pdev); + + for (i = 0; i < PCF50633_NUM_REGULATORS; i++) + platform_device_unregister(pcf->regulator_pdev[i]); + + kfree(pcf); + + return 0; +} + +static struct i2c_device_id pcf50633_id_table[] = { + {"pcf50633", 0x73}, +}; + +static struct i2c_driver pcf50633_driver = { + .driver = { + .name = "pcf50633", + .suspend = pcf50633_suspend, + .resume = pcf50633_resume, + }, + .id_table = pcf50633_id_table, + .probe = pcf50633_probe, + .remove = __devexit_p(pcf50633_remove), +}; + +static int __init pcf50633_init(void) +{ + return i2c_add_driver(&pcf50633_driver); +} + +static void __exit pcf50633_exit(void) +{ + i2c_del_driver(&pcf50633_driver); +} + +MODULE_DESCRIPTION("I2C chip driver for NXP PCF50633 PMU"); +MODULE_AUTHOR("Harald Welte "); +MODULE_LICENSE("GPL"); + +module_init(pcf50633_init); +module_exit(pcf50633_exit); diff --git a/include/linux/mfd/pcf50633/core.h b/include/linux/mfd/pcf50633/core.h new file mode 100644 index 000000000000..4455b212d75a --- /dev/null +++ b/include/linux/mfd/pcf50633/core.h @@ -0,0 +1,218 @@ +/* + * core.h -- Core driver for NXP PCF50633 + * + * (C) 2006-2008 by Openmoko, Inc. + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __LINUX_MFD_PCF50633_CORE_H +#define __LINUX_MFD_PCF50633_CORE_H + +#include +#include +#include +#include +#include + +struct pcf50633; + +#define PCF50633_NUM_REGULATORS 11 + +struct pcf50633_platform_data { + struct regulator_init_data reg_init_data[PCF50633_NUM_REGULATORS]; + + char **batteries; + int num_batteries; + + /* Callbacks */ + void (*probe_done)(struct pcf50633 *); + void (*mbc_event_callback)(struct pcf50633 *, int); + void (*regulator_registered)(struct pcf50633 *, int); + void (*force_shutdown)(struct pcf50633 *); + + u8 resumers[5]; +}; + +struct pcf50633_subdev_pdata { + struct pcf50633 *pcf; +}; + +struct pcf50633_irq { + void (*handler) (int, void *); + void *data; +}; + +int pcf50633_register_irq(struct pcf50633 *pcf, int irq, + void (*handler) (int, void *), void *data); +int pcf50633_free_irq(struct pcf50633 *pcf, int irq); + +int pcf50633_irq_mask(struct pcf50633 *pcf, int irq); +int pcf50633_irq_unmask(struct pcf50633 *pcf, int irq); +int pcf50633_irq_mask_get(struct pcf50633 *pcf, int irq); + +int pcf50633_read_block(struct pcf50633 *, u8 reg, + int nr_regs, u8 *data); +int pcf50633_write_block(struct pcf50633 *pcf, u8 reg, + int nr_regs, u8 *data); +u8 pcf50633_reg_read(struct pcf50633 *, u8 reg); +int pcf50633_reg_write(struct pcf50633 *pcf, u8 reg, u8 val); + +int pcf50633_reg_set_bit_mask(struct pcf50633 *pcf, u8 reg, u8 mask, u8 val); +int pcf50633_reg_clear_bits(struct pcf50633 *pcf, u8 reg, u8 bits); + +/* Interrupt registers */ + +#define PCF50633_REG_INT1 0x02 +#define PCF50633_REG_INT2 0x03 +#define PCF50633_REG_INT3 0x04 +#define PCF50633_REG_INT4 0x05 +#define PCF50633_REG_INT5 0x06 + +#define PCF50633_REG_INT1M 0x07 +#define PCF50633_REG_INT2M 0x08 +#define PCF50633_REG_INT3M 0x09 +#define PCF50633_REG_INT4M 0x0a +#define PCF50633_REG_INT5M 0x0b + +enum { + /* Chip IRQs */ + PCF50633_IRQ_ADPINS, + PCF50633_IRQ_ADPREM, + PCF50633_IRQ_USBINS, + PCF50633_IRQ_USBREM, + PCF50633_IRQ_RESERVED1, + PCF50633_IRQ_RESERVED2, + PCF50633_IRQ_ALARM, + PCF50633_IRQ_SECOND, + PCF50633_IRQ_ONKEYR, + PCF50633_IRQ_ONKEYF, + PCF50633_IRQ_EXTON1R, + PCF50633_IRQ_EXTON1F, + PCF50633_IRQ_EXTON2R, + PCF50633_IRQ_EXTON2F, + PCF50633_IRQ_EXTON3R, + PCF50633_IRQ_EXTON3F, + PCF50633_IRQ_BATFULL, + PCF50633_IRQ_CHGHALT, + PCF50633_IRQ_THLIMON, + PCF50633_IRQ_THLIMOFF, + PCF50633_IRQ_USBLIMON, + PCF50633_IRQ_USBLIMOFF, + PCF50633_IRQ_ADCRDY, + PCF50633_IRQ_ONKEY1S, + PCF50633_IRQ_LOWSYS, + PCF50633_IRQ_LOWBAT, + PCF50633_IRQ_HIGHTMP, + PCF50633_IRQ_AUTOPWRFAIL, + PCF50633_IRQ_DWN1PWRFAIL, + PCF50633_IRQ_DWN2PWRFAIL, + PCF50633_IRQ_LEDPWRFAIL, + PCF50633_IRQ_LEDOVP, + PCF50633_IRQ_LDO1PWRFAIL, + PCF50633_IRQ_LDO2PWRFAIL, + PCF50633_IRQ_LDO3PWRFAIL, + PCF50633_IRQ_LDO4PWRFAIL, + PCF50633_IRQ_LDO5PWRFAIL, + PCF50633_IRQ_LDO6PWRFAIL, + PCF50633_IRQ_HCLDOPWRFAIL, + PCF50633_IRQ_HCLDOOVL, + + /* Always last */ + PCF50633_NUM_IRQ, +}; + +struct pcf50633 { + struct device *dev; + struct i2c_client *i2c_client; + + struct pcf50633_platform_data *pdata; + int irq; + struct pcf50633_irq irq_handler[PCF50633_NUM_IRQ]; + struct work_struct irq_work; + struct mutex lock; + + u8 mask_regs[5]; + + u8 suspend_irq_masks[5]; + u8 resume_reason[5]; + int is_suspended; + + int onkey1s_held; + + struct platform_device *rtc_pdev; + struct platform_device *mbc_pdev; + struct platform_device *adc_pdev; + struct platform_device *input_pdev; + struct platform_device *regulator_pdev[PCF50633_NUM_REGULATORS]; +}; + +enum pcf50633_reg_int1 { + PCF50633_INT1_ADPINS = 0x01, /* Adapter inserted */ + PCF50633_INT1_ADPREM = 0x02, /* Adapter removed */ + PCF50633_INT1_USBINS = 0x04, /* USB inserted */ + PCF50633_INT1_USBREM = 0x08, /* USB removed */ + /* reserved */ + PCF50633_INT1_ALARM = 0x40, /* RTC alarm time is reached */ + PCF50633_INT1_SECOND = 0x80, /* RTC periodic second interrupt */ +}; + +enum pcf50633_reg_int2 { + PCF50633_INT2_ONKEYR = 0x01, /* ONKEY rising edge */ + PCF50633_INT2_ONKEYF = 0x02, /* ONKEY falling edge */ + PCF50633_INT2_EXTON1R = 0x04, /* EXTON1 rising edge */ + PCF50633_INT2_EXTON1F = 0x08, /* EXTON1 falling edge */ + PCF50633_INT2_EXTON2R = 0x10, /* EXTON2 rising edge */ + PCF50633_INT2_EXTON2F = 0x20, /* EXTON2 falling edge */ + PCF50633_INT2_EXTON3R = 0x40, /* EXTON3 rising edge */ + PCF50633_INT2_EXTON3F = 0x80, /* EXTON3 falling edge */ +}; + +enum pcf50633_reg_int3 { + PCF50633_INT3_BATFULL = 0x01, /* Battery full */ + PCF50633_INT3_CHGHALT = 0x02, /* Charger halt */ + PCF50633_INT3_THLIMON = 0x04, + PCF50633_INT3_THLIMOFF = 0x08, + PCF50633_INT3_USBLIMON = 0x10, + PCF50633_INT3_USBLIMOFF = 0x20, + PCF50633_INT3_ADCRDY = 0x40, /* ADC result ready */ + PCF50633_INT3_ONKEY1S = 0x80, /* ONKEY pressed 1 second */ +}; + +enum pcf50633_reg_int4 { + PCF50633_INT4_LOWSYS = 0x01, + PCF50633_INT4_LOWBAT = 0x02, + PCF50633_INT4_HIGHTMP = 0x04, + PCF50633_INT4_AUTOPWRFAIL = 0x08, + PCF50633_INT4_DWN1PWRFAIL = 0x10, + PCF50633_INT4_DWN2PWRFAIL = 0x20, + PCF50633_INT4_LEDPWRFAIL = 0x40, + PCF50633_INT4_LEDOVP = 0x80, +}; + +enum pcf50633_reg_int5 { + PCF50633_INT5_LDO1PWRFAIL = 0x01, + PCF50633_INT5_LDO2PWRFAIL = 0x02, + PCF50633_INT5_LDO3PWRFAIL = 0x04, + PCF50633_INT5_LDO4PWRFAIL = 0x08, + PCF50633_INT5_LDO5PWRFAIL = 0x10, + PCF50633_INT5_LDO6PWRFAIL = 0x20, + PCF50633_INT5_HCLDOPWRFAIL = 0x40, + PCF50633_INT5_HCLDOOVL = 0x80, +}; + +/* misc. registers */ +#define PCF50633_REG_OOCSHDWN 0x0c + +/* LED registers */ +#define PCF50633_REG_LEDOUT 0x28 +#define PCF50633_REG_LEDENA 0x29 +#define PCF50633_REG_LEDCTL 0x2a +#define PCF50633_REG_LEDDIM 0x2b + +#endif + -- cgit v1.2.3-71-gd317 From 08c3e06a5eb27d43b712adef18379f8464425e71 Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Fri, 9 Jan 2009 01:49:26 +0100 Subject: mfd: PCF50633 adc driver This patch adds basic support for the PCF50633 ADC. The subtractive mode is not supported yet. Since we don't have adc subsystem, it currently lives in drivers/mfd. Signed-off-by: Balaji Rao Cc: Andy Green Acked-by: Jonathan Cameron Signed-off-by: Samuel Ortiz --- drivers/mfd/Kconfig | 7 + drivers/mfd/Makefile | 3 +- drivers/mfd/pcf50633-adc.c | 277 +++++++++++++++++++++++++++++++++++++++ include/linux/mfd/pcf50633/adc.h | 72 ++++++++++ 4 files changed, 358 insertions(+), 1 deletion(-) create mode 100644 drivers/mfd/pcf50633-adc.c create mode 100644 include/linux/mfd/pcf50633/adc.h (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 3ac32e45b03b..2fa3504e165a 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -226,6 +226,13 @@ config MFD_PCF50633 facilities, and registers devices for the various functions so that function-specific drivers can bind to them. +config PCF50633_ADC + tristate "Support for NXP PCF50633 ADC" + depends on MFD_PCF50633 + help + Say yes here if you want to include support for ADC in the + NXP PCF50633 chip. + endmenu menu "Multimedia Capabilities Port drivers" diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 23d4d10981b3..138f9c4d3d7b 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -38,4 +38,5 @@ obj-$(CONFIG_UCB1400_CORE) += ucb1400_core.o obj-$(CONFIG_PMIC_DA903X) += da903x.o -obj-$(CONFIG_MFD_PCF50633) += pcf50633-core.o \ No newline at end of file +obj-$(CONFIG_MFD_PCF50633) += pcf50633-core.o +obj-$(CONFIG_PCF50633_ADC) += pcf50633-adc.o \ No newline at end of file diff --git a/drivers/mfd/pcf50633-adc.c b/drivers/mfd/pcf50633-adc.c new file mode 100644 index 000000000000..c2d05becfa97 --- /dev/null +++ b/drivers/mfd/pcf50633-adc.c @@ -0,0 +1,277 @@ +/* NXP PCF50633 ADC Driver + * + * (C) 2006-2008 by Openmoko, Inc. + * Author: Balaji Rao + * All rights reserved. + * + * Broken down from monstrous PCF50633 driver mainly by + * Harald Welte, Andy Green and Werner Almesberger + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * NOTE: This driver does not yet support subtractive ADC mode, which means + * you can do only one measurement per read request. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +struct pcf50633_adc_request { + int mux; + int avg; + int result; + void (*callback)(struct pcf50633 *, void *, int); + void *callback_param; + + /* Used in case of sync requests */ + struct completion completion; + +}; + +#define PCF50633_MAX_ADC_FIFO_DEPTH 8 + +struct pcf50633_adc { + struct pcf50633 *pcf; + + /* Private stuff */ + struct pcf50633_adc_request *queue[PCF50633_MAX_ADC_FIFO_DEPTH]; + int queue_head; + int queue_tail; + struct mutex queue_mutex; +}; + +static inline struct pcf50633_adc *__to_adc(struct pcf50633 *pcf) +{ + return platform_get_drvdata(pcf->adc_pdev); +} + +static void adc_setup(struct pcf50633 *pcf, int channel, int avg) +{ + channel &= PCF50633_ADCC1_ADCMUX_MASK; + + /* kill ratiometric, but enable ACCSW biasing */ + pcf50633_reg_write(pcf, PCF50633_REG_ADCC2, 0x00); + pcf50633_reg_write(pcf, PCF50633_REG_ADCC3, 0x01); + + /* start ADC conversion on selected channel */ + pcf50633_reg_write(pcf, PCF50633_REG_ADCC1, channel | avg | + PCF50633_ADCC1_ADCSTART | PCF50633_ADCC1_RES_10BIT); +} + +static void trigger_next_adc_job_if_any(struct pcf50633 *pcf) +{ + struct pcf50633_adc *adc = __to_adc(pcf); + int head; + + mutex_lock(&adc->queue_mutex); + + head = adc->queue_head; + + if (!adc->queue[head]) { + mutex_unlock(&adc->queue_mutex); + return; + } + mutex_unlock(&adc->queue_mutex); + + adc_setup(pcf, adc->queue[head]->mux, adc->queue[head]->avg); +} + +static int +adc_enqueue_request(struct pcf50633 *pcf, struct pcf50633_adc_request *req) +{ + struct pcf50633_adc *adc = __to_adc(pcf); + int head, tail; + + mutex_lock(&adc->queue_mutex); + + head = adc->queue_head; + tail = adc->queue_tail; + + if (adc->queue[tail]) { + mutex_unlock(&adc->queue_mutex); + return -EBUSY; + } + + adc->queue[tail] = req; + adc->queue_tail = (tail + 1) & (PCF50633_MAX_ADC_FIFO_DEPTH - 1); + + mutex_unlock(&adc->queue_mutex); + + trigger_next_adc_job_if_any(pcf); + + return 0; +} + +static void +pcf50633_adc_sync_read_callback(struct pcf50633 *pcf, void *param, int result) +{ + struct pcf50633_adc_request *req = param; + + req->result = result; + complete(&req->completion); +} + +int pcf50633_adc_sync_read(struct pcf50633 *pcf, int mux, int avg) +{ + struct pcf50633_adc_request *req; + + /* req is freed when the result is ready, in interrupt handler */ + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; + + req->mux = mux; + req->avg = avg; + req->callback = pcf50633_adc_sync_read_callback; + req->callback_param = req; + + init_completion(&req->completion); + adc_enqueue_request(pcf, req); + wait_for_completion(&req->completion); + + return req->result; +} +EXPORT_SYMBOL_GPL(pcf50633_adc_sync_read); + +int pcf50633_adc_async_read(struct pcf50633 *pcf, int mux, int avg, + void (*callback)(struct pcf50633 *, void *, int), + void *callback_param) +{ + struct pcf50633_adc_request *req; + + /* req is freed when the result is ready, in interrupt handler */ + req = kmalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; + + req->mux = mux; + req->avg = avg; + req->callback = callback; + req->callback_param = callback_param; + + adc_enqueue_request(pcf, req); + + return 0; +} +EXPORT_SYMBOL_GPL(pcf50633_adc_async_read); + +static int adc_result(struct pcf50633 *pcf) +{ + u8 adcs1, adcs3; + u16 result; + + adcs1 = pcf50633_reg_read(pcf, PCF50633_REG_ADCS1); + adcs3 = pcf50633_reg_read(pcf, PCF50633_REG_ADCS3); + result = (adcs1 << 2) | (adcs3 & PCF50633_ADCS3_ADCDAT1L_MASK); + + dev_dbg(pcf->dev, "adc result = %d\n", result); + + return result; +} + +static void pcf50633_adc_irq(int irq, void *data) +{ + struct pcf50633_adc *adc = data; + struct pcf50633 *pcf = adc->pcf; + struct pcf50633_adc_request *req; + int head; + + mutex_lock(&adc->queue_mutex); + head = adc->queue_head; + + req = adc->queue[head]; + if (WARN_ON(!req)) { + dev_err(pcf->dev, "pcf50633-adc irq: ADC queue empty!\n"); + mutex_unlock(&adc->queue_mutex); + return; + } + adc->queue[head] = NULL; + adc->queue_head = (head + 1) & + (PCF50633_MAX_ADC_FIFO_DEPTH - 1); + + mutex_unlock(&adc->queue_mutex); + + req->callback(pcf, req->callback_param, adc_result(pcf)); + kfree(req); + + trigger_next_adc_job_if_any(pcf); +} + +static int __devinit pcf50633_adc_probe(struct platform_device *pdev) +{ + struct pcf50633_subdev_pdata *pdata = pdev->dev.platform_data; + struct pcf50633_adc *adc; + + adc = kzalloc(sizeof(*adc), GFP_KERNEL); + if (!adc) + return -ENOMEM; + + adc->pcf = pdata->pcf; + platform_set_drvdata(pdev, adc); + + pcf50633_register_irq(pdata->pcf, PCF50633_IRQ_ADCRDY, + pcf50633_adc_irq, adc); + + mutex_init(&adc->queue_mutex); + + return 0; +} + +static int __devexit pcf50633_adc_remove(struct platform_device *pdev) +{ + struct pcf50633_adc *adc = platform_get_drvdata(pdev); + int i, head; + + pcf50633_free_irq(adc->pcf, PCF50633_IRQ_ADCRDY); + + mutex_lock(&adc->queue_mutex); + head = adc->queue_head; + + if (WARN_ON(adc->queue[head])) + dev_err(adc->pcf->dev, + "adc driver removed with request pending\n"); + + for (i = 0; i < PCF50633_MAX_ADC_FIFO_DEPTH; i++) + kfree(adc->queue[i]); + + mutex_unlock(&adc->queue_mutex); + kfree(adc); + + return 0; +} + +static struct platform_driver pcf50633_adc_driver = { + .driver = { + .name = "pcf50633-adc", + }, + .probe = pcf50633_adc_probe, + .remove = __devexit_p(pcf50633_adc_remove), +}; + +static int __init pcf50633_adc_init(void) +{ + return platform_driver_register(&pcf50633_adc_driver); +} +module_init(pcf50633_adc_init); + +static void __exit pcf50633_adc_exit(void) +{ + platform_driver_unregister(&pcf50633_adc_driver); +} +module_exit(pcf50633_adc_exit); + +MODULE_AUTHOR("Balaji Rao "); +MODULE_DESCRIPTION("PCF50633 adc driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:pcf50633-adc"); + diff --git a/include/linux/mfd/pcf50633/adc.h b/include/linux/mfd/pcf50633/adc.h new file mode 100644 index 000000000000..56669b4183ad --- /dev/null +++ b/include/linux/mfd/pcf50633/adc.h @@ -0,0 +1,72 @@ +/* + * adc.h -- Driver for NXP PCF50633 ADC + * + * (C) 2006-2008 by Openmoko, Inc. + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __LINUX_MFD_PCF50633_ADC_H +#define __LINUX_MFD_PCF50633_ADC_H + +#include +#include + +/* ADC Registers */ +#define PCF50633_REG_ADCC3 0x52 +#define PCF50633_REG_ADCC2 0x53 +#define PCF50633_REG_ADCC1 0x54 +#define PCF50633_REG_ADCS1 0x55 +#define PCF50633_REG_ADCS2 0x56 +#define PCF50633_REG_ADCS3 0x57 + +#define PCF50633_ADCC1_ADCSTART 0x01 +#define PCF50633_ADCC1_RES_10BIT 0x02 +#define PCF50633_ADCC1_AVERAGE_NO 0x00 +#define PCF50633_ADCC1_AVERAGE_4 0x04 +#define PCF50633_ADCC1_AVERAGE_8 0x08 +#define PCF50633_ADCC1_AVERAGE_16 0x0c +#define PCF50633_ADCC1_MUX_BATSNS_RES 0x00 +#define PCF50633_ADCC1_MUX_BATSNS_SUBTR 0x10 +#define PCF50633_ADCC1_MUX_ADCIN2_RES 0x20 +#define PCF50633_ADCC1_MUX_ADCIN2_SUBTR 0x30 +#define PCF50633_ADCC1_MUX_BATTEMP 0x60 +#define PCF50633_ADCC1_MUX_ADCIN1 0x70 +#define PCF50633_ADCC1_AVERAGE_MASK 0x0c +#define PCF50633_ADCC1_ADCMUX_MASK 0xf0 + +#define PCF50633_ADCC2_RATIO_NONE 0x00 +#define PCF50633_ADCC2_RATIO_BATTEMP 0x01 +#define PCF50633_ADCC2_RATIO_ADCIN1 0x02 +#define PCF50633_ADCC2_RATIO_BOTH 0x03 +#define PCF50633_ADCC2_RATIOSETTL_100US 0x04 + +#define PCF50633_ADCC3_ACCSW_EN 0x01 +#define PCF50633_ADCC3_NTCSW_EN 0x04 +#define PCF50633_ADCC3_RES_DIV_TWO 0x10 +#define PCF50633_ADCC3_RES_DIV_THREE 0x00 + +#define PCF50633_ADCS3_REF_NTCSW 0x00 +#define PCF50633_ADCS3_REF_ACCSW 0x10 +#define PCF50633_ADCS3_REF_2V0 0x20 +#define PCF50633_ADCS3_REF_VISA 0x30 +#define PCF50633_ADCS3_REF_2V0_2 0x70 +#define PCF50633_ADCS3_ADCRDY 0x80 + +#define PCF50633_ADCS3_ADCDAT1L_MASK 0x03 +#define PCF50633_ADCS3_ADCDAT2L_MASK 0x0c +#define PCF50633_ADCS3_ADCDAT2L_SHIFT 2 +#define PCF50633_ASCS3_REF_MASK 0x70 + +extern int +pcf50633_adc_async_read(struct pcf50633 *pcf, int mux, int avg, + void (*callback)(struct pcf50633 *, void *, int), + void *callback_param); +extern int +pcf50633_adc_sync_read(struct pcf50633 *pcf, int mux, int avg); + +#endif /* __LINUX_PCF50633_ADC_H */ -- cgit v1.2.3-71-gd317 From 6a3d119b4ce29cf32bfe91eb61d46e9dbd8ce38a Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Fri, 9 Jan 2009 01:49:37 +0100 Subject: mfd: PCF50633 gpio support What the PCF05633 calls as a 'GPIO' is much more than the GPIO in the linux sense and there are only 4 of them - which means, the gpiolib is not used here. Signed-off-by: Balaji Rao Cc: Andy Green Signed-off-by: Samuel Ortiz --- drivers/mfd/Kconfig | 7 +++ drivers/mfd/Makefile | 3 +- drivers/mfd/pcf50633-gpio.c | 118 ++++++++++++++++++++++++++++++++++++++ include/linux/mfd/pcf50633/gpio.h | 52 +++++++++++++++++ 4 files changed, 179 insertions(+), 1 deletion(-) create mode 100644 drivers/mfd/pcf50633-gpio.c create mode 100644 include/linux/mfd/pcf50633/gpio.h (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 2fa3504e165a..06a2b0f7737c 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -233,6 +233,13 @@ config PCF50633_ADC Say yes here if you want to include support for ADC in the NXP PCF50633 chip. +config PCF50633_GPIO + tristate "Support for NXP PCF50633 GPIO" + depends on MFD_PCF50633 + help + Say yes here if you want to include support GPIO for pins on + the PCF50633 chip. + endmenu menu "Multimedia Capabilities Port drivers" diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 138f9c4d3d7b..3afb5192e4da 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -39,4 +39,5 @@ obj-$(CONFIG_UCB1400_CORE) += ucb1400_core.o obj-$(CONFIG_PMIC_DA903X) += da903x.o obj-$(CONFIG_MFD_PCF50633) += pcf50633-core.o -obj-$(CONFIG_PCF50633_ADC) += pcf50633-adc.o \ No newline at end of file +obj-$(CONFIG_PCF50633_ADC) += pcf50633-adc.o +obj-$(CONFIG_PCF50633_GPIO) += pcf50633-gpio.o \ No newline at end of file diff --git a/drivers/mfd/pcf50633-gpio.c b/drivers/mfd/pcf50633-gpio.c new file mode 100644 index 000000000000..2fa2eca5c9cc --- /dev/null +++ b/drivers/mfd/pcf50633-gpio.c @@ -0,0 +1,118 @@ +/* NXP PCF50633 GPIO Driver + * + * (C) 2006-2008 by Openmoko, Inc. + * Author: Balaji Rao + * All rights reserved. + * + * Broken down from monstrous PCF50633 driver mainly by + * Harald Welte, Andy Green and Werner Almesberger + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include + +#include +#include + +enum pcf50633_regulator_id { + PCF50633_REGULATOR_AUTO, + PCF50633_REGULATOR_DOWN1, + PCF50633_REGULATOR_DOWN2, + PCF50633_REGULATOR_LDO1, + PCF50633_REGULATOR_LDO2, + PCF50633_REGULATOR_LDO3, + PCF50633_REGULATOR_LDO4, + PCF50633_REGULATOR_LDO5, + PCF50633_REGULATOR_LDO6, + PCF50633_REGULATOR_HCLDO, + PCF50633_REGULATOR_MEMLDO, +}; + +#define PCF50633_REG_AUTOOUT 0x1a +#define PCF50633_REG_DOWN1OUT 0x1e +#define PCF50633_REG_DOWN2OUT 0x22 +#define PCF50633_REG_MEMLDOOUT 0x26 +#define PCF50633_REG_LDO1OUT 0x2d +#define PCF50633_REG_LDO2OUT 0x2f +#define PCF50633_REG_LDO3OUT 0x31 +#define PCF50633_REG_LDO4OUT 0x33 +#define PCF50633_REG_LDO5OUT 0x35 +#define PCF50633_REG_LDO6OUT 0x37 +#define PCF50633_REG_HCLDOOUT 0x39 + +static const u8 pcf50633_regulator_registers[PCF50633_NUM_REGULATORS] = { + [PCF50633_REGULATOR_AUTO] = PCF50633_REG_AUTOOUT, + [PCF50633_REGULATOR_DOWN1] = PCF50633_REG_DOWN1OUT, + [PCF50633_REGULATOR_DOWN2] = PCF50633_REG_DOWN2OUT, + [PCF50633_REGULATOR_MEMLDO] = PCF50633_REG_MEMLDOOUT, + [PCF50633_REGULATOR_LDO1] = PCF50633_REG_LDO1OUT, + [PCF50633_REGULATOR_LDO2] = PCF50633_REG_LDO2OUT, + [PCF50633_REGULATOR_LDO3] = PCF50633_REG_LDO3OUT, + [PCF50633_REGULATOR_LDO4] = PCF50633_REG_LDO4OUT, + [PCF50633_REGULATOR_LDO5] = PCF50633_REG_LDO5OUT, + [PCF50633_REGULATOR_LDO6] = PCF50633_REG_LDO6OUT, + [PCF50633_REGULATOR_HCLDO] = PCF50633_REG_HCLDOOUT, +}; + +int pcf50633_gpio_set(struct pcf50633 *pcf, int gpio, u8 val) +{ + u8 reg; + + reg = gpio - PCF50633_GPIO1 + PCF50633_REG_GPIO1CFG; + + return pcf50633_reg_set_bit_mask(pcf, reg, 0x07, val); +} +EXPORT_SYMBOL_GPL(pcf50633_gpio_set); + +u8 pcf50633_gpio_get(struct pcf50633 *pcf, int gpio) +{ + u8 reg, val; + + reg = gpio - PCF50633_GPIO1 + PCF50633_REG_GPIO1CFG; + val = pcf50633_reg_read(pcf, reg) & 0x07; + + return val; +} +EXPORT_SYMBOL_GPL(pcf50633_gpio_get); + +int pcf50633_gpio_invert_set(struct pcf50633 *pcf, int gpio, int invert) +{ + u8 val, reg; + + reg = gpio - PCF50633_GPIO1 + PCF50633_REG_GPIO1CFG; + val = !!invert << 3; + + return pcf50633_reg_set_bit_mask(pcf, reg, 1 << 3, val); +} +EXPORT_SYMBOL_GPL(pcf50633_gpio_invert_set); + +int pcf50633_gpio_invert_get(struct pcf50633 *pcf, int gpio) +{ + u8 reg, val; + + reg = gpio - PCF50633_GPIO1 + PCF50633_REG_GPIO1CFG; + val = pcf50633_reg_read(pcf, reg); + + return val & (1 << 3); +} +EXPORT_SYMBOL_GPL(pcf50633_gpio_invert_get); + +int pcf50633_gpio_power_supply_set(struct pcf50633 *pcf, + int gpio, int regulator, int on) +{ + u8 reg, val, mask; + + /* the *ENA register is always one after the *OUT register */ + reg = pcf50633_regulator_registers[regulator] + 1; + + val = !!on << (gpio - PCF50633_GPIO1); + mask = 1 << (gpio - PCF50633_GPIO1); + + return pcf50633_reg_set_bit_mask(pcf, reg, mask, val); +} +EXPORT_SYMBOL_GPL(pcf50633_gpio_power_supply_set); diff --git a/include/linux/mfd/pcf50633/gpio.h b/include/linux/mfd/pcf50633/gpio.h new file mode 100644 index 000000000000..a42b845efc54 --- /dev/null +++ b/include/linux/mfd/pcf50633/gpio.h @@ -0,0 +1,52 @@ +/* + * gpio.h -- GPIO driver for NXP PCF50633 + * + * (C) 2006-2008 by Openmoko, Inc. + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __LINUX_MFD_PCF50633_GPIO_H +#define __LINUX_MFD_PCF50633_GPIO_H + +#include + +#define PCF50633_GPIO1 1 +#define PCF50633_GPIO2 2 +#define PCF50633_GPIO3 3 +#define PCF50633_GPO 4 + +#define PCF50633_REG_GPIO1CFG 0x14 +#define PCF50633_REG_GPIO2CFG 0x15 +#define PCF50633_REG_GPIO3CFG 0x16 +#define PCF50633_REG_GPOCFG 0x17 + +#define PCF50633_GPOCFG_GPOSEL_MASK 0x07 + +enum pcf50633_reg_gpocfg { + PCF50633_GPOCFG_GPOSEL_0 = 0x00, + PCF50633_GPOCFG_GPOSEL_LED_NFET = 0x01, + PCF50633_GPOCFG_GPOSEL_SYSxOK = 0x02, + PCF50633_GPOCFG_GPOSEL_CLK32K = 0x03, + PCF50633_GPOCFG_GPOSEL_ADAPUSB = 0x04, + PCF50633_GPOCFG_GPOSEL_USBxOK = 0x05, + PCF50633_GPOCFG_GPOSEL_ACTPH4 = 0x06, + PCF50633_GPOCFG_GPOSEL_1 = 0x07, + PCF50633_GPOCFG_GPOSEL_INVERSE = 0x08, +}; + +int pcf50633_gpio_set(struct pcf50633 *pcf, int gpio, u8 val); +u8 pcf50633_gpio_get(struct pcf50633 *pcf, int gpio); + +int pcf50633_gpio_invert_set(struct pcf50633 *, int gpio, int invert); +int pcf50633_gpio_invert_get(struct pcf50633 *pcf, int gpio); + +int pcf50633_gpio_power_supply_set(struct pcf50633 *, + int gpio, int regulator, int on); +#endif /* __LINUX_MFD_PCF50633_GPIO_H */ + + -- cgit v1.2.3-71-gd317 From f5714dc97d63cc0dd1219bd0eb2e1f8df1e4347a Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Fri, 9 Jan 2009 01:50:55 +0100 Subject: power_supply: PCF50633 battery charger driver Signed-off-by: Balaji Rao Cc: Andy Green Cc: David Woodhouse Acked-by: Anton Vorontsov Signed-off-by: Samuel Ortiz --- drivers/power/Kconfig | 6 + drivers/power/Makefile | 1 + drivers/power/pcf50633-charger.c | 358 +++++++++++++++++++++++++++++++++++++++ include/linux/mfd/pcf50633/mbc.h | 134 +++++++++++++++ 4 files changed, 499 insertions(+) create mode 100644 drivers/power/pcf50633-charger.c create mode 100644 include/linux/mfd/pcf50633/mbc.h (limited to 'include/linux') diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig index 668472405a57..33da1127992a 100644 --- a/drivers/power/Kconfig +++ b/drivers/power/Kconfig @@ -82,4 +82,10 @@ config BATTERY_DA9030 Say Y here to enable support for batteries charger integrated into DA9030 PMIC. +config CHARGER_PCF50633 + tristate "NXP PCF50633 MBC" + depends on MFD_PCF50633 + help + Say Y to include support for NXP PCF50633 Main Battery Charger. + endif # POWER_SUPPLY diff --git a/drivers/power/Makefile b/drivers/power/Makefile index eebb15505a40..2fcf41d13e5c 100644 --- a/drivers/power/Makefile +++ b/drivers/power/Makefile @@ -25,3 +25,4 @@ obj-$(CONFIG_BATTERY_TOSA) += tosa_battery.o obj-$(CONFIG_BATTERY_WM97XX) += wm97xx_battery.o obj-$(CONFIG_BATTERY_BQ27x00) += bq27x00_battery.o obj-$(CONFIG_BATTERY_DA9030) += da9030_battery.o +obj-$(CONFIG_CHARGER_PCF50633) += pcf50633-charger.o \ No newline at end of file diff --git a/drivers/power/pcf50633-charger.c b/drivers/power/pcf50633-charger.c new file mode 100644 index 000000000000..e988ec130fcd --- /dev/null +++ b/drivers/power/pcf50633-charger.c @@ -0,0 +1,358 @@ +/* NXP PCF50633 Main Battery Charger Driver + * + * (C) 2006-2008 by Openmoko, Inc. + * Author: Balaji Rao + * All rights reserved. + * + * Broken down from monstrous PCF50633 driver mainly by + * Harald Welte, Andy Green and Werner Almesberger + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +struct pcf50633_mbc { + struct pcf50633 *pcf; + + int adapter_active; + int adapter_online; + int usb_active; + int usb_online; + + struct power_supply usb; + struct power_supply adapter; +}; + +int pcf50633_mbc_usb_curlim_set(struct pcf50633 *pcf, int ma) +{ + struct pcf50633_mbc *mbc = platform_get_drvdata(pcf->mbc_pdev); + int ret = 0; + u8 bits; + + if (ma >= 1000) + bits = PCF50633_MBCC7_USB_1000mA; + else if (ma >= 500) + bits = PCF50633_MBCC7_USB_500mA; + else if (ma >= 100) + bits = PCF50633_MBCC7_USB_100mA; + else + bits = PCF50633_MBCC7_USB_SUSPEND; + + ret = pcf50633_reg_set_bit_mask(pcf, PCF50633_REG_MBCC7, + PCF50633_MBCC7_USB_MASK, bits); + if (ret) + dev_err(pcf->dev, "error setting usb curlim to %d mA\n", ma); + else + dev_info(pcf->dev, "usb curlim to %d mA\n", ma); + + power_supply_changed(&mbc->usb); + + return ret; +} +EXPORT_SYMBOL_GPL(pcf50633_mbc_usb_curlim_set); + +int pcf50633_mbc_get_status(struct pcf50633 *pcf) +{ + struct pcf50633_mbc *mbc = platform_get_drvdata(pcf->mbc_pdev); + int status = 0; + + if (mbc->usb_online) + status |= PCF50633_MBC_USB_ONLINE; + if (mbc->usb_active) + status |= PCF50633_MBC_USB_ACTIVE; + if (mbc->adapter_online) + status |= PCF50633_MBC_ADAPTER_ONLINE; + if (mbc->adapter_active) + status |= PCF50633_MBC_ADAPTER_ACTIVE; + + return status; +} +EXPORT_SYMBOL_GPL(pcf50633_mbc_get_status); + +void pcf50633_mbc_set_status(struct pcf50633 *pcf, int what, int status) +{ + struct pcf50633_mbc *mbc = platform_get_drvdata(pcf->mbc_pdev); + + if (what & PCF50633_MBC_USB_ONLINE) + mbc->usb_online = !!status; + if (what & PCF50633_MBC_USB_ACTIVE) + mbc->usb_active = !!status; + if (what & PCF50633_MBC_ADAPTER_ONLINE) + mbc->adapter_online = !!status; + if (what & PCF50633_MBC_ADAPTER_ACTIVE) + mbc->adapter_active = !!status; +} +EXPORT_SYMBOL_GPL(pcf50633_mbc_set_status); + +static ssize_t +show_chgmode(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct pcf50633_mbc *mbc = dev_get_drvdata(dev); + + u8 mbcs2 = pcf50633_reg_read(mbc->pcf, PCF50633_REG_MBCS2); + u8 chgmod = (mbcs2 & PCF50633_MBCS2_MBC_MASK); + + return sprintf(buf, "%d\n", chgmod); +} +static DEVICE_ATTR(chgmode, S_IRUGO, show_chgmode, NULL); + +static ssize_t +show_usblim(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct pcf50633_mbc *mbc = dev_get_drvdata(dev); + u8 usblim = pcf50633_reg_read(mbc->pcf, PCF50633_REG_MBCC7) & + PCF50633_MBCC7_USB_MASK; + unsigned int ma; + + if (usblim == PCF50633_MBCC7_USB_1000mA) + ma = 1000; + else if (usblim == PCF50633_MBCC7_USB_500mA) + ma = 500; + else if (usblim == PCF50633_MBCC7_USB_100mA) + ma = 100; + else + ma = 0; + + return sprintf(buf, "%u\n", ma); +} + +static ssize_t set_usblim(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct pcf50633_mbc *mbc = dev_get_drvdata(dev); + unsigned long ma; + int ret; + + ret = strict_strtoul(buf, 10, &ma); + if (ret) + return -EINVAL; + + pcf50633_mbc_usb_curlim_set(mbc->pcf, ma); + + return count; +} + +static DEVICE_ATTR(usb_curlim, S_IRUGO | S_IWUSR, show_usblim, set_usblim); + +static struct attribute *pcf50633_mbc_sysfs_entries[] = { + &dev_attr_chgmode.attr, + &dev_attr_usb_curlim.attr, + NULL, +}; + +static struct attribute_group mbc_attr_group = { + .name = NULL, /* put in device directory */ + .attrs = pcf50633_mbc_sysfs_entries, +}; + +static void +pcf50633_mbc_irq_handler(int irq, void *data) +{ + struct pcf50633_mbc *mbc = data; + + /* USB */ + if (irq == PCF50633_IRQ_USBINS) { + mbc->usb_online = 1; + } else if (irq == PCF50633_IRQ_USBREM) { + mbc->usb_online = 0; + mbc->usb_active = 0; + pcf50633_mbc_usb_curlim_set(mbc->pcf, 0); + } + + /* Adapter */ + if (irq == PCF50633_IRQ_ADPINS) { + mbc->adapter_online = 1; + mbc->adapter_active = 1; + } else if (irq == PCF50633_IRQ_ADPREM) { + mbc->adapter_online = 0; + mbc->adapter_active = 0; + } + + if (irq == PCF50633_IRQ_BATFULL) { + mbc->usb_active = 0; + mbc->adapter_active = 0; + } + + power_supply_changed(&mbc->usb); + power_supply_changed(&mbc->adapter); + + if (mbc->pcf->pdata->mbc_event_callback) + mbc->pcf->pdata->mbc_event_callback(mbc->pcf, irq); +} + +static int adapter_get_property(struct power_supply *psy, + enum power_supply_property psp, + union power_supply_propval *val) +{ + struct pcf50633_mbc *mbc = container_of(psy, struct pcf50633_mbc, usb); + int ret = 0; + + switch (psp) { + case POWER_SUPPLY_PROP_ONLINE: + val->intval = mbc->adapter_online; + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + +static int usb_get_property(struct power_supply *psy, + enum power_supply_property psp, + union power_supply_propval *val) +{ + struct pcf50633_mbc *mbc = container_of(psy, struct pcf50633_mbc, usb); + int ret = 0; + + switch (psp) { + case POWER_SUPPLY_PROP_ONLINE: + val->intval = mbc->usb_online; + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + +static enum power_supply_property power_props[] = { + POWER_SUPPLY_PROP_ONLINE, +}; + +static const u8 mbc_irq_handlers[] = { + PCF50633_IRQ_ADPINS, + PCF50633_IRQ_ADPREM, + PCF50633_IRQ_USBINS, + PCF50633_IRQ_USBREM, + PCF50633_IRQ_BATFULL, + PCF50633_IRQ_CHGHALT, + PCF50633_IRQ_THLIMON, + PCF50633_IRQ_THLIMOFF, + PCF50633_IRQ_USBLIMON, + PCF50633_IRQ_USBLIMOFF, + PCF50633_IRQ_LOWSYS, + PCF50633_IRQ_LOWBAT, +}; + +static int __devinit pcf50633_mbc_probe(struct platform_device *pdev) +{ + struct pcf50633_mbc *mbc; + struct pcf50633_subdev_pdata *pdata = pdev->dev.platform_data; + int ret; + int i; + u8 mbcs1; + + mbc = kzalloc(sizeof(*mbc), GFP_KERNEL); + if (!mbc) + return -ENOMEM; + + platform_set_drvdata(pdev, mbc); + mbc->pcf = pdata->pcf; + + /* Set up IRQ handlers */ + for (i = 0; i < ARRAY_SIZE(mbc_irq_handlers); i++) + pcf50633_register_irq(mbc->pcf, mbc_irq_handlers[i], + pcf50633_mbc_irq_handler, mbc); + + /* Create power supplies */ + mbc->adapter.name = "adapter"; + mbc->adapter.type = POWER_SUPPLY_TYPE_MAINS; + mbc->adapter.properties = power_props; + mbc->adapter.num_properties = ARRAY_SIZE(power_props); + mbc->adapter.get_property = &adapter_get_property; + mbc->adapter.supplied_to = mbc->pcf->pdata->batteries; + mbc->adapter.num_supplicants = mbc->pcf->pdata->num_batteries; + + mbc->usb.name = "usb"; + mbc->usb.type = POWER_SUPPLY_TYPE_USB; + mbc->usb.properties = power_props; + mbc->usb.num_properties = ARRAY_SIZE(power_props); + mbc->usb.get_property = usb_get_property; + mbc->usb.supplied_to = mbc->pcf->pdata->batteries; + mbc->usb.num_supplicants = mbc->pcf->pdata->num_batteries; + + ret = power_supply_register(&pdev->dev, &mbc->adapter); + if (ret) { + dev_err(mbc->pcf->dev, "failed to register adapter\n"); + kfree(mbc); + return ret; + } + + ret = power_supply_register(&pdev->dev, &mbc->usb); + if (ret) { + dev_err(mbc->pcf->dev, "failed to register usb\n"); + power_supply_unregister(&mbc->adapter); + kfree(mbc); + return ret; + } + + ret = sysfs_create_group(&pdev->dev.kobj, &mbc_attr_group); + if (ret) + dev_err(mbc->pcf->dev, "failed to create sysfs entries\n"); + + mbcs1 = pcf50633_reg_read(mbc->pcf, PCF50633_REG_MBCS1); + if (mbcs1 & PCF50633_MBCS1_USBPRES) + pcf50633_mbc_irq_handler(PCF50633_IRQ_USBINS, mbc); + if (mbcs1 & PCF50633_MBCS1_ADAPTPRES) + pcf50633_mbc_irq_handler(PCF50633_IRQ_ADPINS, mbc); + + return 0; +} + +static int __devexit pcf50633_mbc_remove(struct platform_device *pdev) +{ + struct pcf50633_mbc *mbc = platform_get_drvdata(pdev); + int i; + + /* Remove IRQ handlers */ + for (i = 0; i < ARRAY_SIZE(mbc_irq_handlers); i++) + pcf50633_free_irq(mbc->pcf, mbc_irq_handlers[i]); + + power_supply_unregister(&mbc->usb); + power_supply_unregister(&mbc->adapter); + + kfree(mbc); + + return 0; +} + +static struct platform_driver pcf50633_mbc_driver = { + .driver = { + .name = "pcf50633-mbc", + }, + .probe = pcf50633_mbc_probe, + .remove = __devexit_p(pcf50633_mbc_remove), +}; + +static int __init pcf50633_mbc_init(void) +{ + return platform_driver_register(&pcf50633_mbc_driver); +} +module_init(pcf50633_mbc_init); + +static void __exit pcf50633_mbc_exit(void) +{ + platform_driver_unregister(&pcf50633_mbc_driver); +} +module_exit(pcf50633_mbc_exit); + +MODULE_AUTHOR("Balaji Rao "); +MODULE_DESCRIPTION("PCF50633 mbc driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:pcf50633-mbc"); diff --git a/include/linux/mfd/pcf50633/mbc.h b/include/linux/mfd/pcf50633/mbc.h new file mode 100644 index 000000000000..6e17619b773a --- /dev/null +++ b/include/linux/mfd/pcf50633/mbc.h @@ -0,0 +1,134 @@ +/* + * mbc.h -- Driver for NXP PCF50633 Main Battery Charger + * + * (C) 2006-2008 by Openmoko, Inc. + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __LINUX_MFD_PCF50633_MBC_H +#define __LINUX_MFD_PCF50633_MBC_H + +#include +#include + +#define PCF50633_REG_MBCC1 0x43 +#define PCF50633_REG_MBCC2 0x44 +#define PCF50633_REG_MBCC3 0x45 +#define PCF50633_REG_MBCC4 0x46 +#define PCF50633_REG_MBCC5 0x47 +#define PCF50633_REG_MBCC6 0x48 +#define PCF50633_REG_MBCC7 0x49 +#define PCF50633_REG_MBCC8 0x4a +#define PCF50633_REG_MBCS1 0x4b +#define PCF50633_REG_MBCS2 0x4c +#define PCF50633_REG_MBCS3 0x4d + +enum pcf50633_reg_mbcc1 { + PCF50633_MBCC1_CHGENA = 0x01, /* Charger enable */ + PCF50633_MBCC1_AUTOSTOP = 0x02, + PCF50633_MBCC1_AUTORES = 0x04, /* automatic resume */ + PCF50633_MBCC1_RESUME = 0x08, /* explicit resume cmd */ + PCF50633_MBCC1_RESTART = 0x10, /* restart charging */ + PCF50633_MBCC1_PREWDTIME_60M = 0x20, /* max. precharging time */ + PCF50633_MBCC1_WDTIME_1H = 0x00, + PCF50633_MBCC1_WDTIME_2H = 0x40, + PCF50633_MBCC1_WDTIME_4H = 0x80, + PCF50633_MBCC1_WDTIME_6H = 0xc0, +}; +#define PCF50633_MBCC1_WDTIME_MASK 0xc0 + +enum pcf50633_reg_mbcc2 { + PCF50633_MBCC2_VBATCOND_2V7 = 0x00, + PCF50633_MBCC2_VBATCOND_2V85 = 0x01, + PCF50633_MBCC2_VBATCOND_3V0 = 0x02, + PCF50633_MBCC2_VBATCOND_3V15 = 0x03, + PCF50633_MBCC2_VMAX_4V = 0x00, + PCF50633_MBCC2_VMAX_4V20 = 0x28, + PCF50633_MBCC2_VRESDEBTIME_64S = 0x80, /* debounce time (32/64sec) */ +}; + +enum pcf50633_reg_mbcc7 { + PCF50633_MBCC7_USB_100mA = 0x00, + PCF50633_MBCC7_USB_500mA = 0x01, + PCF50633_MBCC7_USB_1000mA = 0x02, + PCF50633_MBCC7_USB_SUSPEND = 0x03, + PCF50633_MBCC7_BATTEMP_EN = 0x04, + PCF50633_MBCC7_BATSYSIMAX_1A6 = 0x00, + PCF50633_MBCC7_BATSYSIMAX_1A8 = 0x40, + PCF50633_MBCC7_BATSYSIMAX_2A0 = 0x80, + PCF50633_MBCC7_BATSYSIMAX_2A2 = 0xc0, +}; +#define PCF50633_MBCC7_USB_MASK 0x03 + +enum pcf50633_reg_mbcc8 { + PCF50633_MBCC8_USBENASUS = 0x10, +}; + +enum pcf50633_reg_mbcs1 { + PCF50633_MBCS1_USBPRES = 0x01, + PCF50633_MBCS1_USBOK = 0x02, + PCF50633_MBCS1_ADAPTPRES = 0x04, + PCF50633_MBCS1_ADAPTOK = 0x08, + PCF50633_MBCS1_TBAT_OK = 0x00, + PCF50633_MBCS1_TBAT_ABOVE = 0x10, + PCF50633_MBCS1_TBAT_BELOW = 0x20, + PCF50633_MBCS1_TBAT_UNDEF = 0x30, + PCF50633_MBCS1_PREWDTEXP = 0x40, + PCF50633_MBCS1_WDTEXP = 0x80, +}; + +enum pcf50633_reg_mbcs2_mbcmod { + PCF50633_MBCS2_MBC_PLAY = 0x00, + PCF50633_MBCS2_MBC_USB_PRE = 0x01, + PCF50633_MBCS2_MBC_USB_PRE_WAIT = 0x02, + PCF50633_MBCS2_MBC_USB_FAST = 0x03, + PCF50633_MBCS2_MBC_USB_FAST_WAIT = 0x04, + PCF50633_MBCS2_MBC_USB_SUSPEND = 0x05, + PCF50633_MBCS2_MBC_ADP_PRE = 0x06, + PCF50633_MBCS2_MBC_ADP_PRE_WAIT = 0x07, + PCF50633_MBCS2_MBC_ADP_FAST = 0x08, + PCF50633_MBCS2_MBC_ADP_FAST_WAIT = 0x09, + PCF50633_MBCS2_MBC_BAT_FULL = 0x0a, + PCF50633_MBCS2_MBC_HALT = 0x0b, +}; +#define PCF50633_MBCS2_MBC_MASK 0x0f +enum pcf50633_reg_mbcs2_chgstat { + PCF50633_MBCS2_CHGS_NONE = 0x00, + PCF50633_MBCS2_CHGS_ADAPTER = 0x10, + PCF50633_MBCS2_CHGS_USB = 0x20, + PCF50633_MBCS2_CHGS_BOTH = 0x30, +}; +#define PCF50633_MBCS2_RESSTAT_AUTO 0x40 + +enum pcf50633_reg_mbcs3 { + PCF50633_MBCS3_USBLIM_PLAY = 0x01, + PCF50633_MBCS3_USBLIM_CGH = 0x02, + PCF50633_MBCS3_TLIM_PLAY = 0x04, + PCF50633_MBCS3_TLIM_CHG = 0x08, + PCF50633_MBCS3_ILIM = 0x10, /* 1: Ibat > Icutoff */ + PCF50633_MBCS3_VLIM = 0x20, /* 1: Vbat == Vmax */ + PCF50633_MBCS3_VBATSTAT = 0x40, /* 1: Vbat > Vbatcond */ + PCF50633_MBCS3_VRES = 0x80, /* 1: Vbat > Vth(RES) */ +}; + +#define PCF50633_MBCC2_VBATCOND_MASK 0x03 +#define PCF50633_MBCC2_VMAX_MASK 0x3c + +/* Charger status */ +#define PCF50633_MBC_USB_ONLINE 0x01 +#define PCF50633_MBC_USB_ACTIVE 0x02 +#define PCF50633_MBC_ADAPTER_ONLINE 0x04 +#define PCF50633_MBC_ADAPTER_ACTIVE 0x08 + +int pcf50633_mbc_usb_curlim_set(struct pcf50633 *pcf, int ma); + +int pcf50633_mbc_get_status(struct pcf50633 *); +void pcf50633_mbc_set_status(struct pcf50633 *, int what, int status); + +#endif + -- cgit v1.2.3-71-gd317 From 5ec271e745350c7df6a6ebca24b43cb7a10bfa4a Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Fri, 9 Jan 2009 01:51:01 +0100 Subject: regulator: PCF50633 pmic driver Changes from V1: - Removed support for suspend_enable & suspend_disable functions. Signed-off-by: Balaji Rao Cc: Andy Green Cc: Liam Girdwood Acked-by: Mark Brown Signed-off-by: Samuel Ortiz --- drivers/regulator/Kconfig | 7 + drivers/regulator/Makefile | 1 + drivers/regulator/pcf50633-regulator.c | 329 +++++++++++++++++++++++++++++++++ include/linux/mfd/pcf50633/pmic.h | 67 +++++++ 4 files changed, 404 insertions(+) create mode 100644 drivers/regulator/pcf50633-regulator.c create mode 100644 include/linux/mfd/pcf50633/pmic.h (limited to 'include/linux') diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 39360e2a4540..e7e0cf102d6d 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -73,4 +73,11 @@ config REGULATOR_DA903X Say y here to support the BUCKs and LDOs regulators found on Dialog Semiconductor DA9030/DA9034 PMIC. +config REGULATOR_PCF50633 + tristate "PCF50633 regulator driver" + depends on MFD_PCF50633 + help + Say Y here to support the voltage regulators and convertors + on PCF50633 + endif diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile index 254d40c02ee8..61b30c6ddecc 100644 --- a/drivers/regulator/Makefile +++ b/drivers/regulator/Makefile @@ -11,5 +11,6 @@ obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o obj-$(CONFIG_REGULATOR_WM8350) += wm8350-regulator.o obj-$(CONFIG_REGULATOR_WM8400) += wm8400-regulator.o obj-$(CONFIG_REGULATOR_DA903X) += da903x.o +obj-$(CONFIG_REGULATOR_PCF50633) += pcf50633-regulator.o ccflags-$(CONFIG_REGULATOR_DEBUG) += -DDEBUG diff --git a/drivers/regulator/pcf50633-regulator.c b/drivers/regulator/pcf50633-regulator.c new file mode 100644 index 000000000000..4cc85ec6e120 --- /dev/null +++ b/drivers/regulator/pcf50633-regulator.c @@ -0,0 +1,329 @@ +/* NXP PCF50633 PMIC Driver + * + * (C) 2006-2008 by Openmoko, Inc. + * Author: Balaji Rao + * All rights reserved. + * + * Broken down from monstrous PCF50633 driver mainly by + * Harald Welte and Andy Green and Werner Almesberger + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#define PCF50633_REGULATOR(_name, _id) \ + { \ + .name = _name, \ + .id = _id, \ + .ops = &pcf50633_regulator_ops, \ + .type = REGULATOR_VOLTAGE, \ + .owner = THIS_MODULE, \ + } + +static const u8 pcf50633_regulator_registers[PCF50633_NUM_REGULATORS] = { + [PCF50633_REGULATOR_AUTO] = PCF50633_REG_AUTOOUT, + [PCF50633_REGULATOR_DOWN1] = PCF50633_REG_DOWN1OUT, + [PCF50633_REGULATOR_DOWN2] = PCF50633_REG_DOWN2OUT, + [PCF50633_REGULATOR_MEMLDO] = PCF50633_REG_MEMLDOOUT, + [PCF50633_REGULATOR_LDO1] = PCF50633_REG_LDO1OUT, + [PCF50633_REGULATOR_LDO2] = PCF50633_REG_LDO2OUT, + [PCF50633_REGULATOR_LDO3] = PCF50633_REG_LDO3OUT, + [PCF50633_REGULATOR_LDO4] = PCF50633_REG_LDO4OUT, + [PCF50633_REGULATOR_LDO5] = PCF50633_REG_LDO5OUT, + [PCF50633_REGULATOR_LDO6] = PCF50633_REG_LDO6OUT, + [PCF50633_REGULATOR_HCLDO] = PCF50633_REG_HCLDOOUT, +}; + +/* Bits from voltage value */ +static u8 auto_voltage_bits(unsigned int millivolts) +{ + if (millivolts < 1800) + return 0; + if (millivolts > 3800) + return 0xff; + + millivolts -= 625; + + return millivolts / 25; +} + +static u8 down_voltage_bits(unsigned int millivolts) +{ + if (millivolts < 625) + return 0; + else if (millivolts > 3000) + return 0xff; + + millivolts -= 625; + + return millivolts / 25; +} + +static u8 ldo_voltage_bits(unsigned int millivolts) +{ + if (millivolts < 900) + return 0; + else if (millivolts > 3600) + return 0x1f; + + millivolts -= 900; + return millivolts / 100; +} + +/* Obtain voltage value from bits */ +static unsigned int auto_voltage_value(u8 bits) +{ + if (bits < 0x2f) + return 0; + + return 625 + (bits * 25); +} + + +static unsigned int down_voltage_value(u8 bits) +{ + return 625 + (bits * 25); +} + + +static unsigned int ldo_voltage_value(u8 bits) +{ + bits &= 0x1f; + + return 900 + (bits * 100); +} + +static int pcf50633_regulator_set_voltage(struct regulator_dev *rdev, + int min_uV, int max_uV) +{ + struct pcf50633 *pcf; + int regulator_id, millivolts; + u8 volt_bits, regnr; + + pcf = rdev_get_drvdata(rdev); + + regulator_id = rdev_get_id(rdev); + if (regulator_id >= PCF50633_NUM_REGULATORS) + return -EINVAL; + + millivolts = min_uV / 1000; + + regnr = pcf50633_regulator_registers[regulator_id]; + + switch (regulator_id) { + case PCF50633_REGULATOR_AUTO: + volt_bits = auto_voltage_bits(millivolts); + break; + case PCF50633_REGULATOR_DOWN1: + volt_bits = down_voltage_bits(millivolts); + break; + case PCF50633_REGULATOR_DOWN2: + volt_bits = down_voltage_bits(millivolts); + break; + case PCF50633_REGULATOR_LDO1: + case PCF50633_REGULATOR_LDO2: + case PCF50633_REGULATOR_LDO3: + case PCF50633_REGULATOR_LDO4: + case PCF50633_REGULATOR_LDO5: + case PCF50633_REGULATOR_LDO6: + case PCF50633_REGULATOR_HCLDO: + volt_bits = ldo_voltage_bits(millivolts); + break; + default: + return -EINVAL; + } + + return pcf50633_reg_write(pcf, regnr, volt_bits); +} + +static int pcf50633_regulator_get_voltage(struct regulator_dev *rdev) +{ + struct pcf50633 *pcf; + int regulator_id, millivolts, volt_bits; + u8 regnr; + + pcf = rdev_get_drvdata(rdev);; + + regulator_id = rdev_get_id(rdev); + if (regulator_id >= PCF50633_NUM_REGULATORS) + return -EINVAL; + + regnr = pcf50633_regulator_registers[regulator_id]; + + volt_bits = pcf50633_reg_read(pcf, regnr); + if (volt_bits < 0) + return -1; + + switch (regulator_id) { + case PCF50633_REGULATOR_AUTO: + millivolts = auto_voltage_value(volt_bits); + break; + case PCF50633_REGULATOR_DOWN1: + millivolts = down_voltage_value(volt_bits); + break; + case PCF50633_REGULATOR_DOWN2: + millivolts = down_voltage_value(volt_bits); + break; + case PCF50633_REGULATOR_LDO1: + case PCF50633_REGULATOR_LDO2: + case PCF50633_REGULATOR_LDO3: + case PCF50633_REGULATOR_LDO4: + case PCF50633_REGULATOR_LDO5: + case PCF50633_REGULATOR_LDO6: + case PCF50633_REGULATOR_HCLDO: + millivolts = ldo_voltage_value(volt_bits); + break; + default: + return -EINVAL; + } + + return millivolts * 1000; +} + +static int pcf50633_regulator_enable(struct regulator_dev *rdev) +{ + struct pcf50633 *pcf = rdev_get_drvdata(rdev); + int regulator_id; + u8 regnr; + + regulator_id = rdev_get_id(rdev); + if (regulator_id >= PCF50633_NUM_REGULATORS) + return -EINVAL; + + /* The *ENA register is always one after the *OUT register */ + regnr = pcf50633_regulator_registers[regulator_id] + 1; + + return pcf50633_reg_set_bit_mask(pcf, regnr, PCF50633_REGULATOR_ON, + PCF50633_REGULATOR_ON); +} + +static int pcf50633_regulator_disable(struct regulator_dev *rdev) +{ + struct pcf50633 *pcf = rdev_get_drvdata(rdev); + int regulator_id; + u8 regnr; + + regulator_id = rdev_get_id(rdev); + if (regulator_id >= PCF50633_NUM_REGULATORS) + return -EINVAL; + + /* the *ENA register is always one after the *OUT register */ + regnr = pcf50633_regulator_registers[regulator_id] + 1; + + return pcf50633_reg_set_bit_mask(pcf, regnr, + PCF50633_REGULATOR_ON, 0); +} + +static int pcf50633_regulator_is_enabled(struct regulator_dev *rdev) +{ + struct pcf50633 *pcf = rdev_get_drvdata(rdev); + int regulator_id = rdev_get_id(rdev); + u8 regnr; + + regulator_id = rdev_get_id(rdev); + if (regulator_id >= PCF50633_NUM_REGULATORS) + return -EINVAL; + + /* the *ENA register is always one after the *OUT register */ + regnr = pcf50633_regulator_registers[regulator_id] + 1; + + return pcf50633_reg_read(pcf, regnr) & PCF50633_REGULATOR_ON; +} + +static struct regulator_ops pcf50633_regulator_ops = { + .set_voltage = pcf50633_regulator_set_voltage, + .get_voltage = pcf50633_regulator_get_voltage, + .enable = pcf50633_regulator_enable, + .disable = pcf50633_regulator_disable, + .is_enabled = pcf50633_regulator_is_enabled, +}; + +static struct regulator_desc regulators[] = { + [PCF50633_REGULATOR_AUTO] = + PCF50633_REGULATOR("auto", PCF50633_REGULATOR_AUTO), + [PCF50633_REGULATOR_DOWN1] = + PCF50633_REGULATOR("down1", PCF50633_REGULATOR_DOWN1), + [PCF50633_REGULATOR_DOWN2] = + PCF50633_REGULATOR("down2", PCF50633_REGULATOR_DOWN2), + [PCF50633_REGULATOR_LDO1] = + PCF50633_REGULATOR("ldo1", PCF50633_REGULATOR_LDO1), + [PCF50633_REGULATOR_LDO2] = + PCF50633_REGULATOR("ldo2", PCF50633_REGULATOR_LDO2), + [PCF50633_REGULATOR_LDO3] = + PCF50633_REGULATOR("ldo3", PCF50633_REGULATOR_LDO3), + [PCF50633_REGULATOR_LDO4] = + PCF50633_REGULATOR("ldo4", PCF50633_REGULATOR_LDO4), + [PCF50633_REGULATOR_LDO5] = + PCF50633_REGULATOR("ldo5", PCF50633_REGULATOR_LDO5), + [PCF50633_REGULATOR_LDO6] = + PCF50633_REGULATOR("ldo6", PCF50633_REGULATOR_LDO6), + [PCF50633_REGULATOR_HCLDO] = + PCF50633_REGULATOR("hcldo", PCF50633_REGULATOR_HCLDO), + [PCF50633_REGULATOR_MEMLDO] = + PCF50633_REGULATOR("memldo", PCF50633_REGULATOR_MEMLDO), +}; + +static int __devinit pcf50633_regulator_probe(struct platform_device *pdev) +{ + struct regulator_dev *rdev; + struct pcf50633 *pcf; + + /* Already set by core driver */ + pcf = platform_get_drvdata(pdev); + + rdev = regulator_register(®ulators[pdev->id], &pdev->dev, pcf); + if (IS_ERR(rdev)) + return PTR_ERR(rdev); + + if (pcf->pdata->regulator_registered) + pcf->pdata->regulator_registered(pcf, pdev->id); + + return 0; +} + +static int __devexit pcf50633_regulator_remove(struct platform_device *pdev) +{ + struct regulator_dev *rdev = platform_get_drvdata(pdev); + + regulator_unregister(rdev); + + return 0; +} + +static struct platform_driver pcf50633_regulator_driver = { + .driver = { + .name = "pcf50633-regltr", + }, + .probe = pcf50633_regulator_probe, + .remove = __devexit_p(pcf50633_regulator_remove), +}; + +static int __init pcf50633_regulator_init(void) +{ + return platform_driver_register(&pcf50633_regulator_driver); +} +module_init(pcf50633_regulator_init); + +static void __exit pcf50633_regulator_exit(void) +{ + platform_driver_unregister(&pcf50633_regulator_driver); +} +module_exit(pcf50633_regulator_exit); + +MODULE_AUTHOR("Balaji Rao "); +MODULE_DESCRIPTION("PCF50633 regulator driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:pcf50633-regulator"); diff --git a/include/linux/mfd/pcf50633/pmic.h b/include/linux/mfd/pcf50633/pmic.h new file mode 100644 index 000000000000..2d3dbe53b235 --- /dev/null +++ b/include/linux/mfd/pcf50633/pmic.h @@ -0,0 +1,67 @@ +#ifndef __LINUX_MFD_PCF50633_PMIC_H +#define __LINUX_MFD_PCF50633_PMIC_H + +#include +#include + +#define PCF50633_REG_AUTOOUT 0x1a +#define PCF50633_REG_AUTOENA 0x1b +#define PCF50633_REG_AUTOCTL 0x1c +#define PCF50633_REG_AUTOMXC 0x1d +#define PCF50633_REG_DOWN1OUT 0x1e +#define PCF50633_REG_DOWN1ENA 0x1f +#define PCF50633_REG_DOWN1CTL 0x20 +#define PCF50633_REG_DOWN1MXC 0x21 +#define PCF50633_REG_DOWN2OUT 0x22 +#define PCF50633_REG_DOWN2ENA 0x23 +#define PCF50633_REG_DOWN2CTL 0x24 +#define PCF50633_REG_DOWN2MXC 0x25 +#define PCF50633_REG_MEMLDOOUT 0x26 +#define PCF50633_REG_MEMLDOENA 0x27 +#define PCF50633_REG_LDO1OUT 0x2d +#define PCF50633_REG_LDO1ENA 0x2e +#define PCF50633_REG_LDO2OUT 0x2f +#define PCF50633_REG_LDO2ENA 0x30 +#define PCF50633_REG_LDO3OUT 0x31 +#define PCF50633_REG_LDO3ENA 0x32 +#define PCF50633_REG_LDO4OUT 0x33 +#define PCF50633_REG_LDO4ENA 0x34 +#define PCF50633_REG_LDO5OUT 0x35 +#define PCF50633_REG_LDO5ENA 0x36 +#define PCF50633_REG_LDO6OUT 0x37 +#define PCF50633_REG_LDO6ENA 0x38 +#define PCF50633_REG_HCLDOOUT 0x39 +#define PCF50633_REG_HCLDOENA 0x3a +#define PCF50633_REG_HCLDOOVL 0x40 + +enum pcf50633_regulator_enable { + PCF50633_REGULATOR_ON = 0x01, + PCF50633_REGULATOR_ON_GPIO1 = 0x02, + PCF50633_REGULATOR_ON_GPIO2 = 0x04, + PCF50633_REGULATOR_ON_GPIO3 = 0x08, +}; +#define PCF50633_REGULATOR_ON_MASK 0x0f + +enum pcf50633_regulator_phase { + PCF50633_REGULATOR_ACTPH1 = 0x00, + PCF50633_REGULATOR_ACTPH2 = 0x10, + PCF50633_REGULATOR_ACTPH3 = 0x20, + PCF50633_REGULATOR_ACTPH4 = 0x30, +}; +#define PCF50633_REGULATOR_ACTPH_MASK 0x30 + +enum pcf50633_regulator_id { + PCF50633_REGULATOR_AUTO, + PCF50633_REGULATOR_DOWN1, + PCF50633_REGULATOR_DOWN2, + PCF50633_REGULATOR_LDO1, + PCF50633_REGULATOR_LDO2, + PCF50633_REGULATOR_LDO3, + PCF50633_REGULATOR_LDO4, + PCF50633_REGULATOR_LDO5, + PCF50633_REGULATOR_LDO6, + PCF50633_REGULATOR_HCLDO, + PCF50633_REGULATOR_MEMLDO, +}; +#endif + -- cgit v1.2.3-71-gd317 From 53ce3d9564908794ae7dd32969089b57df5fc098 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 9 Jan 2009 12:27:08 -0800 Subject: smp_call_function_single(): be slightly less stupid If you do smp_call_function_single(expression-with-side-effects, ...) then expression-with-side-effects never gets evaluated on UP builds. As always, implementing it in C is the correct thing to do. While we're there, uninline it for size and possible header dependency reasons. And create a new kernel/up.c, as a place in which to put uniprocessor-specific code and storage. It should mirror kernel/smp.c. Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/smp.h | 13 +++---------- kernel/Makefile | 6 +++++- kernel/up.c | 18 ++++++++++++++++++ 3 files changed, 26 insertions(+), 11 deletions(-) create mode 100644 kernel/up.c (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index b82466968101..715196b09d67 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -24,6 +24,9 @@ struct call_single_data { /* total number of cpus in this system (may exceed NR_CPUS) */ extern unsigned int total_cpus; +int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, + int wait); + #ifdef CONFIG_SMP #include @@ -79,8 +82,6 @@ smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info, return 0; } -int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, - int wait); void __smp_call_function_single(int cpuid, struct call_single_data *data); /* @@ -140,14 +141,6 @@ static inline int up_smp_call_function(void (*func)(void *), void *info) static inline void smp_send_reschedule(int cpu) { } #define num_booting_cpus() 1 #define smp_prepare_boot_cpu() do {} while (0) -#define smp_call_function_single(cpuid, func, info, wait) \ -({ \ - WARN_ON(cpuid != 0); \ - local_irq_disable(); \ - (func)(info); \ - local_irq_enable(); \ - 0; \ -}) #define smp_call_function_mask(mask, func, info, wait) \ (up_smp_call_function(func, info)) #define smp_call_function_many(mask, func, info, wait) \ diff --git a/kernel/Makefile b/kernel/Makefile index 2921d90ce32f..2aebc4cd7878 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -40,7 +40,11 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o -obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o +ifeq ($(CONFIG_USE_GENERIC_SMP_HELPERS),y) +obj-y += smp.o +else +obj-y += up.o +endif obj-$(CONFIG_SMP) += spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o obj-$(CONFIG_PROVE_LOCKING) += spinlock.o diff --git a/kernel/up.c b/kernel/up.c new file mode 100644 index 000000000000..ce62cc9e9f71 --- /dev/null +++ b/kernel/up.c @@ -0,0 +1,18 @@ +/* + * Uniprocessor-only support functions. The counterpart to kernel/smp.c + */ + +#include +#include +#include + +int smp_call_function_single(int cpu, void (*func) (void *info), void *info, + int wait) +{ + WARN_ON(cpuid != 0); + local_irq_disable(); + (func)(info); + local_irq_enable(); + return 0; +} +EXPORT_SYMBOL(smp_call_function_single); -- cgit v1.2.3-71-gd317 From 649274d993212e7c23c0cb734572c2311c200872 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 11 Jan 2009 00:20:39 -0800 Subject: net_dma: acquire/release dma channels on ifup/ifdown The recent dmaengine rework removed the capability to remove dma device driver modules while net_dma is active. Rather than notify dmaengine-clients that channels are trying to be removed, we now rely on clients to notify dmaengine when they no longer have a need for channels. Teach net_dma to release channels by taking dmaengine references at netdevice open and dropping references at netdevice close. Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams Signed-off-by: David S. Miller --- include/linux/dmaengine.h | 10 ++++++++++ net/core/dev.c | 13 ++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 64dea2ab326c..c73f1e2b59b7 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -270,8 +270,18 @@ struct dma_device { /* --- public DMA engine API --- */ +#ifdef CONFIG_DMA_ENGINE void dmaengine_get(void); void dmaengine_put(void); +#else +static inline void dmaengine_get(void) +{ +} +static inline void dmaengine_put(void) +{ +} +#endif + dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, void *src, size_t len); dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan, diff --git a/net/core/dev.c b/net/core/dev.c index 5f736f1ceeae..b715a55cccc4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1087,6 +1087,11 @@ int dev_open(struct net_device *dev) */ dev->flags |= IFF_UP; + /* + * Enable NET_DMA + */ + dmaengine_get(); + /* * Initialize multicasting status */ @@ -1164,6 +1169,11 @@ int dev_close(struct net_device *dev) */ call_netdevice_notifiers(NETDEV_DOWN, dev); + /* + * Shutdown NET_DMA + */ + dmaengine_put(); + return 0; } @@ -5151,9 +5161,6 @@ static int __init net_dev_init(void) hotcpu_notifier(dev_cpu_callback, 0); dst_init(); dev_mcast_init(); - #ifdef CONFIG_NET_DMA - dmaengine_get(); - #endif rc = 0; out: return rc; -- cgit v1.2.3-71-gd317 From 57de16e612d63138bd2c618449af9d8312466e25 Mon Sep 17 00:00:00 2001 From: Martin Bachem Date: Sun, 26 Oct 2008 13:30:09 +0100 Subject: BUGFIX: used NULL pointer at ioctl(sk,IMGETDEVINFO,&devinfo) when devinfo.id not registered daxtar example # modprobe hfcsusb daxtar example # modprobe mISDN_l1loop daxtar example # ./misdnportinfo Found 3 devices id: 0 Dprotocols: 00000006 Bprotocols: 0000000e protocol: 0 nrbchan: 2 name: HFC-S_USB.1 id: 1 Dprotocols: 00000006 Bprotocols: 0000000e protocol: 0 nrbchan: 2 name: mISDN_l1loop.1 id: 2 Dprotocols: 00000006 Bprotocols: 0000000e protocol: 0 nrbchan: 2 name: mISDN_l1loop.2 daxtar example # rmmod hfcsusb daxtar example # ./misdnportinfo Found 2 devices *Segmentation* *fault* dmesg: [ 9914.939718] BUG: unable to handle kernel NULL pointer dereference at 000000d4 [ 9914.939721] IP: [] :mISDN_core:get_mdevice+0x19/0x22 [ 9914.939729] *pde = 00000000 [ 9914.939732] Oops: 0000 [#14] PREEMPT SMP [ 9914.939734] Modules linked in: mISDN_l1loop mISDN_core vmnet vmblock vmci vmmon coretemp w83627ehf hwmon_vid rfcomm l2cap blue tooth usbhid snd_usb_audio snd_usb_lib snd_rawmidi snd_hwdep fuse nvidia(P) uhci_hcd i2c_i801 ehci_hcd snd_hda_intel atl1 usbcore i2c_core parport_seria l [last unloaded: hfcsusb] [ 9914.939751] Pid: 29618, comm: misdnportinfo Tainted: P D (2.6.27.3 #5) [ 9914.939753] EIP: 0060:[] EFLAGS: 00210246 CPU: 0 [ 9914.939758] EIP is at get_mdevice+0x19/0x22 [mISDN_core] [ 9914.939760] EAX: 00000000 EBX: f8fa791c ECX: f6afaa58 EDX: f7960cf4 [ 9914.939762] ESI: 80044944 EDI: bfc2e62c EBP: bfc2e62c ESP: f5adbef4 [ 9914.939763] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 [ 9914.939765] Process misdnportinfo (pid: 29618, ti=f5ada000 task=f6bec430 task.ti=f5ada000) [ 9914.939767] Stack: f8f9f4e0 00000000 f8f9f867 bfc2e62c 0000000a c02461e8 00200246 c042dde8 [ 9914.939771] 00000003 c042dde4 00000000 00000001 00200082 c0114775 00000000 00000000 [ 9914.939775] 00000003 f7088010 00200282 f8fa791c 80044944 bfc2e62c bfc2e62c c02f6615 [ 9914.939780] Call Trace: [ 9914.939782] [] _get_mdevice+0x0/0x18 [mISDN_core] [ 9914.939789] [] base_sock_ioctl+0x7a/0x129 [mISDN_core] [ 9914.939789] [] opost+0x171/0x182 [ 9914.939789] [] __wake_up+0x29/0x39 [ 9914.939789] [] sock_ioctl+0x1b5/0x1d9 [ 9914.939789] [] sock_ioctl+0x0/0x1d9 [ 9914.939789] [] vfs_ioctl+0x1c/0x5d [ 9914.939789] [] do_vfs_ioctl+0x23e/0x24e [ 9914.939789] [] sys_ioctl+0x2c/0x45 [ 9914.939789] [] sysenter_do_call+0x12/0x21 [ 9914.939789] [] pci_fixup_i450gx+0x4e/0x56 [ 9914.939789] ======================= [ 9914.939789] Code: 00 68 02 f0 f9 f8 e8 ae b4 2c c7 8b 44 24 04 5a 59 c3 83 ec 04 31 d2 89 04 24 89 e1 b8 ac df fa f8 68 e0 f4 f9 f8 e8 4a b5 2c c7 <8b> 80 d4 00 00 00 5a 59 c3 53 89 cb 8d 90 9c 00 00 00 89 c8 e8 [ 9914.939789] EIP: [] get_mdevice+0x19/0x22 [mISDN_core] SS:ESP 0068:f5adbef4 [ 9914.939858] ---[ end trace 50e18a715b019424 ]--- Signed-off-by: Martin Bachem Signed-off-by: Karsten Keil --- include/linux/mISDNif.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 557477ac3d5b..5da3d95b27f1 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -559,7 +559,10 @@ extern void mISDN_unregister_clock(struct mISDNclock *); static inline struct mISDNdevice *dev_to_mISDN(struct device *dev) { - return dev_get_drvdata(dev); + if (dev) + return dev_get_drvdata(dev); + else + return NULL; } extern void set_channel_address(struct mISDNchannel *, u_int, u_int); -- cgit v1.2.3-71-gd317 From 6d612b0f943289856c6e8186c564cda922cd040e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 12 Jan 2009 12:52:23 +0100 Subject: locking, hpet: annotate false positive warning Alexander Beregalov reported that this warning is caused by the HPET code: > hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0 > hpet0: 3 comparators, 64-bit 14.318180 MHz counter > ODEBUG: object is on stack, but not annotated > ------------[ cut here ]------------ > WARNING: at lib/debugobjects.c:251 __debug_object_init+0x2a4/0x352() > Bisected down to 26afe5f2fbf06ea0765aaa316640c4dd472310c0 > (x86: HPET_MSI Initialise per-cpu HPET timers) The commit is fine - but the on-stack workqueue entry needs annotation. Reported-and-bisected-by: Alexander Beregalov Signed-off-by: Peter Zijlstra Tested-by: Alexander Beregalov Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 2 +- include/linux/workqueue.h | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index cd759ad90690..bb2e0f0975ae 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -628,7 +628,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n, switch (action & 0xf) { case CPU_ONLINE: - INIT_DELAYED_WORK(&work.work, hpet_work); + INIT_DELAYED_WORK_ON_STACK(&work.work, hpet_work); init_completion(&work.complete); /* FIXME: add schedule_work_on() */ schedule_delayed_work_on(cpu, &work.work, 0); diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index b36291130f22..47151c8495aa 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -118,6 +118,12 @@ struct execute_work { init_timer(&(_work)->timer); \ } while (0) +#define INIT_DELAYED_WORK_ON_STACK(_work, _func) \ + do { \ + INIT_WORK(&(_work)->work, (_func)); \ + init_timer_on_stack(&(_work)->timer); \ + } while (0) + #define INIT_DELAYED_WORK_DEFERRABLE(_work, _func) \ do { \ INIT_WORK(&(_work)->work, (_func)); \ -- cgit v1.2.3-71-gd317 From 2e4c77bea3d8b17d94f8ee382411f359b708560f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 30 Dec 2008 14:16:41 +0100 Subject: m68k: dio - Kill warn_unused_result warnings warning: ignoring return value of 'device_register', declared with attribute warn_unused_result warning: ignoring return value of 'device_create_file', declared with attribute warn_unused_result Signed-off-by: Geert Uytterhoeven --- drivers/dio/dio-sysfs.c | 16 ++++++++++------ drivers/dio/dio.c | 18 +++++++++++++++--- include/linux/dio.h | 2 +- 3 files changed, 26 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/dio/dio-sysfs.c b/drivers/dio/dio-sysfs.c index f46463038847..91d5f4da8769 100644 --- a/drivers/dio/dio-sysfs.c +++ b/drivers/dio/dio-sysfs.c @@ -63,15 +63,19 @@ static ssize_t dio_show_resource(struct device *dev, struct device_attribute *at } static DEVICE_ATTR(resource, S_IRUGO, dio_show_resource, NULL); -void dio_create_sysfs_dev_files(struct dio_dev *d) +int dio_create_sysfs_dev_files(struct dio_dev *d) { struct device *dev = &d->dev; + int error; /* current configuration's attributes */ - device_create_file(dev, &dev_attr_id); - device_create_file(dev, &dev_attr_ipl); - device_create_file(dev, &dev_attr_secid); - device_create_file(dev, &dev_attr_name); - device_create_file(dev, &dev_attr_resource); + if ((error = device_create_file(dev, &dev_attr_id)) || + (error = device_create_file(dev, &dev_attr_ipl)) || + (error = device_create_file(dev, &dev_attr_secid)) || + (error = device_create_file(dev, &dev_attr_name)) || + (error = device_create_file(dev, &dev_attr_resource))) + return error; + + return 0; } diff --git a/drivers/dio/dio.c b/drivers/dio/dio.c index 07f274f853d9..10c3c498358c 100644 --- a/drivers/dio/dio.c +++ b/drivers/dio/dio.c @@ -173,6 +173,7 @@ static int __init dio_init(void) mm_segment_t fs; int i; struct dio_dev *dev; + int error; if (!MACH_IS_HP300) return 0; @@ -182,7 +183,11 @@ static int __init dio_init(void) /* Initialize the DIO bus */ INIT_LIST_HEAD(&dio_bus.devices); strcpy(dio_bus.dev.bus_id, "dio"); - device_register(&dio_bus.dev); + error = device_register(&dio_bus.dev); + if (error) { + pr_err("DIO: Error registering dio_bus\n"); + return error; + } /* Request all resources */ dio_bus.num_resources = (hp300_model == HP_320 ? 1 : 2); @@ -252,8 +257,15 @@ static int __init dio_init(void) if (scode >= DIOII_SCBASE) iounmap(va); - device_register(&dev->dev); - dio_create_sysfs_dev_files(dev); + error = device_register(&dev->dev); + if (error) { + pr_err("DIO: Error registering device %s\n", + dev->name); + continue; + } + error = dio_create_sysfs_dev_files(dev); + if (error) + dev_err(&dev->dev, "Error creating sysfs files\n"); } return 0; } diff --git a/include/linux/dio.h b/include/linux/dio.h index 1e65ebc2a3db..b2dd31ca1710 100644 --- a/include/linux/dio.h +++ b/include/linux/dio.h @@ -241,7 +241,7 @@ struct dio_driver { extern int dio_find(int deviceid); extern unsigned long dio_scodetophysaddr(int scode); -extern void dio_create_sysfs_dev_files(struct dio_dev *); +extern int dio_create_sysfs_dev_files(struct dio_dev *); /* New-style probing */ extern int dio_register_driver(struct dio_driver *); -- cgit v1.2.3-71-gd317 From 985ebdb5ed54151eba734aa1b307460e8e4267ba Mon Sep 17 00:00:00 2001 From: Krzysztof HaÅ‚asa Date: Mon, 12 Jan 2009 16:32:13 -0800 Subject: net: Fix a comment in include/linux/netdevice.h. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a comment in include/linux/netdevice.h. Signed-off-by: Krzysztof HaÅ‚asa Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f24556813375..4647604c7ca9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -467,7 +467,7 @@ struct netdev_queue { * This function is called when network device transistions to the down * state. * - * int (*ndo_hard_start_xmit)(struct sk_buff *skb, struct net_device *dev); + * int (*ndo_start_xmit)(struct sk_buff *skb, struct net_device *dev); * Called when a packet needs to be transmitted. * Must return NETDEV_TX_OK , NETDEV_TX_BUSY, or NETDEV_TX_LOCKED, * Required can not be NULL. -- cgit v1.2.3-71-gd317 From daaf83d2b9277928739f3eb7ea64f49c1254fd62 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Mon, 12 Jan 2009 00:06:11 +0000 Subject: netfilter 09/09: remove padding from struct xt_match on 64bit builds reorder struct xt_match to remove 8 bytes of padding and make its size 128 bytes. This saves a small amount of data space in each of the xt netfilter modules and fits xt_match in one 128 byte cache line. Signed-off-by: Richard Kennedy Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index e52ce475d19f..c7ee8744d26b 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -270,6 +270,7 @@ struct xt_match struct list_head list; const char name[XT_FUNCTION_MAXNAMELEN-1]; + u_int8_t revision; /* Return true or false: return FALSE and set *hotdrop = 1 to force immediate packet drop. */ @@ -302,7 +303,6 @@ struct xt_match unsigned short proto; unsigned short family; - u_int8_t revision; }; /* Registration hooks for targets. */ -- cgit v1.2.3-71-gd317 From baf48f6577e581a9adb8fe849dc80e24b21d171d Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Mon, 12 Jan 2009 21:15:17 -0800 Subject: softlock: fix false panic which can occur if softlockup_thresh is reduced At run-time, if softlockup_thresh is changed to a much lower value, touch_timestamp is likely to be much older than the new softlock_thresh. This will cause a false softlockup to be detected. If softlockup_panic is enabled, the system will panic. The fix is to touch all watchdogs before changing softlockup_thresh. Signed-off-by: Mandeep Singh Baines Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +++ kernel/softlockup.c | 9 +++++++++ kernel/sysctl.c | 2 +- 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4cae9b81a1f8..54cbabf3b871 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -293,6 +293,9 @@ extern void sched_show_task(struct task_struct *p); extern void softlockup_tick(void); extern void touch_softlockup_watchdog(void); extern void touch_all_softlockup_watchdogs(void); +extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, + size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; extern unsigned long sysctl_hung_task_check_count; extern unsigned long sysctl_hung_task_timeout_secs; diff --git a/kernel/softlockup.c b/kernel/softlockup.c index d9188c66278a..85d5a2455103 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -88,6 +89,14 @@ void touch_all_softlockup_watchdogs(void) } EXPORT_SYMBOL(touch_all_softlockup_watchdogs); +int proc_dosoftlockup_thresh(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + touch_all_softlockup_watchdogs(); + return proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); +} + /* * This callback runs from the timer interrupt, and checks * whether the watchdog thread has hung or not: diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 89d74436318c..596dc31a7116 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -800,7 +800,7 @@ static struct ctl_table kern_table[] = { .data = &softlockup_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, + .proc_handler = &proc_dosoftlockup_thresh, .strategy = &sysctl_intvec, .extra1 = &neg_one, .extra2 = &sixty, -- cgit v1.2.3-71-gd317 From 4c696ba7982501d43dea11dbbaabd2aa8a19cc42 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:13:53 +0100 Subject: [CVE-2009-0029] Move compat system call declarations to compat header file Move declarations to correct header file. Signed-off-by: Heiko Carstens --- include/linux/compat.h | 13 +++++++++++++ include/linux/syscalls.h | 12 ------------ 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index e88f3ecf38b4..3fd2194ff573 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -280,5 +280,18 @@ asmlinkage long compat_sys_timerfd_settime(int ufd, int flags, asmlinkage long compat_sys_timerfd_gettime(int ufd, struct compat_itimerspec __user *otmr); +asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_page, + __u32 __user *pages, + const int __user *nodes, + int __user *status, + int flags); +asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename, + struct compat_timeval __user *t); +asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename, + struct compat_stat __user *statbuf, + int flag); +asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename, + int flags, int mode); + #endif /* CONFIG_COMPAT */ #endif /* _LINUX_COMPAT_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 18d0a243a7b3..a7593f670ca6 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -530,11 +530,6 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, const int __user *nodes, int __user *status, int flags); -asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_page, - __u32 __user *pages, - const int __user *nodes, - int __user *status, - int flags); asmlinkage long sys_mbind(unsigned long start, unsigned long len, unsigned long mode, unsigned long __user *nmask, @@ -583,13 +578,6 @@ asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *bu int bufsiz); asmlinkage long sys_utimensat(int dfd, char __user *filename, struct timespec __user *utimes, int flags); -asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename, - struct compat_timeval __user *t); -asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename, - struct compat_stat __user *statbuf, - int flag); -asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename, - int flags, int mode); asmlinkage long sys_unshare(unsigned long unshare_flags); asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, -- cgit v1.2.3-71-gd317 From 2ed7c03ec17779afb4fcfa3b8c61df61bd4879ba Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:13:54 +0100 Subject: [CVE-2009-0029] Convert all system calls to return a long Convert all system calls to return a long. This should be a NOP since all converted types should have the same size anyway. With the exception of sys_exit_group which returned void. But that doesn't matter since the system call doesn't return. Signed-off-by: Heiko Carstens --- fs/read_write.c | 18 +++++------ fs/xattr.c | 12 ++++---- include/linux/syscalls.h | 79 ++++++++++++++++++++++++------------------------ ipc/mqueue.c | 2 +- kernel/exit.c | 4 ++- kernel/signal.c | 2 +- kernel/timer.c | 2 +- mm/filemap.c | 2 +- mm/mmap.c | 2 +- mm/mremap.c | 2 +- mm/nommu.c | 2 +- 11 files changed, 64 insertions(+), 63 deletions(-) (limited to 'include/linux') diff --git a/fs/read_write.c b/fs/read_write.c index 5cc6924eb158..940367f51f2a 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -147,7 +147,7 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int origin) } EXPORT_SYMBOL(vfs_llseek); -asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin) +asmlinkage long sys_lseek(unsigned int fd, off_t offset, unsigned int origin) { off_t retval; struct file * file; @@ -369,7 +369,7 @@ static inline void file_pos_write(struct file *file, loff_t pos) file->f_pos = pos; } -asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count) +asmlinkage long sys_read(unsigned int fd, char __user * buf, size_t count) { struct file *file; ssize_t ret = -EBADF; @@ -386,7 +386,7 @@ asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count) return ret; } -asmlinkage ssize_t sys_write(unsigned int fd, const char __user * buf, size_t count) +asmlinkage long sys_write(unsigned int fd, const char __user * buf, size_t count) { struct file *file; ssize_t ret = -EBADF; @@ -403,7 +403,7 @@ asmlinkage ssize_t sys_write(unsigned int fd, const char __user * buf, size_t co return ret; } -asmlinkage ssize_t sys_pread64(unsigned int fd, char __user *buf, +asmlinkage long sys_pread64(unsigned int fd, char __user *buf, size_t count, loff_t pos) { struct file *file; @@ -424,7 +424,7 @@ asmlinkage ssize_t sys_pread64(unsigned int fd, char __user *buf, return ret; } -asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char __user *buf, +asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf, size_t count, loff_t pos) { struct file *file; @@ -672,7 +672,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, EXPORT_SYMBOL(vfs_writev); -asmlinkage ssize_t +asmlinkage long sys_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) { struct file *file; @@ -693,7 +693,7 @@ sys_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) return ret; } -asmlinkage ssize_t +asmlinkage long sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) { struct file *file; @@ -812,7 +812,7 @@ out: return retval; } -asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t __user *offset, size_t count) +asmlinkage long sys_sendfile(int out_fd, int in_fd, off_t __user *offset, size_t count) { loff_t pos; off_t off; @@ -831,7 +831,7 @@ asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t __user *offset, siz return do_sendfile(out_fd, in_fd, NULL, count, 0); } -asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd, loff_t __user *offset, size_t count) +asmlinkage long sys_sendfile64(int out_fd, int in_fd, loff_t __user *offset, size_t count) { loff_t pos; ssize_t ret; diff --git a/fs/xattr.c b/fs/xattr.c index 237804cd6b56..d049ae27aae7 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -349,7 +349,7 @@ getxattr(struct dentry *d, const char __user *name, void __user *value, return error; } -asmlinkage ssize_t +asmlinkage long sys_getxattr(const char __user *pathname, const char __user *name, void __user *value, size_t size) { @@ -364,7 +364,7 @@ sys_getxattr(const char __user *pathname, const char __user *name, return error; } -asmlinkage ssize_t +asmlinkage long sys_lgetxattr(const char __user *pathname, const char __user *name, void __user *value, size_t size) { @@ -379,7 +379,7 @@ sys_lgetxattr(const char __user *pathname, const char __user *name, void __user return error; } -asmlinkage ssize_t +asmlinkage long sys_fgetxattr(int fd, const char __user *name, void __user *value, size_t size) { struct file *f; @@ -424,7 +424,7 @@ listxattr(struct dentry *d, char __user *list, size_t size) return error; } -asmlinkage ssize_t +asmlinkage long sys_listxattr(const char __user *pathname, char __user *list, size_t size) { struct path path; @@ -438,7 +438,7 @@ sys_listxattr(const char __user *pathname, char __user *list, size_t size) return error; } -asmlinkage ssize_t +asmlinkage long sys_llistxattr(const char __user *pathname, char __user *list, size_t size) { struct path path; @@ -452,7 +452,7 @@ sys_llistxattr(const char __user *pathname, char __user *list, size_t size) return error; } -asmlinkage ssize_t +asmlinkage long sys_flistxattr(int fd, char __user *list, size_t size) { struct file *f; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a7593f670ca6..22290eeaf553 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -77,7 +77,7 @@ asmlinkage long sys_times(struct tms __user *tbuf); asmlinkage long sys_gettid(void); asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp); -asmlinkage unsigned long sys_alarm(unsigned int seconds); +asmlinkage long sys_alarm(unsigned int seconds); asmlinkage long sys_getpid(void); asmlinkage long sys_getppid(void); asmlinkage long sys_getuid(void); @@ -166,7 +166,7 @@ asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, unsigned long flags); asmlinkage long sys_exit(int error_code); -asmlinkage void sys_exit_group(int error_code); +asmlinkage long sys_exit_group(int error_code); asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr, int options, struct rusage __user *ru); asmlinkage long sys_waitid(int which, pid_t pid, @@ -196,7 +196,7 @@ asmlinkage long sys_tkill(int pid, int sig); asmlinkage long sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo); asmlinkage long sys_sgetmask(void); asmlinkage long sys_ssetmask(int newmask); -asmlinkage unsigned long sys_signal(int sig, __sighandler_t handler); +asmlinkage long sys_signal(int sig, __sighandler_t handler); asmlinkage long sys_pause(void); asmlinkage long sys_sync(void); @@ -246,29 +246,29 @@ asmlinkage long sys_lsetxattr(const char __user *path, const char __user *name, const void __user *value, size_t size, int flags); asmlinkage long sys_fsetxattr(int fd, const char __user *name, const void __user *value, size_t size, int flags); -asmlinkage ssize_t sys_getxattr(const char __user *path, const char __user *name, - void __user *value, size_t size); -asmlinkage ssize_t sys_lgetxattr(const char __user *path, const char __user *name, - void __user *value, size_t size); -asmlinkage ssize_t sys_fgetxattr(int fd, const char __user *name, - void __user *value, size_t size); -asmlinkage ssize_t sys_listxattr(const char __user *path, char __user *list, - size_t size); -asmlinkage ssize_t sys_llistxattr(const char __user *path, char __user *list, - size_t size); -asmlinkage ssize_t sys_flistxattr(int fd, char __user *list, size_t size); +asmlinkage long sys_getxattr(const char __user *path, const char __user *name, + void __user *value, size_t size); +asmlinkage long sys_lgetxattr(const char __user *path, const char __user *name, + void __user *value, size_t size); +asmlinkage long sys_fgetxattr(int fd, const char __user *name, + void __user *value, size_t size); +asmlinkage long sys_listxattr(const char __user *path, char __user *list, + size_t size); +asmlinkage long sys_llistxattr(const char __user *path, char __user *list, + size_t size); +asmlinkage long sys_flistxattr(int fd, char __user *list, size_t size); asmlinkage long sys_removexattr(const char __user *path, const char __user *name); asmlinkage long sys_lremovexattr(const char __user *path, const char __user *name); asmlinkage long sys_fremovexattr(int fd, const char __user *name); -asmlinkage unsigned long sys_brk(unsigned long brk); +asmlinkage long sys_brk(unsigned long brk); asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot); -asmlinkage unsigned long sys_mremap(unsigned long addr, - unsigned long old_len, unsigned long new_len, - unsigned long flags, unsigned long new_addr); +asmlinkage long sys_mremap(unsigned long addr, + unsigned long old_len, unsigned long new_len, + unsigned long flags, unsigned long new_addr); asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long flags); @@ -321,10 +321,10 @@ asmlinkage long sys_io_submit(aio_context_t, long, struct iocb __user * __user *); asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, struct io_event __user *result); -asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, - off_t __user *offset, size_t count); -asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd, - loff_t __user *offset, size_t count); +asmlinkage long sys_sendfile(int out_fd, int in_fd, + off_t __user *offset, size_t count); +asmlinkage long sys_sendfile64(int out_fd, int in_fd, + loff_t __user *offset, size_t count); asmlinkage long sys_readlink(const char __user *path, char __user *buf, int bufsiz); asmlinkage long sys_creat(const char __user *pathname, int mode); @@ -368,26 +368,25 @@ asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times); asmlinkage long sys_utimes(char __user *filename, struct timeval __user *utimes); -asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, - unsigned int origin); +asmlinkage long sys_lseek(unsigned int fd, off_t offset, + unsigned int origin); asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, unsigned long offset_low, loff_t __user *result, unsigned int origin); -asmlinkage ssize_t sys_read(unsigned int fd, char __user *buf, - size_t count); -asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count); -asmlinkage ssize_t sys_readv(unsigned long fd, - const struct iovec __user *vec, - unsigned long vlen); -asmlinkage ssize_t sys_write(unsigned int fd, const char __user *buf, - size_t count); -asmlinkage ssize_t sys_writev(unsigned long fd, - const struct iovec __user *vec, - unsigned long vlen); -asmlinkage ssize_t sys_pread64(unsigned int fd, char __user *buf, - size_t count, loff_t pos); -asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char __user *buf, - size_t count, loff_t pos); +asmlinkage long sys_read(unsigned int fd, char __user *buf, size_t count); +asmlinkage long sys_readahead(int fd, loff_t offset, size_t count); +asmlinkage long sys_readv(unsigned long fd, + const struct iovec __user *vec, + unsigned long vlen); +asmlinkage long sys_write(unsigned int fd, const char __user *buf, + size_t count); +asmlinkage long sys_writev(unsigned long fd, + const struct iovec __user *vec, + unsigned long vlen); +asmlinkage long sys_pread64(unsigned int fd, char __user *buf, + size_t count, loff_t pos); +asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf, + size_t count, loff_t pos); asmlinkage long sys_getcwd(char __user *buf, unsigned long size); asmlinkage long sys_mkdir(const char __user *pathname, int mode); asmlinkage long sys_chdir(const char __user *filename); @@ -476,7 +475,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); asmlinkage long sys_mq_open(const char __user *name, int oflag, mode_t mode, struct mq_attr __user *attr); asmlinkage long sys_mq_unlink(const char __user *name); asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *msg_ptr, size_t msg_len, unsigned int msg_prio, const struct timespec __user *abs_timeout); -asmlinkage ssize_t sys_mq_timedreceive(mqd_t mqdes, char __user *msg_ptr, size_t msg_len, unsigned int __user *msg_prio, const struct timespec __user *abs_timeout); +asmlinkage long sys_mq_timedreceive(mqd_t mqdes, char __user *msg_ptr, size_t msg_len, unsigned int __user *msg_prio, const struct timespec __user *abs_timeout); asmlinkage long sys_mq_notify(mqd_t mqdes, const struct sigevent __user *notification); asmlinkage long sys_mq_getsetattr(mqd_t mqdes, const struct mq_attr __user *mqstat, struct mq_attr __user *omqstat); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 23fdb8492b8e..6df028b70543 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -907,7 +907,7 @@ out: return ret; } -asmlinkage ssize_t sys_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr, +asmlinkage long sys_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout) { diff --git a/kernel/exit.c b/kernel/exit.c index c7740fa3252c..fac9b040af2c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1182,9 +1182,11 @@ do_group_exit(int exit_code) * wait4()-ing process will get the correct exit code - even if this * thread is not the thread group leader. */ -asmlinkage void sys_exit_group(int error_code) +asmlinkage long sys_exit_group(int error_code) { do_group_exit((error_code & 0xff) << 8); + /* NOTREACHED */ + return 0; } static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) diff --git a/kernel/signal.c b/kernel/signal.c index 3152ac3b62e2..856a5479d49d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2559,7 +2559,7 @@ sys_ssetmask(int newmask) /* * For backwards compatibility. Functionality superseded by sigaction. */ -asmlinkage unsigned long +asmlinkage long sys_signal(int sig, __sighandler_t handler) { struct k_sigaction new_sa, old_sa; diff --git a/kernel/timer.c b/kernel/timer.c index dee3f641a7a7..7b8697d7f04d 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1129,7 +1129,7 @@ void do_timer(unsigned long ticks) * For backwards compatibility? This can be done in libc so Alpha * and all newer ports shouldn't need it. */ -asmlinkage unsigned long sys_alarm(unsigned int seconds) +asmlinkage long sys_alarm(unsigned int seconds) { return alarm_setitimer(seconds); } diff --git a/mm/filemap.c b/mm/filemap.c index ceba0bd03662..538b75ed6236 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1374,7 +1374,7 @@ do_readahead(struct address_space *mapping, struct file *filp, return 0; } -asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count) +asmlinkage long sys_readahead(int fd, loff_t offset, size_t count) { ssize_t ret; struct file *file; diff --git a/mm/mmap.c b/mm/mmap.c index 749623196cb9..a970d890cb21 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -245,7 +245,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) return next; } -asmlinkage unsigned long sys_brk(unsigned long brk) +asmlinkage long sys_brk(unsigned long brk) { unsigned long rlim, retval; unsigned long newbrk, oldbrk; diff --git a/mm/mremap.c b/mm/mremap.c index 646de959aa58..5572e0825d80 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -420,7 +420,7 @@ out_nc: return ret; } -asmlinkage unsigned long sys_mremap(unsigned long addr, +asmlinkage long sys_mremap(unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr) { diff --git a/mm/nommu.c b/mm/nommu.c index 60ed8375c986..ee3e78927739 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -416,7 +416,7 @@ EXPORT_SYMBOL(vm_insert_page); * to a regular file. in this case, the unmapping will need * to invoke file system routines that need the global lock. */ -asmlinkage unsigned long sys_brk(unsigned long brk) +asmlinkage long sys_brk(unsigned long brk) { struct mm_struct *mm = current->mm; -- cgit v1.2.3-71-gd317 From e55380edf68796d75bf41391a781c68ee678587d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:13:55 +0100 Subject: [CVE-2009-0029] Rename old_readdir to sys_old_readdir This way it matches the generic system call name convention. Signed-off-by: Heiko Carstens --- arch/arm/kernel/calls.S | 2 +- arch/cris/arch-v10/kernel/entry.S | 2 +- arch/cris/arch-v32/kernel/entry.S | 2 +- arch/h8300/kernel/syscalls.S | 2 +- arch/m68k/kernel/entry.S | 2 +- arch/m68knommu/kernel/syscalltable.S | 2 +- arch/mips/kernel/scall32-o32.S | 2 +- arch/mn10300/kernel/entry.S | 2 +- arch/powerpc/include/asm/systbl.h | 2 +- arch/sh/kernel/syscalls_32.S | 2 +- arch/sh/kernel/syscalls_64.S | 2 +- arch/sparc/kernel/systbls_32.S | 2 +- arch/x86/kernel/syscall_table_32.S | 2 +- fs/readdir.c | 2 +- include/linux/syscalls.h | 2 ++ 15 files changed, 16 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 09a061cb7838..9ca8d13f05f7 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -98,7 +98,7 @@ CALL(sys_uselib) CALL(sys_swapon) CALL(sys_reboot) - CALL(OBSOLETE(old_readdir)) /* used by libc4 */ + CALL(OBSOLETE(sys_old_readdir)) /* used by libc4 */ /* 90 */ CALL(OBSOLETE(old_mmap)) /* used by libc4 */ CALL(sys_munmap) CALL(sys_truncate) diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S index ed171d389e65..72f5cd319b97 100644 --- a/arch/cris/arch-v10/kernel/entry.S +++ b/arch/cris/arch-v10/kernel/entry.S @@ -691,7 +691,7 @@ sys_call_table: .long sys_uselib .long sys_swapon .long sys_reboot - .long old_readdir + .long sys_old_readdir .long old_mmap /* 90 */ .long sys_munmap .long sys_truncate diff --git a/arch/cris/arch-v32/kernel/entry.S b/arch/cris/arch-v32/kernel/entry.S index 7f6f93e6b70e..5e674c8f7c51 100644 --- a/arch/cris/arch-v32/kernel/entry.S +++ b/arch/cris/arch-v32/kernel/entry.S @@ -614,7 +614,7 @@ sys_call_table: .long sys_uselib .long sys_swapon .long sys_reboot - .long old_readdir + .long sys_old_readdir .long old_mmap /* 90 */ .long sys_munmap .long sys_truncate diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S index 54e21c3f2057..4eb67faac633 100644 --- a/arch/h8300/kernel/syscalls.S +++ b/arch/h8300/kernel/syscalls.S @@ -103,7 +103,7 @@ SYMBOL_NAME_LABEL(sys_call_table) .long SYMBOL_NAME(sys_uselib) .long SYMBOL_NAME(sys_swapon) .long SYMBOL_NAME(sys_reboot) - .long SYMBOL_NAME(old_readdir) + .long SYMBOL_NAME(sys_old_readdir) .long SYMBOL_NAME(old_mmap) /* 90 */ .long SYMBOL_NAME(sys_munmap) .long SYMBOL_NAME(sys_truncate) diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S index 5b780826647c..5c332f2b9b83 100644 --- a/arch/m68k/kernel/entry.S +++ b/arch/m68k/kernel/entry.S @@ -513,7 +513,7 @@ sys_call_table: .long sys_uselib .long sys_swapon .long sys_reboot - .long old_readdir + .long sys_old_readdir .long old_mmap /* 90 */ .long sys_munmap .long sys_truncate diff --git a/arch/m68knommu/kernel/syscalltable.S b/arch/m68knommu/kernel/syscalltable.S index 812f8d8b7a85..5c3e3f62194a 100644 --- a/arch/m68knommu/kernel/syscalltable.S +++ b/arch/m68knommu/kernel/syscalltable.S @@ -107,7 +107,7 @@ ENTRY(sys_call_table) .long sys_uselib .long sys_ni_syscall /* sys_swapon */ .long sys_reboot - .long old_readdir + .long sys_old_readdir .long old_mmap /* 90 */ .long sys_munmap .long sys_truncate diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index d0916a55cd77..51d1ba415b90 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -398,7 +398,7 @@ einval: li v0, -ENOSYS sys sys_uselib 1 sys sys_swapon 2 sys sys_reboot 3 - sys old_readdir 3 + sys sys_old_readdir 3 sys old_mmap 6 /* 4090 */ sys sys_munmap 2 sys sys_truncate 2 diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S index 62fba8aa9b6e..ceeaaaa359e2 100644 --- a/arch/mn10300/kernel/entry.S +++ b/arch/mn10300/kernel/entry.S @@ -478,7 +478,7 @@ ENTRY(sys_call_table) .long sys_uselib .long sys_swapon .long sys_reboot - .long old_readdir + .long sys_old_readdir .long old_mmap /* 90 */ .long sys_munmap .long sys_truncate diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 803def236654..72353f6070a4 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -92,7 +92,7 @@ COMPAT_SYS_SPU(readlink) SYSCALL(uselib) SYSCALL(swapon) SYSCALL(reboot) -SYSX(sys_ni_syscall,compat_sys_old_readdir,old_readdir) +SYSX(sys_ni_syscall,compat_sys_old_readdir,sys_old_readdir) SYSCALL_SPU(mmap) SYSCALL_SPU(munmap) SYSCALL_SPU(truncate) diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S index 0af693e65764..a87ce076cfa2 100644 --- a/arch/sh/kernel/syscalls_32.S +++ b/arch/sh/kernel/syscalls_32.S @@ -105,7 +105,7 @@ ENTRY(sys_call_table) .long sys_uselib .long sys_swapon .long sys_reboot - .long old_readdir + .long sys_old_readdir .long old_mmap /* 90 */ .long sys_munmap .long sys_truncate diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S index 0b436aa3cad7..557cb91f5caf 100644 --- a/arch/sh/kernel/syscalls_64.S +++ b/arch/sh/kernel/syscalls_64.S @@ -109,7 +109,7 @@ sys_call_table: .long sys_uselib .long sys_swapon .long sys_reboot - .long old_readdir + .long sys_old_readdir .long old_mmap /* 90 */ .long sys_munmap .long sys_truncate diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 7d0807586442..8a434f51ba0f 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -56,7 +56,7 @@ sys_call_table: /*185*/ .long sys_setpgid, sys_fremovexattr, sys_tkill, sys_exit_group, sys_newuname /*190*/ .long sys_init_module, sys_personality, sparc_remap_file_pages, sys_epoll_create, sys_epoll_ctl /*195*/ .long sys_epoll_wait, sys_ioprio_set, sys_getppid, sparc_sigaction, sys_sgetmask -/*200*/ .long sys_ssetmask, sys_sigsuspend, sys_newlstat, sys_uselib, old_readdir +/*200*/ .long sys_ssetmask, sys_sigsuspend, sys_newlstat, sys_uselib, sys_old_readdir /*205*/ .long sys_readahead, sys_socketcall, sys_syslog, sys_lookup_dcookie, sys_fadvise64 /*210*/ .long sys_fadvise64_64, sys_tgkill, sys_waitpid, sys_swapoff, sys_sysinfo /*215*/ .long sys_ipc, sys_sigreturn, sys_clone, sys_ioprio_get, sys_adjtimex diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index d44395ff34c3..e2e86a08f31d 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -88,7 +88,7 @@ ENTRY(sys_call_table) .long sys_uselib .long sys_swapon .long sys_reboot - .long old_readdir + .long sys_old_readdir .long old_mmap /* 90 */ .long sys_munmap .long sys_truncate diff --git a/fs/readdir.c b/fs/readdir.c index b318d9b5af2e..8b4c2a0051a6 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -102,7 +102,7 @@ efault: return -EFAULT; } -asmlinkage long old_readdir(unsigned int fd, struct old_linux_dirent __user * dirent, unsigned int count) +asmlinkage long sys_old_readdir(unsigned int fd, struct old_linux_dirent __user * dirent, unsigned int count) { int error; struct file * file; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 22290eeaf553..ca079c3d09e3 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -54,6 +54,7 @@ struct compat_stat; struct compat_timeval; struct robust_list_head; struct getcpu_cache; +struct old_linux_dirent; #include #include @@ -608,6 +609,7 @@ asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr); asmlinkage long sys_eventfd(unsigned int count); asmlinkage long sys_eventfd2(unsigned int count, int flags); asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); +asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); -- cgit v1.2.3-71-gd317 From 1a94bc34768e463a93cb3751819709ab0ea80a01 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:13:59 +0100 Subject: [CVE-2009-0029] System call wrapper infrastructure From: Martin Schwidefsky By selecting HAVE_SYSCALL_WRAPPERS architectures can activate system call wrappers in order to sign extend system call arguments. All architectures where the ABI defines that the caller of a function has to perform sign extension probably need this. Reported-by: Christian Borntraeger Acked-by: Ralf Baechle Signed-off-by: Martin Schwidefsky Signed-off-by: Heiko Carstens --- arch/Kconfig | 3 +++ include/linux/syscalls.h | 62 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) (limited to 'include/linux') diff --git a/arch/Kconfig b/arch/Kconfig index 2e13aa261929..550dab22daa1 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -62,6 +62,9 @@ config HAVE_EFFICIENT_UNALIGNED_ACCESS See Documentation/unaligned-memory-access.txt for more information on the topic of unaligned memory accesses. +config HAVE_SYSCALL_WRAPPERS + bool + config KRETPROBES def_bool y depends on KPROBES && HAVE_KRETPROBES diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index ca079c3d09e3..0bb537d7ba2e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -66,6 +66,68 @@ struct old_linux_dirent; #include #include +#define __SC_DECL1(t1, a1) t1 a1 +#define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__) +#define __SC_DECL3(t3, a3, ...) t3 a3, __SC_DECL2(__VA_ARGS__) +#define __SC_DECL4(t4, a4, ...) t4 a4, __SC_DECL3(__VA_ARGS__) +#define __SC_DECL5(t5, a5, ...) t5 a5, __SC_DECL4(__VA_ARGS__) +#define __SC_DECL6(t6, a6, ...) t6 a6, __SC_DECL5(__VA_ARGS__) + +#define __SC_LONG1(t1, a1) long a1 +#define __SC_LONG2(t2, a2, ...) long a2, __SC_LONG1(__VA_ARGS__) +#define __SC_LONG3(t3, a3, ...) long a3, __SC_LONG2(__VA_ARGS__) +#define __SC_LONG4(t4, a4, ...) long a4, __SC_LONG3(__VA_ARGS__) +#define __SC_LONG5(t5, a5, ...) long a5, __SC_LONG4(__VA_ARGS__) +#define __SC_LONG6(t6, a6, ...) long a6, __SC_LONG5(__VA_ARGS__) + +#define __SC_CAST1(t1, a1) (t1) a1 +#define __SC_CAST2(t2, a2, ...) (t2) a2, __SC_CAST1(__VA_ARGS__) +#define __SC_CAST3(t3, a3, ...) (t3) a3, __SC_CAST2(__VA_ARGS__) +#define __SC_CAST4(t4, a4, ...) (t4) a4, __SC_CAST3(__VA_ARGS__) +#define __SC_CAST5(t5, a5, ...) (t5) a5, __SC_CAST4(__VA_ARGS__) +#define __SC_CAST6(t6, a6, ...) (t6) a6, __SC_CAST5(__VA_ARGS__) + +#define __SC_TEST(type) BUILD_BUG_ON(sizeof(type) > sizeof(long)) +#define __SC_TEST1(t1, a1) __SC_TEST(t1) +#define __SC_TEST2(t2, a2, ...) __SC_TEST(t2); __SC_TEST1(__VA_ARGS__) +#define __SC_TEST3(t3, a3, ...) __SC_TEST(t3); __SC_TEST2(__VA_ARGS__) +#define __SC_TEST4(t4, a4, ...) __SC_TEST(t4); __SC_TEST3(__VA_ARGS__) +#define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__) +#define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) + +#define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) +#define SYSCALL_DEFINE1(...) SYSCALL_DEFINEx(1, __VA_ARGS__) +#define SYSCALL_DEFINE2(...) SYSCALL_DEFINEx(2, __VA_ARGS__) +#define SYSCALL_DEFINE3(...) SYSCALL_DEFINEx(3, __VA_ARGS__) +#define SYSCALL_DEFINE4(...) SYSCALL_DEFINEx(4, __VA_ARGS__) +#define SYSCALL_DEFINE5(...) SYSCALL_DEFINEx(5, __VA_ARGS__) +#define SYSCALL_DEFINE6(...) SYSCALL_DEFINEx(6, __VA_ARGS__) + +#define SYSCALL_ALIAS(alias, name) \ + asm ("\t.globl " #alias "\n\t.set " #alias ", " #name) + +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS + +#define SYSCALL_DEFINE(name) static inline long SYSC_##name +#define SYSCALL_DEFINEx(x, name, ...) \ + asmlinkage long sys_##name(__SC_DECL##x(__VA_ARGS__)); \ + static inline long SYSC_##name(__SC_DECL##x(__VA_ARGS__)); \ + asmlinkage long SyS_##name(__SC_LONG##x(__VA_ARGS__)) \ + { \ + __SC_TEST##x(__VA_ARGS__); \ + return (long) SYSC_##name(__SC_CAST##x(__VA_ARGS__)); \ + } \ + SYSCALL_ALIAS(sys_##name, SyS_##name); \ + static inline long SYSC_##name(__SC_DECL##x(__VA_ARGS__)) + +#else /* CONFIG_HAVE_SYSCALL_WRAPPERS */ + +#define SYSCALL_DEFINE(name) asmlinkage long sys_##name +#define SYSCALL_DEFINEx(x, name, ...) \ + asmlinkage long sys_##name(__SC_DECL##x(__VA_ARGS__)) + +#endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */ + asmlinkage long sys_time(time_t __user *tloc); asmlinkage long sys_stime(time_t __user *tptr); asmlinkage long sys_gettimeofday(struct timeval __user *tv, -- cgit v1.2.3-71-gd317 From ee6a093222549ac0c72cfd296c69fa5e7d6daa34 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 14 Jan 2009 14:14:00 +0100 Subject: [CVE-2009-0029] powerpc: Enable syscall wrappers for 64-bit This enables the use of syscall wrappers to do proper sign extension for 64-bit programs. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Heiko Carstens --- arch/powerpc/Kconfig | 1 + include/linux/syscalls.h | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 84b861316ce7..e39b73bc0ff8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -123,6 +123,7 @@ config PPC select HAVE_DMA_ATTRS if PPC64 select USE_GENERIC_SMP_HELPERS if SMP select HAVE_OPROFILE + select HAVE_SYSCALL_WRAPPERS if PPC64 config EARLY_PRINTK bool diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 0bb537d7ba2e..90aa5eba87a2 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -103,8 +103,14 @@ struct old_linux_dirent; #define SYSCALL_DEFINE5(...) SYSCALL_DEFINEx(5, __VA_ARGS__) #define SYSCALL_DEFINE6(...) SYSCALL_DEFINEx(6, __VA_ARGS__) +#ifdef CONFIG_PPC64 +#define SYSCALL_ALIAS(alias, name) \ + asm ("\t.globl " #alias "\n\t.set " #alias ", " #name "\n" \ + "\t.globl ." #alias "\n\t.set ." #alias ", ." #name) +#else #define SYSCALL_ALIAS(alias, name) \ asm ("\t.globl " #alias "\n\t.set " #alias ", " #name) +#endif #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS -- cgit v1.2.3-71-gd317 From d4e82042c4cfa87a7d51710b71f568fe80132551 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:34 +0100 Subject: [CVE-2009-0029] System call wrappers part 32 Signed-off-by: Heiko Carstens --- fs/eventfd.c | 5 ++--- fs/pipe.c | 2 +- fs/readdir.c | 3 ++- fs/select.c | 11 ++++++----- fs/timerfd.c | 2 +- include/linux/syscalls.h | 7 +++++++ kernel/signal.c | 11 +++++------ 7 files changed, 24 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/fs/eventfd.c b/fs/eventfd.c index 08bf558d0408..5de2c2db3aa2 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -198,7 +198,7 @@ struct file *eventfd_fget(int fd) return file; } -asmlinkage long sys_eventfd2(unsigned int count, int flags) +SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) { int fd; struct eventfd_ctx *ctx; @@ -228,8 +228,7 @@ asmlinkage long sys_eventfd2(unsigned int count, int flags) return fd; } -asmlinkage long sys_eventfd(unsigned int count) +SYSCALL_DEFINE1(eventfd, unsigned int, count) { return sys_eventfd2(count, 0); } - diff --git a/fs/pipe.c b/fs/pipe.c index 0c64db86c919..b89c878588a9 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1043,7 +1043,7 @@ int do_pipe(int *fd) * sys_pipe() is the normal C calling standard for creating * a pipe. It's not the way Unix traditionally does this, though. */ -asmlinkage long sys_pipe2(int __user *fildes, int flags) +SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) { int fd[2]; int error; diff --git a/fs/readdir.c b/fs/readdir.c index cf6a0e39819a..7723401f8d8b 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -102,7 +102,8 @@ efault: return -EFAULT; } -asmlinkage long sys_old_readdir(unsigned int fd, struct old_linux_dirent __user * dirent, unsigned int count) +SYSCALL_DEFINE3(old_readdir, unsigned int, fd, + struct old_linux_dirent __user *, dirent, unsigned int, count) { int error; struct file * file; diff --git a/fs/select.c b/fs/select.c index 338f703403af..0fe0e1469df3 100644 --- a/fs/select.c +++ b/fs/select.c @@ -636,8 +636,9 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, * which has a pointer to the sigset_t itself followed by a size_t containing * the sigset size. */ -asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp, - fd_set __user *exp, struct timespec __user *tsp, void __user *sig) +SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp, + fd_set __user *, exp, struct timespec __user *, tsp, + void __user *, sig) { size_t sigsetsize = 0; sigset_t __user *up = NULL; @@ -889,9 +890,9 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, } #ifdef HAVE_SET_RESTORE_SIGMASK -asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, - struct timespec __user *tsp, const sigset_t __user *sigmask, - size_t sigsetsize) +SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, + struct timespec __user *, tsp, const sigset_t __user *, sigmask, + size_t, sigsetsize) { sigset_t ksigmask, sigsaved; struct timespec ts, end_time, *to = NULL; diff --git a/fs/timerfd.c b/fs/timerfd.c index c8c14f58b96f..6a123b8ff3f5 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -265,7 +265,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, return 0; } -asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr) +SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) { struct file *file; struct timerfd_ctx *ctx; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 90aa5eba87a2..56c400138b05 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -678,6 +678,13 @@ asmlinkage long sys_eventfd(unsigned int count); asmlinkage long sys_eventfd2(unsigned int count, int flags); asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int); +asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *, + fd_set __user *, struct timespec __user *, + void __user *); +asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int, + struct timespec __user *, const sigset_t __user *, + size_t); +asmlinkage long sys_pipe2(int __user *, int); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); diff --git a/kernel/signal.c b/kernel/signal.c index e2333929611a..e73759783dc8 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2491,11 +2491,10 @@ out: #endif /* __ARCH_WANT_SYS_SIGPROCMASK */ #ifdef __ARCH_WANT_SYS_RT_SIGACTION -asmlinkage long -sys_rt_sigaction(int sig, - const struct sigaction __user *act, - struct sigaction __user *oact, - size_t sigsetsize) +SYSCALL_DEFINE4(rt_sigaction, int, sig, + const struct sigaction __user *, act, + struct sigaction __user *, oact, + size_t, sigsetsize) { struct k_sigaction new_sa, old_sa; int ret = -EINVAL; @@ -2578,7 +2577,7 @@ SYSCALL_DEFINE0(pause) #endif #ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND -asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize) +SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize) { sigset_t newset; -- cgit v1.2.3-71-gd317 From 2b66421995d2e93c9d1a0111acf2581f8529c6e5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:35 +0100 Subject: [CVE-2009-0029] System call wrappers part 33 Signed-off-by: Heiko Carstens --- fs/pipe.c | 2 +- include/linux/syscalls.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/pipe.c b/fs/pipe.c index b89c878588a9..3a48ba5179d5 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1059,7 +1059,7 @@ SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) return error; } -asmlinkage long sys_pipe(int __user *fildes) +SYSCALL_DEFINE1(pipe, int __user *, fildes) { return sys_pipe2(fildes, 0); } diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 56c400138b05..16875f89e6a7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -685,6 +685,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int, struct timespec __user *, const sigset_t __user *, size_t); asmlinkage long sys_pipe2(int __user *, int); +asmlinkage long sys_pipe(int __user *); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); -- cgit v1.2.3-71-gd317 From 18e6959c385f3edf3991fa6662a53dac4eb10d5b Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 14 Jan 2009 07:28:16 +0100 Subject: mm: fix assertion This assertion is incorrect for lockless pagecache. By definition if we have an unpinned page that we are trying to take a speculative reference to, it may become the tail of a compound page at any time (if it is freed, then reallocated as a compound page). It was still a valid assertion for the vmscan.c LRU isolation case, but it doesn't seem incredibly helpful... if somebody wants it, they can put it back directly where it applies in the vmscan code. Signed-off-by: Nick Piggin Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index b91a73fd1bcc..e8ddc98b8405 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -260,7 +260,6 @@ static inline int put_page_testzero(struct page *page) */ static inline int get_page_unless_zero(struct page *page) { - VM_BUG_ON(PageTail(page)); return atomic_inc_not_zero(&page->_count); } -- cgit v1.2.3-71-gd317 From b94b898f3107046b5c97c556e23529283ea5eadd Mon Sep 17 00:00:00 2001 From: Brandon Philips Date: Wed, 14 Jan 2009 19:19:02 +0100 Subject: it821x: Add ultra_mask quirk for Vortex86SX On Vortex86SX with IDE controller revision 0x11 ultra DMA must be disabled. This patch was tested by DMP and seems to work. It is a cleaned up version of their older Kernel patch: http://www.dmp.com.tw/tech/vortex86sx/patch-2.6.24-DMP.gz Tested-by: Shawn Lin Signed-off-by: Brandon Philips Cc: Alan Cox Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/it821x.c | 12 ++++++++++++ include/linux/pci_ids.h | 1 + 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/drivers/ide/it821x.c b/drivers/ide/it821x.c index 0be27ac1f077..983440a9a5f7 100644 --- a/drivers/ide/it821x.c +++ b/drivers/ide/it821x.c @@ -68,6 +68,8 @@ #define DRV_NAME "it821x" +#define QUIRK_VORTEX86 1 + struct it821x_dev { unsigned int smart:1, /* Are we in smart raid mode */ @@ -79,6 +81,7 @@ struct it821x_dev u16 pio[2]; /* Cached PIO values */ u16 mwdma[2]; /* Cached MWDMA values */ u16 udma[2]; /* Cached UDMA values (per drive) */ + u16 quirks; }; #define ATA_66 0 @@ -577,6 +580,12 @@ static void __devinit init_hwif_it821x(ide_hwif_t *hwif) hwif->ultra_mask = ATA_UDMA6; hwif->mwdma_mask = ATA_MWDMA2; + + /* Vortex86SX quirk: prevent Ultra-DMA mode to fix BadCRC issue */ + if (idev->quirks & QUIRK_VORTEX86) { + if (dev->revision == 0x11) + hwif->ultra_mask = 0; + } } static void it8212_disable_raid(struct pci_dev *dev) @@ -649,6 +658,8 @@ static int __devinit it821x_init_one(struct pci_dev *dev, const struct pci_devic return -ENOMEM; } + itdevs->quirks = id->driver_data; + rc = ide_pci_init_one(dev, &it821x_chipset, itdevs); if (rc) kfree(itdevs); @@ -668,6 +679,7 @@ static void __devexit it821x_remove(struct pci_dev *dev) static const struct pci_device_id it821x_pci_tbl[] = { { PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8211), 0 }, { PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8212), 0 }, + { PCI_VDEVICE(RDC, PCI_DEVICE_ID_RDC_D1010), QUIRK_VORTEX86 }, { 0, }, }; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index d543365518ab..d56ad9c21c09 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2174,6 +2174,7 @@ #define PCI_DEVICE_ID_RDC_R6040 0x6040 #define PCI_DEVICE_ID_RDC_R6060 0x6060 #define PCI_DEVICE_ID_RDC_R6061 0x6061 +#define PCI_DEVICE_ID_RDC_D1010 0x1010 #define PCI_VENDOR_ID_LENOVO 0x17aa -- cgit v1.2.3-71-gd317 From e720b9e498b6bbb1b4f3b3d2f8e9a78578aafef7 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 14 Jan 2009 19:19:04 +0100 Subject: IDE: fix sparse signed-ness errors with host->host_busy The host_busy field in struct ide_host defaults to a signed-long, where most arch's test_and_set_bit_* macros use an unsigned long. Change to using an unsigned long, which on ARM removes the following sparse errors: drivers/ide/ide-io.c:681:8: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:681:8: expected unsigned long volatile *p drivers/ide/ide-io.c:681:8: got long volatile * drivers/ide/ide-io.c:681:8: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:681:8: expected unsigned long volatile *p drivers/ide/ide-io.c:681:8: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * Signed-off-by: Ben Dooks Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 3644f6323384..194da5a4b0d6 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -871,7 +871,7 @@ struct ide_host { ide_hwif_t *cur_port; /* for hosts requiring serialization */ /* used for hosts requiring serialization */ - volatile long host_busy; + volatile unsigned long host_busy; }; #define IDE_HOST_BUSY 0 -- cgit v1.2.3-71-gd317 From 74d96f018673759d04d032c137d132f6447bfb1e Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 13 Jan 2009 19:27:09 -0800 Subject: byteorder: make swab.h include asm/swab.h like a regular header Add swab.h to kbuild.asm and remove the individual entries from each arch, mark as unifdef as some arches have some kernel-only bits inside. Signed-off-by: Harvey Harrison Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/Kbuild | 1 - arch/alpha/include/asm/byteorder.h | 1 - arch/arm/include/asm/Kbuild | 1 - arch/arm/include/asm/byteorder.h | 2 -- arch/avr32/include/asm/Kbuild | 1 - arch/avr32/include/asm/byteorder.h | 1 - arch/blackfin/include/asm/Kbuild | 1 - arch/blackfin/include/asm/byteorder.h | 1 - arch/cris/include/asm/Kbuild | 1 - arch/cris/include/asm/byteorder.h | 1 - arch/h8300/include/asm/Kbuild | 1 - arch/h8300/include/asm/byteorder.h | 1 - arch/ia64/include/asm/Kbuild | 1 - arch/ia64/include/asm/byteorder.h | 1 - arch/m68knommu/include/asm/Kbuild | 2 -- arch/m68knommu/include/asm/byteorder.h | 1 - arch/mips/include/asm/Kbuild | 1 - arch/mips/include/asm/byteorder.h | 2 -- arch/parisc/include/asm/Kbuild | 1 - arch/parisc/include/asm/byteorder.h | 1 - arch/powerpc/include/asm/Kbuild | 1 - arch/powerpc/include/asm/byteorder.h | 2 -- arch/s390/include/asm/Kbuild | 1 - arch/s390/include/asm/byteorder.h | 1 - arch/sh/include/asm/Kbuild | 1 - arch/sh/include/asm/byteorder.h | 2 -- arch/sparc/include/asm/Kbuild | 1 - arch/sparc/include/asm/byteorder.h | 1 - arch/x86/include/asm/Kbuild | 1 - arch/x86/include/asm/byteorder.h | 1 - arch/xtensa/include/asm/Kbuild | 2 -- arch/xtensa/include/asm/byteorder.h | 2 -- include/asm-frv/Kbuild | 1 - include/asm-frv/byteorder.h | 1 - include/asm-generic/Kbuild.asm | 1 + include/asm-m32r/Kbuild | 1 - include/asm-m32r/byteorder.h | 2 -- include/asm-m68k/Kbuild | 1 - include/asm-m68k/byteorder.h | 1 - include/asm-mn10300/Kbuild | 1 - include/asm-mn10300/byteorder.h | 1 - include/linux/swab.h | 2 +- 42 files changed, 2 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index 4dad27360576..b7c8f188b313 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -9,4 +9,3 @@ unifdef-y += console.h unifdef-y += fpu.h unifdef-y += sysinfo.h unifdef-y += compiler.h -unifdef-y += swab.h diff --git a/arch/alpha/include/asm/byteorder.h b/arch/alpha/include/asm/byteorder.h index 6772f3168701..73683093202d 100644 --- a/arch/alpha/include/asm/byteorder.h +++ b/arch/alpha/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _ALPHA_BYTEORDER_H #define _ALPHA_BYTEORDER_H -#include #include #endif /* _ALPHA_BYTEORDER_H */ diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 43b0b2ba392f..73237bd130a2 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -1,4 +1,3 @@ include include/asm-generic/Kbuild.asm unifdef-y += hwcap.h -unifdef-y += swab.h diff --git a/arch/arm/include/asm/byteorder.h b/arch/arm/include/asm/byteorder.h index c02b6fc28e1a..77379748b171 100644 --- a/arch/arm/include/asm/byteorder.h +++ b/arch/arm/include/asm/byteorder.h @@ -15,8 +15,6 @@ #ifndef __ASM_ARM_BYTEORDER_H #define __ASM_ARM_BYTEORDER_H -#include - #ifdef __ARMEB__ #include #else diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index 219822c8ad18..3136628ba8d2 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -1,4 +1,3 @@ include include/asm-generic/Kbuild.asm -header-y += swab.h header-y += cachectl.h diff --git a/arch/avr32/include/asm/byteorder.h b/arch/avr32/include/asm/byteorder.h index 2aba64b4e122..50abc21619a8 100644 --- a/arch/avr32/include/asm/byteorder.h +++ b/arch/avr32/include/asm/byteorder.h @@ -4,7 +4,6 @@ #ifndef __ASM_AVR32_BYTEORDER_H #define __ASM_AVR32_BYTEORDER_H -#include #include #endif /* __ASM_AVR32_BYTEORDER_H */ diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index d0d1ac435544..606ecfdcc962 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -1,4 +1,3 @@ include include/asm-generic/Kbuild.asm unifdef-y += fixed_code.h -unifdef-y += swab.h diff --git a/arch/blackfin/include/asm/byteorder.h b/arch/blackfin/include/asm/byteorder.h index b9e797a497b4..3e69106a4d37 100644 --- a/arch/blackfin/include/asm/byteorder.h +++ b/arch/blackfin/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _BLACKFIN_BYTEORDER_H #define _BLACKFIN_BYTEORDER_H -#include #include #endif /* _BLACKFIN_BYTEORDER_H */ diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index b79b7c6543a6..d5b631935ec8 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -9,4 +9,3 @@ header-y += sync_serial.h unifdef-y += etraxgpio.h unifdef-y += rs485.h -unifdef-y += swab.h diff --git a/arch/cris/include/asm/byteorder.h b/arch/cris/include/asm/byteorder.h index 7678d86317ae..bcd189798e26 100644 --- a/arch/cris/include/asm/byteorder.h +++ b/arch/cris/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _CRIS_BYTEORDER_H #define _CRIS_BYTEORDER_H -#include #include #endif diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index 27b108a86b39..c68e1680da01 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -1,2 +1 @@ include include/asm-generic/Kbuild.asm -unifdef-y += swab.h diff --git a/arch/h8300/include/asm/byteorder.h b/arch/h8300/include/asm/byteorder.h index c36b80a3dd84..13539da99efd 100644 --- a/arch/h8300/include/asm/byteorder.h +++ b/arch/h8300/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _H8300_BYTEORDER_H #define _H8300_BYTEORDER_H -#include #include #endif /* _H8300_BYTEORDER_H */ diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index 3b25bd9dca91..ccbe8ae47a61 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -14,4 +14,3 @@ unifdef-y += gcc_intrin.h unifdef-y += intrinsics.h unifdef-y += perfmon.h unifdef-y += ustack.h -unifdef-y += swab.h diff --git a/arch/ia64/include/asm/byteorder.h b/arch/ia64/include/asm/byteorder.h index 0f84c5cb703d..a8dd73558150 100644 --- a/arch/ia64/include/asm/byteorder.h +++ b/arch/ia64/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _ASM_IA64_BYTEORDER_H #define _ASM_IA64_BYTEORDER_H -#include #include #endif /* _ASM_IA64_BYTEORDER_H */ diff --git a/arch/m68knommu/include/asm/Kbuild b/arch/m68knommu/include/asm/Kbuild index 58c02a454130..c68e1680da01 100644 --- a/arch/m68knommu/include/asm/Kbuild +++ b/arch/m68knommu/include/asm/Kbuild @@ -1,3 +1 @@ include include/asm-generic/Kbuild.asm - -unifdef-y += swab.h diff --git a/arch/m68knommu/include/asm/byteorder.h b/arch/m68knommu/include/asm/byteorder.h index a6f0b8f7f622..9c6c76a15041 100644 --- a/arch/m68knommu/include/asm/byteorder.h +++ b/arch/m68knommu/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _M68KNOMMU_BYTEORDER_H #define _M68KNOMMU_BYTEORDER_H -#include #include #endif /* _M68KNOMMU_BYTEORDER_H */ diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 023866c0c102..7897f05e3165 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -1,4 +1,3 @@ include include/asm-generic/Kbuild.asm header-y += cachectl.h sgidefs.h sysmips.h -header-y += swab.h diff --git a/arch/mips/include/asm/byteorder.h b/arch/mips/include/asm/byteorder.h index 607b71830707..9579051ff1c7 100644 --- a/arch/mips/include/asm/byteorder.h +++ b/arch/mips/include/asm/byteorder.h @@ -8,8 +8,6 @@ #ifndef _ASM_BYTEORDER_H #define _ASM_BYTEORDER_H -#include - #if defined(__MIPSEB__) #include #elif defined(__MIPSEL__) diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index 2121d99f8364..f88b252e419c 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -1,4 +1,3 @@ include include/asm-generic/Kbuild.asm unifdef-y += pdc.h -unifdef-y += swab.h diff --git a/arch/parisc/include/asm/byteorder.h b/arch/parisc/include/asm/byteorder.h index da66029c4cb2..58af2c5f5d61 100644 --- a/arch/parisc/include/asm/byteorder.h +++ b/arch/parisc/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _PARISC_BYTEORDER_H #define _PARISC_BYTEORDER_H -#include #include #endif /* _PARISC_BYTEORDER_H */ diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 9268602de5d0..5ab7d7fe198c 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -35,4 +35,3 @@ unifdef-y += spu_info.h unifdef-y += termios.h unifdef-y += types.h unifdef-y += unistd.h -unifdef-y += swab.h diff --git a/arch/powerpc/include/asm/byteorder.h b/arch/powerpc/include/asm/byteorder.h index 5cca27a41532..aa6cc4fac965 100644 --- a/arch/powerpc/include/asm/byteorder.h +++ b/arch/powerpc/include/asm/byteorder.h @@ -7,8 +7,6 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ - -#include #include #endif /* _ASM_POWERPC_BYTEORDER_H */ diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index f2af4167bd5f..63a23415fba6 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -13,4 +13,3 @@ unifdef-y += cmb.h unifdef-y += debug.h unifdef-y += chpid.h unifdef-y += schid.h -unifdef-y += swab.h diff --git a/arch/s390/include/asm/byteorder.h b/arch/s390/include/asm/byteorder.h index b95a2b2933fb..a332e59e26fc 100644 --- a/arch/s390/include/asm/byteorder.h +++ b/arch/s390/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _S390_BYTEORDER_H #define _S390_BYTEORDER_H -#include #include #endif /* _S390_BYTEORDER_H */ diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index f1a2a0d1c79c..43910cdf78a5 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -6,4 +6,3 @@ unifdef-y += unistd_32.h unifdef-y += unistd_64.h unifdef-y += posix_types_32.h unifdef-y += posix_types_64.h -unifdef-y += swab.h diff --git a/arch/sh/include/asm/byteorder.h b/arch/sh/include/asm/byteorder.h index e95c41a5c8cc..db2f5d7cb17d 100644 --- a/arch/sh/include/asm/byteorder.h +++ b/arch/sh/include/asm/byteorder.h @@ -1,8 +1,6 @@ #ifndef __ASM_SH_BYTEORDER_H #define __ASM_SH_BYTEORDER_H -#include - #ifdef __LITTLE_ENDIAN__ #include #else diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 95e38a43dff0..deeb0fba8029 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -17,4 +17,3 @@ header-y += traps.h header-y += uctx.h header-y += utrap.h header-y += watchdog.h -header-y += swab.h diff --git a/arch/sparc/include/asm/byteorder.h b/arch/sparc/include/asm/byteorder.h index 48a047cd6fa9..ccc1b6b7de6c 100644 --- a/arch/sparc/include/asm/byteorder.h +++ b/arch/sparc/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _SPARC_BYTEORDER_H #define _SPARC_BYTEORDER_H -#include #include #endif /* _SPARC_BYTEORDER_H */ diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index a9f8a814a1f7..4a8e80cdcfa5 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -22,4 +22,3 @@ unifdef-y += unistd_32.h unifdef-y += unistd_64.h unifdef-y += vm86.h unifdef-y += vsyscall.h -unifdef-y += swab.h diff --git a/arch/x86/include/asm/byteorder.h b/arch/x86/include/asm/byteorder.h index 7c49917e3d9d..b13a7a88f3eb 100644 --- a/arch/x86/include/asm/byteorder.h +++ b/arch/x86/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _ASM_X86_BYTEORDER_H #define _ASM_X86_BYTEORDER_H -#include #include #endif /* _ASM_X86_BYTEORDER_H */ diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 58c02a454130..c68e1680da01 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -1,3 +1 @@ include include/asm-generic/Kbuild.asm - -unifdef-y += swab.h diff --git a/arch/xtensa/include/asm/byteorder.h b/arch/xtensa/include/asm/byteorder.h index 329b94591ca4..54eb6315349c 100644 --- a/arch/xtensa/include/asm/byteorder.h +++ b/arch/xtensa/include/asm/byteorder.h @@ -1,8 +1,6 @@ #ifndef _XTENSA_BYTEORDER_H #define _XTENSA_BYTEORDER_H -#include - #ifdef __XTENSA_EL__ #include #elif defined(__XTENSA_EB__) diff --git a/include/asm-frv/Kbuild b/include/asm-frv/Kbuild index 1f44e7c76995..0f8956def738 100644 --- a/include/asm-frv/Kbuild +++ b/include/asm-frv/Kbuild @@ -3,4 +3,3 @@ include include/asm-generic/Kbuild.asm header-y += registers.h unifdef-y += termios.h -unifdef-y += swab.h diff --git a/include/asm-frv/byteorder.h b/include/asm-frv/byteorder.h index 1187e51ecd13..f29b7593e088 100644 --- a/include/asm-frv/byteorder.h +++ b/include/asm-frv/byteorder.h @@ -1,7 +1,6 @@ #ifndef _ASM_BYTEORDER_H #define _ASM_BYTEORDER_H -#include #include #endif /* _ASM_BYTEORDER_H */ diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm index 1870d5e05f1c..70d185534b9d 100644 --- a/include/asm-generic/Kbuild.asm +++ b/include/asm-generic/Kbuild.asm @@ -31,6 +31,7 @@ unifdef-y += socket.h unifdef-y += sockios.h unifdef-y += stat.h unifdef-y += statfs.h +unifdef-y += swab.h unifdef-y += termbits.h unifdef-y += termios.h unifdef-y += types.h diff --git a/include/asm-m32r/Kbuild b/include/asm-m32r/Kbuild index 27b108a86b39..c68e1680da01 100644 --- a/include/asm-m32r/Kbuild +++ b/include/asm-m32r/Kbuild @@ -1,2 +1 @@ include include/asm-generic/Kbuild.asm -unifdef-y += swab.h diff --git a/include/asm-m32r/byteorder.h b/include/asm-m32r/byteorder.h index 61ff9cfd8451..21855d8b028b 100644 --- a/include/asm-m32r/byteorder.h +++ b/include/asm-m32r/byteorder.h @@ -1,8 +1,6 @@ #ifndef _ASM_M32R_BYTEORDER_H #define _ASM_M32R_BYTEORDER_H -#include - #if defined(__LITTLE_ENDIAN__) # include #else diff --git a/include/asm-m68k/Kbuild b/include/asm-m68k/Kbuild index 52fd96b4142a..1a922fad76f7 100644 --- a/include/asm-m68k/Kbuild +++ b/include/asm-m68k/Kbuild @@ -1,3 +1,2 @@ include include/asm-generic/Kbuild.asm header-y += cachectl.h -unifdef-y += swab.h diff --git a/include/asm-m68k/byteorder.h b/include/asm-m68k/byteorder.h index 300866523b86..31b260a88803 100644 --- a/include/asm-m68k/byteorder.h +++ b/include/asm-m68k/byteorder.h @@ -1,7 +1,6 @@ #ifndef _M68K_BYTEORDER_H #define _M68K_BYTEORDER_H -#include #include #endif /* _M68K_BYTEORDER_H */ diff --git a/include/asm-mn10300/Kbuild b/include/asm-mn10300/Kbuild index 27b108a86b39..c68e1680da01 100644 --- a/include/asm-mn10300/Kbuild +++ b/include/asm-mn10300/Kbuild @@ -1,2 +1 @@ include include/asm-generic/Kbuild.asm -unifdef-y += swab.h diff --git a/include/asm-mn10300/byteorder.h b/include/asm-mn10300/byteorder.h index 45b18ded19e6..5dd0bdd9feee 100644 --- a/include/asm-mn10300/byteorder.h +++ b/include/asm-mn10300/byteorder.h @@ -1,7 +1,6 @@ #ifndef _ASM_BYTEORDER_H #define _ASM_BYTEORDER_H -#include #include #endif /* _ASM_BYTEORDER_H */ diff --git a/include/linux/swab.h b/include/linux/swab.h index be5284d4a053..ea0c02fd5163 100644 --- a/include/linux/swab.h +++ b/include/linux/swab.h @@ -3,7 +3,7 @@ #include #include -#include +#include /* * casts are necessary for constants, because we never know how for sure -- cgit v1.2.3-71-gd317 From 937f1ba56b4be37d9e2ad77412f95048662058d2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 14 Jan 2009 21:05:05 -0800 Subject: net: Add init_dummy_netdev() and fix EMAC driver using it This adds an init_dummy_netdev() function that gets a network device structure (allocation and lifetime entirely under caller's control) and initialize the minimum amount of fields so it can be used to schedule NAPI polls without registering a full blown interface. This is to be used by drivers that need to tie several hardware interfaces to a single NAPI poll scheduler due to HW limitations. It also updates the ibm_newemac driver to use that, this fixing the oops on 2.6.29 due to passing NULL as "dev" to netif_napi_add() Symbol is exported GPL only a I don't think we want binary drivers doing that sort of acrobatics (if we want them at all). Signed-off-by: Benjamin Herrenschmidt Tested-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- drivers/net/ibm_newemac/mal.c | 4 +++- drivers/net/ibm_newemac/mal.h | 2 ++ include/linux/netdevice.h | 3 +++ net/core/dev.c | 39 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ibm_newemac/mal.c b/drivers/net/ibm_newemac/mal.c index ecf9798987fa..2a2fc17b2878 100644 --- a/drivers/net/ibm_newemac/mal.c +++ b/drivers/net/ibm_newemac/mal.c @@ -613,7 +613,9 @@ static int __devinit mal_probe(struct of_device *ofdev, INIT_LIST_HEAD(&mal->list); spin_lock_init(&mal->lock); - netif_napi_add(NULL, &mal->napi, mal_poll, + init_dummy_netdev(&mal->dummy_dev); + + netif_napi_add(&mal->dummy_dev, &mal->napi, mal_poll, CONFIG_IBM_NEW_EMAC_POLL_WEIGHT); /* Load power-on reset defaults */ diff --git a/drivers/net/ibm_newemac/mal.h b/drivers/net/ibm_newemac/mal.h index 2f0a87360844..9ededfbf0726 100644 --- a/drivers/net/ibm_newemac/mal.h +++ b/drivers/net/ibm_newemac/mal.h @@ -214,6 +214,8 @@ struct mal_instance { int index; spinlock_t lock; + struct net_device dummy_dev; + unsigned int features; }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4647604c7ca9..ec54785d34f9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -795,6 +795,7 @@ struct net_device NETREG_UNREGISTERING, /* called unregister_netdevice */ NETREG_UNREGISTERED, /* completed unregister todo */ NETREG_RELEASED, /* called free_netdev */ + NETREG_DUMMY, /* dummy device for NAPI poll */ } reg_state; /* Called from unregister, can be used to call free_netdev */ @@ -1077,6 +1078,8 @@ extern void free_netdev(struct net_device *dev); extern void synchronize_net(void); extern int register_netdevice_notifier(struct notifier_block *nb); extern int unregister_netdevice_notifier(struct notifier_block *nb); +extern int init_dummy_netdev(struct net_device *dev); + extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); extern struct net_device *dev_get_by_index(struct net *net, int ifindex); extern struct net_device *__dev_get_by_index(struct net *net, int ifindex); diff --git a/net/core/dev.c b/net/core/dev.c index 60377b6c0a80..8d675975d85b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4430,6 +4430,45 @@ err_uninit: goto out; } +/** + * init_dummy_netdev - init a dummy network device for NAPI + * @dev: device to init + * + * This takes a network device structure and initialize the minimum + * amount of fields so it can be used to schedule NAPI polls without + * registering a full blown interface. This is to be used by drivers + * that need to tie several hardware interfaces to a single NAPI + * poll scheduler due to HW limitations. + */ +int init_dummy_netdev(struct net_device *dev) +{ + /* Clear everything. Note we don't initialize spinlocks + * are they aren't supposed to be taken by any of the + * NAPI code and this dummy netdev is supposed to be + * only ever used for NAPI polls + */ + memset(dev, 0, sizeof(struct net_device)); + + /* make sure we BUG if trying to hit standard + * register/unregister code path + */ + dev->reg_state = NETREG_DUMMY; + + /* initialize the ref count */ + atomic_set(&dev->refcnt, 1); + + /* NAPI wants this */ + INIT_LIST_HEAD(&dev->napi_list); + + /* a dummy interface is started by default */ + set_bit(__LINK_STATE_PRESENT, &dev->state); + set_bit(__LINK_STATE_START, &dev->state); + + return 0; +} +EXPORT_SYMBOL_GPL(init_dummy_netdev); + + /** * register_netdev - register a network device * @dev: device to register -- cgit v1.2.3-71-gd317 From 45ce80fb6b6f9594d1396d44dd7e7c02d596fef8 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 15 Jan 2009 13:50:59 -0800 Subject: cgroups: consolidate cgroup documents Move Documentation/cpusets.txt and Documentation/controllers/* to Documentation/cgroups/ Signed-off-by: Li Zefan Acked-by: KAMEZAWA Hiroyuki Acked-by: Balbir Singh Acked-by: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/cgroups.txt | 5 +- Documentation/cgroups/cpuacct.txt | 32 + Documentation/cgroups/cpusets.txt | 808 +++++++++++++++++++++++++ Documentation/cgroups/devices.txt | 52 ++ Documentation/cgroups/memcg_test.txt | 342 +++++++++++ Documentation/cgroups/memory.txt | 399 ++++++++++++ Documentation/cgroups/resource_counter.txt | 181 ++++++ Documentation/controllers/cpuacct.txt | 32 - Documentation/controllers/devices.txt | 52 -- Documentation/controllers/memcg_test.txt | 342 ----------- Documentation/controllers/memory.txt | 399 ------------ Documentation/controllers/resource_counter.txt | 181 ------ Documentation/cpusets.txt | 808 ------------------------- Documentation/scheduler/sched-design-CFS.txt | 2 +- include/linux/res_counter.h | 2 +- init/Kconfig | 9 +- kernel/cpuset.c | 2 +- 17 files changed, 1824 insertions(+), 1824 deletions(-) create mode 100644 Documentation/cgroups/cpuacct.txt create mode 100644 Documentation/cgroups/cpusets.txt create mode 100644 Documentation/cgroups/devices.txt create mode 100644 Documentation/cgroups/memcg_test.txt create mode 100644 Documentation/cgroups/memory.txt create mode 100644 Documentation/cgroups/resource_counter.txt delete mode 100644 Documentation/controllers/cpuacct.txt delete mode 100644 Documentation/controllers/devices.txt delete mode 100644 Documentation/controllers/memcg_test.txt delete mode 100644 Documentation/controllers/memory.txt delete mode 100644 Documentation/controllers/resource_counter.txt delete mode 100644 Documentation/cpusets.txt (limited to 'include/linux') diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index e33ee74eee77..d9e5d6f41b92 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt @@ -1,7 +1,8 @@ CGROUPS ------- -Written by Paul Menage based on Documentation/cpusets.txt +Written by Paul Menage based on +Documentation/cgroups/cpusets.txt Original copyright statements from cpusets.txt: Portions Copyright (C) 2004 BULL SA. @@ -68,7 +69,7 @@ On their own, the only use for cgroups is for simple job tracking. The intention is that other subsystems hook into the generic cgroup support to provide new attributes for cgroups, such as accounting/limiting the resources which processes in a cgroup can -access. For example, cpusets (see Documentation/cpusets.txt) allows +access. For example, cpusets (see Documentation/cgroups/cpusets.txt) allows you to associate a set of CPUs and a set of memory nodes with the tasks in each cgroup. diff --git a/Documentation/cgroups/cpuacct.txt b/Documentation/cgroups/cpuacct.txt new file mode 100644 index 000000000000..bb775fbe43d7 --- /dev/null +++ b/Documentation/cgroups/cpuacct.txt @@ -0,0 +1,32 @@ +CPU Accounting Controller +------------------------- + +The CPU accounting controller is used to group tasks using cgroups and +account the CPU usage of these groups of tasks. + +The CPU accounting controller supports multi-hierarchy groups. An accounting +group accumulates the CPU usage of all of its child groups and the tasks +directly present in its group. + +Accounting groups can be created by first mounting the cgroup filesystem. + +# mkdir /cgroups +# mount -t cgroup -ocpuacct none /cgroups + +With the above step, the initial or the parent accounting group +becomes visible at /cgroups. At bootup, this group includes all the +tasks in the system. /cgroups/tasks lists the tasks in this cgroup. +/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by +this group which is essentially the CPU time obtained by all the tasks +in the system. + +New accounting groups can be created under the parent group /cgroups. + +# cd /cgroups +# mkdir g1 +# echo $$ > g1 + +The above steps create a new group g1 and move the current shell +process (bash) into it. CPU time consumed by this bash and its children +can be obtained from g1/cpuacct.usage and the same is accumulated in +/cgroups/cpuacct.usage also. diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroups/cpusets.txt new file mode 100644 index 000000000000..5c86c258c791 --- /dev/null +++ b/Documentation/cgroups/cpusets.txt @@ -0,0 +1,808 @@ + CPUSETS + ------- + +Copyright (C) 2004 BULL SA. +Written by Simon.Derr@bull.net + +Portions Copyright (c) 2004-2006 Silicon Graphics, Inc. +Modified by Paul Jackson +Modified by Christoph Lameter +Modified by Paul Menage +Modified by Hidetoshi Seto + +CONTENTS: +========= + +1. Cpusets + 1.1 What are cpusets ? + 1.2 Why are cpusets needed ? + 1.3 How are cpusets implemented ? + 1.4 What are exclusive cpusets ? + 1.5 What is memory_pressure ? + 1.6 What is memory spread ? + 1.7 What is sched_load_balance ? + 1.8 What is sched_relax_domain_level ? + 1.9 How do I use cpusets ? +2. Usage Examples and Syntax + 2.1 Basic Usage + 2.2 Adding/removing cpus + 2.3 Setting flags + 2.4 Attaching processes +3. Questions +4. Contact + +1. Cpusets +========== + +1.1 What are cpusets ? +---------------------- + +Cpusets provide a mechanism for assigning a set of CPUs and Memory +Nodes to a set of tasks. In this document "Memory Node" refers to +an on-line node that contains memory. + +Cpusets constrain the CPU and Memory placement of tasks to only +the resources within a tasks current cpuset. They form a nested +hierarchy visible in a virtual file system. These are the essential +hooks, beyond what is already present, required to manage dynamic +job placement on large systems. + +Cpusets use the generic cgroup subsystem described in +Documentation/cgroups/cgroups.txt. + +Requests by a task, using the sched_setaffinity(2) system call to +include CPUs in its CPU affinity mask, and using the mbind(2) and +set_mempolicy(2) system calls to include Memory Nodes in its memory +policy, are both filtered through that tasks cpuset, filtering out any +CPUs or Memory Nodes not in that cpuset. The scheduler will not +schedule a task on a CPU that is not allowed in its cpus_allowed +vector, and the kernel page allocator will not allocate a page on a +node that is not allowed in the requesting tasks mems_allowed vector. + +User level code may create and destroy cpusets by name in the cgroup +virtual file system, manage the attributes and permissions of these +cpusets and which CPUs and Memory Nodes are assigned to each cpuset, +specify and query to which cpuset a task is assigned, and list the +task pids assigned to a cpuset. + + +1.2 Why are cpusets needed ? +---------------------------- + +The management of large computer systems, with many processors (CPUs), +complex memory cache hierarchies and multiple Memory Nodes having +non-uniform access times (NUMA) presents additional challenges for +the efficient scheduling and memory placement of processes. + +Frequently more modest sized systems can be operated with adequate +efficiency just by letting the operating system automatically share +the available CPU and Memory resources amongst the requesting tasks. + +But larger systems, which benefit more from careful processor and +memory placement to reduce memory access times and contention, +and which typically represent a larger investment for the customer, +can benefit from explicitly placing jobs on properly sized subsets of +the system. + +This can be especially valuable on: + + * Web Servers running multiple instances of the same web application, + * Servers running different applications (for instance, a web server + and a database), or + * NUMA systems running large HPC applications with demanding + performance characteristics. + +These subsets, or "soft partitions" must be able to be dynamically +adjusted, as the job mix changes, without impacting other concurrently +executing jobs. The location of the running jobs pages may also be moved +when the memory locations are changed. + +The kernel cpuset patch provides the minimum essential kernel +mechanisms required to efficiently implement such subsets. It +leverages existing CPU and Memory Placement facilities in the Linux +kernel to avoid any additional impact on the critical scheduler or +memory allocator code. + + +1.3 How are cpusets implemented ? +--------------------------------- + +Cpusets provide a Linux kernel mechanism to constrain which CPUs and +Memory Nodes are used by a process or set of processes. + +The Linux kernel already has a pair of mechanisms to specify on which +CPUs a task may be scheduled (sched_setaffinity) and on which Memory +Nodes it may obtain memory (mbind, set_mempolicy). + +Cpusets extends these two mechanisms as follows: + + - Cpusets are sets of allowed CPUs and Memory Nodes, known to the + kernel. + - Each task in the system is attached to a cpuset, via a pointer + in the task structure to a reference counted cgroup structure. + - Calls to sched_setaffinity are filtered to just those CPUs + allowed in that tasks cpuset. + - Calls to mbind and set_mempolicy are filtered to just + those Memory Nodes allowed in that tasks cpuset. + - The root cpuset contains all the systems CPUs and Memory + Nodes. + - For any cpuset, one can define child cpusets containing a subset + of the parents CPU and Memory Node resources. + - The hierarchy of cpusets can be mounted at /dev/cpuset, for + browsing and manipulation from user space. + - A cpuset may be marked exclusive, which ensures that no other + cpuset (except direct ancestors and descendents) may contain + any overlapping CPUs or Memory Nodes. + - You can list all the tasks (by pid) attached to any cpuset. + +The implementation of cpusets requires a few, simple hooks +into the rest of the kernel, none in performance critical paths: + + - in init/main.c, to initialize the root cpuset at system boot. + - in fork and exit, to attach and detach a task from its cpuset. + - in sched_setaffinity, to mask the requested CPUs by what's + allowed in that tasks cpuset. + - in sched.c migrate_all_tasks(), to keep migrating tasks within + the CPUs allowed by their cpuset, if possible. + - in the mbind and set_mempolicy system calls, to mask the requested + Memory Nodes by what's allowed in that tasks cpuset. + - in page_alloc.c, to restrict memory to allowed nodes. + - in vmscan.c, to restrict page recovery to the current cpuset. + +You should mount the "cgroup" filesystem type in order to enable +browsing and modifying the cpusets presently known to the kernel. No +new system calls are added for cpusets - all support for querying and +modifying cpusets is via this cpuset file system. + +The /proc//status file for each task has four added lines, +displaying the tasks cpus_allowed (on which CPUs it may be scheduled) +and mems_allowed (on which Memory Nodes it may obtain memory), +in the two formats seen in the following example: + + Cpus_allowed: ffffffff,ffffffff,ffffffff,ffffffff + Cpus_allowed_list: 0-127 + Mems_allowed: ffffffff,ffffffff + Mems_allowed_list: 0-63 + +Each cpuset is represented by a directory in the cgroup file system +containing (on top of the standard cgroup files) the following +files describing that cpuset: + + - cpus: list of CPUs in that cpuset + - mems: list of Memory Nodes in that cpuset + - memory_migrate flag: if set, move pages to cpusets nodes + - cpu_exclusive flag: is cpu placement exclusive? + - mem_exclusive flag: is memory placement exclusive? + - mem_hardwall flag: is memory allocation hardwalled + - memory_pressure: measure of how much paging pressure in cpuset + +In addition, the root cpuset only has the following file: + - memory_pressure_enabled flag: compute memory_pressure? + +New cpusets are created using the mkdir system call or shell +command. The properties of a cpuset, such as its flags, allowed +CPUs and Memory Nodes, and attached tasks, are modified by writing +to the appropriate file in that cpusets directory, as listed above. + +The named hierarchical structure of nested cpusets allows partitioning +a large system into nested, dynamically changeable, "soft-partitions". + +The attachment of each task, automatically inherited at fork by any +children of that task, to a cpuset allows organizing the work load +on a system into related sets of tasks such that each set is constrained +to using the CPUs and Memory Nodes of a particular cpuset. A task +may be re-attached to any other cpuset, if allowed by the permissions +on the necessary cpuset file system directories. + +Such management of a system "in the large" integrates smoothly with +the detailed placement done on individual tasks and memory regions +using the sched_setaffinity, mbind and set_mempolicy system calls. + +The following rules apply to each cpuset: + + - Its CPUs and Memory Nodes must be a subset of its parents. + - It can't be marked exclusive unless its parent is. + - If its cpu or memory is exclusive, they may not overlap any sibling. + +These rules, and the natural hierarchy of cpusets, enable efficient +enforcement of the exclusive guarantee, without having to scan all +cpusets every time any of them change to ensure nothing overlaps a +exclusive cpuset. Also, the use of a Linux virtual file system (vfs) +to represent the cpuset hierarchy provides for a familiar permission +and name space for cpusets, with a minimum of additional kernel code. + +The cpus and mems files in the root (top_cpuset) cpuset are +read-only. The cpus file automatically tracks the value of +cpu_online_map using a CPU hotplug notifier, and the mems file +automatically tracks the value of node_states[N_HIGH_MEMORY]--i.e., +nodes with memory--using the cpuset_track_online_nodes() hook. + + +1.4 What are exclusive cpusets ? +-------------------------------- + +If a cpuset is cpu or mem exclusive, no other cpuset, other than +a direct ancestor or descendent, may share any of the same CPUs or +Memory Nodes. + +A cpuset that is mem_exclusive *or* mem_hardwall is "hardwalled", +i.e. it restricts kernel allocations for page, buffer and other data +commonly shared by the kernel across multiple users. All cpusets, +whether hardwalled or not, restrict allocations of memory for user +space. This enables configuring a system so that several independent +jobs can share common kernel data, such as file system pages, while +isolating each job's user allocation in its own cpuset. To do this, +construct a large mem_exclusive cpuset to hold all the jobs, and +construct child, non-mem_exclusive cpusets for each individual job. +Only a small amount of typical kernel memory, such as requests from +interrupt handlers, is allowed to be taken outside even a +mem_exclusive cpuset. + + +1.5 What is memory_pressure ? +----------------------------- +The memory_pressure of a cpuset provides a simple per-cpuset metric +of the rate that the tasks in a cpuset are attempting to free up in +use memory on the nodes of the cpuset to satisfy additional memory +requests. + +This enables batch managers monitoring jobs running in dedicated +cpusets to efficiently detect what level of memory pressure that job +is causing. + +This is useful both on tightly managed systems running a wide mix of +submitted jobs, which may choose to terminate or re-prioritize jobs that +are trying to use more memory than allowed on the nodes assigned them, +and with tightly coupled, long running, massively parallel scientific +computing jobs that will dramatically fail to meet required performance +goals if they start to use more memory than allowed to them. + +This mechanism provides a very economical way for the batch manager +to monitor a cpuset for signs of memory pressure. It's up to the +batch manager or other user code to decide what to do about it and +take action. + +==> Unless this feature is enabled by writing "1" to the special file + /dev/cpuset/memory_pressure_enabled, the hook in the rebalance + code of __alloc_pages() for this metric reduces to simply noticing + that the cpuset_memory_pressure_enabled flag is zero. So only + systems that enable this feature will compute the metric. + +Why a per-cpuset, running average: + + Because this meter is per-cpuset, rather than per-task or mm, + the system load imposed by a batch scheduler monitoring this + metric is sharply reduced on large systems, because a scan of + the tasklist can be avoided on each set of queries. + + Because this meter is a running average, instead of an accumulating + counter, a batch scheduler can detect memory pressure with a + single read, instead of having to read and accumulate results + for a period of time. + + Because this meter is per-cpuset rather than per-task or mm, + the batch scheduler can obtain the key information, memory + pressure in a cpuset, with a single read, rather than having to + query and accumulate results over all the (dynamically changing) + set of tasks in the cpuset. + +A per-cpuset simple digital filter (requires a spinlock and 3 words +of data per-cpuset) is kept, and updated by any task attached to that +cpuset, if it enters the synchronous (direct) page reclaim code. + +A per-cpuset file provides an integer number representing the recent +(half-life of 10 seconds) rate of direct page reclaims caused by +the tasks in the cpuset, in units of reclaims attempted per second, +times 1000. + + +1.6 What is memory spread ? +--------------------------- +There are two boolean flag files per cpuset that control where the +kernel allocates pages for the file system buffers and related in +kernel data structures. They are called 'memory_spread_page' and +'memory_spread_slab'. + +If the per-cpuset boolean flag file 'memory_spread_page' is set, then +the kernel will spread the file system buffers (page cache) evenly +over all the nodes that the faulting task is allowed to use, instead +of preferring to put those pages on the node where the task is running. + +If the per-cpuset boolean flag file 'memory_spread_slab' is set, +then the kernel will spread some file system related slab caches, +such as for inodes and dentries evenly over all the nodes that the +faulting task is allowed to use, instead of preferring to put those +pages on the node where the task is running. + +The setting of these flags does not affect anonymous data segment or +stack segment pages of a task. + +By default, both kinds of memory spreading are off, and memory +pages are allocated on the node local to where the task is running, +except perhaps as modified by the tasks NUMA mempolicy or cpuset +configuration, so long as sufficient free memory pages are available. + +When new cpusets are created, they inherit the memory spread settings +of their parent. + +Setting memory spreading causes allocations for the affected page +or slab caches to ignore the tasks NUMA mempolicy and be spread +instead. Tasks using mbind() or set_mempolicy() calls to set NUMA +mempolicies will not notice any change in these calls as a result of +their containing tasks memory spread settings. If memory spreading +is turned off, then the currently specified NUMA mempolicy once again +applies to memory page allocations. + +Both 'memory_spread_page' and 'memory_spread_slab' are boolean flag +files. By default they contain "0", meaning that the feature is off +for that cpuset. If a "1" is written to that file, then that turns +the named feature on. + +The implementation is simple. + +Setting the flag 'memory_spread_page' turns on a per-process flag +PF_SPREAD_PAGE for each task that is in that cpuset or subsequently +joins that cpuset. The page allocation calls for the page cache +is modified to perform an inline check for this PF_SPREAD_PAGE task +flag, and if set, a call to a new routine cpuset_mem_spread_node() +returns the node to prefer for the allocation. + +Similarly, setting 'memory_spread_slab' turns on the flag +PF_SPREAD_SLAB, and appropriately marked slab caches will allocate +pages from the node returned by cpuset_mem_spread_node(). + +The cpuset_mem_spread_node() routine is also simple. It uses the +value of a per-task rotor cpuset_mem_spread_rotor to select the next +node in the current tasks mems_allowed to prefer for the allocation. + +This memory placement policy is also known (in other contexts) as +round-robin or interleave. + +This policy can provide substantial improvements for jobs that need +to place thread local data on the corresponding node, but that need +to access large file system data sets that need to be spread across +the several nodes in the jobs cpuset in order to fit. Without this +policy, especially for jobs that might have one thread reading in the +data set, the memory allocation across the nodes in the jobs cpuset +can become very uneven. + +1.7 What is sched_load_balance ? +-------------------------------- + +The kernel scheduler (kernel/sched.c) automatically load balances +tasks. If one CPU is underutilized, kernel code running on that +CPU will look for tasks on other more overloaded CPUs and move those +tasks to itself, within the constraints of such placement mechanisms +as cpusets and sched_setaffinity. + +The algorithmic cost of load balancing and its impact on key shared +kernel data structures such as the task list increases more than +linearly with the number of CPUs being balanced. So the scheduler +has support to partition the systems CPUs into a number of sched +domains such that it only load balances within each sched domain. +Each sched domain covers some subset of the CPUs in the system; +no two sched domains overlap; some CPUs might not be in any sched +domain and hence won't be load balanced. + +Put simply, it costs less to balance between two smaller sched domains +than one big one, but doing so means that overloads in one of the +two domains won't be load balanced to the other one. + +By default, there is one sched domain covering all CPUs, except those +marked isolated using the kernel boot time "isolcpus=" argument. + +This default load balancing across all CPUs is not well suited for +the following two situations: + 1) On large systems, load balancing across many CPUs is expensive. + If the system is managed using cpusets to place independent jobs + on separate sets of CPUs, full load balancing is unnecessary. + 2) Systems supporting realtime on some CPUs need to minimize + system overhead on those CPUs, including avoiding task load + balancing if that is not needed. + +When the per-cpuset flag "sched_load_balance" is enabled (the default +setting), it requests that all the CPUs in that cpusets allowed 'cpus' +be contained in a single sched domain, ensuring that load balancing +can move a task (not otherwised pinned, as by sched_setaffinity) +from any CPU in that cpuset to any other. + +When the per-cpuset flag "sched_load_balance" is disabled, then the +scheduler will avoid load balancing across the CPUs in that cpuset, +--except-- in so far as is necessary because some overlapping cpuset +has "sched_load_balance" enabled. + +So, for example, if the top cpuset has the flag "sched_load_balance" +enabled, then the scheduler will have one sched domain covering all +CPUs, and the setting of the "sched_load_balance" flag in any other +cpusets won't matter, as we're already fully load balancing. + +Therefore in the above two situations, the top cpuset flag +"sched_load_balance" should be disabled, and only some of the smaller, +child cpusets have this flag enabled. + +When doing this, you don't usually want to leave any unpinned tasks in +the top cpuset that might use non-trivial amounts of CPU, as such tasks +may be artificially constrained to some subset of CPUs, depending on +the particulars of this flag setting in descendent cpusets. Even if +such a task could use spare CPU cycles in some other CPUs, the kernel +scheduler might not consider the possibility of load balancing that +task to that underused CPU. + +Of course, tasks pinned to a particular CPU can be left in a cpuset +that disables "sched_load_balance" as those tasks aren't going anywhere +else anyway. + +There is an impedance mismatch here, between cpusets and sched domains. +Cpusets are hierarchical and nest. Sched domains are flat; they don't +overlap and each CPU is in at most one sched domain. + +It is necessary for sched domains to be flat because load balancing +across partially overlapping sets of CPUs would risk unstable dynamics +that would be beyond our understanding. So if each of two partially +overlapping cpusets enables the flag 'sched_load_balance', then we +form a single sched domain that is a superset of both. We won't move +a task to a CPU outside it cpuset, but the scheduler load balancing +code might waste some compute cycles considering that possibility. + +This mismatch is why there is not a simple one-to-one relation +between which cpusets have the flag "sched_load_balance" enabled, +and the sched domain configuration. If a cpuset enables the flag, it +will get balancing across all its CPUs, but if it disables the flag, +it will only be assured of no load balancing if no other overlapping +cpuset enables the flag. + +If two cpusets have partially overlapping 'cpus' allowed, and only +one of them has this flag enabled, then the other may find its +tasks only partially load balanced, just on the overlapping CPUs. +This is just the general case of the top_cpuset example given a few +paragraphs above. In the general case, as in the top cpuset case, +don't leave tasks that might use non-trivial amounts of CPU in +such partially load balanced cpusets, as they may be artificially +constrained to some subset of the CPUs allowed to them, for lack of +load balancing to the other CPUs. + +1.7.1 sched_load_balance implementation details. +------------------------------------------------ + +The per-cpuset flag 'sched_load_balance' defaults to enabled (contrary +to most cpuset flags.) When enabled for a cpuset, the kernel will +ensure that it can load balance across all the CPUs in that cpuset +(makes sure that all the CPUs in the cpus_allowed of that cpuset are +in the same sched domain.) + +If two overlapping cpusets both have 'sched_load_balance' enabled, +then they will be (must be) both in the same sched domain. + +If, as is the default, the top cpuset has 'sched_load_balance' enabled, +then by the above that means there is a single sched domain covering +the whole system, regardless of any other cpuset settings. + +The kernel commits to user space that it will avoid load balancing +where it can. It will pick as fine a granularity partition of sched +domains as it can while still providing load balancing for any set +of CPUs allowed to a cpuset having 'sched_load_balance' enabled. + +The internal kernel cpuset to scheduler interface passes from the +cpuset code to the scheduler code a partition of the load balanced +CPUs in the system. This partition is a set of subsets (represented +as an array of cpumask_t) of CPUs, pairwise disjoint, that cover all +the CPUs that must be load balanced. + +Whenever the 'sched_load_balance' flag changes, or CPUs come or go +from a cpuset with this flag enabled, or a cpuset with this flag +enabled is removed, the cpuset code builds a new such partition and +passes it to the scheduler sched domain setup code, to have the sched +domains rebuilt as necessary. + +This partition exactly defines what sched domains the scheduler should +setup - one sched domain for each element (cpumask_t) in the partition. + +The scheduler remembers the currently active sched domain partitions. +When the scheduler routine partition_sched_domains() is invoked from +the cpuset code to update these sched domains, it compares the new +partition requested with the current, and updates its sched domains, +removing the old and adding the new, for each change. + + +1.8 What is sched_relax_domain_level ? +-------------------------------------- + +In sched domain, the scheduler migrates tasks in 2 ways; periodic load +balance on tick, and at time of some schedule events. + +When a task is woken up, scheduler try to move the task on idle CPU. +For example, if a task A running on CPU X activates another task B +on the same CPU X, and if CPU Y is X's sibling and performing idle, +then scheduler migrate task B to CPU Y so that task B can start on +CPU Y without waiting task A on CPU X. + +And if a CPU run out of tasks in its runqueue, the CPU try to pull +extra tasks from other busy CPUs to help them before it is going to +be idle. + +Of course it takes some searching cost to find movable tasks and/or +idle CPUs, the scheduler might not search all CPUs in the domain +everytime. In fact, in some architectures, the searching ranges on +events are limited in the same socket or node where the CPU locates, +while the load balance on tick searchs all. + +For example, assume CPU Z is relatively far from CPU X. Even if CPU Z +is idle while CPU X and the siblings are busy, scheduler can't migrate +woken task B from X to Z since it is out of its searching range. +As the result, task B on CPU X need to wait task A or wait load balance +on the next tick. For some applications in special situation, waiting +1 tick may be too long. + +The 'sched_relax_domain_level' file allows you to request changing +this searching range as you like. This file takes int value which +indicates size of searching range in levels ideally as follows, +otherwise initial value -1 that indicates the cpuset has no request. + + -1 : no request. use system default or follow request of others. + 0 : no search. + 1 : search siblings (hyperthreads in a core). + 2 : search cores in a package. + 3 : search cpus in a node [= system wide on non-NUMA system] + ( 4 : search nodes in a chunk of node [on NUMA system] ) + ( 5 : search system wide [on NUMA system] ) + +The system default is architecture dependent. The system default +can be changed using the relax_domain_level= boot parameter. + +This file is per-cpuset and affect the sched domain where the cpuset +belongs to. Therefore if the flag 'sched_load_balance' of a cpuset +is disabled, then 'sched_relax_domain_level' have no effect since +there is no sched domain belonging the cpuset. + +If multiple cpusets are overlapping and hence they form a single sched +domain, the largest value among those is used. Be careful, if one +requests 0 and others are -1 then 0 is used. + +Note that modifying this file will have both good and bad effects, +and whether it is acceptable or not will be depend on your situation. +Don't modify this file if you are not sure. + +If your situation is: + - The migration costs between each cpu can be assumed considerably + small(for you) due to your special application's behavior or + special hardware support for CPU cache etc. + - The searching cost doesn't have impact(for you) or you can make + the searching cost enough small by managing cpuset to compact etc. + - The latency is required even it sacrifices cache hit rate etc. +then increasing 'sched_relax_domain_level' would benefit you. + + +1.9 How do I use cpusets ? +-------------------------- + +In order to minimize the impact of cpusets on critical kernel +code, such as the scheduler, and due to the fact that the kernel +does not support one task updating the memory placement of another +task directly, the impact on a task of changing its cpuset CPU +or Memory Node placement, or of changing to which cpuset a task +is attached, is subtle. + +If a cpuset has its Memory Nodes modified, then for each task attached +to that cpuset, the next time that the kernel attempts to allocate +a page of memory for that task, the kernel will notice the change +in the tasks cpuset, and update its per-task memory placement to +remain within the new cpusets memory placement. If the task was using +mempolicy MPOL_BIND, and the nodes to which it was bound overlap with +its new cpuset, then the task will continue to use whatever subset +of MPOL_BIND nodes are still allowed in the new cpuset. If the task +was using MPOL_BIND and now none of its MPOL_BIND nodes are allowed +in the new cpuset, then the task will be essentially treated as if it +was MPOL_BIND bound to the new cpuset (even though its numa placement, +as queried by get_mempolicy(), doesn't change). If a task is moved +from one cpuset to another, then the kernel will adjust the tasks +memory placement, as above, the next time that the kernel attempts +to allocate a page of memory for that task. + +If a cpuset has its 'cpus' modified, then each task in that cpuset +will have its allowed CPU placement changed immediately. Similarly, +if a tasks pid is written to a cpusets 'tasks' file, in either its +current cpuset or another cpuset, then its allowed CPU placement is +changed immediately. If such a task had been bound to some subset +of its cpuset using the sched_setaffinity() call, the task will be +allowed to run on any CPU allowed in its new cpuset, negating the +affect of the prior sched_setaffinity() call. + +In summary, the memory placement of a task whose cpuset is changed is +updated by the kernel, on the next allocation of a page for that task, +but the processor placement is not updated, until that tasks pid is +rewritten to the 'tasks' file of its cpuset. This is done to avoid +impacting the scheduler code in the kernel with a check for changes +in a tasks processor placement. + +Normally, once a page is allocated (given a physical page +of main memory) then that page stays on whatever node it +was allocated, so long as it remains allocated, even if the +cpusets memory placement policy 'mems' subsequently changes. +If the cpuset flag file 'memory_migrate' is set true, then when +tasks are attached to that cpuset, any pages that task had +allocated to it on nodes in its previous cpuset are migrated +to the tasks new cpuset. The relative placement of the page within +the cpuset is preserved during these migration operations if possible. +For example if the page was on the second valid node of the prior cpuset +then the page will be placed on the second valid node of the new cpuset. + +Also if 'memory_migrate' is set true, then if that cpusets +'mems' file is modified, pages allocated to tasks in that +cpuset, that were on nodes in the previous setting of 'mems', +will be moved to nodes in the new setting of 'mems.' +Pages that were not in the tasks prior cpuset, or in the cpusets +prior 'mems' setting, will not be moved. + +There is an exception to the above. If hotplug functionality is used +to remove all the CPUs that are currently assigned to a cpuset, +then all the tasks in that cpuset will be moved to the nearest ancestor +with non-empty cpus. But the moving of some (or all) tasks might fail if +cpuset is bound with another cgroup subsystem which has some restrictions +on task attaching. In this failing case, those tasks will stay +in the original cpuset, and the kernel will automatically update +their cpus_allowed to allow all online CPUs. When memory hotplug +functionality for removing Memory Nodes is available, a similar exception +is expected to apply there as well. In general, the kernel prefers to +violate cpuset placement, over starving a task that has had all +its allowed CPUs or Memory Nodes taken offline. + +There is a second exception to the above. GFP_ATOMIC requests are +kernel internal allocations that must be satisfied, immediately. +The kernel may drop some request, in rare cases even panic, if a +GFP_ATOMIC alloc fails. If the request cannot be satisfied within +the current tasks cpuset, then we relax the cpuset, and look for +memory anywhere we can find it. It's better to violate the cpuset +than stress the kernel. + +To start a new job that is to be contained within a cpuset, the steps are: + + 1) mkdir /dev/cpuset + 2) mount -t cgroup -ocpuset cpuset /dev/cpuset + 3) Create the new cpuset by doing mkdir's and write's (or echo's) in + the /dev/cpuset virtual file system. + 4) Start a task that will be the "founding father" of the new job. + 5) Attach that task to the new cpuset by writing its pid to the + /dev/cpuset tasks file for that cpuset. + 6) fork, exec or clone the job tasks from this founding father task. + +For example, the following sequence of commands will setup a cpuset +named "Charlie", containing just CPUs 2 and 3, and Memory Node 1, +and then start a subshell 'sh' in that cpuset: + + mount -t cgroup -ocpuset cpuset /dev/cpuset + cd /dev/cpuset + mkdir Charlie + cd Charlie + /bin/echo 2-3 > cpus + /bin/echo 1 > mems + /bin/echo $$ > tasks + sh + # The subshell 'sh' is now running in cpuset Charlie + # The next line should display '/Charlie' + cat /proc/self/cpuset + +In the future, a C library interface to cpusets will likely be +available. For now, the only way to query or modify cpusets is +via the cpuset file system, using the various cd, mkdir, echo, cat, +rmdir commands from the shell, or their equivalent from C. + +The sched_setaffinity calls can also be done at the shell prompt using +SGI's runon or Robert Love's taskset. The mbind and set_mempolicy +calls can be done at the shell prompt using the numactl command +(part of Andi Kleen's numa package). + +2. Usage Examples and Syntax +============================ + +2.1 Basic Usage +--------------- + +Creating, modifying, using the cpusets can be done through the cpuset +virtual filesystem. + +To mount it, type: +# mount -t cgroup -o cpuset cpuset /dev/cpuset + +Then under /dev/cpuset you can find a tree that corresponds to the +tree of the cpusets in the system. For instance, /dev/cpuset +is the cpuset that holds the whole system. + +If you want to create a new cpuset under /dev/cpuset: +# cd /dev/cpuset +# mkdir my_cpuset + +Now you want to do something with this cpuset. +# cd my_cpuset + +In this directory you can find several files: +# ls +cpu_exclusive memory_migrate mems tasks +cpus memory_pressure notify_on_release +mem_exclusive memory_spread_page sched_load_balance +mem_hardwall memory_spread_slab sched_relax_domain_level + +Reading them will give you information about the state of this cpuset: +the CPUs and Memory Nodes it can use, the processes that are using +it, its properties. By writing to these files you can manipulate +the cpuset. + +Set some flags: +# /bin/echo 1 > cpu_exclusive + +Add some cpus: +# /bin/echo 0-7 > cpus + +Add some mems: +# /bin/echo 0-7 > mems + +Now attach your shell to this cpuset: +# /bin/echo $$ > tasks + +You can also create cpusets inside your cpuset by using mkdir in this +directory. +# mkdir my_sub_cs + +To remove a cpuset, just use rmdir: +# rmdir my_sub_cs +This will fail if the cpuset is in use (has cpusets inside, or has +processes attached). + +Note that for legacy reasons, the "cpuset" filesystem exists as a +wrapper around the cgroup filesystem. + +The command + +mount -t cpuset X /dev/cpuset + +is equivalent to + +mount -t cgroup -ocpuset X /dev/cpuset +echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent + +2.2 Adding/removing cpus +------------------------ + +This is the syntax to use when writing in the cpus or mems files +in cpuset directories: + +# /bin/echo 1-4 > cpus -> set cpus list to cpus 1,2,3,4 +# /bin/echo 1,2,3,4 > cpus -> set cpus list to cpus 1,2,3,4 + +2.3 Setting flags +----------------- + +The syntax is very simple: + +# /bin/echo 1 > cpu_exclusive -> set flag 'cpu_exclusive' +# /bin/echo 0 > cpu_exclusive -> unset flag 'cpu_exclusive' + +2.4 Attaching processes +----------------------- + +# /bin/echo PID > tasks + +Note that it is PID, not PIDs. You can only attach ONE task at a time. +If you have several tasks to attach, you have to do it one after another: + +# /bin/echo PID1 > tasks +# /bin/echo PID2 > tasks + ... +# /bin/echo PIDn > tasks + + +3. Questions +============ + +Q: what's up with this '/bin/echo' ? +A: bash's builtin 'echo' command does not check calls to write() against + errors. If you use it in the cpuset file system, you won't be + able to tell whether a command succeeded or failed. + +Q: When I attach processes, only the first of the line gets really attached ! +A: We can only return one error code per call to write(). So you should also + put only ONE pid. + +4. Contact +========== + +Web: http://www.bullopensource.org/cpuset diff --git a/Documentation/cgroups/devices.txt b/Documentation/cgroups/devices.txt new file mode 100644 index 000000000000..7cc6e6a60672 --- /dev/null +++ b/Documentation/cgroups/devices.txt @@ -0,0 +1,52 @@ +Device Whitelist Controller + +1. Description: + +Implement a cgroup to track and enforce open and mknod restrictions +on device files. A device cgroup associates a device access +whitelist with each cgroup. A whitelist entry has 4 fields. +'type' is a (all), c (char), or b (block). 'all' means it applies +to all types and all major and minor numbers. Major and minor are +either an integer or * for all. Access is a composition of r +(read), w (write), and m (mknod). + +The root device cgroup starts with rwm to 'all'. A child device +cgroup gets a copy of the parent. Administrators can then remove +devices from the whitelist or add new entries. A child cgroup can +never receive a device access which is denied by its parent. However +when a device access is removed from a parent it will not also be +removed from the child(ren). + +2. User Interface + +An entry is added using devices.allow, and removed using +devices.deny. For instance + + echo 'c 1:3 mr' > /cgroups/1/devices.allow + +allows cgroup 1 to read and mknod the device usually known as +/dev/null. Doing + + echo a > /cgroups/1/devices.deny + +will remove the default 'a *:* rwm' entry. Doing + + echo a > /cgroups/1/devices.allow + +will add the 'a *:* rwm' entry to the whitelist. + +3. Security + +Any task can move itself between cgroups. This clearly won't +suffice, but we can decide the best way to adequately restrict +movement as people get some experience with this. We may just want +to require CAP_SYS_ADMIN, which at least is a separate bit from +CAP_MKNOD. We may want to just refuse moving to a cgroup which +isn't a descendent of the current one. Or we may want to use +CAP_MAC_ADMIN, since we really are trying to lock down root. + +CAP_SYS_ADMIN is needed to modify the whitelist or move another +task to a new cgroup. (Again we'll probably want to change that). + +A cgroup may not be granted more permissions than the cgroup's +parent has. diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt new file mode 100644 index 000000000000..19533f93b7a2 --- /dev/null +++ b/Documentation/cgroups/memcg_test.txt @@ -0,0 +1,342 @@ +Memory Resource Controller(Memcg) Implementation Memo. +Last Updated: 2008/12/15 +Base Kernel Version: based on 2.6.28-rc8-mm. + +Because VM is getting complex (one of reasons is memcg...), memcg's behavior +is complex. This is a document for memcg's internal behavior. +Please note that implementation details can be changed. + +(*) Topics on API should be in Documentation/cgroups/memory.txt) + +0. How to record usage ? + 2 objects are used. + + page_cgroup ....an object per page. + Allocated at boot or memory hotplug. Freed at memory hot removal. + + swap_cgroup ... an entry per swp_entry. + Allocated at swapon(). Freed at swapoff(). + + The page_cgroup has USED bit and double count against a page_cgroup never + occurs. swap_cgroup is used only when a charged page is swapped-out. + +1. Charge + + a page/swp_entry may be charged (usage += PAGE_SIZE) at + + mem_cgroup_newpage_charge() + Called at new page fault and Copy-On-Write. + + mem_cgroup_try_charge_swapin() + Called at do_swap_page() (page fault on swap entry) and swapoff. + Followed by charge-commit-cancel protocol. (With swap accounting) + At commit, a charge recorded in swap_cgroup is removed. + + mem_cgroup_cache_charge() + Called at add_to_page_cache() + + mem_cgroup_cache_charge_swapin() + Called at shmem's swapin. + + mem_cgroup_prepare_migration() + Called before migration. "extra" charge is done and followed by + charge-commit-cancel protocol. + At commit, charge against oldpage or newpage will be committed. + +2. Uncharge + a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by + + mem_cgroup_uncharge_page() + Called when an anonymous page is fully unmapped. I.e., mapcount goes + to 0. If the page is SwapCache, uncharge is delayed until + mem_cgroup_uncharge_swapcache(). + + mem_cgroup_uncharge_cache_page() + Called when a page-cache is deleted from radix-tree. If the page is + SwapCache, uncharge is delayed until mem_cgroup_uncharge_swapcache(). + + mem_cgroup_uncharge_swapcache() + Called when SwapCache is removed from radix-tree. The charge itself + is moved to swap_cgroup. (If mem+swap controller is disabled, no + charge to swap occurs.) + + mem_cgroup_uncharge_swap() + Called when swp_entry's refcnt goes down to 0. A charge against swap + disappears. + + mem_cgroup_end_migration(old, new) + At success of migration old is uncharged (if necessary), a charge + to new page is committed. At failure, charge to old page is committed. + +3. charge-commit-cancel + In some case, we can't know this "charge" is valid or not at charging + (because of races). + To handle such case, there are charge-commit-cancel functions. + mem_cgroup_try_charge_XXX + mem_cgroup_commit_charge_XXX + mem_cgroup_cancel_charge_XXX + these are used in swap-in and migration. + + At try_charge(), there are no flags to say "this page is charged". + at this point, usage += PAGE_SIZE. + + At commit(), the function checks the page should be charged or not + and set flags or avoid charging.(usage -= PAGE_SIZE) + + At cancel(), simply usage -= PAGE_SIZE. + +Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y. + +4. Anonymous + Anonymous page is newly allocated at + - page fault into MAP_ANONYMOUS mapping. + - Copy-On-Write. + It is charged right after it's allocated before doing any page table + related operations. Of course, it's uncharged when another page is used + for the fault address. + + At freeing anonymous page (by exit() or munmap()), zap_pte() is called + and pages for ptes are freed one by one.(see mm/memory.c). Uncharges + are done at page_remove_rmap() when page_mapcount() goes down to 0. + + Another page freeing is by page-reclaim (vmscan.c) and anonymous + pages are swapped out. In this case, the page is marked as + PageSwapCache(). uncharge() routine doesn't uncharge the page marked + as SwapCache(). It's delayed until __delete_from_swap_cache(). + + 4.1 Swap-in. + At swap-in, the page is taken from swap-cache. There are 2 cases. + + (a) If the SwapCache is newly allocated and read, it has no charges. + (b) If the SwapCache has been mapped by processes, it has been + charged already. + + This swap-in is one of the most complicated work. In do_swap_page(), + following events occur when pte is unchanged. + + (1) the page (SwapCache) is looked up. + (2) lock_page() + (3) try_charge_swapin() + (4) reuse_swap_page() (may call delete_swap_cache()) + (5) commit_charge_swapin() + (6) swap_free(). + + Considering following situation for example. + + (A) The page has not been charged before (2) and reuse_swap_page() + doesn't call delete_from_swap_cache(). + (B) The page has not been charged before (2) and reuse_swap_page() + calls delete_from_swap_cache(). + (C) The page has been charged before (2) and reuse_swap_page() doesn't + call delete_from_swap_cache(). + (D) The page has been charged before (2) and reuse_swap_page() calls + delete_from_swap_cache(). + + memory.usage/memsw.usage changes to this page/swp_entry will be + Case (A) (B) (C) (D) + Event + Before (2) 0/ 1 0/ 1 1/ 1 1/ 1 + =========================================== + (3) +1/+1 +1/+1 +1/+1 +1/+1 + (4) - 0/ 0 - -1/ 0 + (5) 0/-1 0/ 0 -1/-1 0/ 0 + (6) - 0/-1 - 0/-1 + =========================================== + Result 1/ 1 1/ 1 1/ 1 1/ 1 + + In any cases, charges to this page should be 1/ 1. + + 4.2 Swap-out. + At swap-out, typical state transition is below. + + (a) add to swap cache. (marked as SwapCache) + swp_entry's refcnt += 1. + (b) fully unmapped. + swp_entry's refcnt += # of ptes. + (c) write back to swap. + (d) delete from swap cache. (remove from SwapCache) + swp_entry's refcnt -= 1. + + + At (b), the page is marked as SwapCache and not uncharged. + At (d), the page is removed from SwapCache and a charge in page_cgroup + is moved to swap_cgroup. + + Finally, at task exit, + (e) zap_pte() is called and swp_entry's refcnt -=1 -> 0. + Here, a charge in swap_cgroup disappears. + +5. Page Cache + Page Cache is charged at + - add_to_page_cache_locked(). + + uncharged at + - __remove_from_page_cache(). + + The logic is very clear. (About migration, see below) + Note: __remove_from_page_cache() is called by remove_from_page_cache() + and __remove_mapping(). + +6. Shmem(tmpfs) Page Cache + Memcg's charge/uncharge have special handlers of shmem. The best way + to understand shmem's page state transition is to read mm/shmem.c. + But brief explanation of the behavior of memcg around shmem will be + helpful to understand the logic. + + Shmem's page (just leaf page, not direct/indirect block) can be on + - radix-tree of shmem's inode. + - SwapCache. + - Both on radix-tree and SwapCache. This happens at swap-in + and swap-out, + + It's charged when... + - A new page is added to shmem's radix-tree. + - A swp page is read. (move a charge from swap_cgroup to page_cgroup) + It's uncharged when + - A page is removed from radix-tree and not SwapCache. + - When SwapCache is removed, a charge is moved to swap_cgroup. + - When swp_entry's refcnt goes down to 0, a charge in swap_cgroup + disappears. + +7. Page Migration + One of the most complicated functions is page-migration-handler. + Memcg has 2 routines. Assume that we are migrating a page's contents + from OLDPAGE to NEWPAGE. + + Usual migration logic is.. + (a) remove the page from LRU. + (b) allocate NEWPAGE (migration target) + (c) lock by lock_page(). + (d) unmap all mappings. + (e-1) If necessary, replace entry in radix-tree. + (e-2) move contents of a page. + (f) map all mappings again. + (g) pushback the page to LRU. + (-) OLDPAGE will be freed. + + Before (g), memcg should complete all necessary charge/uncharge to + NEWPAGE/OLDPAGE. + + The point is.... + - If OLDPAGE is anonymous, all charges will be dropped at (d) because + try_to_unmap() drops all mapcount and the page will not be + SwapCache. + + - If OLDPAGE is SwapCache, charges will be kept at (g) because + __delete_from_swap_cache() isn't called at (e-1) + + - If OLDPAGE is page-cache, charges will be kept at (g) because + remove_from_swap_cache() isn't called at (e-1) + + memcg provides following hooks. + + - mem_cgroup_prepare_migration(OLDPAGE) + Called after (b) to account a charge (usage += PAGE_SIZE) against + memcg which OLDPAGE belongs to. + + - mem_cgroup_end_migration(OLDPAGE, NEWPAGE) + Called after (f) before (g). + If OLDPAGE is used, commit OLDPAGE again. If OLDPAGE is already + charged, a charge by prepare_migration() is automatically canceled. + If NEWPAGE is used, commit NEWPAGE and uncharge OLDPAGE. + + But zap_pte() (by exit or munmap) can be called while migration, + we have to check if OLDPAGE/NEWPAGE is a valid page after commit(). + +8. LRU + Each memcg has its own private LRU. Now, it's handling is under global + VM's control (means that it's handled under global zone->lru_lock). + Almost all routines around memcg's LRU is called by global LRU's + list management functions under zone->lru_lock(). + + A special function is mem_cgroup_isolate_pages(). This scans + memcg's private LRU and call __isolate_lru_page() to extract a page + from LRU. + (By __isolate_lru_page(), the page is removed from both of global and + private LRU.) + + +9. Typical Tests. + + Tests for racy cases. + + 9.1 Small limit to memcg. + When you do test to do racy case, it's good test to set memcg's limit + to be very small rather than GB. Many races found in the test under + xKB or xxMB limits. + (Memory behavior under GB and Memory behavior under MB shows very + different situation.) + + 9.2 Shmem + Historically, memcg's shmem handling was poor and we saw some amount + of troubles here. This is because shmem is page-cache but can be + SwapCache. Test with shmem/tmpfs is always good test. + + 9.3 Migration + For NUMA, migration is an another special case. To do easy test, cpuset + is useful. Following is a sample script to do migration. + + mount -t cgroup -o cpuset none /opt/cpuset + + mkdir /opt/cpuset/01 + echo 1 > /opt/cpuset/01/cpuset.cpus + echo 0 > /opt/cpuset/01/cpuset.mems + echo 1 > /opt/cpuset/01/cpuset.memory_migrate + mkdir /opt/cpuset/02 + echo 1 > /opt/cpuset/02/cpuset.cpus + echo 1 > /opt/cpuset/02/cpuset.mems + echo 1 > /opt/cpuset/02/cpuset.memory_migrate + + In above set, when you moves a task from 01 to 02, page migration to + node 0 to node 1 will occur. Following is a script to migrate all + under cpuset. + -- + move_task() + { + for pid in $1 + do + /bin/echo $pid >$2/tasks 2>/dev/null + echo -n $pid + echo -n " " + done + echo END + } + + G1_TASK=`cat ${G1}/tasks` + G2_TASK=`cat ${G2}/tasks` + move_task "${G1_TASK}" ${G2} & + -- + 9.4 Memory hotplug. + memory hotplug test is one of good test. + to offline memory, do following. + # echo offline > /sys/devices/system/memory/memoryXXX/state + (XXX is the place of memory) + This is an easy way to test page migration, too. + + 9.5 mkdir/rmdir + When using hierarchy, mkdir/rmdir test should be done. + Use tests like the following. + + echo 1 >/opt/cgroup/01/memory/use_hierarchy + mkdir /opt/cgroup/01/child_a + mkdir /opt/cgroup/01/child_b + + set limit to 01. + add limit to 01/child_b + run jobs under child_a and child_b + + create/delete following groups at random while jobs are running. + /opt/cgroup/01/child_a/child_aa + /opt/cgroup/01/child_b/child_bb + /opt/cgroup/01/child_c + + running new jobs in new group is also good. + + 9.6 Mount with other subsystems. + Mounting with other subsystems is a good test because there is a + race and lock dependency with other cgroup subsystems. + + example) + # mount -t cgroup none /cgroup -t cpuset,memory,cpu,devices + + and do task move, mkdir, rmdir etc...under this. diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt new file mode 100644 index 000000000000..e1501964df1e --- /dev/null +++ b/Documentation/cgroups/memory.txt @@ -0,0 +1,399 @@ +Memory Resource Controller + +NOTE: The Memory Resource Controller has been generically been referred +to as the memory controller in this document. Do not confuse memory controller +used here with the memory controller that is used in hardware. + +Salient features + +a. Enable control of both RSS (mapped) and Page Cache (unmapped) pages +b. The infrastructure allows easy addition of other types of memory to control +c. Provides *zero overhead* for non memory controller users +d. Provides a double LRU: global memory pressure causes reclaim from the + global LRU; a cgroup on hitting a limit, reclaims from the per + cgroup LRU + +NOTE: Swap Cache (unmapped) is not accounted now. + +Benefits and Purpose of the memory controller + +The memory controller isolates the memory behaviour of a group of tasks +from the rest of the system. The article on LWN [12] mentions some probable +uses of the memory controller. The memory controller can be used to + +a. Isolate an application or a group of applications + Memory hungry applications can be isolated and limited to a smaller + amount of memory. +b. Create a cgroup with limited amount of memory, this can be used + as a good alternative to booting with mem=XXXX. +c. Virtualization solutions can control the amount of memory they want + to assign to a virtual machine instance. +d. A CD/DVD burner could control the amount of memory used by the + rest of the system to ensure that burning does not fail due to lack + of available memory. +e. There are several other use cases, find one or use the controller just + for fun (to learn and hack on the VM subsystem). + +1. History + +The memory controller has a long history. A request for comments for the memory +controller was posted by Balbir Singh [1]. At the time the RFC was posted +there were several implementations for memory control. The goal of the +RFC was to build consensus and agreement for the minimal features required +for memory control. The first RSS controller was posted by Balbir Singh[2] +in Feb 2007. Pavel Emelianov [3][4][5] has since posted three versions of the +RSS controller. At OLS, at the resource management BoF, everyone suggested +that we handle both page cache and RSS together. Another request was raised +to allow user space handling of OOM. The current memory controller is +at version 6; it combines both mapped (RSS) and unmapped Page +Cache Control [11]. + +2. Memory Control + +Memory is a unique resource in the sense that it is present in a limited +amount. If a task requires a lot of CPU processing, the task can spread +its processing over a period of hours, days, months or years, but with +memory, the same physical memory needs to be reused to accomplish the task. + +The memory controller implementation has been divided into phases. These +are: + +1. Memory controller +2. mlock(2) controller +3. Kernel user memory accounting and slab control +4. user mappings length controller + +The memory controller is the first controller developed. + +2.1. Design + +The core of the design is a counter called the res_counter. The res_counter +tracks the current memory usage and limit of the group of processes associated +with the controller. Each cgroup has a memory controller specific data +structure (mem_cgroup) associated with it. + +2.2. Accounting + + +--------------------+ + | mem_cgroup | + | (res_counter) | + +--------------------+ + / ^ \ + / | \ + +---------------+ | +---------------+ + | mm_struct | |.... | mm_struct | + | | | | | + +---------------+ | +---------------+ + | + + --------------+ + | + +---------------+ +------+--------+ + | page +----------> page_cgroup| + | | | | + +---------------+ +---------------+ + + (Figure 1: Hierarchy of Accounting) + + +Figure 1 shows the important aspects of the controller + +1. Accounting happens per cgroup +2. Each mm_struct knows about which cgroup it belongs to +3. Each page has a pointer to the page_cgroup, which in turn knows the + cgroup it belongs to + +The accounting is done as follows: mem_cgroup_charge() is invoked to setup +the necessary data structures and check if the cgroup that is being charged +is over its limit. If it is then reclaim is invoked on the cgroup. +More details can be found in the reclaim section of this document. +If everything goes well, a page meta-data-structure called page_cgroup is +allocated and associated with the page. This routine also adds the page to +the per cgroup LRU. + +2.2.1 Accounting details + +All mapped anon pages (RSS) and cache pages (Page Cache) are accounted. +(some pages which never be reclaimable and will not be on global LRU + are not accounted. we just accounts pages under usual vm management.) + +RSS pages are accounted at page_fault unless they've already been accounted +for earlier. A file page will be accounted for as Page Cache when it's +inserted into inode (radix-tree). While it's mapped into the page tables of +processes, duplicate accounting is carefully avoided. + +A RSS page is unaccounted when it's fully unmapped. A PageCache page is +unaccounted when it's removed from radix-tree. + +At page migration, accounting information is kept. + +Note: we just account pages-on-lru because our purpose is to control amount +of used pages. not-on-lru pages are tend to be out-of-control from vm view. + +2.3 Shared Page Accounting + +Shared pages are accounted on the basis of the first touch approach. The +cgroup that first touches a page is accounted for the page. The principle +behind this approach is that a cgroup that aggressively uses a shared +page will eventually get charged for it (once it is uncharged from +the cgroup that brought it in -- this will happen on memory pressure). + +Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used.. +When you do swapoff and make swapped-out pages of shmem(tmpfs) to +be backed into memory in force, charges for pages are accounted against the +caller of swapoff rather than the users of shmem. + + +2.4 Swap Extension (CONFIG_CGROUP_MEM_RES_CTLR_SWAP) +Swap Extension allows you to record charge for swap. A swapped-in page is +charged back to original page allocator if possible. + +When swap is accounted, following files are added. + - memory.memsw.usage_in_bytes. + - memory.memsw.limit_in_bytes. + +usage of mem+swap is limited by memsw.limit_in_bytes. + +Note: why 'mem+swap' rather than swap. +The global LRU(kswapd) can swap out arbitrary pages. Swap-out means +to move account from memory to swap...there is no change in usage of +mem+swap. + +In other words, when we want to limit the usage of swap without affecting +global LRU, mem+swap limit is better than just limiting swap from OS point +of view. + +2.5 Reclaim + +Each cgroup maintains a per cgroup LRU that consists of an active +and inactive list. When a cgroup goes over its limit, we first try +to reclaim memory from the cgroup so as to make space for the new +pages that the cgroup has touched. If the reclaim is unsuccessful, +an OOM routine is invoked to select and kill the bulkiest task in the +cgroup. + +The reclaim algorithm has not been modified for cgroups, except that +pages that are selected for reclaiming come from the per cgroup LRU +list. + +2. Locking + +The memory controller uses the following hierarchy + +1. zone->lru_lock is used for selecting pages to be isolated +2. mem->per_zone->lru_lock protects the per cgroup LRU (per zone) +3. lock_page_cgroup() is used to protect page->page_cgroup + +3. User Interface + +0. Configuration + +a. Enable CONFIG_CGROUPS +b. Enable CONFIG_RESOURCE_COUNTERS +c. Enable CONFIG_CGROUP_MEM_RES_CTLR + +1. Prepare the cgroups +# mkdir -p /cgroups +# mount -t cgroup none /cgroups -o memory + +2. Make the new group and move bash into it +# mkdir /cgroups/0 +# echo $$ > /cgroups/0/tasks + +Since now we're in the 0 cgroup, +We can alter the memory limit: +# echo 4M > /cgroups/0/memory.limit_in_bytes + +NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, +mega or gigabytes. + +# cat /cgroups/0/memory.limit_in_bytes +4194304 + +NOTE: The interface has now changed to display the usage in bytes +instead of pages + +We can check the usage: +# cat /cgroups/0/memory.usage_in_bytes +1216512 + +A successful write to this file does not guarantee a successful set of +this limit to the value written into the file. This can be due to a +number of factors, such as rounding up to page boundaries or the total +availability of memory on the system. The user is required to re-read +this file after a write to guarantee the value committed by the kernel. + +# echo 1 > memory.limit_in_bytes +# cat memory.limit_in_bytes +4096 + +The memory.failcnt field gives the number of times that the cgroup limit was +exceeded. + +The memory.stat file gives accounting information. Now, the number of +caches, RSS and Active pages/Inactive pages are shown. + +4. Testing + +Balbir posted lmbench, AIM9, LTP and vmmstress results [10] and [11]. +Apart from that v6 has been tested with several applications and regular +daily use. The controller has also been tested on the PPC64, x86_64 and +UML platforms. + +4.1 Troubleshooting + +Sometimes a user might find that the application under a cgroup is +terminated. There are several causes for this: + +1. The cgroup limit is too low (just too low to do anything useful) +2. The user is using anonymous memory and swap is turned off or too low + +A sync followed by echo 1 > /proc/sys/vm/drop_caches will help get rid of +some of the pages cached in the cgroup (page cache pages). + +4.2 Task migration + +When a task migrates from one cgroup to another, it's charge is not +carried forward. The pages allocated from the original cgroup still +remain charged to it, the charge is dropped when the page is freed or +reclaimed. + +4.3 Removing a cgroup + +A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a +cgroup might have some charge associated with it, even though all +tasks have migrated away from it. +Such charges are freed(at default) or moved to its parent. When moved, +both of RSS and CACHES are moved to parent. +If both of them are busy, rmdir() returns -EBUSY. See 5.1 Also. + +Charges recorded in swap information is not updated at removal of cgroup. +Recorded information is discarded and a cgroup which uses swap (swapcache) +will be charged as a new owner of it. + + +5. Misc. interfaces. + +5.1 force_empty + memory.force_empty interface is provided to make cgroup's memory usage empty. + You can use this interface only when the cgroup has no tasks. + When writing anything to this + + # echo 0 > memory.force_empty + + Almost all pages tracked by this memcg will be unmapped and freed. Some of + pages cannot be freed because it's locked or in-use. Such pages are moved + to parent and this cgroup will be empty. But this may return -EBUSY in + some too busy case. + + Typical use case of this interface is that calling this before rmdir(). + Because rmdir() moves all pages to parent, some out-of-use page caches can be + moved to the parent. If you want to avoid that, force_empty will be useful. + +5.2 stat file + memory.stat file includes following statistics (now) + cache - # of pages from page-cache and shmem. + rss - # of pages from anonymous memory. + pgpgin - # of event of charging + pgpgout - # of event of uncharging + active_anon - # of pages on active lru of anon, shmem. + inactive_anon - # of pages on active lru of anon, shmem + active_file - # of pages on active lru of file-cache + inactive_file - # of pages on inactive lru of file cache + unevictable - # of pages cannot be reclaimed.(mlocked etc) + + Below is depend on CONFIG_DEBUG_VM. + inactive_ratio - VM inernal parameter. (see mm/page_alloc.c) + recent_rotated_anon - VM internal parameter. (see mm/vmscan.c) + recent_rotated_file - VM internal parameter. (see mm/vmscan.c) + recent_scanned_anon - VM internal parameter. (see mm/vmscan.c) + recent_scanned_file - VM internal parameter. (see mm/vmscan.c) + + Memo: + recent_rotated means recent frequency of lru rotation. + recent_scanned means recent # of scans to lru. + showing for better debug please see the code for meanings. + + +5.3 swappiness + Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only. + + Following cgroup's swapiness can't be changed. + - root cgroup (uses /proc/sys/vm/swappiness). + - a cgroup which uses hierarchy and it has child cgroup. + - a cgroup which uses hierarchy and not the root of hierarchy. + + +6. Hierarchy support + +The memory controller supports a deep hierarchy and hierarchical accounting. +The hierarchy is created by creating the appropriate cgroups in the +cgroup filesystem. Consider for example, the following cgroup filesystem +hierarchy + + root + / | \ + / | \ + a b c + | \ + | \ + d e + +In the diagram above, with hierarchical accounting enabled, all memory +usage of e, is accounted to its ancestors up until the root (i.e, c and root), +that has memory.use_hierarchy enabled. If one of the ancestors goes over its +limit, the reclaim algorithm reclaims from the tasks in the ancestor and the +children of the ancestor. + +6.1 Enabling hierarchical accounting and reclaim + +The memory controller by default disables the hierarchy feature. Support +can be enabled by writing 1 to memory.use_hierarchy file of the root cgroup + +# echo 1 > memory.use_hierarchy + +The feature can be disabled by + +# echo 0 > memory.use_hierarchy + +NOTE1: Enabling/disabling will fail if the cgroup already has other +cgroups created below it. + +NOTE2: This feature can be enabled/disabled per subtree. + +7. TODO + +1. Add support for accounting huge pages (as a separate controller) +2. Make per-cgroup scanner reclaim not-shared pages first +3. Teach controller to account for shared-pages +4. Start reclamation in the background when the limit is + not yet hit but the usage is getting closer + +Summary + +Overall, the memory controller has been a stable controller and has been +commented and discussed quite extensively in the community. + +References + +1. Singh, Balbir. RFC: Memory Controller, http://lwn.net/Articles/206697/ +2. Singh, Balbir. Memory Controller (RSS Control), + http://lwn.net/Articles/222762/ +3. Emelianov, Pavel. Resource controllers based on process cgroups + http://lkml.org/lkml/2007/3/6/198 +4. Emelianov, Pavel. RSS controller based on process cgroups (v2) + http://lkml.org/lkml/2007/4/9/78 +5. Emelianov, Pavel. RSS controller based on process cgroups (v3) + http://lkml.org/lkml/2007/5/30/244 +6. Menage, Paul. Control Groups v10, http://lwn.net/Articles/236032/ +7. Vaidyanathan, Srinivasan, Control Groups: Pagecache accounting and control + subsystem (v3), http://lwn.net/Articles/235534/ +8. Singh, Balbir. RSS controller v2 test results (lmbench), + http://lkml.org/lkml/2007/5/17/232 +9. Singh, Balbir. RSS controller v2 AIM9 results + http://lkml.org/lkml/2007/5/18/1 +10. Singh, Balbir. Memory controller v6 test results, + http://lkml.org/lkml/2007/8/19/36 +11. Singh, Balbir. Memory controller introduction (v6), + http://lkml.org/lkml/2007/8/17/69 +12. Corbet, Jonathan, Controlling memory use in cgroups, + http://lwn.net/Articles/243795/ diff --git a/Documentation/cgroups/resource_counter.txt b/Documentation/cgroups/resource_counter.txt new file mode 100644 index 000000000000..f196ac1d7d25 --- /dev/null +++ b/Documentation/cgroups/resource_counter.txt @@ -0,0 +1,181 @@ + + The Resource Counter + +The resource counter, declared at include/linux/res_counter.h, +is supposed to facilitate the resource management by controllers +by providing common stuff for accounting. + +This "stuff" includes the res_counter structure and routines +to work with it. + + + +1. Crucial parts of the res_counter structure + + a. unsigned long long usage + + The usage value shows the amount of a resource that is consumed + by a group at a given time. The units of measurement should be + determined by the controller that uses this counter. E.g. it can + be bytes, items or any other unit the controller operates on. + + b. unsigned long long max_usage + + The maximal value of the usage over time. + + This value is useful when gathering statistical information about + the particular group, as it shows the actual resource requirements + for a particular group, not just some usage snapshot. + + c. unsigned long long limit + + The maximal allowed amount of resource to consume by the group. In + case the group requests for more resources, so that the usage value + would exceed the limit, the resource allocation is rejected (see + the next section). + + d. unsigned long long failcnt + + The failcnt stands for "failures counter". This is the number of + resource allocation attempts that failed. + + c. spinlock_t lock + + Protects changes of the above values. + + + +2. Basic accounting routines + + a. void res_counter_init(struct res_counter *rc) + + Initializes the resource counter. As usual, should be the first + routine called for a new counter. + + b. int res_counter_charge[_locked] + (struct res_counter *rc, unsigned long val) + + When a resource is about to be allocated it has to be accounted + with the appropriate resource counter (controller should determine + which one to use on its own). This operation is called "charging". + + This is not very important which operation - resource allocation + or charging - is performed first, but + * if the allocation is performed first, this may create a + temporary resource over-usage by the time resource counter is + charged; + * if the charging is performed first, then it should be uncharged + on error path (if the one is called). + + c. void res_counter_uncharge[_locked] + (struct res_counter *rc, unsigned long val) + + When a resource is released (freed) it should be de-accounted + from the resource counter it was accounted to. This is called + "uncharging". + + The _locked routines imply that the res_counter->lock is taken. + + + 2.1 Other accounting routines + + There are more routines that may help you with common needs, like + checking whether the limit is reached or resetting the max_usage + value. They are all declared in include/linux/res_counter.h. + + + +3. Analyzing the resource counter registrations + + a. If the failcnt value constantly grows, this means that the counter's + limit is too tight. Either the group is misbehaving and consumes too + many resources, or the configuration is not suitable for the group + and the limit should be increased. + + b. The max_usage value can be used to quickly tune the group. One may + set the limits to maximal values and either load the container with + a common pattern or leave one for a while. After this the max_usage + value shows the amount of memory the container would require during + its common activity. + + Setting the limit a bit above this value gives a pretty good + configuration that works in most of the cases. + + c. If the max_usage is much less than the limit, but the failcnt value + is growing, then the group tries to allocate a big chunk of resource + at once. + + d. If the max_usage is much less than the limit, but the failcnt value + is 0, then this group is given too high limit, that it does not + require. It is better to lower the limit a bit leaving more resource + for other groups. + + + +4. Communication with the control groups subsystem (cgroups) + +All the resource controllers that are using cgroups and resource counters +should provide files (in the cgroup filesystem) to work with the resource +counter fields. They are recommended to adhere to the following rules: + + a. File names + + Field name File name + --------------------------------------------------- + usage usage_in_ + max_usage max_usage_in_ + limit limit_in_ + failcnt failcnt + lock no file :) + + b. Reading from file should show the corresponding field value in the + appropriate format. + + c. Writing to file + + Field Expected behavior + ---------------------------------- + usage prohibited + max_usage reset to usage + limit set the limit + failcnt reset to zero + + + +5. Usage example + + a. Declare a task group (take a look at cgroups subsystem for this) and + fold a res_counter into it + + struct my_group { + struct res_counter res; + + + } + + b. Put hooks in resource allocation/release paths + + int alloc_something(...) + { + if (res_counter_charge(res_counter_ptr, amount) < 0) + return -ENOMEM; + + + } + + void release_something(...) + { + res_counter_uncharge(res_counter_ptr, amount); + + + } + + In order to keep the usage value self-consistent, both the + "res_counter_ptr" and the "amount" in release_something() should be + the same as they were in the alloc_something() when the releasing + resource was allocated. + + c. Provide the way to read res_counter values and set them (the cgroups + still can help with it). + + c. Compile and run :) diff --git a/Documentation/controllers/cpuacct.txt b/Documentation/controllers/cpuacct.txt deleted file mode 100644 index bb775fbe43d7..000000000000 --- a/Documentation/controllers/cpuacct.txt +++ /dev/null @@ -1,32 +0,0 @@ -CPU Accounting Controller -------------------------- - -The CPU accounting controller is used to group tasks using cgroups and -account the CPU usage of these groups of tasks. - -The CPU accounting controller supports multi-hierarchy groups. An accounting -group accumulates the CPU usage of all of its child groups and the tasks -directly present in its group. - -Accounting groups can be created by first mounting the cgroup filesystem. - -# mkdir /cgroups -# mount -t cgroup -ocpuacct none /cgroups - -With the above step, the initial or the parent accounting group -becomes visible at /cgroups. At bootup, this group includes all the -tasks in the system. /cgroups/tasks lists the tasks in this cgroup. -/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by -this group which is essentially the CPU time obtained by all the tasks -in the system. - -New accounting groups can be created under the parent group /cgroups. - -# cd /cgroups -# mkdir g1 -# echo $$ > g1 - -The above steps create a new group g1 and move the current shell -process (bash) into it. CPU time consumed by this bash and its children -can be obtained from g1/cpuacct.usage and the same is accumulated in -/cgroups/cpuacct.usage also. diff --git a/Documentation/controllers/devices.txt b/Documentation/controllers/devices.txt deleted file mode 100644 index 7cc6e6a60672..000000000000 --- a/Documentation/controllers/devices.txt +++ /dev/null @@ -1,52 +0,0 @@ -Device Whitelist Controller - -1. Description: - -Implement a cgroup to track and enforce open and mknod restrictions -on device files. A device cgroup associates a device access -whitelist with each cgroup. A whitelist entry has 4 fields. -'type' is a (all), c (char), or b (block). 'all' means it applies -to all types and all major and minor numbers. Major and minor are -either an integer or * for all. Access is a composition of r -(read), w (write), and m (mknod). - -The root device cgroup starts with rwm to 'all'. A child device -cgroup gets a copy of the parent. Administrators can then remove -devices from the whitelist or add new entries. A child cgroup can -never receive a device access which is denied by its parent. However -when a device access is removed from a parent it will not also be -removed from the child(ren). - -2. User Interface - -An entry is added using devices.allow, and removed using -devices.deny. For instance - - echo 'c 1:3 mr' > /cgroups/1/devices.allow - -allows cgroup 1 to read and mknod the device usually known as -/dev/null. Doing - - echo a > /cgroups/1/devices.deny - -will remove the default 'a *:* rwm' entry. Doing - - echo a > /cgroups/1/devices.allow - -will add the 'a *:* rwm' entry to the whitelist. - -3. Security - -Any task can move itself between cgroups. This clearly won't -suffice, but we can decide the best way to adequately restrict -movement as people get some experience with this. We may just want -to require CAP_SYS_ADMIN, which at least is a separate bit from -CAP_MKNOD. We may want to just refuse moving to a cgroup which -isn't a descendent of the current one. Or we may want to use -CAP_MAC_ADMIN, since we really are trying to lock down root. - -CAP_SYS_ADMIN is needed to modify the whitelist or move another -task to a new cgroup. (Again we'll probably want to change that). - -A cgroup may not be granted more permissions than the cgroup's -parent has. diff --git a/Documentation/controllers/memcg_test.txt b/Documentation/controllers/memcg_test.txt deleted file mode 100644 index 08d4d3ea0d79..000000000000 --- a/Documentation/controllers/memcg_test.txt +++ /dev/null @@ -1,342 +0,0 @@ -Memory Resource Controller(Memcg) Implementation Memo. -Last Updated: 2008/12/15 -Base Kernel Version: based on 2.6.28-rc8-mm. - -Because VM is getting complex (one of reasons is memcg...), memcg's behavior -is complex. This is a document for memcg's internal behavior. -Please note that implementation details can be changed. - -(*) Topics on API should be in Documentation/controllers/memory.txt) - -0. How to record usage ? - 2 objects are used. - - page_cgroup ....an object per page. - Allocated at boot or memory hotplug. Freed at memory hot removal. - - swap_cgroup ... an entry per swp_entry. - Allocated at swapon(). Freed at swapoff(). - - The page_cgroup has USED bit and double count against a page_cgroup never - occurs. swap_cgroup is used only when a charged page is swapped-out. - -1. Charge - - a page/swp_entry may be charged (usage += PAGE_SIZE) at - - mem_cgroup_newpage_charge() - Called at new page fault and Copy-On-Write. - - mem_cgroup_try_charge_swapin() - Called at do_swap_page() (page fault on swap entry) and swapoff. - Followed by charge-commit-cancel protocol. (With swap accounting) - At commit, a charge recorded in swap_cgroup is removed. - - mem_cgroup_cache_charge() - Called at add_to_page_cache() - - mem_cgroup_cache_charge_swapin() - Called at shmem's swapin. - - mem_cgroup_prepare_migration() - Called before migration. "extra" charge is done and followed by - charge-commit-cancel protocol. - At commit, charge against oldpage or newpage will be committed. - -2. Uncharge - a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by - - mem_cgroup_uncharge_page() - Called when an anonymous page is fully unmapped. I.e., mapcount goes - to 0. If the page is SwapCache, uncharge is delayed until - mem_cgroup_uncharge_swapcache(). - - mem_cgroup_uncharge_cache_page() - Called when a page-cache is deleted from radix-tree. If the page is - SwapCache, uncharge is delayed until mem_cgroup_uncharge_swapcache(). - - mem_cgroup_uncharge_swapcache() - Called when SwapCache is removed from radix-tree. The charge itself - is moved to swap_cgroup. (If mem+swap controller is disabled, no - charge to swap occurs.) - - mem_cgroup_uncharge_swap() - Called when swp_entry's refcnt goes down to 0. A charge against swap - disappears. - - mem_cgroup_end_migration(old, new) - At success of migration old is uncharged (if necessary), a charge - to new page is committed. At failure, charge to old page is committed. - -3. charge-commit-cancel - In some case, we can't know this "charge" is valid or not at charging - (because of races). - To handle such case, there are charge-commit-cancel functions. - mem_cgroup_try_charge_XXX - mem_cgroup_commit_charge_XXX - mem_cgroup_cancel_charge_XXX - these are used in swap-in and migration. - - At try_charge(), there are no flags to say "this page is charged". - at this point, usage += PAGE_SIZE. - - At commit(), the function checks the page should be charged or not - and set flags or avoid charging.(usage -= PAGE_SIZE) - - At cancel(), simply usage -= PAGE_SIZE. - -Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y. - -4. Anonymous - Anonymous page is newly allocated at - - page fault into MAP_ANONYMOUS mapping. - - Copy-On-Write. - It is charged right after it's allocated before doing any page table - related operations. Of course, it's uncharged when another page is used - for the fault address. - - At freeing anonymous page (by exit() or munmap()), zap_pte() is called - and pages for ptes are freed one by one.(see mm/memory.c). Uncharges - are done at page_remove_rmap() when page_mapcount() goes down to 0. - - Another page freeing is by page-reclaim (vmscan.c) and anonymous - pages are swapped out. In this case, the page is marked as - PageSwapCache(). uncharge() routine doesn't uncharge the page marked - as SwapCache(). It's delayed until __delete_from_swap_cache(). - - 4.1 Swap-in. - At swap-in, the page is taken from swap-cache. There are 2 cases. - - (a) If the SwapCache is newly allocated and read, it has no charges. - (b) If the SwapCache has been mapped by processes, it has been - charged already. - - This swap-in is one of the most complicated work. In do_swap_page(), - following events occur when pte is unchanged. - - (1) the page (SwapCache) is looked up. - (2) lock_page() - (3) try_charge_swapin() - (4) reuse_swap_page() (may call delete_swap_cache()) - (5) commit_charge_swapin() - (6) swap_free(). - - Considering following situation for example. - - (A) The page has not been charged before (2) and reuse_swap_page() - doesn't call delete_from_swap_cache(). - (B) The page has not been charged before (2) and reuse_swap_page() - calls delete_from_swap_cache(). - (C) The page has been charged before (2) and reuse_swap_page() doesn't - call delete_from_swap_cache(). - (D) The page has been charged before (2) and reuse_swap_page() calls - delete_from_swap_cache(). - - memory.usage/memsw.usage changes to this page/swp_entry will be - Case (A) (B) (C) (D) - Event - Before (2) 0/ 1 0/ 1 1/ 1 1/ 1 - =========================================== - (3) +1/+1 +1/+1 +1/+1 +1/+1 - (4) - 0/ 0 - -1/ 0 - (5) 0/-1 0/ 0 -1/-1 0/ 0 - (6) - 0/-1 - 0/-1 - =========================================== - Result 1/ 1 1/ 1 1/ 1 1/ 1 - - In any cases, charges to this page should be 1/ 1. - - 4.2 Swap-out. - At swap-out, typical state transition is below. - - (a) add to swap cache. (marked as SwapCache) - swp_entry's refcnt += 1. - (b) fully unmapped. - swp_entry's refcnt += # of ptes. - (c) write back to swap. - (d) delete from swap cache. (remove from SwapCache) - swp_entry's refcnt -= 1. - - - At (b), the page is marked as SwapCache and not uncharged. - At (d), the page is removed from SwapCache and a charge in page_cgroup - is moved to swap_cgroup. - - Finally, at task exit, - (e) zap_pte() is called and swp_entry's refcnt -=1 -> 0. - Here, a charge in swap_cgroup disappears. - -5. Page Cache - Page Cache is charged at - - add_to_page_cache_locked(). - - uncharged at - - __remove_from_page_cache(). - - The logic is very clear. (About migration, see below) - Note: __remove_from_page_cache() is called by remove_from_page_cache() - and __remove_mapping(). - -6. Shmem(tmpfs) Page Cache - Memcg's charge/uncharge have special handlers of shmem. The best way - to understand shmem's page state transition is to read mm/shmem.c. - But brief explanation of the behavior of memcg around shmem will be - helpful to understand the logic. - - Shmem's page (just leaf page, not direct/indirect block) can be on - - radix-tree of shmem's inode. - - SwapCache. - - Both on radix-tree and SwapCache. This happens at swap-in - and swap-out, - - It's charged when... - - A new page is added to shmem's radix-tree. - - A swp page is read. (move a charge from swap_cgroup to page_cgroup) - It's uncharged when - - A page is removed from radix-tree and not SwapCache. - - When SwapCache is removed, a charge is moved to swap_cgroup. - - When swp_entry's refcnt goes down to 0, a charge in swap_cgroup - disappears. - -7. Page Migration - One of the most complicated functions is page-migration-handler. - Memcg has 2 routines. Assume that we are migrating a page's contents - from OLDPAGE to NEWPAGE. - - Usual migration logic is.. - (a) remove the page from LRU. - (b) allocate NEWPAGE (migration target) - (c) lock by lock_page(). - (d) unmap all mappings. - (e-1) If necessary, replace entry in radix-tree. - (e-2) move contents of a page. - (f) map all mappings again. - (g) pushback the page to LRU. - (-) OLDPAGE will be freed. - - Before (g), memcg should complete all necessary charge/uncharge to - NEWPAGE/OLDPAGE. - - The point is.... - - If OLDPAGE is anonymous, all charges will be dropped at (d) because - try_to_unmap() drops all mapcount and the page will not be - SwapCache. - - - If OLDPAGE is SwapCache, charges will be kept at (g) because - __delete_from_swap_cache() isn't called at (e-1) - - - If OLDPAGE is page-cache, charges will be kept at (g) because - remove_from_swap_cache() isn't called at (e-1) - - memcg provides following hooks. - - - mem_cgroup_prepare_migration(OLDPAGE) - Called after (b) to account a charge (usage += PAGE_SIZE) against - memcg which OLDPAGE belongs to. - - - mem_cgroup_end_migration(OLDPAGE, NEWPAGE) - Called after (f) before (g). - If OLDPAGE is used, commit OLDPAGE again. If OLDPAGE is already - charged, a charge by prepare_migration() is automatically canceled. - If NEWPAGE is used, commit NEWPAGE and uncharge OLDPAGE. - - But zap_pte() (by exit or munmap) can be called while migration, - we have to check if OLDPAGE/NEWPAGE is a valid page after commit(). - -8. LRU - Each memcg has its own private LRU. Now, it's handling is under global - VM's control (means that it's handled under global zone->lru_lock). - Almost all routines around memcg's LRU is called by global LRU's - list management functions under zone->lru_lock(). - - A special function is mem_cgroup_isolate_pages(). This scans - memcg's private LRU and call __isolate_lru_page() to extract a page - from LRU. - (By __isolate_lru_page(), the page is removed from both of global and - private LRU.) - - -9. Typical Tests. - - Tests for racy cases. - - 9.1 Small limit to memcg. - When you do test to do racy case, it's good test to set memcg's limit - to be very small rather than GB. Many races found in the test under - xKB or xxMB limits. - (Memory behavior under GB and Memory behavior under MB shows very - different situation.) - - 9.2 Shmem - Historically, memcg's shmem handling was poor and we saw some amount - of troubles here. This is because shmem is page-cache but can be - SwapCache. Test with shmem/tmpfs is always good test. - - 9.3 Migration - For NUMA, migration is an another special case. To do easy test, cpuset - is useful. Following is a sample script to do migration. - - mount -t cgroup -o cpuset none /opt/cpuset - - mkdir /opt/cpuset/01 - echo 1 > /opt/cpuset/01/cpuset.cpus - echo 0 > /opt/cpuset/01/cpuset.mems - echo 1 > /opt/cpuset/01/cpuset.memory_migrate - mkdir /opt/cpuset/02 - echo 1 > /opt/cpuset/02/cpuset.cpus - echo 1 > /opt/cpuset/02/cpuset.mems - echo 1 > /opt/cpuset/02/cpuset.memory_migrate - - In above set, when you moves a task from 01 to 02, page migration to - node 0 to node 1 will occur. Following is a script to migrate all - under cpuset. - -- - move_task() - { - for pid in $1 - do - /bin/echo $pid >$2/tasks 2>/dev/null - echo -n $pid - echo -n " " - done - echo END - } - - G1_TASK=`cat ${G1}/tasks` - G2_TASK=`cat ${G2}/tasks` - move_task "${G1_TASK}" ${G2} & - -- - 9.4 Memory hotplug. - memory hotplug test is one of good test. - to offline memory, do following. - # echo offline > /sys/devices/system/memory/memoryXXX/state - (XXX is the place of memory) - This is an easy way to test page migration, too. - - 9.5 mkdir/rmdir - When using hierarchy, mkdir/rmdir test should be done. - Use tests like the following. - - echo 1 >/opt/cgroup/01/memory/use_hierarchy - mkdir /opt/cgroup/01/child_a - mkdir /opt/cgroup/01/child_b - - set limit to 01. - add limit to 01/child_b - run jobs under child_a and child_b - - create/delete following groups at random while jobs are running. - /opt/cgroup/01/child_a/child_aa - /opt/cgroup/01/child_b/child_bb - /opt/cgroup/01/child_c - - running new jobs in new group is also good. - - 9.6 Mount with other subsystems. - Mounting with other subsystems is a good test because there is a - race and lock dependency with other cgroup subsystems. - - example) - # mount -t cgroup none /cgroup -t cpuset,memory,cpu,devices - - and do task move, mkdir, rmdir etc...under this. diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt deleted file mode 100644 index e1501964df1e..000000000000 --- a/Documentation/controllers/memory.txt +++ /dev/null @@ -1,399 +0,0 @@ -Memory Resource Controller - -NOTE: The Memory Resource Controller has been generically been referred -to as the memory controller in this document. Do not confuse memory controller -used here with the memory controller that is used in hardware. - -Salient features - -a. Enable control of both RSS (mapped) and Page Cache (unmapped) pages -b. The infrastructure allows easy addition of other types of memory to control -c. Provides *zero overhead* for non memory controller users -d. Provides a double LRU: global memory pressure causes reclaim from the - global LRU; a cgroup on hitting a limit, reclaims from the per - cgroup LRU - -NOTE: Swap Cache (unmapped) is not accounted now. - -Benefits and Purpose of the memory controller - -The memory controller isolates the memory behaviour of a group of tasks -from the rest of the system. The article on LWN [12] mentions some probable -uses of the memory controller. The memory controller can be used to - -a. Isolate an application or a group of applications - Memory hungry applications can be isolated and limited to a smaller - amount of memory. -b. Create a cgroup with limited amount of memory, this can be used - as a good alternative to booting with mem=XXXX. -c. Virtualization solutions can control the amount of memory they want - to assign to a virtual machine instance. -d. A CD/DVD burner could control the amount of memory used by the - rest of the system to ensure that burning does not fail due to lack - of available memory. -e. There are several other use cases, find one or use the controller just - for fun (to learn and hack on the VM subsystem). - -1. History - -The memory controller has a long history. A request for comments for the memory -controller was posted by Balbir Singh [1]. At the time the RFC was posted -there were several implementations for memory control. The goal of the -RFC was to build consensus and agreement for the minimal features required -for memory control. The first RSS controller was posted by Balbir Singh[2] -in Feb 2007. Pavel Emelianov [3][4][5] has since posted three versions of the -RSS controller. At OLS, at the resource management BoF, everyone suggested -that we handle both page cache and RSS together. Another request was raised -to allow user space handling of OOM. The current memory controller is -at version 6; it combines both mapped (RSS) and unmapped Page -Cache Control [11]. - -2. Memory Control - -Memory is a unique resource in the sense that it is present in a limited -amount. If a task requires a lot of CPU processing, the task can spread -its processing over a period of hours, days, months or years, but with -memory, the same physical memory needs to be reused to accomplish the task. - -The memory controller implementation has been divided into phases. These -are: - -1. Memory controller -2. mlock(2) controller -3. Kernel user memory accounting and slab control -4. user mappings length controller - -The memory controller is the first controller developed. - -2.1. Design - -The core of the design is a counter called the res_counter. The res_counter -tracks the current memory usage and limit of the group of processes associated -with the controller. Each cgroup has a memory controller specific data -structure (mem_cgroup) associated with it. - -2.2. Accounting - - +--------------------+ - | mem_cgroup | - | (res_counter) | - +--------------------+ - / ^ \ - / | \ - +---------------+ | +---------------+ - | mm_struct | |.... | mm_struct | - | | | | | - +---------------+ | +---------------+ - | - + --------------+ - | - +---------------+ +------+--------+ - | page +----------> page_cgroup| - | | | | - +---------------+ +---------------+ - - (Figure 1: Hierarchy of Accounting) - - -Figure 1 shows the important aspects of the controller - -1. Accounting happens per cgroup -2. Each mm_struct knows about which cgroup it belongs to -3. Each page has a pointer to the page_cgroup, which in turn knows the - cgroup it belongs to - -The accounting is done as follows: mem_cgroup_charge() is invoked to setup -the necessary data structures and check if the cgroup that is being charged -is over its limit. If it is then reclaim is invoked on the cgroup. -More details can be found in the reclaim section of this document. -If everything goes well, a page meta-data-structure called page_cgroup is -allocated and associated with the page. This routine also adds the page to -the per cgroup LRU. - -2.2.1 Accounting details - -All mapped anon pages (RSS) and cache pages (Page Cache) are accounted. -(some pages which never be reclaimable and will not be on global LRU - are not accounted. we just accounts pages under usual vm management.) - -RSS pages are accounted at page_fault unless they've already been accounted -for earlier. A file page will be accounted for as Page Cache when it's -inserted into inode (radix-tree). While it's mapped into the page tables of -processes, duplicate accounting is carefully avoided. - -A RSS page is unaccounted when it's fully unmapped. A PageCache page is -unaccounted when it's removed from radix-tree. - -At page migration, accounting information is kept. - -Note: we just account pages-on-lru because our purpose is to control amount -of used pages. not-on-lru pages are tend to be out-of-control from vm view. - -2.3 Shared Page Accounting - -Shared pages are accounted on the basis of the first touch approach. The -cgroup that first touches a page is accounted for the page. The principle -behind this approach is that a cgroup that aggressively uses a shared -page will eventually get charged for it (once it is uncharged from -the cgroup that brought it in -- this will happen on memory pressure). - -Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used.. -When you do swapoff and make swapped-out pages of shmem(tmpfs) to -be backed into memory in force, charges for pages are accounted against the -caller of swapoff rather than the users of shmem. - - -2.4 Swap Extension (CONFIG_CGROUP_MEM_RES_CTLR_SWAP) -Swap Extension allows you to record charge for swap. A swapped-in page is -charged back to original page allocator if possible. - -When swap is accounted, following files are added. - - memory.memsw.usage_in_bytes. - - memory.memsw.limit_in_bytes. - -usage of mem+swap is limited by memsw.limit_in_bytes. - -Note: why 'mem+swap' rather than swap. -The global LRU(kswapd) can swap out arbitrary pages. Swap-out means -to move account from memory to swap...there is no change in usage of -mem+swap. - -In other words, when we want to limit the usage of swap without affecting -global LRU, mem+swap limit is better than just limiting swap from OS point -of view. - -2.5 Reclaim - -Each cgroup maintains a per cgroup LRU that consists of an active -and inactive list. When a cgroup goes over its limit, we first try -to reclaim memory from the cgroup so as to make space for the new -pages that the cgroup has touched. If the reclaim is unsuccessful, -an OOM routine is invoked to select and kill the bulkiest task in the -cgroup. - -The reclaim algorithm has not been modified for cgroups, except that -pages that are selected for reclaiming come from the per cgroup LRU -list. - -2. Locking - -The memory controller uses the following hierarchy - -1. zone->lru_lock is used for selecting pages to be isolated -2. mem->per_zone->lru_lock protects the per cgroup LRU (per zone) -3. lock_page_cgroup() is used to protect page->page_cgroup - -3. User Interface - -0. Configuration - -a. Enable CONFIG_CGROUPS -b. Enable CONFIG_RESOURCE_COUNTERS -c. Enable CONFIG_CGROUP_MEM_RES_CTLR - -1. Prepare the cgroups -# mkdir -p /cgroups -# mount -t cgroup none /cgroups -o memory - -2. Make the new group and move bash into it -# mkdir /cgroups/0 -# echo $$ > /cgroups/0/tasks - -Since now we're in the 0 cgroup, -We can alter the memory limit: -# echo 4M > /cgroups/0/memory.limit_in_bytes - -NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, -mega or gigabytes. - -# cat /cgroups/0/memory.limit_in_bytes -4194304 - -NOTE: The interface has now changed to display the usage in bytes -instead of pages - -We can check the usage: -# cat /cgroups/0/memory.usage_in_bytes -1216512 - -A successful write to this file does not guarantee a successful set of -this limit to the value written into the file. This can be due to a -number of factors, such as rounding up to page boundaries or the total -availability of memory on the system. The user is required to re-read -this file after a write to guarantee the value committed by the kernel. - -# echo 1 > memory.limit_in_bytes -# cat memory.limit_in_bytes -4096 - -The memory.failcnt field gives the number of times that the cgroup limit was -exceeded. - -The memory.stat file gives accounting information. Now, the number of -caches, RSS and Active pages/Inactive pages are shown. - -4. Testing - -Balbir posted lmbench, AIM9, LTP and vmmstress results [10] and [11]. -Apart from that v6 has been tested with several applications and regular -daily use. The controller has also been tested on the PPC64, x86_64 and -UML platforms. - -4.1 Troubleshooting - -Sometimes a user might find that the application under a cgroup is -terminated. There are several causes for this: - -1. The cgroup limit is too low (just too low to do anything useful) -2. The user is using anonymous memory and swap is turned off or too low - -A sync followed by echo 1 > /proc/sys/vm/drop_caches will help get rid of -some of the pages cached in the cgroup (page cache pages). - -4.2 Task migration - -When a task migrates from one cgroup to another, it's charge is not -carried forward. The pages allocated from the original cgroup still -remain charged to it, the charge is dropped when the page is freed or -reclaimed. - -4.3 Removing a cgroup - -A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a -cgroup might have some charge associated with it, even though all -tasks have migrated away from it. -Such charges are freed(at default) or moved to its parent. When moved, -both of RSS and CACHES are moved to parent. -If both of them are busy, rmdir() returns -EBUSY. See 5.1 Also. - -Charges recorded in swap information is not updated at removal of cgroup. -Recorded information is discarded and a cgroup which uses swap (swapcache) -will be charged as a new owner of it. - - -5. Misc. interfaces. - -5.1 force_empty - memory.force_empty interface is provided to make cgroup's memory usage empty. - You can use this interface only when the cgroup has no tasks. - When writing anything to this - - # echo 0 > memory.force_empty - - Almost all pages tracked by this memcg will be unmapped and freed. Some of - pages cannot be freed because it's locked or in-use. Such pages are moved - to parent and this cgroup will be empty. But this may return -EBUSY in - some too busy case. - - Typical use case of this interface is that calling this before rmdir(). - Because rmdir() moves all pages to parent, some out-of-use page caches can be - moved to the parent. If you want to avoid that, force_empty will be useful. - -5.2 stat file - memory.stat file includes following statistics (now) - cache - # of pages from page-cache and shmem. - rss - # of pages from anonymous memory. - pgpgin - # of event of charging - pgpgout - # of event of uncharging - active_anon - # of pages on active lru of anon, shmem. - inactive_anon - # of pages on active lru of anon, shmem - active_file - # of pages on active lru of file-cache - inactive_file - # of pages on inactive lru of file cache - unevictable - # of pages cannot be reclaimed.(mlocked etc) - - Below is depend on CONFIG_DEBUG_VM. - inactive_ratio - VM inernal parameter. (see mm/page_alloc.c) - recent_rotated_anon - VM internal parameter. (see mm/vmscan.c) - recent_rotated_file - VM internal parameter. (see mm/vmscan.c) - recent_scanned_anon - VM internal parameter. (see mm/vmscan.c) - recent_scanned_file - VM internal parameter. (see mm/vmscan.c) - - Memo: - recent_rotated means recent frequency of lru rotation. - recent_scanned means recent # of scans to lru. - showing for better debug please see the code for meanings. - - -5.3 swappiness - Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only. - - Following cgroup's swapiness can't be changed. - - root cgroup (uses /proc/sys/vm/swappiness). - - a cgroup which uses hierarchy and it has child cgroup. - - a cgroup which uses hierarchy and not the root of hierarchy. - - -6. Hierarchy support - -The memory controller supports a deep hierarchy and hierarchical accounting. -The hierarchy is created by creating the appropriate cgroups in the -cgroup filesystem. Consider for example, the following cgroup filesystem -hierarchy - - root - / | \ - / | \ - a b c - | \ - | \ - d e - -In the diagram above, with hierarchical accounting enabled, all memory -usage of e, is accounted to its ancestors up until the root (i.e, c and root), -that has memory.use_hierarchy enabled. If one of the ancestors goes over its -limit, the reclaim algorithm reclaims from the tasks in the ancestor and the -children of the ancestor. - -6.1 Enabling hierarchical accounting and reclaim - -The memory controller by default disables the hierarchy feature. Support -can be enabled by writing 1 to memory.use_hierarchy file of the root cgroup - -# echo 1 > memory.use_hierarchy - -The feature can be disabled by - -# echo 0 > memory.use_hierarchy - -NOTE1: Enabling/disabling will fail if the cgroup already has other -cgroups created below it. - -NOTE2: This feature can be enabled/disabled per subtree. - -7. TODO - -1. Add support for accounting huge pages (as a separate controller) -2. Make per-cgroup scanner reclaim not-shared pages first -3. Teach controller to account for shared-pages -4. Start reclamation in the background when the limit is - not yet hit but the usage is getting closer - -Summary - -Overall, the memory controller has been a stable controller and has been -commented and discussed quite extensively in the community. - -References - -1. Singh, Balbir. RFC: Memory Controller, http://lwn.net/Articles/206697/ -2. Singh, Balbir. Memory Controller (RSS Control), - http://lwn.net/Articles/222762/ -3. Emelianov, Pavel. Resource controllers based on process cgroups - http://lkml.org/lkml/2007/3/6/198 -4. Emelianov, Pavel. RSS controller based on process cgroups (v2) - http://lkml.org/lkml/2007/4/9/78 -5. Emelianov, Pavel. RSS controller based on process cgroups (v3) - http://lkml.org/lkml/2007/5/30/244 -6. Menage, Paul. Control Groups v10, http://lwn.net/Articles/236032/ -7. Vaidyanathan, Srinivasan, Control Groups: Pagecache accounting and control - subsystem (v3), http://lwn.net/Articles/235534/ -8. Singh, Balbir. RSS controller v2 test results (lmbench), - http://lkml.org/lkml/2007/5/17/232 -9. Singh, Balbir. RSS controller v2 AIM9 results - http://lkml.org/lkml/2007/5/18/1 -10. Singh, Balbir. Memory controller v6 test results, - http://lkml.org/lkml/2007/8/19/36 -11. Singh, Balbir. Memory controller introduction (v6), - http://lkml.org/lkml/2007/8/17/69 -12. Corbet, Jonathan, Controlling memory use in cgroups, - http://lwn.net/Articles/243795/ diff --git a/Documentation/controllers/resource_counter.txt b/Documentation/controllers/resource_counter.txt deleted file mode 100644 index f196ac1d7d25..000000000000 --- a/Documentation/controllers/resource_counter.txt +++ /dev/null @@ -1,181 +0,0 @@ - - The Resource Counter - -The resource counter, declared at include/linux/res_counter.h, -is supposed to facilitate the resource management by controllers -by providing common stuff for accounting. - -This "stuff" includes the res_counter structure and routines -to work with it. - - - -1. Crucial parts of the res_counter structure - - a. unsigned long long usage - - The usage value shows the amount of a resource that is consumed - by a group at a given time. The units of measurement should be - determined by the controller that uses this counter. E.g. it can - be bytes, items or any other unit the controller operates on. - - b. unsigned long long max_usage - - The maximal value of the usage over time. - - This value is useful when gathering statistical information about - the particular group, as it shows the actual resource requirements - for a particular group, not just some usage snapshot. - - c. unsigned long long limit - - The maximal allowed amount of resource to consume by the group. In - case the group requests for more resources, so that the usage value - would exceed the limit, the resource allocation is rejected (see - the next section). - - d. unsigned long long failcnt - - The failcnt stands for "failures counter". This is the number of - resource allocation attempts that failed. - - c. spinlock_t lock - - Protects changes of the above values. - - - -2. Basic accounting routines - - a. void res_counter_init(struct res_counter *rc) - - Initializes the resource counter. As usual, should be the first - routine called for a new counter. - - b. int res_counter_charge[_locked] - (struct res_counter *rc, unsigned long val) - - When a resource is about to be allocated it has to be accounted - with the appropriate resource counter (controller should determine - which one to use on its own). This operation is called "charging". - - This is not very important which operation - resource allocation - or charging - is performed first, but - * if the allocation is performed first, this may create a - temporary resource over-usage by the time resource counter is - charged; - * if the charging is performed first, then it should be uncharged - on error path (if the one is called). - - c. void res_counter_uncharge[_locked] - (struct res_counter *rc, unsigned long val) - - When a resource is released (freed) it should be de-accounted - from the resource counter it was accounted to. This is called - "uncharging". - - The _locked routines imply that the res_counter->lock is taken. - - - 2.1 Other accounting routines - - There are more routines that may help you with common needs, like - checking whether the limit is reached or resetting the max_usage - value. They are all declared in include/linux/res_counter.h. - - - -3. Analyzing the resource counter registrations - - a. If the failcnt value constantly grows, this means that the counter's - limit is too tight. Either the group is misbehaving and consumes too - many resources, or the configuration is not suitable for the group - and the limit should be increased. - - b. The max_usage value can be used to quickly tune the group. One may - set the limits to maximal values and either load the container with - a common pattern or leave one for a while. After this the max_usage - value shows the amount of memory the container would require during - its common activity. - - Setting the limit a bit above this value gives a pretty good - configuration that works in most of the cases. - - c. If the max_usage is much less than the limit, but the failcnt value - is growing, then the group tries to allocate a big chunk of resource - at once. - - d. If the max_usage is much less than the limit, but the failcnt value - is 0, then this group is given too high limit, that it does not - require. It is better to lower the limit a bit leaving more resource - for other groups. - - - -4. Communication with the control groups subsystem (cgroups) - -All the resource controllers that are using cgroups and resource counters -should provide files (in the cgroup filesystem) to work with the resource -counter fields. They are recommended to adhere to the following rules: - - a. File names - - Field name File name - --------------------------------------------------- - usage usage_in_ - max_usage max_usage_in_ - limit limit_in_ - failcnt failcnt - lock no file :) - - b. Reading from file should show the corresponding field value in the - appropriate format. - - c. Writing to file - - Field Expected behavior - ---------------------------------- - usage prohibited - max_usage reset to usage - limit set the limit - failcnt reset to zero - - - -5. Usage example - - a. Declare a task group (take a look at cgroups subsystem for this) and - fold a res_counter into it - - struct my_group { - struct res_counter res; - - - } - - b. Put hooks in resource allocation/release paths - - int alloc_something(...) - { - if (res_counter_charge(res_counter_ptr, amount) < 0) - return -ENOMEM; - - - } - - void release_something(...) - { - res_counter_uncharge(res_counter_ptr, amount); - - - } - - In order to keep the usage value self-consistent, both the - "res_counter_ptr" and the "amount" in release_something() should be - the same as they were in the alloc_something() when the releasing - resource was allocated. - - c. Provide the way to read res_counter values and set them (the cgroups - still can help with it). - - c. Compile and run :) diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt deleted file mode 100644 index 5c86c258c791..000000000000 --- a/Documentation/cpusets.txt +++ /dev/null @@ -1,808 +0,0 @@ - CPUSETS - ------- - -Copyright (C) 2004 BULL SA. -Written by Simon.Derr@bull.net - -Portions Copyright (c) 2004-2006 Silicon Graphics, Inc. -Modified by Paul Jackson -Modified by Christoph Lameter -Modified by Paul Menage -Modified by Hidetoshi Seto - -CONTENTS: -========= - -1. Cpusets - 1.1 What are cpusets ? - 1.2 Why are cpusets needed ? - 1.3 How are cpusets implemented ? - 1.4 What are exclusive cpusets ? - 1.5 What is memory_pressure ? - 1.6 What is memory spread ? - 1.7 What is sched_load_balance ? - 1.8 What is sched_relax_domain_level ? - 1.9 How do I use cpusets ? -2. Usage Examples and Syntax - 2.1 Basic Usage - 2.2 Adding/removing cpus - 2.3 Setting flags - 2.4 Attaching processes -3. Questions -4. Contact - -1. Cpusets -========== - -1.1 What are cpusets ? ----------------------- - -Cpusets provide a mechanism for assigning a set of CPUs and Memory -Nodes to a set of tasks. In this document "Memory Node" refers to -an on-line node that contains memory. - -Cpusets constrain the CPU and Memory placement of tasks to only -the resources within a tasks current cpuset. They form a nested -hierarchy visible in a virtual file system. These are the essential -hooks, beyond what is already present, required to manage dynamic -job placement on large systems. - -Cpusets use the generic cgroup subsystem described in -Documentation/cgroups/cgroups.txt. - -Requests by a task, using the sched_setaffinity(2) system call to -include CPUs in its CPU affinity mask, and using the mbind(2) and -set_mempolicy(2) system calls to include Memory Nodes in its memory -policy, are both filtered through that tasks cpuset, filtering out any -CPUs or Memory Nodes not in that cpuset. The scheduler will not -schedule a task on a CPU that is not allowed in its cpus_allowed -vector, and the kernel page allocator will not allocate a page on a -node that is not allowed in the requesting tasks mems_allowed vector. - -User level code may create and destroy cpusets by name in the cgroup -virtual file system, manage the attributes and permissions of these -cpusets and which CPUs and Memory Nodes are assigned to each cpuset, -specify and query to which cpuset a task is assigned, and list the -task pids assigned to a cpuset. - - -1.2 Why are cpusets needed ? ----------------------------- - -The management of large computer systems, with many processors (CPUs), -complex memory cache hierarchies and multiple Memory Nodes having -non-uniform access times (NUMA) presents additional challenges for -the efficient scheduling and memory placement of processes. - -Frequently more modest sized systems can be operated with adequate -efficiency just by letting the operating system automatically share -the available CPU and Memory resources amongst the requesting tasks. - -But larger systems, which benefit more from careful processor and -memory placement to reduce memory access times and contention, -and which typically represent a larger investment for the customer, -can benefit from explicitly placing jobs on properly sized subsets of -the system. - -This can be especially valuable on: - - * Web Servers running multiple instances of the same web application, - * Servers running different applications (for instance, a web server - and a database), or - * NUMA systems running large HPC applications with demanding - performance characteristics. - -These subsets, or "soft partitions" must be able to be dynamically -adjusted, as the job mix changes, without impacting other concurrently -executing jobs. The location of the running jobs pages may also be moved -when the memory locations are changed. - -The kernel cpuset patch provides the minimum essential kernel -mechanisms required to efficiently implement such subsets. It -leverages existing CPU and Memory Placement facilities in the Linux -kernel to avoid any additional impact on the critical scheduler or -memory allocator code. - - -1.3 How are cpusets implemented ? ---------------------------------- - -Cpusets provide a Linux kernel mechanism to constrain which CPUs and -Memory Nodes are used by a process or set of processes. - -The Linux kernel already has a pair of mechanisms to specify on which -CPUs a task may be scheduled (sched_setaffinity) and on which Memory -Nodes it may obtain memory (mbind, set_mempolicy). - -Cpusets extends these two mechanisms as follows: - - - Cpusets are sets of allowed CPUs and Memory Nodes, known to the - kernel. - - Each task in the system is attached to a cpuset, via a pointer - in the task structure to a reference counted cgroup structure. - - Calls to sched_setaffinity are filtered to just those CPUs - allowed in that tasks cpuset. - - Calls to mbind and set_mempolicy are filtered to just - those Memory Nodes allowed in that tasks cpuset. - - The root cpuset contains all the systems CPUs and Memory - Nodes. - - For any cpuset, one can define child cpusets containing a subset - of the parents CPU and Memory Node resources. - - The hierarchy of cpusets can be mounted at /dev/cpuset, for - browsing and manipulation from user space. - - A cpuset may be marked exclusive, which ensures that no other - cpuset (except direct ancestors and descendents) may contain - any overlapping CPUs or Memory Nodes. - - You can list all the tasks (by pid) attached to any cpuset. - -The implementation of cpusets requires a few, simple hooks -into the rest of the kernel, none in performance critical paths: - - - in init/main.c, to initialize the root cpuset at system boot. - - in fork and exit, to attach and detach a task from its cpuset. - - in sched_setaffinity, to mask the requested CPUs by what's - allowed in that tasks cpuset. - - in sched.c migrate_all_tasks(), to keep migrating tasks within - the CPUs allowed by their cpuset, if possible. - - in the mbind and set_mempolicy system calls, to mask the requested - Memory Nodes by what's allowed in that tasks cpuset. - - in page_alloc.c, to restrict memory to allowed nodes. - - in vmscan.c, to restrict page recovery to the current cpuset. - -You should mount the "cgroup" filesystem type in order to enable -browsing and modifying the cpusets presently known to the kernel. No -new system calls are added for cpusets - all support for querying and -modifying cpusets is via this cpuset file system. - -The /proc//status file for each task has four added lines, -displaying the tasks cpus_allowed (on which CPUs it may be scheduled) -and mems_allowed (on which Memory Nodes it may obtain memory), -in the two formats seen in the following example: - - Cpus_allowed: ffffffff,ffffffff,ffffffff,ffffffff - Cpus_allowed_list: 0-127 - Mems_allowed: ffffffff,ffffffff - Mems_allowed_list: 0-63 - -Each cpuset is represented by a directory in the cgroup file system -containing (on top of the standard cgroup files) the following -files describing that cpuset: - - - cpus: list of CPUs in that cpuset - - mems: list of Memory Nodes in that cpuset - - memory_migrate flag: if set, move pages to cpusets nodes - - cpu_exclusive flag: is cpu placement exclusive? - - mem_exclusive flag: is memory placement exclusive? - - mem_hardwall flag: is memory allocation hardwalled - - memory_pressure: measure of how much paging pressure in cpuset - -In addition, the root cpuset only has the following file: - - memory_pressure_enabled flag: compute memory_pressure? - -New cpusets are created using the mkdir system call or shell -command. The properties of a cpuset, such as its flags, allowed -CPUs and Memory Nodes, and attached tasks, are modified by writing -to the appropriate file in that cpusets directory, as listed above. - -The named hierarchical structure of nested cpusets allows partitioning -a large system into nested, dynamically changeable, "soft-partitions". - -The attachment of each task, automatically inherited at fork by any -children of that task, to a cpuset allows organizing the work load -on a system into related sets of tasks such that each set is constrained -to using the CPUs and Memory Nodes of a particular cpuset. A task -may be re-attached to any other cpuset, if allowed by the permissions -on the necessary cpuset file system directories. - -Such management of a system "in the large" integrates smoothly with -the detailed placement done on individual tasks and memory regions -using the sched_setaffinity, mbind and set_mempolicy system calls. - -The following rules apply to each cpuset: - - - Its CPUs and Memory Nodes must be a subset of its parents. - - It can't be marked exclusive unless its parent is. - - If its cpu or memory is exclusive, they may not overlap any sibling. - -These rules, and the natural hierarchy of cpusets, enable efficient -enforcement of the exclusive guarantee, without having to scan all -cpusets every time any of them change to ensure nothing overlaps a -exclusive cpuset. Also, the use of a Linux virtual file system (vfs) -to represent the cpuset hierarchy provides for a familiar permission -and name space for cpusets, with a minimum of additional kernel code. - -The cpus and mems files in the root (top_cpuset) cpuset are -read-only. The cpus file automatically tracks the value of -cpu_online_map using a CPU hotplug notifier, and the mems file -automatically tracks the value of node_states[N_HIGH_MEMORY]--i.e., -nodes with memory--using the cpuset_track_online_nodes() hook. - - -1.4 What are exclusive cpusets ? --------------------------------- - -If a cpuset is cpu or mem exclusive, no other cpuset, other than -a direct ancestor or descendent, may share any of the same CPUs or -Memory Nodes. - -A cpuset that is mem_exclusive *or* mem_hardwall is "hardwalled", -i.e. it restricts kernel allocations for page, buffer and other data -commonly shared by the kernel across multiple users. All cpusets, -whether hardwalled or not, restrict allocations of memory for user -space. This enables configuring a system so that several independent -jobs can share common kernel data, such as file system pages, while -isolating each job's user allocation in its own cpuset. To do this, -construct a large mem_exclusive cpuset to hold all the jobs, and -construct child, non-mem_exclusive cpusets for each individual job. -Only a small amount of typical kernel memory, such as requests from -interrupt handlers, is allowed to be taken outside even a -mem_exclusive cpuset. - - -1.5 What is memory_pressure ? ------------------------------ -The memory_pressure of a cpuset provides a simple per-cpuset metric -of the rate that the tasks in a cpuset are attempting to free up in -use memory on the nodes of the cpuset to satisfy additional memory -requests. - -This enables batch managers monitoring jobs running in dedicated -cpusets to efficiently detect what level of memory pressure that job -is causing. - -This is useful both on tightly managed systems running a wide mix of -submitted jobs, which may choose to terminate or re-prioritize jobs that -are trying to use more memory than allowed on the nodes assigned them, -and with tightly coupled, long running, massively parallel scientific -computing jobs that will dramatically fail to meet required performance -goals if they start to use more memory than allowed to them. - -This mechanism provides a very economical way for the batch manager -to monitor a cpuset for signs of memory pressure. It's up to the -batch manager or other user code to decide what to do about it and -take action. - -==> Unless this feature is enabled by writing "1" to the special file - /dev/cpuset/memory_pressure_enabled, the hook in the rebalance - code of __alloc_pages() for this metric reduces to simply noticing - that the cpuset_memory_pressure_enabled flag is zero. So only - systems that enable this feature will compute the metric. - -Why a per-cpuset, running average: - - Because this meter is per-cpuset, rather than per-task or mm, - the system load imposed by a batch scheduler monitoring this - metric is sharply reduced on large systems, because a scan of - the tasklist can be avoided on each set of queries. - - Because this meter is a running average, instead of an accumulating - counter, a batch scheduler can detect memory pressure with a - single read, instead of having to read and accumulate results - for a period of time. - - Because this meter is per-cpuset rather than per-task or mm, - the batch scheduler can obtain the key information, memory - pressure in a cpuset, with a single read, rather than having to - query and accumulate results over all the (dynamically changing) - set of tasks in the cpuset. - -A per-cpuset simple digital filter (requires a spinlock and 3 words -of data per-cpuset) is kept, and updated by any task attached to that -cpuset, if it enters the synchronous (direct) page reclaim code. - -A per-cpuset file provides an integer number representing the recent -(half-life of 10 seconds) rate of direct page reclaims caused by -the tasks in the cpuset, in units of reclaims attempted per second, -times 1000. - - -1.6 What is memory spread ? ---------------------------- -There are two boolean flag files per cpuset that control where the -kernel allocates pages for the file system buffers and related in -kernel data structures. They are called 'memory_spread_page' and -'memory_spread_slab'. - -If the per-cpuset boolean flag file 'memory_spread_page' is set, then -the kernel will spread the file system buffers (page cache) evenly -over all the nodes that the faulting task is allowed to use, instead -of preferring to put those pages on the node where the task is running. - -If the per-cpuset boolean flag file 'memory_spread_slab' is set, -then the kernel will spread some file system related slab caches, -such as for inodes and dentries evenly over all the nodes that the -faulting task is allowed to use, instead of preferring to put those -pages on the node where the task is running. - -The setting of these flags does not affect anonymous data segment or -stack segment pages of a task. - -By default, both kinds of memory spreading are off, and memory -pages are allocated on the node local to where the task is running, -except perhaps as modified by the tasks NUMA mempolicy or cpuset -configuration, so long as sufficient free memory pages are available. - -When new cpusets are created, they inherit the memory spread settings -of their parent. - -Setting memory spreading causes allocations for the affected page -or slab caches to ignore the tasks NUMA mempolicy and be spread -instead. Tasks using mbind() or set_mempolicy() calls to set NUMA -mempolicies will not notice any change in these calls as a result of -their containing tasks memory spread settings. If memory spreading -is turned off, then the currently specified NUMA mempolicy once again -applies to memory page allocations. - -Both 'memory_spread_page' and 'memory_spread_slab' are boolean flag -files. By default they contain "0", meaning that the feature is off -for that cpuset. If a "1" is written to that file, then that turns -the named feature on. - -The implementation is simple. - -Setting the flag 'memory_spread_page' turns on a per-process flag -PF_SPREAD_PAGE for each task that is in that cpuset or subsequently -joins that cpuset. The page allocation calls for the page cache -is modified to perform an inline check for this PF_SPREAD_PAGE task -flag, and if set, a call to a new routine cpuset_mem_spread_node() -returns the node to prefer for the allocation. - -Similarly, setting 'memory_spread_slab' turns on the flag -PF_SPREAD_SLAB, and appropriately marked slab caches will allocate -pages from the node returned by cpuset_mem_spread_node(). - -The cpuset_mem_spread_node() routine is also simple. It uses the -value of a per-task rotor cpuset_mem_spread_rotor to select the next -node in the current tasks mems_allowed to prefer for the allocation. - -This memory placement policy is also known (in other contexts) as -round-robin or interleave. - -This policy can provide substantial improvements for jobs that need -to place thread local data on the corresponding node, but that need -to access large file system data sets that need to be spread across -the several nodes in the jobs cpuset in order to fit. Without this -policy, especially for jobs that might have one thread reading in the -data set, the memory allocation across the nodes in the jobs cpuset -can become very uneven. - -1.7 What is sched_load_balance ? --------------------------------- - -The kernel scheduler (kernel/sched.c) automatically load balances -tasks. If one CPU is underutilized, kernel code running on that -CPU will look for tasks on other more overloaded CPUs and move those -tasks to itself, within the constraints of such placement mechanisms -as cpusets and sched_setaffinity. - -The algorithmic cost of load balancing and its impact on key shared -kernel data structures such as the task list increases more than -linearly with the number of CPUs being balanced. So the scheduler -has support to partition the systems CPUs into a number of sched -domains such that it only load balances within each sched domain. -Each sched domain covers some subset of the CPUs in the system; -no two sched domains overlap; some CPUs might not be in any sched -domain and hence won't be load balanced. - -Put simply, it costs less to balance between two smaller sched domains -than one big one, but doing so means that overloads in one of the -two domains won't be load balanced to the other one. - -By default, there is one sched domain covering all CPUs, except those -marked isolated using the kernel boot time "isolcpus=" argument. - -This default load balancing across all CPUs is not well suited for -the following two situations: - 1) On large systems, load balancing across many CPUs is expensive. - If the system is managed using cpusets to place independent jobs - on separate sets of CPUs, full load balancing is unnecessary. - 2) Systems supporting realtime on some CPUs need to minimize - system overhead on those CPUs, including avoiding task load - balancing if that is not needed. - -When the per-cpuset flag "sched_load_balance" is enabled (the default -setting), it requests that all the CPUs in that cpusets allowed 'cpus' -be contained in a single sched domain, ensuring that load balancing -can move a task (not otherwised pinned, as by sched_setaffinity) -from any CPU in that cpuset to any other. - -When the per-cpuset flag "sched_load_balance" is disabled, then the -scheduler will avoid load balancing across the CPUs in that cpuset, ---except-- in so far as is necessary because some overlapping cpuset -has "sched_load_balance" enabled. - -So, for example, if the top cpuset has the flag "sched_load_balance" -enabled, then the scheduler will have one sched domain covering all -CPUs, and the setting of the "sched_load_balance" flag in any other -cpusets won't matter, as we're already fully load balancing. - -Therefore in the above two situations, the top cpuset flag -"sched_load_balance" should be disabled, and only some of the smaller, -child cpusets have this flag enabled. - -When doing this, you don't usually want to leave any unpinned tasks in -the top cpuset that might use non-trivial amounts of CPU, as such tasks -may be artificially constrained to some subset of CPUs, depending on -the particulars of this flag setting in descendent cpusets. Even if -such a task could use spare CPU cycles in some other CPUs, the kernel -scheduler might not consider the possibility of load balancing that -task to that underused CPU. - -Of course, tasks pinned to a particular CPU can be left in a cpuset -that disables "sched_load_balance" as those tasks aren't going anywhere -else anyway. - -There is an impedance mismatch here, between cpusets and sched domains. -Cpusets are hierarchical and nest. Sched domains are flat; they don't -overlap and each CPU is in at most one sched domain. - -It is necessary for sched domains to be flat because load balancing -across partially overlapping sets of CPUs would risk unstable dynamics -that would be beyond our understanding. So if each of two partially -overlapping cpusets enables the flag 'sched_load_balance', then we -form a single sched domain that is a superset of both. We won't move -a task to a CPU outside it cpuset, but the scheduler load balancing -code might waste some compute cycles considering that possibility. - -This mismatch is why there is not a simple one-to-one relation -between which cpusets have the flag "sched_load_balance" enabled, -and the sched domain configuration. If a cpuset enables the flag, it -will get balancing across all its CPUs, but if it disables the flag, -it will only be assured of no load balancing if no other overlapping -cpuset enables the flag. - -If two cpusets have partially overlapping 'cpus' allowed, and only -one of them has this flag enabled, then the other may find its -tasks only partially load balanced, just on the overlapping CPUs. -This is just the general case of the top_cpuset example given a few -paragraphs above. In the general case, as in the top cpuset case, -don't leave tasks that might use non-trivial amounts of CPU in -such partially load balanced cpusets, as they may be artificially -constrained to some subset of the CPUs allowed to them, for lack of -load balancing to the other CPUs. - -1.7.1 sched_load_balance implementation details. ------------------------------------------------- - -The per-cpuset flag 'sched_load_balance' defaults to enabled (contrary -to most cpuset flags.) When enabled for a cpuset, the kernel will -ensure that it can load balance across all the CPUs in that cpuset -(makes sure that all the CPUs in the cpus_allowed of that cpuset are -in the same sched domain.) - -If two overlapping cpusets both have 'sched_load_balance' enabled, -then they will be (must be) both in the same sched domain. - -If, as is the default, the top cpuset has 'sched_load_balance' enabled, -then by the above that means there is a single sched domain covering -the whole system, regardless of any other cpuset settings. - -The kernel commits to user space that it will avoid load balancing -where it can. It will pick as fine a granularity partition of sched -domains as it can while still providing load balancing for any set -of CPUs allowed to a cpuset having 'sched_load_balance' enabled. - -The internal kernel cpuset to scheduler interface passes from the -cpuset code to the scheduler code a partition of the load balanced -CPUs in the system. This partition is a set of subsets (represented -as an array of cpumask_t) of CPUs, pairwise disjoint, that cover all -the CPUs that must be load balanced. - -Whenever the 'sched_load_balance' flag changes, or CPUs come or go -from a cpuset with this flag enabled, or a cpuset with this flag -enabled is removed, the cpuset code builds a new such partition and -passes it to the scheduler sched domain setup code, to have the sched -domains rebuilt as necessary. - -This partition exactly defines what sched domains the scheduler should -setup - one sched domain for each element (cpumask_t) in the partition. - -The scheduler remembers the currently active sched domain partitions. -When the scheduler routine partition_sched_domains() is invoked from -the cpuset code to update these sched domains, it compares the new -partition requested with the current, and updates its sched domains, -removing the old and adding the new, for each change. - - -1.8 What is sched_relax_domain_level ? --------------------------------------- - -In sched domain, the scheduler migrates tasks in 2 ways; periodic load -balance on tick, and at time of some schedule events. - -When a task is woken up, scheduler try to move the task on idle CPU. -For example, if a task A running on CPU X activates another task B -on the same CPU X, and if CPU Y is X's sibling and performing idle, -then scheduler migrate task B to CPU Y so that task B can start on -CPU Y without waiting task A on CPU X. - -And if a CPU run out of tasks in its runqueue, the CPU try to pull -extra tasks from other busy CPUs to help them before it is going to -be idle. - -Of course it takes some searching cost to find movable tasks and/or -idle CPUs, the scheduler might not search all CPUs in the domain -everytime. In fact, in some architectures, the searching ranges on -events are limited in the same socket or node where the CPU locates, -while the load balance on tick searchs all. - -For example, assume CPU Z is relatively far from CPU X. Even if CPU Z -is idle while CPU X and the siblings are busy, scheduler can't migrate -woken task B from X to Z since it is out of its searching range. -As the result, task B on CPU X need to wait task A or wait load balance -on the next tick. For some applications in special situation, waiting -1 tick may be too long. - -The 'sched_relax_domain_level' file allows you to request changing -this searching range as you like. This file takes int value which -indicates size of searching range in levels ideally as follows, -otherwise initial value -1 that indicates the cpuset has no request. - - -1 : no request. use system default or follow request of others. - 0 : no search. - 1 : search siblings (hyperthreads in a core). - 2 : search cores in a package. - 3 : search cpus in a node [= system wide on non-NUMA system] - ( 4 : search nodes in a chunk of node [on NUMA system] ) - ( 5 : search system wide [on NUMA system] ) - -The system default is architecture dependent. The system default -can be changed using the relax_domain_level= boot parameter. - -This file is per-cpuset and affect the sched domain where the cpuset -belongs to. Therefore if the flag 'sched_load_balance' of a cpuset -is disabled, then 'sched_relax_domain_level' have no effect since -there is no sched domain belonging the cpuset. - -If multiple cpusets are overlapping and hence they form a single sched -domain, the largest value among those is used. Be careful, if one -requests 0 and others are -1 then 0 is used. - -Note that modifying this file will have both good and bad effects, -and whether it is acceptable or not will be depend on your situation. -Don't modify this file if you are not sure. - -If your situation is: - - The migration costs between each cpu can be assumed considerably - small(for you) due to your special application's behavior or - special hardware support for CPU cache etc. - - The searching cost doesn't have impact(for you) or you can make - the searching cost enough small by managing cpuset to compact etc. - - The latency is required even it sacrifices cache hit rate etc. -then increasing 'sched_relax_domain_level' would benefit you. - - -1.9 How do I use cpusets ? --------------------------- - -In order to minimize the impact of cpusets on critical kernel -code, such as the scheduler, and due to the fact that the kernel -does not support one task updating the memory placement of another -task directly, the impact on a task of changing its cpuset CPU -or Memory Node placement, or of changing to which cpuset a task -is attached, is subtle. - -If a cpuset has its Memory Nodes modified, then for each task attached -to that cpuset, the next time that the kernel attempts to allocate -a page of memory for that task, the kernel will notice the change -in the tasks cpuset, and update its per-task memory placement to -remain within the new cpusets memory placement. If the task was using -mempolicy MPOL_BIND, and the nodes to which it was bound overlap with -its new cpuset, then the task will continue to use whatever subset -of MPOL_BIND nodes are still allowed in the new cpuset. If the task -was using MPOL_BIND and now none of its MPOL_BIND nodes are allowed -in the new cpuset, then the task will be essentially treated as if it -was MPOL_BIND bound to the new cpuset (even though its numa placement, -as queried by get_mempolicy(), doesn't change). If a task is moved -from one cpuset to another, then the kernel will adjust the tasks -memory placement, as above, the next time that the kernel attempts -to allocate a page of memory for that task. - -If a cpuset has its 'cpus' modified, then each task in that cpuset -will have its allowed CPU placement changed immediately. Similarly, -if a tasks pid is written to a cpusets 'tasks' file, in either its -current cpuset or another cpuset, then its allowed CPU placement is -changed immediately. If such a task had been bound to some subset -of its cpuset using the sched_setaffinity() call, the task will be -allowed to run on any CPU allowed in its new cpuset, negating the -affect of the prior sched_setaffinity() call. - -In summary, the memory placement of a task whose cpuset is changed is -updated by the kernel, on the next allocation of a page for that task, -but the processor placement is not updated, until that tasks pid is -rewritten to the 'tasks' file of its cpuset. This is done to avoid -impacting the scheduler code in the kernel with a check for changes -in a tasks processor placement. - -Normally, once a page is allocated (given a physical page -of main memory) then that page stays on whatever node it -was allocated, so long as it remains allocated, even if the -cpusets memory placement policy 'mems' subsequently changes. -If the cpuset flag file 'memory_migrate' is set true, then when -tasks are attached to that cpuset, any pages that task had -allocated to it on nodes in its previous cpuset are migrated -to the tasks new cpuset. The relative placement of the page within -the cpuset is preserved during these migration operations if possible. -For example if the page was on the second valid node of the prior cpuset -then the page will be placed on the second valid node of the new cpuset. - -Also if 'memory_migrate' is set true, then if that cpusets -'mems' file is modified, pages allocated to tasks in that -cpuset, that were on nodes in the previous setting of 'mems', -will be moved to nodes in the new setting of 'mems.' -Pages that were not in the tasks prior cpuset, or in the cpusets -prior 'mems' setting, will not be moved. - -There is an exception to the above. If hotplug functionality is used -to remove all the CPUs that are currently assigned to a cpuset, -then all the tasks in that cpuset will be moved to the nearest ancestor -with non-empty cpus. But the moving of some (or all) tasks might fail if -cpuset is bound with another cgroup subsystem which has some restrictions -on task attaching. In this failing case, those tasks will stay -in the original cpuset, and the kernel will automatically update -their cpus_allowed to allow all online CPUs. When memory hotplug -functionality for removing Memory Nodes is available, a similar exception -is expected to apply there as well. In general, the kernel prefers to -violate cpuset placement, over starving a task that has had all -its allowed CPUs or Memory Nodes taken offline. - -There is a second exception to the above. GFP_ATOMIC requests are -kernel internal allocations that must be satisfied, immediately. -The kernel may drop some request, in rare cases even panic, if a -GFP_ATOMIC alloc fails. If the request cannot be satisfied within -the current tasks cpuset, then we relax the cpuset, and look for -memory anywhere we can find it. It's better to violate the cpuset -than stress the kernel. - -To start a new job that is to be contained within a cpuset, the steps are: - - 1) mkdir /dev/cpuset - 2) mount -t cgroup -ocpuset cpuset /dev/cpuset - 3) Create the new cpuset by doing mkdir's and write's (or echo's) in - the /dev/cpuset virtual file system. - 4) Start a task that will be the "founding father" of the new job. - 5) Attach that task to the new cpuset by writing its pid to the - /dev/cpuset tasks file for that cpuset. - 6) fork, exec or clone the job tasks from this founding father task. - -For example, the following sequence of commands will setup a cpuset -named "Charlie", containing just CPUs 2 and 3, and Memory Node 1, -and then start a subshell 'sh' in that cpuset: - - mount -t cgroup -ocpuset cpuset /dev/cpuset - cd /dev/cpuset - mkdir Charlie - cd Charlie - /bin/echo 2-3 > cpus - /bin/echo 1 > mems - /bin/echo $$ > tasks - sh - # The subshell 'sh' is now running in cpuset Charlie - # The next line should display '/Charlie' - cat /proc/self/cpuset - -In the future, a C library interface to cpusets will likely be -available. For now, the only way to query or modify cpusets is -via the cpuset file system, using the various cd, mkdir, echo, cat, -rmdir commands from the shell, or their equivalent from C. - -The sched_setaffinity calls can also be done at the shell prompt using -SGI's runon or Robert Love's taskset. The mbind and set_mempolicy -calls can be done at the shell prompt using the numactl command -(part of Andi Kleen's numa package). - -2. Usage Examples and Syntax -============================ - -2.1 Basic Usage ---------------- - -Creating, modifying, using the cpusets can be done through the cpuset -virtual filesystem. - -To mount it, type: -# mount -t cgroup -o cpuset cpuset /dev/cpuset - -Then under /dev/cpuset you can find a tree that corresponds to the -tree of the cpusets in the system. For instance, /dev/cpuset -is the cpuset that holds the whole system. - -If you want to create a new cpuset under /dev/cpuset: -# cd /dev/cpuset -# mkdir my_cpuset - -Now you want to do something with this cpuset. -# cd my_cpuset - -In this directory you can find several files: -# ls -cpu_exclusive memory_migrate mems tasks -cpus memory_pressure notify_on_release -mem_exclusive memory_spread_page sched_load_balance -mem_hardwall memory_spread_slab sched_relax_domain_level - -Reading them will give you information about the state of this cpuset: -the CPUs and Memory Nodes it can use, the processes that are using -it, its properties. By writing to these files you can manipulate -the cpuset. - -Set some flags: -# /bin/echo 1 > cpu_exclusive - -Add some cpus: -# /bin/echo 0-7 > cpus - -Add some mems: -# /bin/echo 0-7 > mems - -Now attach your shell to this cpuset: -# /bin/echo $$ > tasks - -You can also create cpusets inside your cpuset by using mkdir in this -directory. -# mkdir my_sub_cs - -To remove a cpuset, just use rmdir: -# rmdir my_sub_cs -This will fail if the cpuset is in use (has cpusets inside, or has -processes attached). - -Note that for legacy reasons, the "cpuset" filesystem exists as a -wrapper around the cgroup filesystem. - -The command - -mount -t cpuset X /dev/cpuset - -is equivalent to - -mount -t cgroup -ocpuset X /dev/cpuset -echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent - -2.2 Adding/removing cpus ------------------------- - -This is the syntax to use when writing in the cpus or mems files -in cpuset directories: - -# /bin/echo 1-4 > cpus -> set cpus list to cpus 1,2,3,4 -# /bin/echo 1,2,3,4 > cpus -> set cpus list to cpus 1,2,3,4 - -2.3 Setting flags ------------------ - -The syntax is very simple: - -# /bin/echo 1 > cpu_exclusive -> set flag 'cpu_exclusive' -# /bin/echo 0 > cpu_exclusive -> unset flag 'cpu_exclusive' - -2.4 Attaching processes ------------------------ - -# /bin/echo PID > tasks - -Note that it is PID, not PIDs. You can only attach ONE task at a time. -If you have several tasks to attach, you have to do it one after another: - -# /bin/echo PID1 > tasks -# /bin/echo PID2 > tasks - ... -# /bin/echo PIDn > tasks - - -3. Questions -============ - -Q: what's up with this '/bin/echo' ? -A: bash's builtin 'echo' command does not check calls to write() against - errors. If you use it in the cpuset file system, you won't be - able to tell whether a command succeeded or failed. - -Q: When I attach processes, only the first of the line gets really attached ! -A: We can only return one error code per call to write(). So you should also - put only ONE pid. - -4. Contact -========== - -Web: http://www.bullopensource.org/cpuset diff --git a/Documentation/scheduler/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt index 8398ca4ff4ed..6f33593e59e2 100644 --- a/Documentation/scheduler/sched-design-CFS.txt +++ b/Documentation/scheduler/sched-design-CFS.txt @@ -231,7 +231,7 @@ CPU bandwidth control purposes: This options needs CONFIG_CGROUPS to be defined, and lets the administrator create arbitrary groups of tasks, using the "cgroup" pseudo filesystem. See - Documentation/cgroups.txt for more information about this filesystem. + Documentation/cgroups/cgroups.txt for more information about this filesystem. Only one of these options to group tasks can be chosen and not both. diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index dede0a2cfc45..4c5bcf6ca7e8 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -9,7 +9,7 @@ * * Author: Pavel Emelianov * - * See Documentation/controllers/resource_counter.txt for more + * See Documentation/cgroups/resource_counter.txt for more * info about what this counter is. */ diff --git a/init/Kconfig b/init/Kconfig index 56fd93c63c77..2af83825634e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -323,8 +323,8 @@ config CGROUP_SCHED This option allows you to create arbitrary task groups using the "cgroup" pseudo filesystem and control the cpu bandwidth allocated to each such task group. - Refer to Documentation/cgroups.txt for more information - on "cgroup" pseudo filesystem. + Refer to Documentation/cgroups/cgroups.txt for more + information on "cgroup" pseudo filesystem. endchoice @@ -335,10 +335,9 @@ menuconfig CGROUPS use with process control subsystems such as Cpusets, CFS, memory controls or device isolation. See - - Documentation/cpusets.txt (Cpusets) - Documentation/scheduler/sched-design-CFS.txt (CFS) - - Documentation/cgroups/ (features for grouping, isolation) - - Documentation/controllers/ (features for resource control) + - Documentation/cgroups/ (features for grouping, isolation + and resource control) Say N if unsure. diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 647c77a88fcb..a85678865c5e 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -568,7 +568,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) * load balancing domains (sched domains) as specified by that partial * partition. * - * See "What is sched_load_balance" in Documentation/cpusets.txt + * See "What is sched_load_balance" in Documentation/cgroups/cpusets.txt * for a background explanation of this. * * Does not return errors, on the theory that the callers of this -- cgit v1.2.3-71-gd317 From 3eabdb76a03bbe8f556162738c264dbfb24cff6a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Jan 2009 13:51:01 -0800 Subject: jbd: fix missing kernel-doc Fix jbd header file kernel-doc notation: Warning(linux-2.6.28-git13//include/linux/jbd.h:823): No description found for parameter 'j_average_commit_time' Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/jbd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 6384b19efe64..64246dce5663 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -614,6 +614,8 @@ struct transaction_s * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the * number that will fit in j_blocksize * @j_last_sync_writer: most recent pid which did a synchronous write + * @j_average_commit_time: the average amount of time in nanoseconds it + * takes to commit a transaction to the disk. * @j_private: An opaque pointer to fs-private information. */ -- cgit v1.2.3-71-gd317 From 6ae301e85c9c58d2f430a8a7057ce488b7ff76df Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Jan 2009 13:51:01 -0800 Subject: resources: fix parameter name and kernel-doc Fix __request_region() parameter kernel-doc notation and parameter name: Warning(linux-2.6.28-git10//kernel/resource.c:627): No description found for parameter 'flags' Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ioport.h | 3 ++- kernel/resource.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index f6bb2ca8e3ba..32e4b2f72294 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -143,7 +143,8 @@ static inline unsigned long resource_type(struct resource *res) extern struct resource * __request_region(struct resource *, resource_size_t start, - resource_size_t n, const char *name, int relaxed); + resource_size_t n, + const char *name, int flags); /* Compatibility cruft */ #define release_region(start,n) __release_region(&ioport_resource, (start), (n)) diff --git a/kernel/resource.c b/kernel/resource.c index ca6a1536b205..fd5d7d574bb9 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -620,6 +620,7 @@ resource_size_t resource_alignment(struct resource *res) * @start: resource start address * @n: resource region size * @name: reserving caller's ID string + * @flags: IO resource flags */ struct resource * __request_region(struct resource *parent, resource_size_t start, resource_size_t n, -- cgit v1.2.3-71-gd317 From 1bcbf31337391a2f54ef6c1e8871c2de5944a7dc Mon Sep 17 00:00:00 2001 From: Qinghuang Feng Date: Thu, 15 Jan 2009 13:51:03 -0800 Subject: btrfs & squashfs: Move btrfs and squashfsto's magic number to Use the standard magic.h for btrfs and squashfs. Signed-off-by: Qinghuang Feng Cc: Phillip Lougher Cc: Chris Mason Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/btrfs/super.c | 2 +- fs/squashfs/squashfs_fs.h | 1 - fs/squashfs/super.c | 1 + include/linux/magic.h | 2 ++ 4 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0a14b495532f..7256cf242eb0 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -51,7 +52,6 @@ #include "export.h" #include "compression.h" -#define BTRFS_SUPER_MAGIC 0x9123683E static struct super_operations btrfs_super_ops; diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 6840da1bf21e..283daafc568e 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -26,7 +26,6 @@ #define SQUASHFS_CACHED_FRAGMENTS CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE #define SQUASHFS_MAJOR 4 #define SQUASHFS_MINOR 0 -#define SQUASHFS_MAGIC 0x73717368 #define SQUASHFS_START 0 /* size of metadata (inode and directory) blocks */ diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index a0466d7467b2..071df5b5b491 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "squashfs_fs.h" #include "squashfs_fs_sb.h" diff --git a/include/linux/magic.h b/include/linux/magic.h index 439f6f3cb0c4..0b4df7eba852 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -10,11 +10,13 @@ #define SYSFS_MAGIC 0x62656572 #define SECURITYFS_MAGIC 0x73636673 #define TMPFS_MAGIC 0x01021994 +#define SQUASHFS_MAGIC 0x73717368 #define EFS_SUPER_MAGIC 0x414A53 #define EXT2_SUPER_MAGIC 0xEF53 #define EXT3_SUPER_MAGIC 0xEF53 #define XENFS_SUPER_MAGIC 0xabba1974 #define EXT4_SUPER_MAGIC 0xEF53 +#define BTRFS_SUPER_MAGIC 0x9123683E #define HPFS_SUPER_MAGIC 0xf995e849 #define ISOFS_SUPER_MAGIC 0x9660 #define JFFS2_SUPER_MAGIC 0x72b6 -- cgit v1.2.3-71-gd317 From 00bfddaf7f68a6551319b536f052040c370756b0 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 15 Jan 2009 13:51:26 -0800 Subject: include of is preferred over Impact: fix 15 make headers_check warnings: include of is preferred over Signed-off-by: Jaswinder Singh Rajput Cc: Ingo Molnar Cc: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/agpgart.h | 1 - include/linux/atm_idt77105.h | 2 +- include/linux/capi.h | 2 +- include/linux/connector.h | 2 +- include/linux/cyclades.h | 2 -- include/linux/fb.h | 2 +- include/linux/if_pppol2tp.h | 2 +- include/linux/if_pppox.h | 2 +- include/linux/input.h | 2 +- include/linux/joystick.h | 2 +- include/linux/kvm.h | 2 +- include/linux/loop.h | 2 +- include/linux/matroxfb.h | 2 +- include/linux/phantom.h | 2 +- include/linux/radeonfb.h | 2 +- 15 files changed, 13 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/agpgart.h b/include/linux/agpgart.h index c8fdb6e658e1..110c600c885f 100644 --- a/include/linux/agpgart.h +++ b/include/linux/agpgart.h @@ -52,7 +52,6 @@ #ifndef __KERNEL__ #include -#include struct agp_version { __u16 major; diff --git a/include/linux/atm_idt77105.h b/include/linux/atm_idt77105.h index 05621cf20709..8b724000aa50 100644 --- a/include/linux/atm_idt77105.h +++ b/include/linux/atm_idt77105.h @@ -7,7 +7,7 @@ #ifndef LINUX_ATM_IDT77105_H #define LINUX_ATM_IDT77105_H -#include +#include #include #include diff --git a/include/linux/capi.h b/include/linux/capi.h index fdebaaa9f66e..65100d6cb89b 100644 --- a/include/linux/capi.h +++ b/include/linux/capi.h @@ -12,7 +12,7 @@ #ifndef __LINUX_CAPI_H__ #define __LINUX_CAPI_H__ -#include +#include #include #ifndef __KERNEL__ #include diff --git a/include/linux/connector.h b/include/linux/connector.h index 5c7f9468f753..34f2789d9b9b 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -22,7 +22,7 @@ #ifndef __CONNECTOR_H #define __CONNECTOR_H -#include +#include #define CN_IDX_CONNECTOR 0xffffffff #define CN_VAL_CONNECTOR 0xffffffff diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h index 2d3d1e04ba92..d06fbf286346 100644 --- a/include/linux/cyclades.h +++ b/include/linux/cyclades.h @@ -150,8 +150,6 @@ struct CYZ_BOOT_CTRL { * architectures and compilers. */ -#include - typedef __u64 ucdouble; /* 64 bits, unsigned */ typedef __u32 uclong; /* 32 bits, unsigned */ typedef __u16 ucshort; /* 16 bits, unsigned */ diff --git a/include/linux/fb.h b/include/linux/fb.h index 1ee63df5be92..818fe21257e8 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -1,7 +1,7 @@ #ifndef _LINUX_FB_H #define _LINUX_FB_H -#include +#include #include struct dentry; diff --git a/include/linux/if_pppol2tp.h b/include/linux/if_pppol2tp.h index a7d6a2234b31..c7a66882b6d0 100644 --- a/include/linux/if_pppol2tp.h +++ b/include/linux/if_pppol2tp.h @@ -15,7 +15,7 @@ #ifndef __LINUX_IF_PPPOL2TP_H #define __LINUX_IF_PPPOL2TP_H -#include +#include #ifdef __KERNEL__ #include diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 6fb7f1788570..30c88b2245ff 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -17,7 +17,7 @@ #define __LINUX_IF_PPPOX_H -#include +#include #include #ifdef __KERNEL__ diff --git a/include/linux/input.h b/include/linux/input.h index 9a6355f74db2..1249a0c20a38 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include #endif /* diff --git a/include/linux/joystick.h b/include/linux/joystick.h index b5e051295a67..9e20c29c1e14 100644 --- a/include/linux/joystick.h +++ b/include/linux/joystick.h @@ -27,7 +27,7 @@ * Vojtech Pavlik, Ucitelska 1576, Prague 8, 182 00 Czech Republic */ -#include +#include #include /* diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 35525ac63337..5715f1907601 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -7,7 +7,7 @@ * Note: you must update KVM_API_VERSION if you change this interface. */ -#include +#include #include #include #include diff --git a/include/linux/loop.h b/include/linux/loop.h index 46169a7b559b..6ffd6db5bb0d 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -80,7 +80,7 @@ enum { }; #include /* for __kernel_old_dev_t */ -#include /* for __u64 */ +#include /* for __u64 */ /* Backwards compatibility version */ struct loop_info { diff --git a/include/linux/matroxfb.h b/include/linux/matroxfb.h index ae5b09493062..404f678e734b 100644 --- a/include/linux/matroxfb.h +++ b/include/linux/matroxfb.h @@ -2,7 +2,7 @@ #define __LINUX_MATROXFB_H__ #include -#include +#include #include struct matroxioc_output_mode { diff --git a/include/linux/phantom.h b/include/linux/phantom.h index 02268c54c250..94dd6645c60a 100644 --- a/include/linux/phantom.h +++ b/include/linux/phantom.h @@ -10,7 +10,7 @@ #ifndef __PHANTOM_H #define __PHANTOM_H -#include +#include /* PHN_(G/S)ET_REG param */ struct phm_reg { diff --git a/include/linux/radeonfb.h b/include/linux/radeonfb.h index 5bd8975ed78e..8c4bbdecc44f 100644 --- a/include/linux/radeonfb.h +++ b/include/linux/radeonfb.h @@ -2,7 +2,7 @@ #define __LINUX_RADEONFB_H__ #include -#include +#include #define ATY_RADEON_LCD_ON 0x00000001 #define ATY_RADEON_CRT_ON 0x00000002 -- cgit v1.2.3-71-gd317 From 94be9a58d7e683ac3c1df1858a17f09ebade8da0 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Fri, 16 Jan 2009 10:17:09 -0500 Subject: [libata] get-identity ioctl: Fix use of invalid memory pointer for SAS drivers. Caught by Ke Wei (and team?) at Marvell. Also, move the ata_scsi_ioctl export to libata-scsi.c, as that seems to be the general trend. Acked-by: James Bottomley Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 1 - drivers/ata/libata-scsi.c | 17 +++++++++++++---- drivers/scsi/ipr.c | 2 +- drivers/scsi/libsas/sas_scsi_host.c | 2 +- include/linux/libata.h | 2 ++ 5 files changed, 17 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 71218d76d75e..552ecae13434 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6638,7 +6638,6 @@ EXPORT_SYMBOL_GPL(ata_dev_pair); EXPORT_SYMBOL_GPL(ata_port_disable); EXPORT_SYMBOL_GPL(ata_ratelimit); EXPORT_SYMBOL_GPL(ata_wait_register); -EXPORT_SYMBOL_GPL(ata_scsi_ioctl); EXPORT_SYMBOL_GPL(ata_scsi_queuecmd); EXPORT_SYMBOL_GPL(ata_scsi_slave_config); EXPORT_SYMBOL_GPL(ata_scsi_slave_destroy); diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 9e92107691f2..a1a6e6298c33 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -423,9 +423,9 @@ int ata_std_bios_param(struct scsi_device *sdev, struct block_device *bdev, * RETURNS: * Zero on success, negative errno on error. */ -static int ata_get_identity(struct scsi_device *sdev, void __user *arg) +static int ata_get_identity(struct ata_port *ap, struct scsi_device *sdev, + void __user *arg) { - struct ata_port *ap = ata_shost_to_port(sdev->host); struct ata_device *dev = ata_scsi_find_dev(ap, sdev); u16 __user *dst = arg; char buf[40]; @@ -645,7 +645,8 @@ int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg) return rc; } -int ata_scsi_ioctl(struct scsi_device *scsidev, int cmd, void __user *arg) +int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *scsidev, + int cmd, void __user *arg) { int val = -EINVAL, rc = -EINVAL; @@ -663,7 +664,7 @@ int ata_scsi_ioctl(struct scsi_device *scsidev, int cmd, void __user *arg) return 0; case HDIO_GET_IDENTITY: - return ata_get_identity(scsidev, arg); + return ata_get_identity(ap, scsidev, arg); case HDIO_DRIVE_CMD: if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) @@ -682,6 +683,14 @@ int ata_scsi_ioctl(struct scsi_device *scsidev, int cmd, void __user *arg) return rc; } +EXPORT_SYMBOL_GPL(ata_sas_scsi_ioctl); + +int ata_scsi_ioctl(struct scsi_device *scsidev, int cmd, void __user *arg) +{ + return ata_sas_scsi_ioctl(ata_shost_to_port(scsidev->host), + scsidev, cmd, arg); +} +EXPORT_SYMBOL_GPL(ata_scsi_ioctl); /** * ata_scsi_qc_new - acquire new ata_queued_cmd reference diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 841f460edbc4..07829009a8be 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -4912,7 +4912,7 @@ static int ipr_ioctl(struct scsi_device *sdev, int cmd, void __user *arg) if (res && ipr_is_gata(res)) { if (cmd == HDIO_GET_IDENTITY) return -ENOTTY; - return ata_scsi_ioctl(sdev, cmd, arg); + return ata_sas_scsi_ioctl(res->sata_port->ap, sdev, cmd, arg); } return -EINVAL; diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 744838780ada..1c558d3bce18 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -717,7 +717,7 @@ int sas_ioctl(struct scsi_device *sdev, int cmd, void __user *arg) struct domain_device *dev = sdev_to_domain_dev(sdev); if (dev_is_sata(dev)) - return ata_scsi_ioctl(sdev, cmd, arg); + return ata_sas_scsi_ioctl(dev->sata_dev.ap, sdev, cmd, arg); return -EINVAL; } diff --git a/include/linux/libata.h b/include/linux/libata.h index b6b8a7f3ec66..73b69c7071c5 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -927,6 +927,8 @@ extern void ata_host_init(struct ata_host *, struct device *, extern int ata_scsi_detect(struct scsi_host_template *sht); extern int ata_scsi_ioctl(struct scsi_device *dev, int cmd, void __user *arg); extern int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)); +extern int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *dev, + int cmd, void __user *arg); extern void ata_sas_port_destroy(struct ata_port *); extern struct ata_port *ata_sas_port_alloc(struct ata_host *, struct ata_port_info *, struct Scsi_Host *); -- cgit v1.2.3-71-gd317 From 3ada9c126499dd4700dcdbd5b9fe8110ad17f578 Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 15 Jan 2009 17:45:31 -0800 Subject: libata: Add another column to the ata_timing table. The forthcoming OCTEON SOC Compact Flash driver needs an additional timing value that was not available in the ata_timing table. I add a new column for dmack_hold time. The values were obtained from the Compact Flash specification Rev 4.1. Signed-off-by: David Daney Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 72 ++++++++++++++++++++++++----------------------- include/linux/libata.h | 9 ++++-- 2 files changed, 43 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 552ecae13434..88c242856dae 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -3029,33 +3029,33 @@ int sata_set_spd(struct ata_link *link) */ static const struct ata_timing ata_timing[] = { -/* { XFER_PIO_SLOW, 120, 290, 240, 960, 290, 240, 960, 0 }, */ - { XFER_PIO_0, 70, 290, 240, 600, 165, 150, 600, 0 }, - { XFER_PIO_1, 50, 290, 93, 383, 125, 100, 383, 0 }, - { XFER_PIO_2, 30, 290, 40, 330, 100, 90, 240, 0 }, - { XFER_PIO_3, 30, 80, 70, 180, 80, 70, 180, 0 }, - { XFER_PIO_4, 25, 70, 25, 120, 70, 25, 120, 0 }, - { XFER_PIO_5, 15, 65, 25, 100, 65, 25, 100, 0 }, - { XFER_PIO_6, 10, 55, 20, 80, 55, 20, 80, 0 }, - - { XFER_SW_DMA_0, 120, 0, 0, 0, 480, 480, 960, 0 }, - { XFER_SW_DMA_1, 90, 0, 0, 0, 240, 240, 480, 0 }, - { XFER_SW_DMA_2, 60, 0, 0, 0, 120, 120, 240, 0 }, - - { XFER_MW_DMA_0, 60, 0, 0, 0, 215, 215, 480, 0 }, - { XFER_MW_DMA_1, 45, 0, 0, 0, 80, 50, 150, 0 }, - { XFER_MW_DMA_2, 25, 0, 0, 0, 70, 25, 120, 0 }, - { XFER_MW_DMA_3, 25, 0, 0, 0, 65, 25, 100, 0 }, - { XFER_MW_DMA_4, 25, 0, 0, 0, 55, 20, 80, 0 }, - -/* { XFER_UDMA_SLOW, 0, 0, 0, 0, 0, 0, 0, 150 }, */ - { XFER_UDMA_0, 0, 0, 0, 0, 0, 0, 0, 120 }, - { XFER_UDMA_1, 0, 0, 0, 0, 0, 0, 0, 80 }, - { XFER_UDMA_2, 0, 0, 0, 0, 0, 0, 0, 60 }, - { XFER_UDMA_3, 0, 0, 0, 0, 0, 0, 0, 45 }, - { XFER_UDMA_4, 0, 0, 0, 0, 0, 0, 0, 30 }, - { XFER_UDMA_5, 0, 0, 0, 0, 0, 0, 0, 20 }, - { XFER_UDMA_6, 0, 0, 0, 0, 0, 0, 0, 15 }, +/* { XFER_PIO_SLOW, 120, 290, 240, 960, 290, 240, 0, 960, 0 }, */ + { XFER_PIO_0, 70, 290, 240, 600, 165, 150, 0, 600, 0 }, + { XFER_PIO_1, 50, 290, 93, 383, 125, 100, 0, 383, 0 }, + { XFER_PIO_2, 30, 290, 40, 330, 100, 90, 0, 240, 0 }, + { XFER_PIO_3, 30, 80, 70, 180, 80, 70, 0, 180, 0 }, + { XFER_PIO_4, 25, 70, 25, 120, 70, 25, 0, 120, 0 }, + { XFER_PIO_5, 15, 65, 25, 100, 65, 25, 0, 100, 0 }, + { XFER_PIO_6, 10, 55, 20, 80, 55, 20, 0, 80, 0 }, + + { XFER_SW_DMA_0, 120, 0, 0, 0, 480, 480, 50, 960, 0 }, + { XFER_SW_DMA_1, 90, 0, 0, 0, 240, 240, 30, 480, 0 }, + { XFER_SW_DMA_2, 60, 0, 0, 0, 120, 120, 20, 240, 0 }, + + { XFER_MW_DMA_0, 60, 0, 0, 0, 215, 215, 20, 480, 0 }, + { XFER_MW_DMA_1, 45, 0, 0, 0, 80, 50, 5, 150, 0 }, + { XFER_MW_DMA_2, 25, 0, 0, 0, 70, 25, 5, 120, 0 }, + { XFER_MW_DMA_3, 25, 0, 0, 0, 65, 25, 5, 100, 0 }, + { XFER_MW_DMA_4, 25, 0, 0, 0, 55, 20, 5, 80, 0 }, + +/* { XFER_UDMA_SLOW, 0, 0, 0, 0, 0, 0, 0, 0, 150 }, */ + { XFER_UDMA_0, 0, 0, 0, 0, 0, 0, 0, 0, 120 }, + { XFER_UDMA_1, 0, 0, 0, 0, 0, 0, 0, 0, 80 }, + { XFER_UDMA_2, 0, 0, 0, 0, 0, 0, 0, 0, 60 }, + { XFER_UDMA_3, 0, 0, 0, 0, 0, 0, 0, 0, 45 }, + { XFER_UDMA_4, 0, 0, 0, 0, 0, 0, 0, 0, 30 }, + { XFER_UDMA_5, 0, 0, 0, 0, 0, 0, 0, 0, 20 }, + { XFER_UDMA_6, 0, 0, 0, 0, 0, 0, 0, 0, 15 }, { 0xFF } }; @@ -3065,14 +3065,15 @@ static const struct ata_timing ata_timing[] = { static void ata_timing_quantize(const struct ata_timing *t, struct ata_timing *q, int T, int UT) { - q->setup = EZ(t->setup * 1000, T); - q->act8b = EZ(t->act8b * 1000, T); - q->rec8b = EZ(t->rec8b * 1000, T); - q->cyc8b = EZ(t->cyc8b * 1000, T); - q->active = EZ(t->active * 1000, T); - q->recover = EZ(t->recover * 1000, T); - q->cycle = EZ(t->cycle * 1000, T); - q->udma = EZ(t->udma * 1000, UT); + q->setup = EZ(t->setup * 1000, T); + q->act8b = EZ(t->act8b * 1000, T); + q->rec8b = EZ(t->rec8b * 1000, T); + q->cyc8b = EZ(t->cyc8b * 1000, T); + q->active = EZ(t->active * 1000, T); + q->recover = EZ(t->recover * 1000, T); + q->dmack_hold = EZ(t->dmack_hold * 1000, T); + q->cycle = EZ(t->cycle * 1000, T); + q->udma = EZ(t->udma * 1000, UT); } void ata_timing_merge(const struct ata_timing *a, const struct ata_timing *b, @@ -3084,6 +3085,7 @@ void ata_timing_merge(const struct ata_timing *a, const struct ata_timing *b, if (what & ATA_TIMING_CYC8B ) m->cyc8b = max(a->cyc8b, b->cyc8b); if (what & ATA_TIMING_ACTIVE ) m->active = max(a->active, b->active); if (what & ATA_TIMING_RECOVER) m->recover = max(a->recover, b->recover); + if (what & ATA_TIMING_DMACK_HOLD) m->dmack_hold = max(a->dmack_hold, b->dmack_hold); if (what & ATA_TIMING_CYCLE ) m->cycle = max(a->cycle, b->cycle); if (what & ATA_TIMING_UDMA ) m->udma = max(a->udma, b->udma); } diff --git a/include/linux/libata.h b/include/linux/libata.h index 73b69c7071c5..2c6bd66209ff 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -401,12 +401,14 @@ enum { ATA_TIMING_CYC8B, ATA_TIMING_ACTIVE = (1 << 4), ATA_TIMING_RECOVER = (1 << 5), - ATA_TIMING_CYCLE = (1 << 6), - ATA_TIMING_UDMA = (1 << 7), + ATA_TIMING_DMACK_HOLD = (1 << 6), + ATA_TIMING_CYCLE = (1 << 7), + ATA_TIMING_UDMA = (1 << 8), ATA_TIMING_ALL = ATA_TIMING_SETUP | ATA_TIMING_ACT8B | ATA_TIMING_REC8B | ATA_TIMING_CYC8B | ATA_TIMING_ACTIVE | ATA_TIMING_RECOVER | - ATA_TIMING_CYCLE | ATA_TIMING_UDMA, + ATA_TIMING_DMACK_HOLD | ATA_TIMING_CYCLE | + ATA_TIMING_UDMA, }; enum ata_xfer_mask { @@ -866,6 +868,7 @@ struct ata_timing { unsigned short cyc8b; /* t0 for 8-bit I/O */ unsigned short active; /* t2 or tD */ unsigned short recover; /* t2i or tK */ + unsigned short dmack_hold; /* tj */ unsigned short cycle; /* t0 */ unsigned short udma; /* t2CYCTYP/2 */ }; -- cgit v1.2.3-71-gd317 From 08ec8c3878cea0bf91f2ba3c0badf44b383752d0 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 16 Jan 2009 11:57:00 -0500 Subject: jbd2: On a __journal_expect() assertion failure printk "JBD2", not "EXT3-fs" Otherwise it can be very confusing to find a "EXT3-fs: " failure in the middle of EXT4-fs failures, and it makes it harder to track the source of the failure. Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index b45109c61fba..b28b37eb11c6 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -308,7 +308,8 @@ void buffer_assertion_failure(struct buffer_head *bh); int val = (expr); \ if (!val) { \ printk(KERN_ERR \ - "EXT3-fs unexpected failure: %s;\n",# expr); \ + "JBD2 unexpected failure: %s: %s;\n", \ + __func__, #expr); \ printk(KERN_ERR why "\n"); \ } \ val; \ -- cgit v1.2.3-71-gd317 From cc33412fb1f11613e20f9dfc2919a77ecd63fbc4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 12 Jan 2009 17:23:05 +0100 Subject: quota: Improve locking We implement dqget() and dqput() that need neither dqonoff_mutex nor dqptr_sem. Then move dqget() and dqput() calls so that they are not called from under dqptr_sem. This is important because filesystem callbacks aren't called from under dqptr_sem which used to cause *lots* of problems with lock ranking (and with OCFS2 they became close to unsolvable). The patch also removes two functions which were introduced solely because OCFS2 needed them to cope with the old locking scheme. As time showed, they were not enough for OCFS2 anyway and it would be unnecessary work to adapt them to the new locking scheme in which they aren't needed. As a result OCFS2 needs the following patch to compile properly with quotas. Sorry to any bisecters which hit this in advance. Signed-off-by: Jan Kara --- fs/dquot.c | 218 ++++++++++++++++++++++++++--------------------- include/linux/quotaops.h | 2 - 2 files changed, 122 insertions(+), 98 deletions(-) (limited to 'include/linux') diff --git a/fs/dquot.c b/fs/dquot.c index 48c0571f831d..bca3cac4bee7 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -87,14 +87,17 @@ #define __DQUOT_PARANOIA /* - * There are two quota SMP locks. dq_list_lock protects all lists with quotas - * and quota formats and also dqstats structure containing statistics about the - * lists. dq_data_lock protects data from dq_dqb and also mem_dqinfo structures - * and also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes. + * There are three quota SMP locks. dq_list_lock protects all lists with quotas + * and quota formats, dqstats structure containing statistics about the lists + * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and + * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes. * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly - * in inode_add_bytes() and inode_sub_bytes(). + * in inode_add_bytes() and inode_sub_bytes(). dq_state_lock protects + * modifications of quota state (on quotaon and quotaoff) and readers who care + * about latest values take it as well. * - * The spinlock ordering is hence: dq_data_lock > dq_list_lock > i_lock + * The spinlock ordering is hence: dq_data_lock > dq_list_lock > i_lock, + * dq_list_lock > dq_state_lock * * Note that some things (eg. sb pointer, type, id) doesn't change during * the life of the dquot structure and so needn't to be protected by a lock @@ -103,12 +106,7 @@ * operation is just reading pointers from inode (or not using them at all) the * read lock is enough. If pointers are altered function must hold write lock * (these locking rules also apply for S_NOQUOTA flag in the inode - note that - * for altering the flag i_mutex is also needed). If operation is holding - * reference to dquot in other way (e.g. quotactl ops) it must be guarded by - * dqonoff_mutex. - * This locking assures that: - * a) update/access to dquot pointers in inode is serialized - * b) everyone is guarded against invalidate_dquots() + * for altering the flag i_mutex is also needed). * * Each dquot has its dq_lock mutex. Locked dquots might not be referenced * from inodes (dquot_alloc_space() and such don't check the dq_lock). @@ -122,10 +120,17 @@ * Lock ordering (including related VFS locks) is the following: * i_mutex > dqonoff_sem > journal_lock > dqptr_sem > dquot->dq_lock > * dqio_mutex + * The lock ordering of dqptr_sem imposed by quota code is only dqonoff_sem > + * dqptr_sem. But filesystem has to count with the fact that functions such as + * dquot_alloc_space() acquire dqptr_sem and they usually have to be called + * from inside a transaction to keep filesystem consistency after a crash. Also + * filesystems usually want to do some IO on dquot from ->mark_dirty which is + * called with dqptr_sem held. * i_mutex on quota files is special (it's below dqio_mutex) */ static DEFINE_SPINLOCK(dq_list_lock); +static DEFINE_SPINLOCK(dq_state_lock); DEFINE_SPINLOCK(dq_data_lock); static char *quotatypes[] = INITQFNAMES; @@ -428,7 +433,7 @@ static inline void do_destroy_dquot(struct dquot *dquot) * quota is disabled and pointers from inodes removed so there cannot be new * quota users. There can still be some users of quotas due to inodes being * just deleted or pruned by prune_icache() (those are not attached to any - * list). We have to wait for such users. + * list) or parallel quotactl call. We have to wait for such users. */ static void invalidate_dquots(struct super_block *sb, int type) { @@ -600,7 +605,6 @@ static struct shrinker dqcache_shrinker = { /* * Put reference to dquot * NOTE: If you change this function please check whether dqput_blocks() works right... - * MUST be called with either dqptr_sem or dqonoff_mutex held */ void dqput(struct dquot *dquot) { @@ -696,37 +700,31 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type) return dquot; } -/* - * Check whether dquot is in memory. - * MUST be called with either dqptr_sem or dqonoff_mutex held - */ -int dquot_is_cached(struct super_block *sb, unsigned int id, int type) -{ - unsigned int hashent = hashfn(sb, id, type); - int ret = 0; - - if (!sb_has_quota_active(sb, type)) - return 0; - spin_lock(&dq_list_lock); - if (find_dquot(hashent, sb, id, type) != NODQUOT) - ret = 1; - spin_unlock(&dq_list_lock); - return ret; -} - /* * Get reference to dquot - * MUST be called with either dqptr_sem or dqonoff_mutex held + * + * Locking is slightly tricky here. We are guarded from parallel quotaoff() + * destroying our dquot by: + * a) checking for quota flags under dq_list_lock and + * b) getting a reference to dquot before we release dq_list_lock */ struct dquot *dqget(struct super_block *sb, unsigned int id, int type) { unsigned int hashent = hashfn(sb, id, type); - struct dquot *dquot, *empty = NODQUOT; + struct dquot *dquot = NODQUOT, *empty = NODQUOT; if (!sb_has_quota_active(sb, type)) return NODQUOT; we_slept: spin_lock(&dq_list_lock); + spin_lock(&dq_state_lock); + if (!sb_has_quota_active(sb, type)) { + spin_unlock(&dq_state_lock); + spin_unlock(&dq_list_lock); + goto out; + } + spin_unlock(&dq_state_lock); + if ((dquot = find_dquot(hashent, sb, id, type)) == NODQUOT) { if (empty == NODQUOT) { spin_unlock(&dq_list_lock); @@ -735,6 +733,7 @@ we_slept: goto we_slept; } dquot = empty; + empty = NODQUOT; dquot->dq_id = id; /* all dquots go on the inuse_list */ put_inuse(dquot); @@ -749,8 +748,6 @@ we_slept: dqstats.cache_hits++; dqstats.lookups++; spin_unlock(&dq_list_lock); - if (empty) - do_destroy_dquot(empty); } /* Wait for dq_lock - after this we know that either dquot_release() is already * finished or it will be canceled due to dq_count > 1 test */ @@ -758,11 +755,15 @@ we_slept: /* Read the dquot and instantiate it (everything done only if needed) */ if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && sb->dq_op->acquire_dquot(dquot) < 0) { dqput(dquot); - return NODQUOT; + dquot = NODQUOT; + goto out; } #ifdef __DQUOT_PARANOIA BUG_ON(!dquot->dq_sb); /* Has somebody invalidated entry under us? */ #endif +out: + if (empty) + do_destroy_dquot(empty); return dquot; } @@ -1198,63 +1199,76 @@ static int info_bdq_free(struct dquot *dquot, qsize_t space) } /* * Initialize quota pointers in inode - * Transaction must be started at entry + * We do things in a bit complicated way but by that we avoid calling + * dqget() and thus filesystem callbacks under dqptr_sem. */ int dquot_initialize(struct inode *inode, int type) { unsigned int id = 0; int cnt, ret = 0; + struct dquot *got[MAXQUOTAS] = { NODQUOT, NODQUOT }; + struct super_block *sb = inode->i_sb; /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ if (IS_NOQUOTA(inode)) return 0; - down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + + /* First get references to structures we might need. */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (type != -1 && cnt != type) + continue; + switch (cnt) { + case USRQUOTA: + id = inode->i_uid; + break; + case GRPQUOTA: + id = inode->i_gid; + break; + } + got[cnt] = dqget(sb, id, cnt); + } + + down_write(&sb_dqopt(sb)->dqptr_sem); /* Having dqptr_sem we know NOQUOTA flags can't be altered... */ if (IS_NOQUOTA(inode)) goto out_err; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; + /* Avoid races with quotaoff() */ + if (!sb_has_quota_active(sb, cnt)) + continue; if (inode->i_dquot[cnt] == NODQUOT) { - switch (cnt) { - case USRQUOTA: - id = inode->i_uid; - break; - case GRPQUOTA: - id = inode->i_gid; - break; - } - inode->i_dquot[cnt] = dqget(inode->i_sb, id, cnt); + inode->i_dquot[cnt] = got[cnt]; + got[cnt] = NODQUOT; } } out_err: - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + up_write(&sb_dqopt(sb)->dqptr_sem); + /* Drop unused references */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + dqput(got[cnt]); return ret; } /* * Release all quotas referenced by inode - * Transaction must be started at an entry */ -int dquot_drop_locked(struct inode *inode) +int dquot_drop(struct inode *inode) { int cnt; + struct dquot *put[MAXQUOTAS]; + down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (inode->i_dquot[cnt] != NODQUOT) { - dqput(inode->i_dquot[cnt]); - inode->i_dquot[cnt] = NODQUOT; - } + put[cnt] = inode->i_dquot[cnt]; + inode->i_dquot[cnt] = NODQUOT; } - return 0; -} - -int dquot_drop(struct inode *inode) -{ - down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); - dquot_drop_locked(inode); up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + dqput(put[cnt]); return 0; } @@ -1470,8 +1484,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) qsize_t space; struct dquot *transfer_from[MAXQUOTAS]; struct dquot *transfer_to[MAXQUOTAS]; - int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid, - chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid; + int cnt, ret = QUOTA_OK; + int chuid = iattr->ia_valid & ATTR_UID && inode->i_uid != iattr->ia_uid, + chgid = iattr->ia_valid & ATTR_GID && inode->i_gid != iattr->ia_gid; char warntype_to[MAXQUOTAS]; char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS]; @@ -1479,21 +1494,11 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) * re-enter the quota code and are already holding the mutex */ if (IS_NOQUOTA(inode)) return QUOTA_OK; - /* Clear the arrays */ + /* Initialize the arrays */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - transfer_to[cnt] = transfer_from[cnt] = NODQUOT; + transfer_from[cnt] = NODQUOT; + transfer_to[cnt] = NODQUOT; warntype_to[cnt] = QUOTA_NL_NOWARN; - } - down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); - /* Now recheck reliably when holding dqptr_sem */ - if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); - return QUOTA_OK; - } - /* First build the transfer_to list - here we can block on - * reading/instantiating of dquots. We know that the transaction for - * us was already started so we don't violate lock ranking here */ - for (cnt = 0; cnt < MAXQUOTAS; cnt++) { switch (cnt) { case USRQUOTA: if (!chuid) @@ -1507,6 +1512,13 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) break; } } + + down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + /* Now recheck reliably when holding dqptr_sem */ + if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ + up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + goto put_all; + } spin_lock(&dq_data_lock); space = inode_get_bytes(inode); /* Build the transfer_from list and check the limits */ @@ -1517,7 +1529,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) == NO_QUOTA || check_bdq(transfer_to[cnt], space, 0, warntype_to + cnt) == NO_QUOTA) - goto warn_put_all; + goto over_quota; } /* @@ -1545,28 +1557,37 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) inode->i_dquot[cnt] = transfer_to[cnt]; } - ret = QUOTA_OK; -warn_put_all: spin_unlock(&dq_data_lock); + up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + /* Dirtify all the dquots - this can block when journalling */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (transfer_from[cnt]) mark_dquot_dirty(transfer_from[cnt]); - if (transfer_to[cnt]) + if (transfer_to[cnt]) { mark_dquot_dirty(transfer_to[cnt]); + /* The reference we got is transferred to the inode */ + transfer_to[cnt] = NODQUOT; + } } +warn_put_all: flush_warnings(transfer_to, warntype_to); flush_warnings(transfer_from, warntype_from_inodes); flush_warnings(transfer_from, warntype_from_space); - +put_all: for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (ret == QUOTA_OK && transfer_from[cnt] != NODQUOT) - dqput(transfer_from[cnt]); - if (ret == NO_QUOTA && transfer_to[cnt] != NODQUOT) - dqput(transfer_to[cnt]); + dqput(transfer_from[cnt]); + dqput(transfer_to[cnt]); } - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); return ret; +over_quota: + spin_unlock(&dq_data_lock); + up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + /* Clear dquot pointers we don't want to dqput() */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + transfer_from[cnt] = NODQUOT; + ret = NO_QUOTA; + goto warn_put_all; } /* Wrapper for transferring ownership of an inode */ @@ -1651,19 +1672,24 @@ int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags) continue; if (flags & DQUOT_SUSPENDED) { + spin_lock(&dq_state_lock); dqopt->flags |= dquot_state_flag(DQUOT_SUSPENDED, cnt); + spin_unlock(&dq_state_lock); } else { + spin_lock(&dq_state_lock); dqopt->flags &= ~dquot_state_flag(flags, cnt); /* Turning off suspended quotas? */ if (!sb_has_quota_loaded(sb, cnt) && sb_has_quota_suspended(sb, cnt)) { dqopt->flags &= ~dquot_state_flag( DQUOT_SUSPENDED, cnt); + spin_unlock(&dq_state_lock); iput(dqopt->files[cnt]); dqopt->files[cnt] = NULL; continue; } + spin_unlock(&dq_state_lock); } /* We still have to keep quota loaded? */ @@ -1830,7 +1856,9 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, } mutex_unlock(&dqopt->dqio_mutex); mutex_unlock(&inode->i_mutex); + spin_lock(&dq_state_lock); dqopt->flags |= dquot_state_flag(flags, type); + spin_unlock(&dq_state_lock); add_dquot_ref(sb, type); mutex_unlock(&dqopt->dqonoff_mutex); @@ -1872,9 +1900,11 @@ static int vfs_quota_on_remount(struct super_block *sb, int type) } inode = dqopt->files[type]; dqopt->files[type] = NULL; + spin_lock(&dq_state_lock); flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED, type); dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, type); + spin_unlock(&dq_state_lock); mutex_unlock(&dqopt->dqonoff_mutex); flags = dquot_generic_flag(flags, type); @@ -1952,7 +1982,9 @@ int vfs_quota_enable(struct inode *inode, int type, int format_id, ret = -EBUSY; goto out_lock; } + spin_lock(&dq_state_lock); sb_dqopt(sb)->flags |= dquot_state_flag(flags, type); + spin_unlock(&dq_state_lock); out_lock: mutex_unlock(&dqopt->dqonoff_mutex); return ret; @@ -2039,14 +2071,12 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d { struct dquot *dquot; - mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); - if (!(dquot = dqget(sb, id, type))) { - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); + dquot = dqget(sb, id, type); + if (dquot == NODQUOT) return -ESRCH; - } do_get_dqblk(dquot, di); dqput(dquot); - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); + return 0; } @@ -2130,7 +2160,6 @@ int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d struct dquot *dquot; int rc; - mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); dquot = dqget(sb, id, type); if (!dquot) { rc = -ESRCH; @@ -2139,7 +2168,6 @@ int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d rc = do_set_dqblk(dquot, di); dqput(dquot); out: - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); return rc; } @@ -2370,11 +2398,9 @@ EXPORT_SYMBOL(dquot_release); EXPORT_SYMBOL(dquot_mark_dquot_dirty); EXPORT_SYMBOL(dquot_initialize); EXPORT_SYMBOL(dquot_drop); -EXPORT_SYMBOL(dquot_drop_locked); EXPORT_SYMBOL(vfs_dq_drop); EXPORT_SYMBOL(dqget); EXPORT_SYMBOL(dqput); -EXPORT_SYMBOL(dquot_is_cached); EXPORT_SYMBOL(dquot_alloc_space); EXPORT_SYMBOL(dquot_alloc_inode); EXPORT_SYMBOL(dquot_free_space); diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 21b781a3350f..0b35b3a1be05 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -24,10 +24,8 @@ void sync_dquots(struct super_block *sb, int type); int dquot_initialize(struct inode *inode, int type); int dquot_drop(struct inode *inode); -int dquot_drop_locked(struct inode *inode); struct dquot *dqget(struct super_block *sb, unsigned int id, int type); void dqput(struct dquot *dquot); -int dquot_is_cached(struct super_block *sb, unsigned int id, int type); int dquot_scan_active(struct super_block *sb, int (*fn)(struct dquot *dquot, unsigned long priv), unsigned long priv); -- cgit v1.2.3-71-gd317 From aa8c6c93747f7b55fa11e1624fec8ca33763a805 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 16 Jan 2009 21:54:43 +0100 Subject: PCI PM: Restore standard config registers of all devices early There is a problem in our handling of suspend-resume of PCI devices that many of them have their standard config registers restored with interrupts enabled and they are put into the full power state with interrupts enabled as well. This may lead to the following scenario: * an interrupt vector is shared between two or more devices * one device is resumed earlier and generates an interrupt * the interrupt handler of another device tries to handle it and attempts to access the device the config space of which hasn't been restored yet and/or which still is in a low power state * the system crashes as a result To prevent this from happening we should restore the standard configuration registers of all devices with interrupts disabled and we should put them into the D0 power state right after that. Unfortunately, this cannot be done using the existing pci_set_power_state(), because it can sleep. Also, to do it we have to make sure that the config spaces of all devices were actually saved during suspend. Signed-off-by: Rafael J. Wysocki Acked-by: Linus Torvalds Signed-off-by: Jesse Barnes --- drivers/pci/pci-driver.c | 91 ++++++++++++++---------------------------------- drivers/pci/pci.c | 63 +++++++++++++++++++++++++++++---- drivers/pci/pci.h | 6 ++++ include/linux/pci.h | 5 +++ 4 files changed, 95 insertions(+), 70 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index c697f2680856..9de07b75b993 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -355,17 +355,27 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state) int i = 0; if (drv && drv->suspend) { + pci_dev->state_saved = false; + i = drv->suspend(pci_dev, state); suspend_report_result(drv->suspend, i); - } else { - pci_save_state(pci_dev); - /* - * This is for compatibility with existing code with legacy PM - * support. - */ - pci_pm_set_unknown_state(pci_dev); + if (i) + return i; + + if (pci_dev->state_saved) + goto Fixup; + + if (WARN_ON_ONCE(pci_dev->current_state != PCI_D0)) + goto Fixup; } + pci_save_state(pci_dev); + /* + * This is for compatibility with existing code with legacy PM support. + */ + pci_pm_set_unknown_state(pci_dev); + + Fixup: pci_fixup_device(pci_fixup_suspend, pci_dev); return i; @@ -386,81 +396,34 @@ static int pci_legacy_suspend_late(struct device *dev, pm_message_t state) static int pci_legacy_resume_early(struct device *dev) { - int error = 0; struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; - pci_fixup_device(pci_fixup_resume_early, pci_dev); - - if (drv && drv->resume_early) - error = drv->resume_early(pci_dev); - return error; + return drv && drv->resume_early ? + drv->resume_early(pci_dev) : 0; } static int pci_legacy_resume(struct device *dev) { - int error; struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; pci_fixup_device(pci_fixup_resume, pci_dev); - if (drv && drv->resume) { - error = drv->resume(pci_dev); - } else { - /* restore the PCI config space */ - pci_restore_state(pci_dev); - error = pci_pm_reenable_device(pci_dev); - } - return error; + return drv && drv->resume ? + drv->resume(pci_dev) : pci_pm_reenable_device(pci_dev); } /* Auxiliary functions used by the new power management framework */ -static int pci_restore_standard_config(struct pci_dev *pci_dev) -{ - struct pci_dev *parent = pci_dev->bus->self; - int error = 0; - - /* Check if the device's bus is operational */ - if (!parent || parent->current_state == PCI_D0) { - pci_restore_state(pci_dev); - pci_update_current_state(pci_dev, PCI_D0); - } else { - dev_warn(&pci_dev->dev, "unable to restore config, " - "bridge %s in low power state D%d\n", pci_name(parent), - parent->current_state); - pci_dev->current_state = PCI_UNKNOWN; - error = -EAGAIN; - } - - return error; -} - -static bool pci_is_bridge(struct pci_dev *pci_dev) -{ - return !!(pci_dev->subordinate); -} - static void pci_pm_default_resume_noirq(struct pci_dev *pci_dev) { - if (pci_restore_standard_config(pci_dev)) - pci_fixup_device(pci_fixup_resume_early, pci_dev); + pci_restore_standard_config(pci_dev); + pci_fixup_device(pci_fixup_resume_early, pci_dev); } static int pci_pm_default_resume(struct pci_dev *pci_dev) { - /* - * pci_restore_standard_config() should have been called once already, - * but it would have failed if the device's parent bridge had not been - * in power state D0 at that time. Check it and try again if necessary. - */ - if (pci_dev->current_state == PCI_UNKNOWN) { - int error = pci_restore_standard_config(pci_dev); - if (error) - return error; - } - pci_fixup_device(pci_fixup_resume, pci_dev); if (!pci_is_bridge(pci_dev)) @@ -575,11 +538,11 @@ static int pci_pm_resume_noirq(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + pci_pm_default_resume_noirq(pci_dev); + if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume_early(dev); - pci_pm_default_resume_noirq(pci_dev); - if (drv && drv->pm && drv->pm->resume_noirq) error = drv->pm->resume_noirq(dev); @@ -730,11 +693,11 @@ static int pci_pm_restore_noirq(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + pci_pm_default_resume_noirq(pci_dev); + if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume_early(dev); - pci_pm_default_resume_noirq(pci_dev); - if (drv && drv->pm && drv->pm->restore_noirq) error = drv->pm->restore_noirq(dev); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e491fdedf705..17bd9325a245 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -22,7 +22,7 @@ #include /* isa_dma_bridge_buggy */ #include "pci.h" -unsigned int pci_pm_d3_delay = 10; +unsigned int pci_pm_d3_delay = PCI_PM_D3_WAIT; #ifdef CONFIG_PCI_DOMAINS int pci_domains_supported = 1; @@ -426,6 +426,7 @@ static inline int platform_pci_sleep_wake(struct pci_dev *dev, bool enable) * given PCI device * @dev: PCI device to handle. * @state: PCI power state (D0, D1, D2, D3hot) to put the device into. + * @wait: If 'true', wait for the device to change its power state * * RETURN VALUE: * -EINVAL if the requested state is invalid. @@ -435,7 +436,7 @@ static inline int platform_pci_sleep_wake(struct pci_dev *dev, bool enable) * 0 if device's power state has been successfully changed. */ static int -pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) +pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state, bool wait) { u16 pmcsr; bool need_restore = false; @@ -480,8 +481,10 @@ pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) break; case PCI_UNKNOWN: /* Boot-up */ if ((pmcsr & PCI_PM_CTRL_STATE_MASK) == PCI_D3hot - && !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET)) + && !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET)) { need_restore = true; + wait = true; + } /* Fall-through: force to D0 */ default: pmcsr = 0; @@ -491,12 +494,15 @@ pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) /* enter specified state */ pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr); + if (!wait) + return 0; + /* Mandatory power management transition delays */ /* see PCI PM 1.1 5.6.1 table 18 */ if (state == PCI_D3hot || dev->current_state == PCI_D3hot) msleep(pci_pm_d3_delay); else if (state == PCI_D2 || dev->current_state == PCI_D2) - udelay(200); + udelay(PCI_PM_D2_DELAY); dev->current_state = state; @@ -515,7 +521,7 @@ pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) if (need_restore) pci_restore_bars(dev); - if (dev->bus->self) + if (wait && dev->bus->self) pcie_aspm_pm_state_change(dev->bus->self); return 0; @@ -585,7 +591,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) if (state == PCI_D3hot && (dev->dev_flags & PCI_DEV_FLAGS_NO_D3)) return 0; - error = pci_raw_set_power_state(dev, state); + error = pci_raw_set_power_state(dev, state, true); if (state > PCI_D0 && platform_pci_power_manageable(dev)) { /* Allow the platform to finalize the transition */ @@ -730,6 +736,7 @@ pci_save_state(struct pci_dev *dev) /* XXX: 100% dword access ok here? */ for (i = 0; i < 16; i++) pci_read_config_dword(dev, i * 4,&dev->saved_config_space[i]); + dev->state_saved = true; if ((i = pci_save_pcie_state(dev)) != 0) return i; if ((i = pci_save_pcix_state(dev)) != 0) @@ -1373,6 +1380,50 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev) "unable to preallocate PCI-X save buffer\n"); } +/** + * pci_restore_standard_config - restore standard config registers of PCI device + * @dev: PCI device to handle + * + * This function assumes that the device's configuration space is accessible. + * If the device needs to be powered up, the function will wait for it to + * change the state. + */ +int pci_restore_standard_config(struct pci_dev *dev) +{ + pci_power_t prev_state; + int error; + + pci_restore_state(dev); + pci_update_current_state(dev, PCI_D0); + + prev_state = dev->current_state; + if (prev_state == PCI_D0) + return 0; + + error = pci_raw_set_power_state(dev, PCI_D0, false); + if (error) + return error; + + if (pci_is_bridge(dev)) { + if (prev_state > PCI_D1) + mdelay(PCI_PM_BUS_WAIT); + } else { + switch(prev_state) { + case PCI_D3cold: + case PCI_D3hot: + mdelay(pci_pm_d3_delay); + break; + case PCI_D2: + udelay(PCI_PM_D2_DELAY); + break; + } + } + + dev->current_state = PCI_D0; + + return 0; +} + /** * pci_enable_ari - enable ARI forwarding if hardware support it * @dev: the PCI device diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 1351bb4addde..26ddf78ac300 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -49,6 +49,12 @@ extern void pci_disable_enabled_device(struct pci_dev *dev); extern void pci_pm_init(struct pci_dev *dev); extern void platform_pci_wakeup_init(struct pci_dev *dev); extern void pci_allocate_cap_save_buffers(struct pci_dev *dev); +extern int pci_restore_standard_config(struct pci_dev *dev); + +static inline bool pci_is_bridge(struct pci_dev *pci_dev) +{ + return !!(pci_dev->subordinate); +} extern int pci_user_read_config_byte(struct pci_dev *dev, int where, u8 *val); extern int pci_user_read_config_word(struct pci_dev *dev, int where, u16 *val); diff --git a/include/linux/pci.h b/include/linux/pci.h index 80f8b8b65fde..48890cf3f96e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -117,6 +117,10 @@ typedef int __bitwise pci_power_t; #define PCI_UNKNOWN ((pci_power_t __force) 5) #define PCI_POWER_ERROR ((pci_power_t __force) -1) +#define PCI_PM_D2_DELAY 200 +#define PCI_PM_D3_WAIT 10 +#define PCI_PM_BUS_WAIT 50 + /** The pci_channel state describes connectivity between the CPU and * the pci device. If some PCI bus between here and the pci device * has crashed or locked up, this info is reflected here. @@ -252,6 +256,7 @@ struct pci_dev { unsigned int ari_enabled:1; /* ARI forwarding */ unsigned int is_managed:1; unsigned int is_pcie:1; + unsigned int state_saved:1; pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ -- cgit v1.2.3-71-gd317 From c50331e8be32eaba5e1949f98c70d50b891262db Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 19 Jan 2009 15:33:14 -0700 Subject: dmaengine: dma_issue_pending_all == nop when CONFIG_DMA_ENGINE=n The device list will always be empty in this configuration, so no need to walk the list. Reported-by: Ingo Molnar Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 64dea2ab326c..c4a560e72ab7 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -390,11 +390,16 @@ static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie, enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie); #ifdef CONFIG_DMA_ENGINE enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx); +void dma_issue_pending_all(void); #else static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) { return DMA_SUCCESS; } +static inline void dma_issue_pending_all(void) +{ + do { } while (0); +} #endif /* --- DMA device --- */ @@ -403,7 +408,6 @@ int dma_async_device_register(struct dma_device *device); void dma_async_device_unregister(struct dma_device *device); void dma_run_dependencies(struct dma_async_tx_descriptor *tx); struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type); -void dma_issue_pending_all(void); #define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y) struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param); void dma_release_channel(struct dma_chan *chan); -- cgit v1.2.3-71-gd317 From ef560682a97491f62ef538931a4861b57d66c52c Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Mon, 19 Jan 2009 15:36:21 -0700 Subject: dmaengine: add async_tx_clear_ack() macro To complete the DMA_CTRL_ACK handling API add a async_tx_clear_ack() macro. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index c4a560e72ab7..34f124d7fb94 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -287,6 +287,11 @@ static inline void async_tx_ack(struct dma_async_tx_descriptor *tx) tx->flags |= DMA_CTRL_ACK; } +static inline void async_tx_clear_ack(struct dma_async_tx_descriptor *tx) +{ + tx->flags &= ~DMA_CTRL_ACK; +} + static inline bool async_tx_test_ack(struct dma_async_tx_descriptor *tx) { return (tx->flags & DMA_CTRL_ACK) == DMA_CTRL_ACK; -- cgit v1.2.3-71-gd317 From 8adb711f3668b034e7b956fac951ed08b53e0d55 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Tue, 20 Jan 2009 12:17:28 -0800 Subject: debugfs: introduce stub for debugfs_create_size_t() when DEBUG_FS=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Toralf Förster reported a build failure in the WiMAX stack when CONFIG_DEBUG_FS=n http://linuxwimax.org/pipermail/wimax/2009-January/000449.html This is due to debugfs_create_size_t() missing an stub that returns -ENODEV when the DEBUGFS subsystem is not configured in (like the rest of the debugfs API). This patch adds said stub. Reported-by: Toralf Förster Signed-off-by: Inaky Perez-Gonzalez Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 23936b16426b..0f5c33b0bd3e 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -162,6 +162,13 @@ static inline struct dentry *debugfs_create_x32(const char *name, mode_t mode, return ERR_PTR(-ENODEV); } +struct dentry *debugfs_create_size_t(const char *name, mode_t mode, + struct dentry *parent, + size_t *value) +{ + return ERR_PTR(-ENODEV); +} + static inline struct dentry *debugfs_create_bool(const char *name, mode_t mode, struct dentry *parent, u32 *value) -- cgit v1.2.3-71-gd317 From c0e69a5bbc6fc74184aa043aadb9a53bc58f953b Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Wed, 14 Jan 2009 11:19:08 +0100 Subject: klist.c: bit 0 in pointer can't be used as flag The commit a1ed5b0cffe4b16a93a6a3390e8cee0fbef94f86 (klist: don't iterate over deleted entries) introduces use of the low bit in a pointer to indicate if the knode is dead or not, assuming that this bit is always free. This is not true for all architectures, CRIS for example may align data on byte borders. The result is a bunch of warnings on bootup, devices not being added correctly etc, reported by Hinko Kocevar : ------------[ cut here ]------------ WARNING: at lib/klist.c:62 () Modules linked in: Stack from c1fe1cf0: c01cc7f4 c1fe1d11 c000eb4e c000e4de 00000000 00000000 c1f4f78f c1f50c2d c01d008c c1fdd1a0 c1fdd1a0 c1fe1d38 c0192954 c1fe0000 00000000 c1fe1dc0 00000002 7fffffff c1fe1da8 c0192d50 c1fe1dc0 00000002 7fffffff c1ff9fcc Call Trace: [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] <4>---[ end trace 4eaa2a86a8e2da22 ]--- ------------[ cut here ]------------ Repeat ad nauseam. Wed, Jan 14, 2009 at 12:11:32AM +0100, Bastien ROUCARIES wrote: > Perhaps using a pointerhackalign trick on this structure where > #define pointerhackalign(x) __attribute__ ((aligned (x))) > and declare > struct klist_node { > ... > } pointerhackalign(2); > > Because __attribute__ ((aligned (x))) could only increase alignment > it will safe to do that and serve as documentation purpose :) That works, but we need to do it not for the struct klist_node, but for the struct we insert into the void * in klist_node, which is struct klist. Reported-by: Hinko Kocevar Signed-off-by: Jesper Nilsson Cc: stable Signed-off-by: Greg Kroah-Hartman --- include/linux/klist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/klist.h b/include/linux/klist.h index d5a27af9dba5..e91a4e59b771 100644 --- a/include/linux/klist.h +++ b/include/linux/klist.h @@ -22,7 +22,7 @@ struct klist { struct list_head k_list; void (*get)(struct klist_node *); void (*put)(struct klist_node *); -}; +} __attribute__ ((aligned (4))); #define KLIST_INIT(_name, _get, _put) \ { .k_lock = __SPIN_LOCK_UNLOCKED(_name.k_lock), \ -- cgit v1.2.3-71-gd317 From 0b491eee46012772cbf029450d123e933c2e7940 Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Wed, 21 Jan 2009 12:35:43 -0800 Subject: usbnet: allow type check of devdbg arguments in non-debug build Improve usbnet's devdbg to always type-check diagnostic arguments, like dev_dbg (device.h). This makes no change to the resulting size of usbnet modules. This patch also removes an #ifdef DEBUG directive from rndis_wlan so it's devdbg statements are always type-checked at compile time. Signed-off-by: Steve Glendinning Signed-off-by: David Brownell Signed-off-by: David S. Miller --- drivers/net/wireless/rndis_wlan.c | 2 -- include/linux/usb/usbnet.h | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index 607ce9f61b54..ed93ac41297f 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -1649,9 +1649,7 @@ static char *rndis_translate_scan(struct net_device *dev, char *end_buf, struct ndis_80211_bssid_ex *bssid) { -#ifdef DEBUG struct usbnet *usbdev = netdev_priv(dev); -#endif u8 *ie; char *current_val; int bssid_len, ie_len, i; diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index ba09fe88adda..7d3822243074 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -197,7 +197,9 @@ extern int usbnet_nway_reset(struct net_device *net); #define devdbg(usbnet, fmt, arg...) \ printk(KERN_DEBUG "%s: " fmt "\n" , (usbnet)->net->name , ## arg) #else -#define devdbg(usbnet, fmt, arg...) do {} while(0) +#define devdbg(usbnet, fmt, arg...) \ + ({ if (0) printk(KERN_DEBUG "%s: " fmt "\n" , (usbnet)->net->name , \ + ## arg); 0; }) #endif #define deverr(usbnet, fmt, arg...) \ -- cgit v1.2.3-71-gd317 From 336f6c322d87806ef93afad6308ac65083a865e5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 22 Jan 2009 09:50:44 +0100 Subject: debugobjects: add and use INIT_WORK_ON_STACK Impact: Fix debugobjects warning debugobject enabled kernels spit out a warning in hpet code due to a workqueue which is initialized on stack. Add INIT_WORK_ON_STACK() which calls init_timer_on_stack() and use it in hpet. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 3 ++- include/linux/workqueue.h | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index cd759ad90690..64d5ad0b8add 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -628,11 +628,12 @@ static int hpet_cpuhp_notify(struct notifier_block *n, switch (action & 0xf) { case CPU_ONLINE: - INIT_DELAYED_WORK(&work.work, hpet_work); + INIT_DELAYED_WORK_ON_STACK(&work.work, hpet_work); init_completion(&work.complete); /* FIXME: add schedule_work_on() */ schedule_delayed_work_on(cpu, &work.work, 0); wait_for_completion(&work.complete); + destroy_timer_on_stack(&work.work.timer); break; case CPU_DEAD: if (hdev) { diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index b36291130f22..20b59eb1facd 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -124,6 +124,12 @@ struct execute_work { init_timer_deferrable(&(_work)->timer); \ } while (0) +#define INIT_DELAYED_WORK_ON_STACK(_work, _func) \ + do { \ + INIT_WORK(&(_work)->work, (_func)); \ + init_timer_on_stack(&(_work)->timer); \ + } while (0) + /** * work_pending - Find out whether a work item is currently pending * @work: The work item in question -- cgit v1.2.3-71-gd317 From e4d866cdea24543ee16ce6d07d80c513e86ba983 Mon Sep 17 00:00:00 2001 From: "JosephChan@via.com.tw" Date: Fri, 23 Jan 2009 15:37:39 +0800 Subject: [libata] pata_via: support VX855, future chips whose IDE controller use 0x0571 It supports VX855 and future chips whose IDE controller uses PCI ID 0x0571. Signed-off-by: Joseph Chan Acked-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/pata_via.c | 22 +++++++++++++++++----- include/linux/pci_ids.h | 4 ++++ 2 files changed, 21 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c index 681169c9c640..79a6c9a0b721 100644 --- a/drivers/ata/pata_via.c +++ b/drivers/ata/pata_via.c @@ -86,6 +86,10 @@ enum { VIA_SATA_PATA = 0x800, /* SATA/PATA combined configuration */ }; +enum { + VIA_IDFLAG_SINGLE = (1 << 0), /* single channel controller) */ +}; + /* * VIA SouthBridge chips. */ @@ -97,8 +101,12 @@ static const struct via_isa_bridge { u8 rev_max; u16 flags; } via_isa_bridges[] = { + { "vx855", PCI_DEVICE_ID_VIA_VX855, 0x00, 0x2f, + VIA_UDMA_133 | VIA_BAD_AST | VIA_SATA_PATA }, { "vx800", PCI_DEVICE_ID_VIA_VX800, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_SATA_PATA }, + { "vt8261", PCI_DEVICE_ID_VIA_8261, 0x00, 0x2f, + VIA_UDMA_133 | VIA_BAD_AST }, { "vt8237s", PCI_DEVICE_ID_VIA_8237S, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "vt8251", PCI_DEVICE_ID_VIA_8251, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "cx700", PCI_DEVICE_ID_VIA_CX700, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_SATA_PATA }, @@ -122,6 +130,8 @@ static const struct via_isa_bridge { { "vt82c586", PCI_DEVICE_ID_VIA_82C586_0, 0x00, 0x0f, VIA_UDMA_NONE | VIA_SET_FIFO }, { "vt82c576", PCI_DEVICE_ID_VIA_82C576, 0x00, 0x2f, VIA_UDMA_NONE | VIA_SET_FIFO | VIA_NO_UNMASK }, { "vt82c576", PCI_DEVICE_ID_VIA_82C576, 0x00, 0x2f, VIA_UDMA_NONE | VIA_SET_FIFO | VIA_NO_UNMASK | VIA_BAD_ID }, + { "vtxxxx", PCI_DEVICE_ID_VIA_ANON, 0x00, 0x2f, + VIA_UDMA_133 | VIA_BAD_AST }, { NULL } }; @@ -460,6 +470,7 @@ static int via_init_one(struct pci_dev *pdev, const struct pci_device_id *id) static int printed_version; u8 enable; u32 timing; + unsigned long flags = id->driver_data; int rc; if (!printed_version++) @@ -469,9 +480,13 @@ static int via_init_one(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; + if (flags & VIA_IDFLAG_SINGLE) + ppi[1] = &ata_dummy_port_info; + /* To find out how the IDE will behave and what features we actually have to look at the bridge not the IDE controller */ - for (config = via_isa_bridges; config->id; config++) + for (config = via_isa_bridges; config->id != PCI_DEVICE_ID_VIA_ANON; + config++) if ((isa = pci_get_device(PCI_VENDOR_ID_VIA + !!(config->flags & VIA_BAD_ID), config->id, NULL))) { @@ -482,10 +497,6 @@ static int via_init_one(struct pci_dev *pdev, const struct pci_device_id *id) pci_dev_put(isa); } - if (!config->id) { - printk(KERN_WARNING "via: Unknown VIA SouthBridge, disabling.\n"); - return -ENODEV; - } pci_dev_put(isa); if (!(config->flags & VIA_NO_ENABLES)) { @@ -587,6 +598,7 @@ static const struct pci_device_id via[] = { { PCI_VDEVICE(VIA, 0x1571), }, { PCI_VDEVICE(VIA, 0x3164), }, { PCI_VDEVICE(VIA, 0x5324), }, + { PCI_VDEVICE(VIA, 0xC409), VIA_IDFLAG_SINGLE }, { }, }; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index d56ad9c21c09..febc10ed3858 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1357,6 +1357,7 @@ #define PCI_DEVICE_ID_VIA_8783_0 0x3208 #define PCI_DEVICE_ID_VIA_8237 0x3227 #define PCI_DEVICE_ID_VIA_8251 0x3287 +#define PCI_DEVICE_ID_VIA_8261 0x3402 #define PCI_DEVICE_ID_VIA_8237A 0x3337 #define PCI_DEVICE_ID_VIA_8237S 0x3372 #define PCI_DEVICE_ID_VIA_SATA_EIDE 0x5324 @@ -1366,10 +1367,13 @@ #define PCI_DEVICE_ID_VIA_CX700 0x8324 #define PCI_DEVICE_ID_VIA_CX700_IDE 0x0581 #define PCI_DEVICE_ID_VIA_VX800 0x8353 +#define PCI_DEVICE_ID_VIA_VX855 0x8409 #define PCI_DEVICE_ID_VIA_8371_1 0x8391 #define PCI_DEVICE_ID_VIA_82C598_1 0x8598 #define PCI_DEVICE_ID_VIA_838X_1 0xB188 #define PCI_DEVICE_ID_VIA_83_87XX_1 0xB198 +#define PCI_DEVICE_ID_VIA_C409_IDE 0XC409 +#define PCI_DEVICE_ID_VIA_ANON 0xFFFF #define PCI_VENDOR_ID_SIEMENS 0x110A #define PCI_DEVICE_ID_SIEMENS_DSCC4 0x2102 -- cgit v1.2.3-71-gd317 From e2ba42ed0f8ba174302ebfabfa063fb456d5d6f5 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 26 Jan 2009 21:19:52 +0100 Subject: i2c: Delete 10 unused driver IDs Signed-off-by: Jean Delvare --- include/linux/i2c-id.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h index 01d67ba9e985..82b7eee22636 100644 --- a/include/linux/i2c-id.h +++ b/include/linux/i2c-id.h @@ -40,9 +40,7 @@ #define I2C_DRIVERID_SAA7185B 13 /* video encoder */ #define I2C_DRIVERID_SAA7110 22 /* video decoder */ #define I2C_DRIVERID_SAA5249 24 /* SAA5249 and compatibles */ -#define I2C_DRIVERID_PCF8583 25 /* real time clock */ #define I2C_DRIVERID_TDA7432 27 /* Stereo sound processor */ -#define I2C_DRIVERID_TVMIXER 28 /* Mixer driver for tv cards */ #define I2C_DRIVERID_TVAUDIO 29 /* Generic TV sound driver */ #define I2C_DRIVERID_TDA9875 32 /* TV sound decoder chip */ #define I2C_DRIVERID_BT819 40 /* video decoder */ @@ -54,7 +52,6 @@ #define I2C_DRIVERID_SAA7191 57 /* video decoder */ #define I2C_DRIVERID_INDYCAM 58 /* SGI IndyCam */ #define I2C_DRIVERID_OVCAMCHIP 61 /* OmniVision CMOS image sens. */ -#define I2C_DRIVERID_MAX6900 63 /* MAX6900 real-time clock */ #define I2C_DRIVERID_SAA6752HS 67 /* MPEG2 encoder */ #define I2C_DRIVERID_TVEEPROM 68 /* TV EEPROM */ #define I2C_DRIVERID_WM8775 69 /* wm8775 audio processor */ @@ -62,23 +59,16 @@ #define I2C_DRIVERID_CX25840 71 /* cx2584x video encoder */ #define I2C_DRIVERID_SAA7127 72 /* saa7127 video encoder */ #define I2C_DRIVERID_SAA711X 73 /* saa711x video encoders */ -#define I2C_DRIVERID_AKITAIOEXP 74 /* IO Expander on Sharp SL-C1000 */ #define I2C_DRIVERID_INFRARED 75 /* I2C InfraRed on Video boards */ #define I2C_DRIVERID_TVP5150 76 /* TVP5150 video decoder */ #define I2C_DRIVERID_WM8739 77 /* wm8739 audio processor */ #define I2C_DRIVERID_UPD64083 78 /* upd64083 video processor */ #define I2C_DRIVERID_UPD64031A 79 /* upd64031a video processor */ #define I2C_DRIVERID_SAA717X 80 /* saa717x video encoder */ -#define I2C_DRIVERID_DS1672 81 /* Dallas/Maxim DS1672 RTC */ #define I2C_DRIVERID_BT866 85 /* Conexant bt866 video encoder */ #define I2C_DRIVERID_KS0127 86 /* Samsung ks0127 video decoder */ #define I2C_DRIVERID_TLV320AIC23B 87 /* TI TLV320AIC23B audio codec */ -#define I2C_DRIVERID_WM8731 89 /* Wolfson WM8731 audio codec */ -#define I2C_DRIVERID_WM8750 90 /* Wolfson WM8750 audio codec */ -#define I2C_DRIVERID_WM8753 91 /* Wolfson WM8753 audio codec */ -#define I2C_DRIVERID_LM4857 92 /* LM4857 Audio Amplifier */ #define I2C_DRIVERID_VP27SMPX 93 /* Panasonic VP27s tuner internal MPX */ -#define I2C_DRIVERID_CS4270 94 /* Cirrus Logic 4270 audio codec */ #define I2C_DRIVERID_M52790 95 /* Mitsubishi M52790SP/FP AV switch */ #define I2C_DRIVERID_CS5345 96 /* cs5345 audio processor */ -- cgit v1.2.3-71-gd317 From 1745522ccbabd990bfc7511861aa9fa98287cba0 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 26 Jan 2009 21:19:52 +0100 Subject: i2c: Delete many unused adapter IDs Signed-off-by: Jean Delvare --- drivers/gpu/drm/i915/intel_i2c.c | 4 --- drivers/i2c/busses/i2c-acorn.c | 1 - drivers/i2c/busses/i2c-ali1535.c | 1 - drivers/i2c/busses/i2c-ali1563.c | 1 - drivers/i2c/busses/i2c-ali15x3.c | 1 - drivers/i2c/busses/i2c-amd756.c | 1 - drivers/i2c/busses/i2c-amd8111.c | 1 - drivers/i2c/busses/i2c-au1550.c | 1 - drivers/i2c/busses/i2c-bfin-twi.c | 1 - drivers/i2c/busses/i2c-elektor.c | 1 - drivers/i2c/busses/i2c-hydra.c | 1 - drivers/i2c/busses/i2c-i801.c | 1 - drivers/i2c/busses/i2c-ibm_iic.c | 1 - drivers/i2c/busses/i2c-iop3xx.c | 1 - drivers/i2c/busses/i2c-ixp2000.c | 1 - drivers/i2c/busses/i2c-mpc.c | 1 - drivers/i2c/busses/i2c-mv64xxx.c | 1 - drivers/i2c/busses/i2c-nforce2.c | 1 - drivers/i2c/busses/i2c-parport-light.c | 1 - drivers/i2c/busses/i2c-parport.c | 1 - drivers/i2c/busses/i2c-pca-isa.c | 1 - drivers/i2c/busses/i2c-piix4.c | 1 - drivers/i2c/busses/i2c-sibyte.c | 2 -- drivers/i2c/busses/i2c-sis5595.c | 1 - drivers/i2c/busses/i2c-sis630.c | 1 - drivers/i2c/busses/i2c-sis96x.c | 1 - drivers/i2c/busses/i2c-via.c | 1 - drivers/i2c/busses/i2c-viapro.c | 1 - drivers/i2c/busses/i2c-voodoo3.c | 2 -- drivers/i2c/busses/scx200_acb.c | 1 - drivers/i2c/busses/scx200_i2c.c | 1 - drivers/ieee1394/pcilynx.c | 1 - drivers/video/aty/radeon_i2c.c | 1 - drivers/video/i810/i810-i2c.c | 1 - drivers/video/intelfb/intelfb_i2c.c | 1 - drivers/video/nvidia/nv_i2c.c | 1 - drivers/video/savage/savagefb-i2c.c | 1 - include/linux/i2c-id.h | 51 ---------------------------------- 38 files changed, 93 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index a5a2f5339e9e..5ee9d4c25753 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -137,10 +137,6 @@ struct intel_i2c_chan *intel_i2c_create(struct drm_device *dev, const u32 reg, chan->reg = reg; snprintf(chan->adapter.name, I2C_NAME_SIZE, "intel drm %s", name); chan->adapter.owner = THIS_MODULE; -#ifndef I2C_HW_B_INTELFB -#define I2C_HW_B_INTELFB I2C_HW_B_I810 -#endif - chan->adapter.id = I2C_HW_B_INTELFB; chan->adapter.algo_data = &chan->algo; chan->adapter.dev.parent = &dev->pdev->dev; chan->algo.setsda = set_data; diff --git a/drivers/i2c/busses/i2c-acorn.c b/drivers/i2c/busses/i2c-acorn.c index 75089febbc13..9fee3ca17344 100644 --- a/drivers/i2c/busses/i2c-acorn.c +++ b/drivers/i2c/busses/i2c-acorn.c @@ -83,7 +83,6 @@ static struct i2c_algo_bit_data ioc_data = { }; static struct i2c_adapter ioc_ops = { - .id = I2C_HW_B_IOC, .algo_data = &ioc_data, }; diff --git a/drivers/i2c/busses/i2c-ali1535.c b/drivers/i2c/busses/i2c-ali1535.c index 9cead9b9458e..981e080b32ae 100644 --- a/drivers/i2c/busses/i2c-ali1535.c +++ b/drivers/i2c/busses/i2c-ali1535.c @@ -476,7 +476,6 @@ static const struct i2c_algorithm smbus_algorithm = { static struct i2c_adapter ali1535_adapter = { .owner = THIS_MODULE, - .id = I2C_HW_SMBUS_ALI1535, .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, .algo = &smbus_algorithm, }; diff --git a/drivers/i2c/busses/i2c-ali1563.c b/drivers/i2c/busses/i2c-ali1563.c index dd9e796fad69..f70f46582c6c 100644 --- a/drivers/i2c/busses/i2c-ali1563.c +++ b/drivers/i2c/busses/i2c-ali1563.c @@ -386,7 +386,6 @@ static const struct i2c_algorithm ali1563_algorithm = { static struct i2c_adapter ali1563_adapter = { .owner = THIS_MODULE, - .id = I2C_HW_SMBUS_ALI1563, .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, .algo = &ali1563_algorithm, }; diff --git a/drivers/i2c/busses/i2c-ali15x3.c b/drivers/i2c/busses/i2c-ali15x3.c index 234fdde7d40e..39066dee46e3 100644 --- a/drivers/i2c/busses/i2c-ali15x3.c +++ b/drivers/i2c/busses/i2c-ali15x3.c @@ -473,7 +473,6 @@ static const struct i2c_algorithm smbus_algorithm = { static struct i2c_adapter ali15x3_adapter = { .owner = THIS_MODULE, - .id = I2C_HW_SMBUS_ALI15X3, .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, .algo = &smbus_algorithm, }; diff --git a/drivers/i2c/busses/i2c-amd756.c b/drivers/i2c/busses/i2c-amd756.c index 36bee5b9c952..220f4a1eee1d 100644 --- a/drivers/i2c/busses/i2c-amd756.c +++ b/drivers/i2c/busses/i2c-amd756.c @@ -298,7 +298,6 @@ static const struct i2c_algorithm smbus_algorithm = { struct i2c_adapter amd756_smbus = { .owner = THIS_MODULE, - .id = I2C_HW_SMBUS_AMD756, .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, .algo = &smbus_algorithm, }; diff --git a/drivers/i2c/busses/i2c-amd8111.c b/drivers/i2c/busses/i2c-amd8111.c index 3972208876b3..edab51973bf5 100644 --- a/drivers/i2c/busses/i2c-amd8111.c +++ b/drivers/i2c/busses/i2c-amd8111.c @@ -387,7 +387,6 @@ static int __devinit amd8111_probe(struct pci_dev *dev, smbus->adapter.owner = THIS_MODULE; snprintf(smbus->adapter.name, sizeof(smbus->adapter.name), "SMBus2 AMD8111 adapter at %04x", smbus->base); - smbus->adapter.id = I2C_HW_SMBUS_AMD8111; smbus->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD; smbus->adapter.algo = &smbus_algorithm; smbus->adapter.algo_data = smbus; diff --git a/drivers/i2c/busses/i2c-au1550.c b/drivers/i2c/busses/i2c-au1550.c index 66a04c2c660f..f78ce523e3db 100644 --- a/drivers/i2c/busses/i2c-au1550.c +++ b/drivers/i2c/busses/i2c-au1550.c @@ -400,7 +400,6 @@ i2c_au1550_probe(struct platform_device *pdev) priv->xfer_timeout = 200; priv->ack_timeout = 200; - priv->adap.id = I2C_HW_AU1550_PSC; priv->adap.nr = pdev->id; priv->adap.algo = &au1550_algo; priv->adap.algo_data = priv; diff --git a/drivers/i2c/busses/i2c-bfin-twi.c b/drivers/i2c/busses/i2c-bfin-twi.c index 3fd2c417c1e0..fc548b3d002e 100644 --- a/drivers/i2c/busses/i2c-bfin-twi.c +++ b/drivers/i2c/busses/i2c-bfin-twi.c @@ -651,7 +651,6 @@ static int i2c_bfin_twi_probe(struct platform_device *pdev) iface->timeout_timer.data = (unsigned long)iface; p_adap = &iface->adap; - p_adap->id = I2C_HW_BLACKFIN; p_adap->nr = pdev->id; strlcpy(p_adap->name, pdev->name, sizeof(p_adap->name)); p_adap->algo = &bfin_twi_algorithm; diff --git a/drivers/i2c/busses/i2c-elektor.c b/drivers/i2c/busses/i2c-elektor.c index 0ed3ccb81b63..448b4bf35eb7 100644 --- a/drivers/i2c/busses/i2c-elektor.c +++ b/drivers/i2c/busses/i2c-elektor.c @@ -202,7 +202,6 @@ static struct i2c_algo_pcf_data pcf_isa_data = { static struct i2c_adapter pcf_isa_ops = { .owner = THIS_MODULE, .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, - .id = I2C_HW_P_ELEK, .algo_data = &pcf_isa_data, .name = "i2c-elektor", }; diff --git a/drivers/i2c/busses/i2c-hydra.c b/drivers/i2c/busses/i2c-hydra.c index 648aa7baff83..bec9b845dd16 100644 --- a/drivers/i2c/busses/i2c-hydra.c +++ b/drivers/i2c/busses/i2c-hydra.c @@ -102,7 +102,6 @@ static struct i2c_algo_bit_data hydra_bit_data = { static struct i2c_adapter hydra_adap = { .owner = THIS_MODULE, .name = "Hydra i2c", - .id = I2C_HW_B_HYDRA, .algo_data = &hydra_bit_data, }; diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 526625eaa84b..230238df56c4 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -556,7 +556,6 @@ static const struct i2c_algorithm smbus_algorithm = { static struct i2c_adapter i801_adapter = { .owner = THIS_MODULE, - .id = I2C_HW_SMBUS_I801, .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, .algo = &smbus_algorithm, }; diff --git a/drivers/i2c/busses/i2c-ibm_iic.c b/drivers/i2c/busses/i2c-ibm_iic.c index 651f2f1ae5b7..88f0db73b364 100644 --- a/drivers/i2c/busses/i2c-ibm_iic.c +++ b/drivers/i2c/busses/i2c-ibm_iic.c @@ -746,7 +746,6 @@ static int __devinit iic_probe(struct of_device *ofdev, adap->dev.parent = &ofdev->dev; strlcpy(adap->name, "IBM IIC", sizeof(adap->name)); i2c_set_adapdata(adap, dev); - adap->id = I2C_HW_OCP; adap->class = I2C_CLASS_HWMON | I2C_CLASS_SPD; adap->algo = &iic_algo; adap->timeout = 1; diff --git a/drivers/i2c/busses/i2c-iop3xx.c b/drivers/i2c/busses/i2c-iop3xx.c index fc2714ac0c0f..3190690c26ce 100644 --- a/drivers/i2c/busses/i2c-iop3xx.c +++ b/drivers/i2c/busses/i2c-iop3xx.c @@ -480,7 +480,6 @@ iop3xx_i2c_probe(struct platform_device *pdev) } memcpy(new_adapter->name, pdev->name, strlen(pdev->name)); - new_adapter->id = I2C_HW_IOP3XX; new_adapter->owner = THIS_MODULE; new_adapter->class = I2C_CLASS_HWMON | I2C_CLASS_SPD; new_adapter->dev.parent = &pdev->dev; diff --git a/drivers/i2c/busses/i2c-ixp2000.c b/drivers/i2c/busses/i2c-ixp2000.c index 05d72e981353..8e8467970481 100644 --- a/drivers/i2c/busses/i2c-ixp2000.c +++ b/drivers/i2c/busses/i2c-ixp2000.c @@ -116,7 +116,6 @@ static int ixp2000_i2c_probe(struct platform_device *plat_dev) drv_data->algo_data.udelay = 6; drv_data->algo_data.timeout = 100; - drv_data->adapter.id = I2C_HW_B_IXP2000, strlcpy(drv_data->adapter.name, plat_dev->dev.driver->name, sizeof(drv_data->adapter.name)); drv_data->adapter.algo_data = &drv_data->algo_data, diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c index a9a45fcc8544..aedbbe6618db 100644 --- a/drivers/i2c/busses/i2c-mpc.c +++ b/drivers/i2c/busses/i2c-mpc.c @@ -310,7 +310,6 @@ static const struct i2c_algorithm mpc_algo = { static struct i2c_adapter mpc_ops = { .owner = THIS_MODULE, .name = "MPC adapter", - .id = I2C_HW_MPC107, .algo = &mpc_algo, .timeout = 1, }; diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index 9e8118d2fe64..eeda276f8f16 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -527,7 +527,6 @@ mv64xxx_i2c_probe(struct platform_device *pd) goto exit_unmap_regs; } drv_data->adapter.dev.parent = &pd->dev; - drv_data->adapter.id = I2C_HW_MV64XXX; drv_data->adapter.algo = &mv64xxx_i2c_algo; drv_data->adapter.owner = THIS_MODULE; drv_data->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD; diff --git a/drivers/i2c/busses/i2c-nforce2.c b/drivers/i2c/busses/i2c-nforce2.c index 3b19bc41a60b..05af6cd7f270 100644 --- a/drivers/i2c/busses/i2c-nforce2.c +++ b/drivers/i2c/busses/i2c-nforce2.c @@ -355,7 +355,6 @@ static int __devinit nforce2_probe_smb (struct pci_dev *dev, int bar, return -EBUSY; } smbus->adapter.owner = THIS_MODULE; - smbus->adapter.id = I2C_HW_SMBUS_NFORCE2; smbus->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD; smbus->adapter.algo = &smbus_algorithm; smbus->adapter.algo_data = smbus; diff --git a/drivers/i2c/busses/i2c-parport-light.c b/drivers/i2c/busses/i2c-parport-light.c index b2b8380f6602..322c5691e38e 100644 --- a/drivers/i2c/busses/i2c-parport-light.c +++ b/drivers/i2c/busses/i2c-parport-light.c @@ -115,7 +115,6 @@ static struct i2c_algo_bit_data parport_algo_data = { static struct i2c_adapter parport_adapter = { .owner = THIS_MODULE, .class = I2C_CLASS_HWMON, - .id = I2C_HW_B_LP, .algo_data = &parport_algo_data, .name = "Parallel port adapter (light)", }; diff --git a/drivers/i2c/busses/i2c-parport.c b/drivers/i2c/busses/i2c-parport.c index a257cd5cd134..0d8998610c74 100644 --- a/drivers/i2c/busses/i2c-parport.c +++ b/drivers/i2c/busses/i2c-parport.c @@ -164,7 +164,6 @@ static void i2c_parport_attach (struct parport *port) /* Fill the rest of the structure */ adapter->adapter.owner = THIS_MODULE; adapter->adapter.class = I2C_CLASS_HWMON; - adapter->adapter.id = I2C_HW_B_LP; strlcpy(adapter->adapter.name, "Parallel port adapter", sizeof(adapter->adapter.name)); adapter->algo_data = parport_algo_data; diff --git a/drivers/i2c/busses/i2c-pca-isa.c b/drivers/i2c/busses/i2c-pca-isa.c index 9eb76268ec78..4aa8138cb0a9 100644 --- a/drivers/i2c/busses/i2c-pca-isa.c +++ b/drivers/i2c/busses/i2c-pca-isa.c @@ -101,7 +101,6 @@ static struct i2c_algo_pca_data pca_isa_data = { static struct i2c_adapter pca_isa_ops = { .owner = THIS_MODULE, - .id = I2C_HW_A_ISA, .algo_data = &pca_isa_data, .name = "PCA9564 ISA Adapter", .timeout = 100, diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index eaa9b387543e..761f9dd53620 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -403,7 +403,6 @@ static const struct i2c_algorithm smbus_algorithm = { static struct i2c_adapter piix4_adapter = { .owner = THIS_MODULE, - .id = I2C_HW_SMBUS_PIIX4, .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, .algo = &smbus_algorithm, }; diff --git a/drivers/i2c/busses/i2c-sibyte.c b/drivers/i2c/busses/i2c-sibyte.c index 4ddefbf238e9..98b1ec489159 100644 --- a/drivers/i2c/busses/i2c-sibyte.c +++ b/drivers/i2c/busses/i2c-sibyte.c @@ -155,7 +155,6 @@ static struct i2c_algo_sibyte_data sibyte_board_data[2] = { static struct i2c_adapter sibyte_board_adapter[2] = { { .owner = THIS_MODULE, - .id = I2C_HW_SIBYTE, .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, .algo = NULL, .algo_data = &sibyte_board_data[0], @@ -164,7 +163,6 @@ static struct i2c_adapter sibyte_board_adapter[2] = { }, { .owner = THIS_MODULE, - .id = I2C_HW_SIBYTE, .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, .algo = NULL, .algo_data = &sibyte_board_data[1], diff --git a/drivers/i2c/busses/i2c-sis5595.c b/drivers/i2c/busses/i2c-sis5595.c index 8ce2daff985c..f320ab27da46 100644 --- a/drivers/i2c/busses/i2c-sis5595.c +++ b/drivers/i2c/busses/i2c-sis5595.c @@ -365,7 +365,6 @@ static const struct i2c_algorithm smbus_algorithm = { static struct i2c_adapter sis5595_adapter = { .owner = THIS_MODULE, - .id = I2C_HW_SMBUS_SIS5595, .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, .algo = &smbus_algorithm, }; diff --git a/drivers/i2c/busses/i2c-sis630.c b/drivers/i2c/busses/i2c-sis630.c index 9c9c016ff2b5..50c3610e6028 100644 --- a/drivers/i2c/busses/i2c-sis630.c +++ b/drivers/i2c/busses/i2c-sis630.c @@ -464,7 +464,6 @@ static const struct i2c_algorithm smbus_algorithm = { static struct i2c_adapter sis630_adapter = { .owner = THIS_MODULE, - .id = I2C_HW_SMBUS_SIS630, .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, .algo = &smbus_algorithm, }; diff --git a/drivers/i2c/busses/i2c-sis96x.c b/drivers/i2c/busses/i2c-sis96x.c index f1bba6396641..7e1594b40579 100644 --- a/drivers/i2c/busses/i2c-sis96x.c +++ b/drivers/i2c/busses/i2c-sis96x.c @@ -241,7 +241,6 @@ static const struct i2c_algorithm smbus_algorithm = { static struct i2c_adapter sis96x_adapter = { .owner = THIS_MODULE, - .id = I2C_HW_SMBUS_SIS96X, .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, .algo = &smbus_algorithm, }; diff --git a/drivers/i2c/busses/i2c-via.c b/drivers/i2c/busses/i2c-via.c index 29cef0433f34..8b24f192103a 100644 --- a/drivers/i2c/busses/i2c-via.c +++ b/drivers/i2c/busses/i2c-via.c @@ -83,7 +83,6 @@ static struct i2c_algo_bit_data bit_data = { static struct i2c_adapter vt586b_adapter = { .owner = THIS_MODULE, - .id = I2C_HW_B_VIA, .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, .name = "VIA i2c", .algo_data = &bit_data, diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c index 9f194d9efd91..02e6f724b05f 100644 --- a/drivers/i2c/busses/i2c-viapro.c +++ b/drivers/i2c/busses/i2c-viapro.c @@ -321,7 +321,6 @@ static const struct i2c_algorithm smbus_algorithm = { static struct i2c_adapter vt596_adapter = { .owner = THIS_MODULE, - .id = I2C_HW_SMBUS_VIA2, .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, .algo = &smbus_algorithm, }; diff --git a/drivers/i2c/busses/i2c-voodoo3.c b/drivers/i2c/busses/i2c-voodoo3.c index 1d4ae26ba73d..1a474acc0ddd 100644 --- a/drivers/i2c/busses/i2c-voodoo3.c +++ b/drivers/i2c/busses/i2c-voodoo3.c @@ -163,7 +163,6 @@ static struct i2c_algo_bit_data voo_i2c_bit_data = { static struct i2c_adapter voodoo3_i2c_adapter = { .owner = THIS_MODULE, - .id = I2C_HW_B_VOO, .class = I2C_CLASS_TV_ANALOG, .name = "I2C Voodoo3/Banshee adapter", .algo_data = &voo_i2c_bit_data, @@ -180,7 +179,6 @@ static struct i2c_algo_bit_data voo_ddc_bit_data = { static struct i2c_adapter voodoo3_ddc_adapter = { .owner = THIS_MODULE, - .id = I2C_HW_B_VOO, .class = I2C_CLASS_DDC, .name = "DDC Voodoo3/Banshee adapter", .algo_data = &voo_ddc_bit_data, diff --git a/drivers/i2c/busses/scx200_acb.c b/drivers/i2c/busses/scx200_acb.c index ed794b145a11..648ecc6f60e6 100644 --- a/drivers/i2c/busses/scx200_acb.c +++ b/drivers/i2c/busses/scx200_acb.c @@ -440,7 +440,6 @@ static __init struct scx200_acb_iface *scx200_create_iface(const char *text, i2c_set_adapdata(adapter, iface); snprintf(adapter->name, sizeof(adapter->name), "%s ACB%d", text, index); adapter->owner = THIS_MODULE; - adapter->id = I2C_HW_SMBUS_SCX200; adapter->algo = &scx200_acb_algorithm; adapter->class = I2C_CLASS_HWMON | I2C_CLASS_SPD; adapter->dev.parent = dev; diff --git a/drivers/i2c/busses/scx200_i2c.c b/drivers/i2c/busses/scx200_i2c.c index e4c98539c517..162b74a04886 100644 --- a/drivers/i2c/busses/scx200_i2c.c +++ b/drivers/i2c/busses/scx200_i2c.c @@ -82,7 +82,6 @@ static struct i2c_algo_bit_data scx200_i2c_data = { static struct i2c_adapter scx200_i2c_ops = { .owner = THIS_MODULE, .class = I2C_CLASS_HWMON | I2C_CLASS_SPD, - .id = I2C_HW_B_SCX200, .algo_data = &scx200_i2c_data, .name = "NatSemi SCx200 I2C", }; diff --git a/drivers/ieee1394/pcilynx.c b/drivers/ieee1394/pcilynx.c index dc15cadb06ef..38f712036201 100644 --- a/drivers/ieee1394/pcilynx.c +++ b/drivers/ieee1394/pcilynx.c @@ -1419,7 +1419,6 @@ static int __devinit add_card(struct pci_dev *dev, i2c_ad = kzalloc(sizeof(*i2c_ad), GFP_KERNEL); if (!i2c_ad) FAIL("failed to allocate I2C adapter memory"); - i2c_ad->id = I2C_HW_B_PCILYNX; strlcpy(i2c_ad->name, "PCILynx I2C", sizeof(i2c_ad->name)); i2c_adapter_data = bit_data; i2c_ad->algo_data = &i2c_adapter_data; diff --git a/drivers/video/aty/radeon_i2c.c b/drivers/video/aty/radeon_i2c.c index 2c5567175dca..359fc64e761a 100644 --- a/drivers/video/aty/radeon_i2c.c +++ b/drivers/video/aty/radeon_i2c.c @@ -72,7 +72,6 @@ static int radeon_setup_i2c_bus(struct radeon_i2c_chan *chan, const char *name) snprintf(chan->adapter.name, sizeof(chan->adapter.name), "radeonfb %s", name); chan->adapter.owner = THIS_MODULE; - chan->adapter.id = I2C_HW_B_RADEON; chan->adapter.algo_data = &chan->algo; chan->adapter.dev.parent = &chan->rinfo->pdev->dev; chan->algo.setsda = radeon_gpio_setsda; diff --git a/drivers/video/i810/i810-i2c.c b/drivers/video/i810/i810-i2c.c index 7787c3322ffb..9dd55e5324a1 100644 --- a/drivers/video/i810/i810-i2c.c +++ b/drivers/video/i810/i810-i2c.c @@ -90,7 +90,6 @@ static int i810_setup_i2c_bus(struct i810fb_i2c_chan *chan, const char *name) chan->adapter.owner = THIS_MODULE; chan->adapter.algo_data = &chan->algo; chan->adapter.dev.parent = &chan->par->dev->dev; - chan->adapter.id = I2C_HW_B_I810; chan->algo.setsda = i810i2c_setsda; chan->algo.setscl = i810i2c_setscl; chan->algo.getsda = i810i2c_getsda; diff --git a/drivers/video/intelfb/intelfb_i2c.c b/drivers/video/intelfb/intelfb_i2c.c index 5d896b81f4e0..b3065492bb20 100644 --- a/drivers/video/intelfb/intelfb_i2c.c +++ b/drivers/video/intelfb/intelfb_i2c.c @@ -111,7 +111,6 @@ static int intelfb_setup_i2c_bus(struct intelfb_info *dinfo, "intelfb %s", name); chan->adapter.class = class; chan->adapter.owner = THIS_MODULE; - chan->adapter.id = I2C_HW_B_INTELFB; chan->adapter.algo_data = &chan->algo; chan->adapter.dev.parent = &chan->dinfo->pdev->dev; chan->algo.setsda = intelfb_gpio_setsda; diff --git a/drivers/video/nvidia/nv_i2c.c b/drivers/video/nvidia/nv_i2c.c index 6fd7cb8f9b8e..6aaddb4f6788 100644 --- a/drivers/video/nvidia/nv_i2c.c +++ b/drivers/video/nvidia/nv_i2c.c @@ -87,7 +87,6 @@ static int nvidia_setup_i2c_bus(struct nvidia_i2c_chan *chan, const char *name, strcpy(chan->adapter.name, name); chan->adapter.owner = THIS_MODULE; - chan->adapter.id = I2C_HW_B_NVIDIA; chan->adapter.class = i2c_class; chan->adapter.algo_data = &chan->algo; chan->adapter.dev.parent = &chan->par->pci_dev->dev; diff --git a/drivers/video/savage/savagefb-i2c.c b/drivers/video/savage/savagefb-i2c.c index 783d4adffb93..574b29e9f8f2 100644 --- a/drivers/video/savage/savagefb-i2c.c +++ b/drivers/video/savage/savagefb-i2c.c @@ -137,7 +137,6 @@ static int savage_setup_i2c_bus(struct savagefb_i2c_chan *chan, if (chan->par) { strcpy(chan->adapter.name, name); chan->adapter.owner = THIS_MODULE; - chan->adapter.id = I2C_HW_B_SAVAGE; chan->adapter.algo_data = &chan->algo; chan->adapter.dev.parent = &chan->par->pcidev->dev; chan->algo.udelay = 10; diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h index 82b7eee22636..1ffc23bc5d1e 100644 --- a/include/linux/i2c-id.h +++ b/include/linux/i2c-id.h @@ -79,74 +79,23 @@ */ /* --- Bit algorithm adapters */ -#define I2C_HW_B_LP 0x010000 /* Parallel port Philips style */ #define I2C_HW_B_BT848 0x010005 /* BT848 video boards */ -#define I2C_HW_B_VIA 0x010007 /* Via vt82c586b */ -#define I2C_HW_B_HYDRA 0x010008 /* Apple Hydra Mac I/O */ -#define I2C_HW_B_I810 0x01000a /* Intel I810 */ -#define I2C_HW_B_VOO 0x01000b /* 3dfx Voodoo 3 / Banshee */ -#define I2C_HW_B_SCX200 0x01000e /* Nat'l Semi SCx200 I2C */ #define I2C_HW_B_RIVA 0x010010 /* Riva based graphics cards */ -#define I2C_HW_B_IOC 0x010011 /* IOC bit-wiggling */ -#define I2C_HW_B_IXP2000 0x010016 /* GPIO on IXP2000 systems */ #define I2C_HW_B_ZR36067 0x010019 /* Zoran-36057/36067 based boards */ -#define I2C_HW_B_PCILYNX 0x01001a /* TI PCILynx I2C adapter */ #define I2C_HW_B_CX2388x 0x01001b /* connexant 2388x based tv cards */ -#define I2C_HW_B_NVIDIA 0x01001c /* nvidia framebuffer driver */ -#define I2C_HW_B_SAVAGE 0x01001d /* savage framebuffer driver */ -#define I2C_HW_B_RADEON 0x01001e /* radeon framebuffer driver */ #define I2C_HW_B_EM28XX 0x01001f /* em28xx video capture cards */ #define I2C_HW_B_CX2341X 0x010020 /* Conexant CX2341X MPEG encoder cards */ -#define I2C_HW_B_INTELFB 0x010021 /* intel framebuffer driver */ #define I2C_HW_B_CX23885 0x010022 /* conexant 23885 based tv cards (bus1) */ #define I2C_HW_B_AU0828 0x010023 /* auvitek au0828 usb bridge */ -/* --- PCF 8584 based algorithms */ -#define I2C_HW_P_ELEK 0x020002 /* Elektor ISA Bus inteface card */ - -/* --- PCA 9564 based algorithms */ -#define I2C_HW_A_ISA 0x1a0000 /* generic ISA Bus interface card */ - -/* --- PowerPC on-chip adapters */ -#define I2C_HW_OCP 0x120000 /* IBM on-chip I2C adapter */ - -/* --- Broadcom SiByte adapters */ -#define I2C_HW_SIBYTE 0x150000 - /* --- SGI adapters */ #define I2C_HW_SGI_VINO 0x160000 -/* --- XSCALE on-chip adapters */ -#define I2C_HW_IOP3XX 0x140000 - -/* --- Au1550 PSC adapters adapters */ -#define I2C_HW_AU1550_PSC 0x1b0000 - /* --- SMBus only adapters */ -#define I2C_HW_SMBUS_PIIX4 0x040000 -#define I2C_HW_SMBUS_ALI15X3 0x040001 -#define I2C_HW_SMBUS_VIA2 0x040002 -#define I2C_HW_SMBUS_I801 0x040004 -#define I2C_HW_SMBUS_AMD756 0x040005 -#define I2C_HW_SMBUS_SIS5595 0x040006 -#define I2C_HW_SMBUS_ALI1535 0x040007 -#define I2C_HW_SMBUS_SIS630 0x040008 -#define I2C_HW_SMBUS_SIS96X 0x040009 -#define I2C_HW_SMBUS_AMD8111 0x04000a -#define I2C_HW_SMBUS_SCX200 0x04000b -#define I2C_HW_SMBUS_NFORCE2 0x04000c #define I2C_HW_SMBUS_W9968CF 0x04000d #define I2C_HW_SMBUS_OV511 0x04000e /* OV511(+) USB 1.1 webcam ICs */ #define I2C_HW_SMBUS_OV518 0x04000f /* OV518(+) USB 1.1 webcam ICs */ #define I2C_HW_SMBUS_CAFE 0x040012 /* Marvell 88ALP01 "CAFE" cam */ -#define I2C_HW_SMBUS_ALI1563 0x040013 - -/* --- MCP107 adapter */ -#define I2C_HW_MPC107 0x0d0000 - -/* --- Embedded adapters */ -#define I2C_HW_MV64XXX 0x190000 -#define I2C_HW_BLACKFIN 0x190001 /* ADI Blackfin I2C TWI driver */ /* --- Miscellaneous adapters */ #define I2C_HW_SAA7146 0x060000 /* SAA7146 video decoder bus */ -- cgit v1.2.3-71-gd317 From 5fb4523afbffae5a5cec4989ee4c9fbc3dbdef33 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 26 Jan 2009 21:19:57 +0100 Subject: i2c: Warn on deprecated binding model use Let the kernel developers know that i2c_attach_client() and i2c_detach_client() are deprecated and should no longer be used. Drivers using these should be converted to the standard device driver binding model (probe and remove methods.) Signed-off-by: Jean Delvare Acked-by: Ben Dooks --- include/linux/i2c.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 20873d402467..fcfbfea3af72 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -151,7 +151,7 @@ struct i2c_driver { * has been dynamically allocated by the driver in the function above, * it must be freed here. (LEGACY I2C DRIVERS ONLY) */ - int (*detach_client)(struct i2c_client *); + int (*detach_client)(struct i2c_client *) __deprecated; /* Standard driver model interfaces, for "new style" i2c drivers. * With the driver model, device enumeration is NEVER done by drivers; @@ -429,8 +429,10 @@ static inline int i2c_add_driver(struct i2c_driver *driver) return i2c_register_driver(THIS_MODULE, driver); } -extern int i2c_attach_client(struct i2c_client *); -extern int i2c_detach_client(struct i2c_client *); +/* These are deprecated, your driver should use the standard .probe() + * and .remove() methods instead. */ +extern int __deprecated i2c_attach_client(struct i2c_client *); +extern int __deprecated i2c_detach_client(struct i2c_client *); extern struct i2c_client *i2c_use_client(struct i2c_client *client); extern void i2c_release_client(struct i2c_client *client); -- cgit v1.2.3-71-gd317 From 3121a48d87a580f369eeb26aa0a075142274a353 Mon Sep 17 00:00:00 2001 From: Krzysztof HaÅ‚asa Date: Mon, 26 Jan 2009 12:30:12 -0800 Subject: net: Fix linux/if_frad.h's suitability for userspace. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The userspace interfaces are protected by CONFIG_* ifdefs and that of course can't work. Reported by Jaswinder Singh Rajput. Signed-off-by: Krzysztof HaÅ‚asa Signed-off-by: David S. Miller --- include/linux/if_frad.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h index 5c34240de746..60e16a551dd6 100644 --- a/include/linux/if_frad.h +++ b/include/linux/if_frad.h @@ -26,8 +26,6 @@ #include -#if defined(CONFIG_DLCI) || defined(CONFIG_DLCI_MODULE) - /* Structures and constants associated with the DLCI device driver */ struct dlci_add @@ -127,6 +125,8 @@ struct frad_conf #ifdef __KERNEL__ +#if defined(CONFIG_DLCI) || defined(CONFIG_DLCI_MODULE) + /* these are the fields of an RFC 1490 header */ struct frhdr { @@ -190,12 +190,10 @@ struct frad_local int buffer; /* current buffer for S508 firmware */ }; -#endif /* __KERNEL__ */ - #endif /* CONFIG_DLCI || CONFIG_DLCI_MODULE */ -#ifdef __KERNEL__ extern void dlci_ioctl_set(int (*hook)(unsigned int, void __user *)); -#endif + +#endif /* __KERNEL__ */ #endif -- cgit v1.2.3-71-gd317 From 5ee810072175042775e39bdd3eaaa68884c27805 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 26 Jan 2009 19:21:26 -0800 Subject: Fix "multiple definition of `debugfs_create_size_t'" Introduced by 8adb711f3668b034e7b956fac951ed08b53e0d55 ("debugfs: introduce stub for debugfs_create_size_t() when DEBUG_FS=n") and due to a simple missing "static inline". Reported-and-tested-by: Jeff Chua Acked-by: Greg KH Signed-off-by: Linus Torvalds --- include/linux/debugfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 0f5c33b0bd3e..af0e01d4c663 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -162,7 +162,7 @@ static inline struct dentry *debugfs_create_x32(const char *name, mode_t mode, return ERR_PTR(-ENODEV); } -struct dentry *debugfs_create_size_t(const char *name, mode_t mode, +static inline struct dentry *debugfs_create_size_t(const char *name, mode_t mode, struct dentry *parent, size_t *value) { -- cgit v1.2.3-71-gd317 From abfe2d7b915c872f3a1fd203267cedebf90daa45 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 19 Jan 2009 20:54:54 +0100 Subject: Hibernation: Introduce system_entering_hibernation Introduce boolean function system_entering_hibernation() returning 'true' during the last phase of hibernation, in which devices are being put into low power states and the sleep state (for example, ACPI S4) is finally entered. Some device drivers need such a function to check if the system is in the final phase of hibernation. In particular, some SATA drivers are going to use it for blacklisting systems in which the disks should not be spun down during the last phase of hibernation (the BIOS will do that anyway). Signed-off-by: Rafael J. Wysocki Signed-off-by: Jeff Garzik --- include/linux/suspend.h | 2 ++ kernel/power/disk.c | 10 ++++++++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 2b409c44db83..c7d9bb1832ba 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -237,6 +237,7 @@ extern int hibernate_nvs_alloc(void); extern void hibernate_nvs_free(void); extern void hibernate_nvs_save(void); extern void hibernate_nvs_restore(void); +extern bool system_entering_hibernation(void); #else /* CONFIG_HIBERNATION */ static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } static inline void swsusp_set_page_free(struct page *p) {} @@ -252,6 +253,7 @@ static inline int hibernate_nvs_alloc(void) { return 0; } static inline void hibernate_nvs_free(void) {} static inline void hibernate_nvs_save(void) {} static inline void hibernate_nvs_restore(void) {} +static inline bool system_entering_hibernation(void) { return false; } #endif /* CONFIG_HIBERNATION */ #ifdef CONFIG_PM_SLEEP diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 45e8541ab7e3..432ee575c9ee 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -71,6 +71,14 @@ void hibernation_set_ops(struct platform_hibernation_ops *ops) mutex_unlock(&pm_mutex); } +static bool entering_platform_hibernation; + +bool system_entering_hibernation(void) +{ + return entering_platform_hibernation; +} +EXPORT_SYMBOL(system_entering_hibernation); + #ifdef CONFIG_PM_DEBUG static void hibernation_debug_sleep(void) { @@ -411,6 +419,7 @@ int hibernation_platform_enter(void) if (error) goto Close; + entering_platform_hibernation = true; suspend_console(); error = device_suspend(PMSG_HIBERNATE); if (error) { @@ -445,6 +454,7 @@ int hibernation_platform_enter(void) Finish: hibernation_ops->finish(); Resume_devices: + entering_platform_hibernation = false; device_resume(PMSG_RESTORE); resume_console(); Close: -- cgit v1.2.3-71-gd317 From d7b1956fed33d30c4815e848fd7a143722916868 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 19 Jan 2009 20:55:50 +0100 Subject: DMI: Introduce dmi_first_match to make the interface more flexible Some notebooks from HP have the problem that their BIOSes attempt to spin down hard drives before entering ACPI system states S4 and S5. This leads to a yo-yo effect during system power-off shutdown and the last phase of hibernation when the disk is first spun down by the kernel and then almost immediately turned on and off by the BIOS. This, in turn, may result in shortening the disk's life times. To prevent this from happening we can blacklist the affected systems using DMI information. However, only the on-board controlles should be blacklisted and their PCI slot numbers can be used for this purpose. Unfortunately the existing interface for checking DMI information of the system is not very convenient for this purpose, because to use it, we would have to define special callback functions or create a separate struct dmi_system_id table for each blacklisted system. To overcome this difficulty introduce a new function dmi_first_match() returning a pointer to the first entry in an array of struct dmi_system_id elements that matches the system DMI information. Then, we can use this pointer to access the entry's .driver_data field containing the additional information, such as the PCI slot number, allowing us to do the desired blacklisting. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jeff Garzik --- drivers/firmware/dmi_scan.c | 74 +++++++++++++++++++++++++++++++++------------ include/linux/dmi.h | 1 + 2 files changed, 56 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index d76adfea5df7..8f0f7c449305 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -414,6 +414,29 @@ void __init dmi_scan_machine(void) dmi_initialized = 1; } +/** + * dmi_matches - check if dmi_system_id structure matches system DMI data + * @dmi: pointer to the dmi_system_id structure to check + */ +static bool dmi_matches(const struct dmi_system_id *dmi) +{ + int i; + + WARN(!dmi_initialized, KERN_ERR "dmi check: not initialized yet.\n"); + + for (i = 0; i < ARRAY_SIZE(dmi->matches); i++) { + int s = dmi->matches[i].slot; + if (s == DMI_NONE) + continue; + if (dmi_ident[s] + && strstr(dmi_ident[s], dmi->matches[i].substr)) + continue; + /* No match */ + return false; + } + return true; +} + /** * dmi_check_system - check system DMI data * @list: array of dmi_system_id structures to match against @@ -429,31 +452,44 @@ void __init dmi_scan_machine(void) */ int dmi_check_system(const struct dmi_system_id *list) { - int i, count = 0; - const struct dmi_system_id *d = list; - - WARN(!dmi_initialized, KERN_ERR "dmi check: not initialized yet.\n"); - - while (d->ident) { - for (i = 0; i < ARRAY_SIZE(d->matches); i++) { - int s = d->matches[i].slot; - if (s == DMI_NONE) - continue; - if (dmi_ident[s] && strstr(dmi_ident[s], d->matches[i].substr)) - continue; - /* No match */ - goto fail; + int count = 0; + const struct dmi_system_id *d; + + for (d = list; d->ident; d++) + if (dmi_matches(d)) { + count++; + if (d->callback && d->callback(d)) + break; } - count++; - if (d->callback && d->callback(d)) - break; -fail: d++; - } return count; } EXPORT_SYMBOL(dmi_check_system); +/** + * dmi_first_match - find dmi_system_id structure matching system DMI data + * @list: array of dmi_system_id structures to match against + * All non-null elements of the list must match + * their slot's (field index's) data (i.e., each + * list string must be a substring of the specified + * DMI slot's string data) to be considered a + * successful match. + * + * Walk the blacklist table until the first match is found. Return the + * pointer to the matching entry or NULL if there's no match. + */ +const struct dmi_system_id *dmi_first_match(const struct dmi_system_id *list) +{ + const struct dmi_system_id *d; + + for (d = list; d->ident; d++) + if (dmi_matches(d)) + return d; + + return NULL; +} +EXPORT_SYMBOL(dmi_first_match); + /** * dmi_get_system_info - return DMI data value * @field: data index (see enum dmi_field) diff --git a/include/linux/dmi.h b/include/linux/dmi.h index 34161907b2f8..aea23105d3ed 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -38,6 +38,7 @@ struct dmi_device { #ifdef CONFIG_DMI extern int dmi_check_system(const struct dmi_system_id *list); +const struct dmi_system_id *dmi_first_match(const struct dmi_system_id *list); extern const char * dmi_get_system_info(int field); extern const struct dmi_device * dmi_find_device(int type, const char *name, const struct dmi_device *from); -- cgit v1.2.3-71-gd317 From 2a6e58d2731dcc05dafa7f976d935e0f0627fcd7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 19 Jan 2009 20:56:43 +0100 Subject: SATA: Blacklisting of systems that spin off disks during ACPI power off Introduce new libata flags ATA_FLAG_NO_POWEROFF_SPINDOWN and ATA_FLAG_NO_HIBERNATE_SPINDOWN that, if set, will prevent disks from being spun off during system power off and hibernation, respectively (to handle the hibernation case we need the new system state SYSTEM_HIBERNATE_ENTER that can be checked against by libata, in analogy with SYSTEM_POWER_OFF). Signed-off-by: Rafael J. Wysocki Signed-off-by: Jeff Garzik --- drivers/ata/libata-scsi.c | 20 +++++++++++++++++--- include/linux/libata.h | 2 ++ 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index a1a6e6298c33..3c4c5ae277ba 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "libata.h" @@ -1303,6 +1304,17 @@ static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc) tf->command = ATA_CMD_VERIFY; /* READ VERIFY */ } else { + /* Some odd clown BIOSen issue spindown on power off (ACPI S4 + * or S5) causing some drives to spin up and down again. + */ + if ((qc->ap->flags & ATA_FLAG_NO_POWEROFF_SPINDOWN) && + system_state == SYSTEM_POWER_OFF) + goto skip; + + if ((qc->ap->flags & ATA_FLAG_NO_HIBERNATE_SPINDOWN) && + system_entering_hibernation()) + goto skip; + /* XXX: This is for backward compatibility, will be * removed. Read Documentation/feature-removal-schedule.txt * for more info. @@ -1326,8 +1338,7 @@ static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc) scmd->scsi_done = qc->scsidone; qc->scsidone = ata_delayed_done; } - scmd->result = SAM_STAT_GOOD; - return 1; + goto skip; } /* Issue ATA STANDBY IMMEDIATE command */ @@ -1343,10 +1354,13 @@ static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc) return 0; -invalid_fld: + invalid_fld: ata_scsi_set_sense(scmd, ILLEGAL_REQUEST, 0x24, 0x0); /* "Invalid field in cbd" */ return 1; + skip: + scmd->result = SAM_STAT_GOOD; + return 1; } diff --git a/include/linux/libata.h b/include/linux/libata.h index 2c6bd66209ff..bca3ba25f52a 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -187,6 +187,8 @@ enum { ATA_FLAG_PIO_POLLING = (1 << 9), /* use polling PIO if LLD * doesn't handle PIO interrupts */ ATA_FLAG_NCQ = (1 << 10), /* host supports NCQ */ + ATA_FLAG_NO_POWEROFF_SPINDOWN = (1 << 11), /* don't spindown before poweroff */ + ATA_FLAG_NO_HIBERNATE_SPINDOWN = (1 << 12), /* don't spindown before hibernation */ ATA_FLAG_DEBUGMSG = (1 << 13), ATA_FLAG_IGN_SIMPLEX = (1 << 15), /* ignore SIMPLEX */ ATA_FLAG_NO_IORDY = (1 << 16), /* controller lacks iordy */ -- cgit v1.2.3-71-gd317 From 57064d213d2e44654d4f13c66df135b5e7389a26 Mon Sep 17 00:00:00 2001 From: Seth Heasley Date: Fri, 23 Jan 2009 12:43:38 -0800 Subject: PCI: irq and pci_ids patch for Intel Tigerpoint DeviceIDs This patch adds the Intel Tigerpoint LPC Controller DeviceIDs. Signed-off-by: Seth Heasley Signed-off-by: Jesse Barnes --- arch/x86/pci/irq.c | 1 + include/linux/pci_ids.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 4064345cf144..fecbce6e7d7c 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -572,6 +572,7 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route case PCI_DEVICE_ID_INTEL_ICH7_1: case PCI_DEVICE_ID_INTEL_ICH7_30: case PCI_DEVICE_ID_INTEL_ICH7_31: + case PCI_DEVICE_ID_INTEL_TGP_LPC: case PCI_DEVICE_ID_INTEL_ESB2_0: case PCI_DEVICE_ID_INTEL_ICH8_0: case PCI_DEVICE_ID_INTEL_ICH8_1: diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index d56ad9c21c09..6f260b50253b 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2421,6 +2421,7 @@ #define PCI_DEVICE_ID_INTEL_ICH7_0 0x27b8 #define PCI_DEVICE_ID_INTEL_ICH7_1 0x27b9 #define PCI_DEVICE_ID_INTEL_ICH7_30 0x27b0 +#define PCI_DEVICE_ID_INTEL_TGP_LPC 0x27bc #define PCI_DEVICE_ID_INTEL_ICH7_31 0x27bd #define PCI_DEVICE_ID_INTEL_ICH7_17 0x27da #define PCI_DEVICE_ID_INTEL_ICH7_19 0x27dd -- cgit v1.2.3-71-gd317 From 1cf3eb2ff6b0844c678f2f48d0053b9d12b7da67 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 27 Jan 2009 23:48:59 +0200 Subject: kmalloc: return NULL instead of link failure The SLAB kmalloc with a constant value isn't consistent with the other implementations because it bails out with __you_cannot_kmalloc_that_much rather than returning NULL and properly allowing the caller to fall back to vmalloc or take other action. This doesn't happen with a non-constant value or with SLOB or SLUB. Starting with 2.6.28, I've been seeing build failures on s390x. This is due to init_section_page_cgroup trying to allocate 2.5MB when the max size for a kmalloc on s390x is 2MB. It's failing because the value is constant. The workarounds at the call size are ugly and the caller shouldn't have to change behavior depending on what the backend of the API is. So, this patch eliminates the link failure and returns NULL like the other implementations. Signed-off-by: Jeff Mahoney Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Christoph Lameter Cc: Pekka Enberg Cc: Matt Mackall Cc: Nick Piggin Cc: [2.6.28.x] Signed-off-by: Andrew Morton Signed-off-by: Pekka Enberg --- include/linux/slab_def.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 39c3a5eb8ebe..6ca6a7b66d75 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -43,10 +43,7 @@ static inline void *kmalloc(size_t size, gfp_t flags) i++; #include #undef CACHE - { - extern void __you_cannot_kmalloc_that_much(void); - __you_cannot_kmalloc_that_much(); - } + return NULL; found: #ifdef CONFIG_ZONE_DMA if (flags & GFP_DMA) @@ -77,10 +74,7 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node) i++; #include #undef CACHE - { - extern void __you_cannot_kmalloc_that_much(void); - __you_cannot_kmalloc_that_much(); - } + return NULL; found: #ifdef CONFIG_ZONE_DMA if (flags & GFP_DMA) -- cgit v1.2.3-71-gd317 From 40413dcb7b273bda681dca38e6ff0bbb3728ef11 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 22 Jan 2009 01:58:36 +0300 Subject: Fix longstanding "error: storage size of '__mod_dmi_device_table' isn't known" gcc 3.4.6 doesn't like MODULE_DEVICE_TABLE(dmi, x) expansion enough to error out. Shut it up in a most simple way. Signed-off-by: Alexey Dobriyan Signed-off-by: Linus Torvalds --- include/linux/mod_devicetable.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 97b91d1abb43..fde86671f48f 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -443,6 +443,13 @@ struct dmi_system_id { struct dmi_strmatch matches[4]; void *driver_data; }; +/* + * struct dmi_device_id appears during expansion of + * "MODULE_DEVICE_TABLE(dmi, x)". Compiler doesn't look inside it + * but this is enough for gcc 3.4.6 to error out: + * error: storage size of '__mod_dmi_device_table' isn't known + */ +#define dmi_device_id dmi_system_id #endif #define DMI_MATCH(a, b) { a, b } -- cgit v1.2.3-71-gd317 From 11e76ae0f3a82bbb6c06df8af2167af8b96a0584 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Thu, 8 Jan 2009 12:52:19 -0800 Subject: USB: add kernel-doc for wusb_dev in struct usb_device Reported by Randy Dunlap from a warning on the v2.6.29 merge window. Signed-off-by: Inaky Perez-Gonzalez Cc: David Vrabel Cc: Randy Dunlap Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 85ee9be9361e..88079fd60235 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -418,6 +418,8 @@ struct usb_tt; * @autosuspend_disabled: autosuspend disabled by the user * @autoresume_disabled: autoresume disabled by the user * @skip_sys_resume: skip the next system resume + * @wusb_dev: if this is a Wireless USB device, link to the WUSB + * specific data for the device. * * Notes: * Usbcore drivers should not set usbdev->state directly. Instead use -- cgit v1.2.3-71-gd317 From d8204ee2ad1c9babd7e33d4c118ec99a78a8442e Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Wed, 28 Jan 2009 00:07:20 -0600 Subject: dmi: Fix build breakage Commit d7b1956fed33d30c4815e848fd7a143722916868 ("DMI: Introduce dmi_first_match to make the interface more flexible") introduced compile errors like the following when !CONFIG_DMI drivers/ata/sata_sil.c: In function 'sil_broken_system_poweroff': drivers/ata/sata_sil.c:713: error: implicit declaration of function 'dmi_first_match' drivers/ata/sata_sil.c:713: warning: initialization makes pointer from integer without a cast We just need a dummy version of dmi_first_match() to fix this all up. Signed-off-by: Kumar Gala Signed-off-by: Linus Torvalds --- include/linux/dmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmi.h b/include/linux/dmi.h index aea23105d3ed..d741b9ceb0e0 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -65,6 +65,8 @@ static inline int dmi_walk(void (*decode)(const struct dmi_header *)) { return -1; } static inline bool dmi_match(enum dmi_field f, const char *str) { return false; } +static inline const struct dmi_system_id * + dmi_first_match(const struct dmi_system_id *list) { return NULL; } #endif -- cgit v1.2.3-71-gd317 From dc19835df6c47ff676ad6c98722d5e529db5d74c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 29 Jan 2009 14:25:08 -0800 Subject: kprobes: fix module compilation error with CONFIG_KPROBES=n Define kprobes related data structures even if CONFIG_KPROBES is not set. This fixes compilation errors which occur if CONFIG_KPROBES is not set, in kprobe using modules. [akpm@linux-foundation.org: fix build for non-kprobes-supporting architectures] Reviewed-by: Ananth N Mavinakayanahalli Signed-off-by: Masami Hiramatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kprobes.h | 47 ++++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index d6ea19e314bb..32851eef48f0 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -49,6 +49,13 @@ /* Attach to insert probes on any functions which should be ignored*/ #define __kprobes __attribute__((__section__(".kprobes.text"))) notrace +#else /* CONFIG_KPROBES */ +typedef int kprobe_opcode_t; +struct arch_specific_insn { + int dummy; +}; +#define __kprobes notrace +#endif /* CONFIG_KPROBES */ struct kprobe; struct pt_regs; @@ -131,23 +138,6 @@ struct jprobe { /* For backward compatibility with old code using JPROBE_ENTRY() */ #define JPROBE_ENTRY(handler) (handler) -DECLARE_PER_CPU(struct kprobe *, current_kprobe); -DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); - -#ifdef CONFIG_KRETPROBES -extern void arch_prepare_kretprobe(struct kretprobe_instance *ri, - struct pt_regs *regs); -extern int arch_trampoline_kprobe(struct kprobe *p); -#else /* CONFIG_KRETPROBES */ -static inline void arch_prepare_kretprobe(struct kretprobe *rp, - struct pt_regs *regs) -{ -} -static inline int arch_trampoline_kprobe(struct kprobe *p) -{ - return 0; -} -#endif /* CONFIG_KRETPROBES */ /* * Function-return probe - * Note: @@ -188,6 +178,25 @@ struct kprobe_blackpoint { unsigned long range; }; +#ifdef CONFIG_KPROBES +DECLARE_PER_CPU(struct kprobe *, current_kprobe); +DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +#ifdef CONFIG_KRETPROBES +extern void arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs); +extern int arch_trampoline_kprobe(struct kprobe *p); +#else /* CONFIG_KRETPROBES */ +static inline void arch_prepare_kretprobe(struct kretprobe *rp, + struct pt_regs *regs) +{ +} +static inline int arch_trampoline_kprobe(struct kprobe *p) +{ + return 0; +} +#endif /* CONFIG_KRETPROBES */ + extern struct kretprobe_blackpoint kretprobe_blacklist[]; static inline void kretprobe_assert(struct kretprobe_instance *ri, @@ -264,10 +273,6 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); #else /* CONFIG_KPROBES */ -#define __kprobes notrace -struct jprobe; -struct kretprobe; - static inline struct kprobe *get_kprobe(void *addr) { return NULL; -- cgit v1.2.3-71-gd317 From e5d9a90c36e05dd080704ea58328c00f64facdc1 Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky Date: Thu, 29 Jan 2009 14:25:18 -0800 Subject: alpha: use syscall wrappers Convert OSF syscalls and add alpha specific SYSCALL_ALIAS() macro. Signed-off-by: Ivan Kokshaysky Cc: Richard Henderson Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/Kconfig | 1 + arch/alpha/kernel/entry.S | 2 +- arch/alpha/kernel/osf_sys.c | 113 ++++++++++++++++++++------------------------ arch/alpha/kernel/signal.c | 18 +++---- arch/alpha/kernel/systbls.S | 52 ++++++++++---------- include/linux/syscalls.h | 5 ++ 6 files changed, 92 insertions(+), 99 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 6110197757a3..9fb8aae5c391 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -8,6 +8,7 @@ config ALPHA select HAVE_AOUT select HAVE_IDE select HAVE_OPROFILE + select HAVE_SYSCALL_WRAPPERS help The Alpha is a 64-bit general-purpose processor designed and marketed by the Digital Equipment Corporation of blessed memory, diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index aa2e50cf9857..e4a54b615894 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -933,7 +933,7 @@ sys_execve: osf_sigprocmask: .prologue 0 mov $sp, $18 - jmp $31, do_osf_sigprocmask + jmp $31, sys_osf_sigprocmask .end osf_sigprocmask .align 4 diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 18a3ea1aac51..ae41f097864b 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -54,8 +54,7 @@ extern int do_pipe(int *); * identical to OSF as we don't return 0 on success, but doing otherwise * would require changes to libc. Hopefully this is good enough. */ -asmlinkage unsigned long -osf_brk(unsigned long brk) +SYSCALL_DEFINE1(osf_brk, unsigned long, brk) { unsigned long retval = sys_brk(brk); if (brk && brk != retval) @@ -66,9 +65,9 @@ osf_brk(unsigned long brk) /* * This is pure guess-work.. */ -asmlinkage int -osf_set_program_attributes(unsigned long text_start, unsigned long text_len, - unsigned long bss_start, unsigned long bss_len) +SYSCALL_DEFINE4(osf_set_program_attributes, unsigned long, text_start, + unsigned long, text_len, unsigned long, bss_start, + unsigned long, bss_len) { struct mm_struct *mm; @@ -146,9 +145,9 @@ Efault: return -EFAULT; } -asmlinkage int -osf_getdirentries(unsigned int fd, struct osf_dirent __user *dirent, - unsigned int count, long __user *basep) +SYSCALL_DEFINE4(osf_getdirentries, unsigned int, fd, + struct osf_dirent __user *, dirent, unsigned int, count, + long __user *, basep) { int error; struct file *file; @@ -177,9 +176,9 @@ osf_getdirentries(unsigned int fd, struct osf_dirent __user *dirent, #undef NAME_OFFSET -asmlinkage unsigned long -osf_mmap(unsigned long addr, unsigned long len, unsigned long prot, - unsigned long flags, unsigned long fd, unsigned long off) +SYSCALL_DEFINE6(osf_mmap, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, unsigned long, fd, + unsigned long, off) { struct file *file = NULL; unsigned long ret = -EBADF; @@ -254,8 +253,8 @@ do_osf_statfs(struct dentry * dentry, struct osf_statfs __user *buffer, return error; } -asmlinkage int -osf_statfs(char __user *pathname, struct osf_statfs __user *buffer, unsigned long bufsiz) +SYSCALL_DEFINE3(osf_statfs, char __user *, pathname, + struct osf_statfs __user *, buffer, unsigned long, bufsiz) { struct path path; int retval; @@ -268,8 +267,8 @@ osf_statfs(char __user *pathname, struct osf_statfs __user *buffer, unsigned lon return retval; } -asmlinkage int -osf_fstatfs(unsigned long fd, struct osf_statfs __user *buffer, unsigned long bufsiz) +SYSCALL_DEFINE3(osf_fstatfs, unsigned long, fd, + struct osf_statfs __user *, buffer, unsigned long, bufsiz) { struct file *file; int retval; @@ -368,8 +367,8 @@ osf_procfs_mount(char *dirname, struct procfs_args __user *args, int flags) return do_mount("", dirname, "proc", flags, NULL); } -asmlinkage int -osf_mount(unsigned long typenr, char __user *path, int flag, void __user *data) +SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, char __user *, path, + int, flag, void __user *, data) { int retval = -EINVAL; char *name; @@ -399,8 +398,7 @@ osf_mount(unsigned long typenr, char __user *path, int flag, void __user *data) return retval; } -asmlinkage int -osf_utsname(char __user *name) +SYSCALL_DEFINE1(osf_utsname, char __user *, name) { int error; @@ -423,14 +421,12 @@ osf_utsname(char __user *name) return error; } -asmlinkage unsigned long -sys_getpagesize(void) +SYSCALL_DEFINE0(getpagesize) { return PAGE_SIZE; } -asmlinkage unsigned long -sys_getdtablesize(void) +SYSCALL_DEFINE0(getdtablesize) { return sysctl_nr_open; } @@ -438,8 +434,7 @@ sys_getdtablesize(void) /* * For compatibility with OSF/1 only. Use utsname(2) instead. */ -asmlinkage int -osf_getdomainname(char __user *name, int namelen) +SYSCALL_DEFINE2(osf_getdomainname, char __user *, name, int, namelen) { unsigned len; int i; @@ -527,8 +522,8 @@ enum pl_code { PL_DEL = 5, PL_FDEL = 6 }; -asmlinkage long -osf_proplist_syscall(enum pl_code code, union pl_args __user *args) +SYSCALL_DEFINE2(osf_proplist_syscall, enum pl_code, code, + union pl_args __user *, args) { long error; int __user *min_buf_size_ptr; @@ -567,8 +562,8 @@ osf_proplist_syscall(enum pl_code code, union pl_args __user *args) return error; } -asmlinkage int -osf_sigstack(struct sigstack __user *uss, struct sigstack __user *uoss) +SYSCALL_DEFINE2(osf_sigstack, struct sigstack __user *, uss, + struct sigstack __user *, uoss) { unsigned long usp = rdusp(); unsigned long oss_sp = current->sas_ss_sp + current->sas_ss_size; @@ -608,8 +603,7 @@ osf_sigstack(struct sigstack __user *uss, struct sigstack __user *uoss) return error; } -asmlinkage long -osf_sysinfo(int command, char __user *buf, long count) +SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count) { char *sysinfo_table[] = { utsname()->sysname, @@ -647,9 +641,8 @@ osf_sysinfo(int command, char __user *buf, long count) return err; } -asmlinkage unsigned long -osf_getsysinfo(unsigned long op, void __user *buffer, unsigned long nbytes, - int __user *start, void __user *arg) +SYSCALL_DEFINE5(osf_getsysinfo, unsigned long, op, void __user *, buffer, + unsigned long, nbytes, int __user *, start, void __user *, arg) { unsigned long w; struct percpu_struct *cpu; @@ -705,9 +698,8 @@ osf_getsysinfo(unsigned long op, void __user *buffer, unsigned long nbytes, return -EOPNOTSUPP; } -asmlinkage unsigned long -osf_setsysinfo(unsigned long op, void __user *buffer, unsigned long nbytes, - int __user *start, void __user *arg) +SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer, + unsigned long, nbytes, int __user *, start, void __user *, arg) { switch (op) { case SSI_IEEE_FP_CONTROL: { @@ -880,8 +872,8 @@ jiffies_to_timeval32(unsigned long jiffies, struct timeval32 *value) value->tv_sec = jiffies / HZ; } -asmlinkage int -osf_gettimeofday(struct timeval32 __user *tv, struct timezone __user *tz) +SYSCALL_DEFINE2(osf_gettimeofday, struct timeval32 __user *, tv, + struct timezone __user *, tz) { if (tv) { struct timeval ktv; @@ -896,8 +888,8 @@ osf_gettimeofday(struct timeval32 __user *tv, struct timezone __user *tz) return 0; } -asmlinkage int -osf_settimeofday(struct timeval32 __user *tv, struct timezone __user *tz) +SYSCALL_DEFINE2(osf_settimeofday, struct timeval32 __user *, tv, + struct timezone __user *, tz) { struct timespec kts; struct timezone ktz; @@ -916,8 +908,7 @@ osf_settimeofday(struct timeval32 __user *tv, struct timezone __user *tz) return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); } -asmlinkage int -osf_getitimer(int which, struct itimerval32 __user *it) +SYSCALL_DEFINE2(osf_getitimer, int, which, struct itimerval32 __user *, it) { struct itimerval kit; int error; @@ -929,8 +920,8 @@ osf_getitimer(int which, struct itimerval32 __user *it) return error; } -asmlinkage int -osf_setitimer(int which, struct itimerval32 __user *in, struct itimerval32 __user *out) +SYSCALL_DEFINE3(osf_setitimer, int, which, struct itimerval32 __user *, in, + struct itimerval32 __user *, out) { struct itimerval kin, kout; int error; @@ -952,8 +943,8 @@ osf_setitimer(int which, struct itimerval32 __user *in, struct itimerval32 __use } -asmlinkage int -osf_utimes(char __user *filename, struct timeval32 __user *tvs) +SYSCALL_DEFINE2(osf_utimes, char __user *, filename, + struct timeval32 __user *, tvs) { struct timespec tv[2]; @@ -979,9 +970,8 @@ osf_utimes(char __user *filename, struct timeval32 __user *tvs) #define MAX_SELECT_SECONDS \ ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) -asmlinkage int -osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, - struct timeval32 __user *tvp) +SYSCALL_DEFINE5(osf_select, int, n, fd_set __user *, inp, fd_set __user *, outp, + fd_set __user *, exp, struct timeval32 __user *, tvp) { struct timespec end_time, *to = NULL; if (tvp) { @@ -1026,8 +1016,7 @@ struct rusage32 { long ru_nivcsw; /* involuntary " */ }; -asmlinkage int -osf_getrusage(int who, struct rusage32 __user *ru) +SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru) { struct rusage32 r; @@ -1053,9 +1042,8 @@ osf_getrusage(int who, struct rusage32 __user *ru) return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; } -asmlinkage long -osf_wait4(pid_t pid, int __user *ustatus, int options, - struct rusage32 __user *ur) +SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options, + struct rusage32 __user *, ur) { struct rusage r; long ret, err; @@ -1101,8 +1089,8 @@ osf_wait4(pid_t pid, int __user *ustatus, int options, * seems to be a timeval pointer, and I suspect the second * one is the time remaining.. Ho humm.. No documentation. */ -asmlinkage int -osf_usleep_thread(struct timeval32 __user *sleep, struct timeval32 __user *remain) +SYSCALL_DEFINE2(osf_usleep_thread, struct timeval32 __user *, sleep, + struct timeval32 __user *, remain) { struct timeval tmp; unsigned long ticks; @@ -1155,8 +1143,7 @@ struct timex32 { int :32; int :32; int :32; int :32; }; -asmlinkage int -sys_old_adjtimex(struct timex32 __user *txc_p) +SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p) { struct timex txc; int ret; @@ -1267,8 +1254,8 @@ osf_fix_iov_len(const struct iovec __user *iov, unsigned long count) return 0; } -asmlinkage ssize_t -osf_readv(unsigned long fd, const struct iovec __user * vector, unsigned long count) +SYSCALL_DEFINE3(osf_readv, unsigned long, fd, + const struct iovec __user *, vector, unsigned long, count) { if (unlikely(personality(current->personality) == PER_OSF4)) if (osf_fix_iov_len(vector, count)) @@ -1276,8 +1263,8 @@ osf_readv(unsigned long fd, const struct iovec __user * vector, unsigned long co return sys_readv(fd, vector, count); } -asmlinkage ssize_t -osf_writev(unsigned long fd, const struct iovec __user * vector, unsigned long count) +SYSCALL_DEFINE3(osf_writev, unsigned long, fd, + const struct iovec __user *, vector, unsigned long, count) { if (unlikely(personality(current->personality) == PER_OSF4)) if (osf_fix_iov_len(vector, count)) diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index 410af4f3140e..df65eaa84c4c 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -51,8 +52,8 @@ static void do_signal(struct pt_regs *, struct switch_stack *, * Note that we don't need to acquire the kernel lock for SMP * operation, as all of this is local to this thread. */ -asmlinkage unsigned long -do_osf_sigprocmask(int how, unsigned long newmask, struct pt_regs *regs) +SYSCALL_DEFINE3(osf_sigprocmask, int, how, unsigned long, newmask, + struct pt_regs *, regs) { unsigned long oldmask = -EINVAL; @@ -81,9 +82,9 @@ do_osf_sigprocmask(int how, unsigned long newmask, struct pt_regs *regs) return oldmask; } -asmlinkage int -osf_sigaction(int sig, const struct osf_sigaction __user *act, - struct osf_sigaction __user *oact) +SYSCALL_DEFINE3(osf_sigaction, int, sig, + const struct osf_sigaction __user *, act, + struct osf_sigaction __user *, oact) { struct k_sigaction new_ka, old_ka; int ret; @@ -112,10 +113,9 @@ osf_sigaction(int sig, const struct osf_sigaction __user *act, return ret; } -asmlinkage long -sys_rt_sigaction(int sig, const struct sigaction __user *act, - struct sigaction __user *oact, - size_t sigsetsize, void __user *restorer) +SYSCALL_DEFINE5(rt_sigaction, int, sig, const struct sigaction __user *, act, + struct sigaction __user *, oact, + size_t, sigsetsize, void __user *, restorer) { struct k_sigaction new_ka, old_ka; int ret; diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index 9d9e3a98bb95..95c9aef1c106 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -17,7 +17,7 @@ sys_call_table: .quad sys_write .quad alpha_ni_syscall /* 5 */ .quad sys_close - .quad osf_wait4 + .quad sys_osf_wait4 .quad alpha_ni_syscall .quad sys_link .quad sys_unlink /* 10 */ @@ -27,11 +27,11 @@ sys_call_table: .quad sys_mknod .quad sys_chmod /* 15 */ .quad sys_chown - .quad osf_brk + .quad sys_osf_brk .quad alpha_ni_syscall .quad sys_lseek .quad sys_getxpid /* 20 */ - .quad osf_mount + .quad sys_osf_mount .quad sys_umount .quad sys_setuid .quad sys_getxuid @@ -53,7 +53,7 @@ sys_call_table: .quad alpha_ni_syscall /* 40 */ .quad sys_dup .quad sys_alpha_pipe - .quad osf_set_program_attributes + .quad sys_osf_set_program_attributes .quad alpha_ni_syscall .quad sys_open /* 45 */ .quad alpha_ni_syscall @@ -81,7 +81,7 @@ sys_call_table: .quad sys_newlstat .quad alpha_ni_syscall .quad alpha_ni_syscall /* 70 */ - .quad osf_mmap + .quad sys_osf_mmap .quad alpha_ni_syscall .quad sys_munmap .quad sys_mprotect @@ -94,17 +94,17 @@ sys_call_table: .quad sys_setgroups /* 80 */ .quad alpha_ni_syscall .quad sys_setpgid - .quad osf_setitimer + .quad sys_osf_setitimer .quad alpha_ni_syscall .quad alpha_ni_syscall /* 85 */ - .quad osf_getitimer + .quad sys_osf_getitimer .quad sys_gethostname .quad sys_sethostname .quad sys_getdtablesize .quad sys_dup2 /* 90 */ .quad sys_newfstat .quad sys_fcntl - .quad osf_select + .quad sys_osf_select .quad sys_poll .quad sys_fsync /* 95 */ .quad sys_setpriority @@ -123,22 +123,22 @@ sys_call_table: .quad alpha_ni_syscall .quad alpha_ni_syscall /* 110 */ .quad sys_sigsuspend - .quad osf_sigstack + .quad sys_osf_sigstack .quad sys_recvmsg .quad sys_sendmsg .quad alpha_ni_syscall /* 115 */ - .quad osf_gettimeofday - .quad osf_getrusage + .quad sys_osf_gettimeofday + .quad sys_osf_getrusage .quad sys_getsockopt .quad alpha_ni_syscall #ifdef CONFIG_OSF4_COMPAT - .quad osf_readv /* 120 */ - .quad osf_writev + .quad sys_osf_readv /* 120 */ + .quad sys_osf_writev #else .quad sys_readv /* 120 */ .quad sys_writev #endif - .quad osf_settimeofday + .quad sys_osf_settimeofday .quad sys_fchown .quad sys_fchmod .quad sys_recvfrom /* 125 */ @@ -154,7 +154,7 @@ sys_call_table: .quad sys_socketpair /* 135 */ .quad sys_mkdir .quad sys_rmdir - .quad osf_utimes + .quad sys_osf_utimes .quad alpha_ni_syscall .quad alpha_ni_syscall /* 140 */ .quad sys_getpeername @@ -172,16 +172,16 @@ sys_call_table: .quad alpha_ni_syscall .quad alpha_ni_syscall .quad alpha_ni_syscall /* 155 */ - .quad osf_sigaction + .quad sys_osf_sigaction .quad alpha_ni_syscall .quad alpha_ni_syscall - .quad osf_getdirentries - .quad osf_statfs /* 160 */ - .quad osf_fstatfs + .quad sys_osf_getdirentries + .quad sys_osf_statfs /* 160 */ + .quad sys_osf_fstatfs .quad alpha_ni_syscall .quad alpha_ni_syscall .quad alpha_ni_syscall - .quad osf_getdomainname /* 165 */ + .quad sys_osf_getdomainname /* 165 */ .quad sys_setdomainname .quad alpha_ni_syscall .quad alpha_ni_syscall @@ -224,7 +224,7 @@ sys_call_table: .quad sys_semctl .quad sys_semget /* 205 */ .quad sys_semop - .quad osf_utsname + .quad sys_osf_utsname .quad sys_lchown .quad sys_shmat .quad sys_shmctl /* 210 */ @@ -258,23 +258,23 @@ sys_call_table: .quad alpha_ni_syscall .quad alpha_ni_syscall .quad alpha_ni_syscall /* 240 */ - .quad osf_sysinfo + .quad sys_osf_sysinfo .quad alpha_ni_syscall .quad alpha_ni_syscall - .quad osf_proplist_syscall + .quad sys_osf_proplist_syscall .quad alpha_ni_syscall /* 245 */ .quad alpha_ni_syscall .quad alpha_ni_syscall .quad alpha_ni_syscall .quad alpha_ni_syscall .quad alpha_ni_syscall /* 250 */ - .quad osf_usleep_thread + .quad sys_osf_usleep_thread .quad alpha_ni_syscall .quad alpha_ni_syscall .quad sys_sysfs .quad alpha_ni_syscall /* 255 */ - .quad osf_getsysinfo - .quad osf_setsysinfo + .quad sys_osf_getsysinfo + .quad sys_osf_setsysinfo .quad alpha_ni_syscall .quad alpha_ni_syscall .quad alpha_ni_syscall /* 260 */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 16875f89e6a7..0eda02ff2414 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -108,9 +108,14 @@ struct old_linux_dirent; asm ("\t.globl " #alias "\n\t.set " #alias ", " #name "\n" \ "\t.globl ." #alias "\n\t.set ." #alias ", ." #name) #else +#ifdef CONFIG_ALPHA +#define SYSCALL_ALIAS(alias, name) \ + asm ( #alias " = " #name "\n\t.globl " #alias) +#else #define SYSCALL_ALIAS(alias, name) \ asm ("\t.globl " #alias "\n\t.set " #alias ", " #name) #endif +#endif #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS -- cgit v1.2.3-71-gd317 From 804b3c28a4e4fa1c224571bf76edb534b9c4b1ed Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Thu, 29 Jan 2009 14:25:21 -0800 Subject: cgroups: add cpu_relax() calls in css_tryget() and cgroup_clear_css_refs() css_tryget() and cgroup_clear_css_refs() contain polling loops; these loops should have cpu_relax calls in them to reduce cross-cache traffic. Signed-off-by: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 1 + kernel/cgroup.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index e267e62827bb..e4e8e117d27d 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -99,6 +99,7 @@ static inline bool css_tryget(struct cgroup_subsys_state *css) while (!atomic_inc_not_zero(&css->refcnt)) { if (test_bit(CSS_REMOVED, &css->flags)) return false; + cpu_relax(); } return true; } diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 0066092de19a..492215d67fa5 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2509,7 +2509,7 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) for_each_subsys(cgrp->root, ss) { struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; int refcnt; - do { + while (1) { /* We can only remove a CSS with a refcnt==1 */ refcnt = atomic_read(&css->refcnt); if (refcnt > 1) { @@ -2523,7 +2523,10 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) * css_tryget() to spin until we set the * CSS_REMOVED bits or abort */ - } while (atomic_cmpxchg(&css->refcnt, refcnt, 0) != refcnt); + if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt) + break; + cpu_relax(); + } } done: for_each_subsys(cgrp->root, ss) { -- cgit v1.2.3-71-gd317 From 9df04e1f25effde823a600e755b51475d438f56b Mon Sep 17 00:00:00 2001 From: Davide Libenzi Date: Thu, 29 Jan 2009 14:25:26 -0800 Subject: epoll: drop max_user_instances and rely only on max_user_watches Linus suggested to put limits where the money is, and max_user_watches already does that w/out the need of max_user_instances. That has the advantage to mitigate the potential DoS while allowing pretty generous default behavior. Allowing top 4% of low memory (per user) to be allocated in epoll watches, we have: LOMEM MAX_WATCHES (per user) 512MB ~178000 1GB ~356000 2GB ~712000 A box with 512MB of lomem, will meet some challenge in hitting 180K watches, socket buffers math teaches us. No more max_user_instances limits then. Signed-off-by: Davide Libenzi Cc: Willy Tarreau Cc: Michael Kerrisk Cc: Bron Gondwana Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 22 ++++------------------ include/linux/sched.h | 1 - 2 files changed, 4 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index ba2f9ec71192..011b9b8c90c6 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -234,8 +234,6 @@ struct ep_pqueue { /* * Configuration options available inside /proc/sys/fs/epoll/ */ -/* Maximum number of epoll devices, per user */ -static int max_user_instances __read_mostly; /* Maximum number of epoll watched descriptors, per user */ static int max_user_watches __read_mostly; @@ -260,14 +258,6 @@ static struct kmem_cache *pwq_cache __read_mostly; static int zero; ctl_table epoll_table[] = { - { - .procname = "max_user_instances", - .data = &max_user_instances, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .extra1 = &zero, - }, { .procname = "max_user_watches", .data = &max_user_watches, @@ -491,7 +481,6 @@ static void ep_free(struct eventpoll *ep) mutex_unlock(&epmutex); mutex_destroy(&ep->mtx); - atomic_dec(&ep->user->epoll_devs); free_uid(ep->user); kfree(ep); } @@ -581,10 +570,6 @@ static int ep_alloc(struct eventpoll **pep) struct eventpoll *ep; user = get_current_user(); - error = -EMFILE; - if (unlikely(atomic_read(&user->epoll_devs) >= - max_user_instances)) - goto free_uid; error = -ENOMEM; ep = kzalloc(sizeof(*ep), GFP_KERNEL); if (unlikely(!ep)) @@ -1141,7 +1126,6 @@ SYSCALL_DEFINE1(epoll_create1, int, flags) flags & O_CLOEXEC); if (fd < 0) ep_free(ep); - atomic_inc(&ep->user->epoll_devs); error_return: DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", @@ -1366,8 +1350,10 @@ static int __init eventpoll_init(void) struct sysinfo si; si_meminfo(&si); - max_user_instances = 128; - max_user_watches = (((si.totalram - si.totalhigh) / 32) << PAGE_SHIFT) / + /* + * Allows top 4% of lomem to be allocated for epoll watches (per user). + */ + max_user_watches = (((si.totalram - si.totalhigh) / 25) << PAGE_SHIFT) / EP_ITEM_COST; /* Initialize the structure used to perform safe poll wait head wake ups */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 02e16d207304..5a7c76388731 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -630,7 +630,6 @@ struct user_struct { atomic_t inotify_devs; /* How many inotify devs does this user have opened? */ #endif #ifdef CONFIG_EPOLL - atomic_t epoll_devs; /* The number of epoll descriptors currently open */ atomic_t epoll_watches; /* The number of file descriptors currently watched */ #endif #ifdef CONFIG_POSIX_MQUEUE -- cgit v1.2.3-71-gd317 From 7b24fc4d7eb611da367dea3aad45473050aacd6c Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Sun, 4 Jan 2009 02:43:38 -0500 Subject: block: Don't verify integrity metadata on read error If we get an I/O error on a read request there is no point in doing a verify pass on the integrity buffer. Adjust the completion path accordingly. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- fs/bio-integrity.c | 25 +++++++++++++++---------- include/linux/bio.h | 1 - 2 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 77ebc3c263d6..8396d741f804 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -465,7 +465,7 @@ static int bio_integrity_verify(struct bio *bio) if (ret) { kunmap_atomic(kaddr, KM_USER0); - break; + return ret; } sectors = bv->bv_len / bi->sector_size; @@ -493,18 +493,13 @@ static void bio_integrity_verify_fn(struct work_struct *work) struct bio_integrity_payload *bip = container_of(work, struct bio_integrity_payload, bip_work); struct bio *bio = bip->bip_bio; - int error = bip->bip_error; + int error; - if (bio_integrity_verify(bio)) { - clear_bit(BIO_UPTODATE, &bio->bi_flags); - error = -EIO; - } + error = bio_integrity_verify(bio); /* Restore original bio completion handler */ bio->bi_end_io = bip->bip_end_io; - - if (bio->bi_end_io) - bio->bi_end_io(bio, error); + bio_endio(bio, error); } /** @@ -525,7 +520,17 @@ void bio_integrity_endio(struct bio *bio, int error) BUG_ON(bip->bip_bio != bio); - bip->bip_error = error; + /* In case of an I/O error there is no point in verifying the + * integrity metadata. Restore original bio end_io handler + * and run it. + */ + if (error) { + bio->bi_end_io = bip->bip_end_io; + bio_endio(bio, error); + + return; + } + INIT_WORK(&bip->bip_work, bio_integrity_verify_fn); queue_work(kintegrityd_wq, &bip->bip_work); } diff --git a/include/linux/bio.h b/include/linux/bio.h index 18462c5b8fff..18fc4a281a7b 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -312,7 +312,6 @@ struct bio_integrity_payload { void *bip_buf; /* generated integrity data */ bio_end_io_t *bip_end_io; /* saved I/O completion fn */ - int bip_error; /* saved I/O error */ unsigned int bip_size; unsigned short bip_pool; /* pool the ivec came from */ -- cgit v1.2.3-71-gd317 From 16642eb68216d8e0e136a99e514e9166e7125838 Mon Sep 17 00:00:00 2001 From: Alberto Bertogli Date: Mon, 5 Jan 2009 10:18:53 +0100 Subject: Fix small typo in bio.h's documentation Signed-off-by: Alberto Bertogli Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 18fc4a281a7b..5175aa3103c6 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -144,7 +144,7 @@ struct bio { * bit 1 -- rw-ahead when set * bit 2 -- barrier * Insert a serialization point in the IO queue, forcing previously - * submitted IO to be completed before this oen is issued. + * submitted IO to be completed before this one is issued. * bit 3 -- synchronous I/O hint: the block layer will unplug immediately * Note that this does NOT indicate that the IO itself is sync, just * that the block layer will not postpone issue of this IO by plugging. -- cgit v1.2.3-71-gd317 From 213d9417fec62ef4c3675621b9364a667954d4dd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 6 Jan 2009 09:16:05 +0100 Subject: block: seperate bio/request unplug and sync bits Signed-off-by: Jens Axboe --- block/blk-core.c | 5 ++++- include/linux/bio.h | 18 +++++++++++------- include/linux/blkdev.h | 2 ++ 3 files changed, 17 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index a824e49c0d0a..9e2e86fb78b8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1125,6 +1125,8 @@ void init_request_from_bio(struct request *req, struct bio *bio) if (bio_sync(bio)) req->cmd_flags |= REQ_RW_SYNC; + if (bio_unplug(bio)) + req->cmd_flags |= REQ_UNPLUG; if (bio_rw_meta(bio)) req->cmd_flags |= REQ_RW_META; @@ -1141,6 +1143,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) int el_ret, nr_sectors; const unsigned short prio = bio_prio(bio); const int sync = bio_sync(bio); + const int unplug = bio_unplug(bio); int rw_flags; nr_sectors = bio_sectors(bio); @@ -1244,7 +1247,7 @@ get_rq: blk_plug_device(q); add_request(q, req); out: - if (sync || blk_queue_nonrot(q)) + if (unplug || blk_queue_nonrot(q)) __generic_unplug_device(q); spin_unlock_irq(q->queue_lock); return 0; diff --git a/include/linux/bio.h b/include/linux/bio.h index 5175aa3103c6..f53568c5852e 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -163,12 +163,15 @@ struct bio { #define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ #define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ #define BIO_RW_BARRIER 2 -#define BIO_RW_SYNC 3 -#define BIO_RW_META 4 -#define BIO_RW_DISCARD 5 -#define BIO_RW_FAILFAST_DEV 6 -#define BIO_RW_FAILFAST_TRANSPORT 7 -#define BIO_RW_FAILFAST_DRIVER 8 +#define BIO_RW_SYNCIO 3 +#define BIO_RW_UNPLUG 4 +#define BIO_RW_META 5 +#define BIO_RW_DISCARD 6 +#define BIO_RW_FAILFAST_DEV 7 +#define BIO_RW_FAILFAST_TRANSPORT 8 +#define BIO_RW_FAILFAST_DRIVER 9 + +#define BIO_RW_SYNC (BIO_RW_SYNCIO | BIO_RW_UNPLUG) /* * upper 16 bits of bi_rw define the io priority of this bio @@ -194,7 +197,8 @@ struct bio { #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) #define bio_sectors(bio) ((bio)->bi_size >> 9) #define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER)) -#define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC)) +#define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNCIO)) +#define bio_unplug(bio) ((bio)->bi_rw & (1 << BIO_RW_UNPLUG)) #define bio_failfast_dev(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_DEV)) #define bio_failfast_transport(bio) \ ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_TRANSPORT)) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 044467ef7b11..75426e4b8cdf 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -108,6 +108,7 @@ enum rq_flag_bits { __REQ_RW_META, /* metadata io request */ __REQ_COPY_USER, /* contains copies of user pages */ __REQ_INTEGRITY, /* integrity metadata has been remapped */ + __REQ_UNPLUG, /* unplug queue on submission */ __REQ_NR_BITS, /* stops here */ }; @@ -134,6 +135,7 @@ enum rq_flag_bits { #define REQ_RW_META (1 << __REQ_RW_META) #define REQ_COPY_USER (1 << __REQ_COPY_USER) #define REQ_INTEGRITY (1 << __REQ_INTEGRITY) +#define REQ_UNPLUG (1 << __REQ_UNPLUG) #define BLK_MAX_CDB 16 -- cgit v1.2.3-71-gd317 From 1dfa17f4ab8543d82caf4d36636b93916a18f456 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 6 Jan 2009 09:21:49 +0100 Subject: block: add bio_rw_flagged() for testing bio->bi_rw The existing functions for checking bio->bi_rw are badly named. So lets mirror what we do for bio->bi_flags testing, use a properly named function so that it's immediately obvious what is being tested. Maintain compatability names for the old macros, eventually we'll get rid of these. Signed-off-by: Jens Axboe --- include/linux/bio.h | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index f53568c5852e..0942765cf8c0 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -173,6 +173,24 @@ struct bio { #define BIO_RW_SYNC (BIO_RW_SYNCIO | BIO_RW_UNPLUG) +#define bio_rw_flagged(bio, flag) ((bio)->bi_rw & (1 << (flag))) + +/* + * Old defines, these should eventually be replaced by direct usage of + * bio_rw_flagged() + */ +#define bio_barrier(bio) bio_rw_flagged(bio, BIO_RW_BARRIER) +#define bio_sync(bio) bio_rw_flagged(bio, BIO_RW_SYNCIO) +#define bio_unplug(bio) bio_rw_flagged(bio, BIO_RW_UNPLUG) +#define bio_failfast_dev(bio) bio_rw_flagged(bio, BIO_RW_FAILFAST_DEV) +#define bio_failfast_transport(bio) \ + bio_rw_flagged(bio, BIO_RW_FAILFAST_TRANSPORT) +#define bio_failfast_driver(bio) \ + bio_rw_flagged(bio, BIO_RW_FAILFAST_DRIVER) +#define bio_rw_ahead(bio) bio_rw_flagged(bio, BIO_RW_AHEAD) +#define bio_rw_meta(bio) bio_rw_flagged(bio, BIO_RW_META) +#define bio_discard(bio) bio_rw_flagged(bio, BIO_RW_DISCARD) + /* * upper 16 bits of bi_rw define the io priority of this bio */ @@ -196,16 +214,6 @@ struct bio { #define bio_offset(bio) bio_iovec((bio))->bv_offset #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) #define bio_sectors(bio) ((bio)->bi_size >> 9) -#define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER)) -#define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNCIO)) -#define bio_unplug(bio) ((bio)->bi_rw & (1 << BIO_RW_UNPLUG)) -#define bio_failfast_dev(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_DEV)) -#define bio_failfast_transport(bio) \ - ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_TRANSPORT)) -#define bio_failfast_driver(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_DRIVER)) -#define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) -#define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) -#define bio_discard(bio) ((bio)->bi_rw & (1 << BIO_RW_DISCARD)) #define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio)) static inline unsigned int bio_cur_sectors(struct bio *bio) -- cgit v1.2.3-71-gd317 From a229fc61ef0ee3c30fd193beee0eeb87410227f1 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Mon, 19 Jan 2009 10:37:38 +0100 Subject: include/linux: Add bsg.h to the Kernel exported headers bsg.h in current form is perfectly suitable for user-mode consumption. It is needed together with scsi/sg.h for applications that want to interface with the bsg driver. Currently the few projects that use it would copy it over into the projects. But that is not acceptable for projects that need to provide source and devel packages for distros. This should also be submitted to stable 2.6.28 and 2.6.27 since bsg had a stable API since these Kernels and distro users will need the header for these kernels a swell Signed-off-by: Boaz Harrosh Acked-by: FUJITA Tomonori CC: stable@kernel.org Signed-off-by: Jens Axboe --- include/linux/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 12e9a2957caf..2124c063a7ef 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -41,6 +41,7 @@ header-y += baycom.h header-y += bfs_fs.h header-y += blkpg.h header-y += bpqether.h +header-y += bsg.h header-y += can.h header-y += cdk.h header-y += chio.h -- cgit v1.2.3-71-gd317 From bc58ba9468d94d62c56ab9b47173583ec140b165 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 23 Jan 2009 10:54:44 +0100 Subject: block: add sysfs file for controlling io stats accounting This allows us to turn off disk stat accounting completely, for the cases where the 0.5-1% reduction in system time is important. Signed-off-by: Jens Axboe --- block/blk-core.c | 90 ++++++++++++++++++++++++++++++-------------------- block/blk-sysfs.c | 28 ++++++++++++++++ include/linux/blkdev.h | 6 ++++ 3 files changed, 88 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index ae75c047f45d..ca69f3d94100 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -64,11 +64,12 @@ static struct workqueue_struct *kblockd_workqueue; static void drive_stat_acct(struct request *rq, int new_io) { + struct gendisk *disk = rq->rq_disk; struct hd_struct *part; int rw = rq_data_dir(rq); int cpu; - if (!blk_fs_request(rq) || !rq->rq_disk) + if (!blk_fs_request(rq) || !disk || !blk_queue_io_stat(disk->queue)) return; cpu = part_stat_lock(); @@ -599,8 +600,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) q->request_fn = rfn; q->prep_rq_fn = NULL; q->unplug_fn = generic_unplug_device; - q->queue_flags = (1 << QUEUE_FLAG_CLUSTER | - 1 << QUEUE_FLAG_STACKABLE); + q->queue_flags = QUEUE_FLAG_DEFAULT; q->queue_lock = lock; blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK); @@ -1663,6 +1663,55 @@ void blkdev_dequeue_request(struct request *req) } EXPORT_SYMBOL(blkdev_dequeue_request); +static void blk_account_io_completion(struct request *req, unsigned int bytes) +{ + struct gendisk *disk = req->rq_disk; + + if (!disk || !blk_queue_io_stat(disk->queue)) + return; + + if (blk_fs_request(req)) { + const int rw = rq_data_dir(req); + struct hd_struct *part; + int cpu; + + cpu = part_stat_lock(); + part = disk_map_sector_rcu(req->rq_disk, req->sector); + part_stat_add(cpu, part, sectors[rw], bytes >> 9); + part_stat_unlock(); + } +} + +static void blk_account_io_done(struct request *req) +{ + struct gendisk *disk = req->rq_disk; + + if (!disk || !blk_queue_io_stat(disk->queue)) + return; + + /* + * Account IO completion. bar_rq isn't accounted as a normal + * IO on queueing nor completion. Accounting the containing + * request is enough. + */ + if (blk_fs_request(req) && req != &req->q->bar_rq) { + unsigned long duration = jiffies - req->start_time; + const int rw = rq_data_dir(req); + struct hd_struct *part; + int cpu; + + cpu = part_stat_lock(); + part = disk_map_sector_rcu(disk, req->sector); + + part_stat_inc(cpu, part, ios[rw]); + part_stat_add(cpu, part, ticks[rw], duration); + part_round_stats(cpu, part); + part_dec_in_flight(part); + + part_stat_unlock(); + } +} + /** * __end_that_request_first - end I/O on a request * @req: the request being processed @@ -1698,16 +1747,7 @@ static int __end_that_request_first(struct request *req, int error, (unsigned long long)req->sector); } - if (blk_fs_request(req) && req->rq_disk) { - const int rw = rq_data_dir(req); - struct hd_struct *part; - int cpu; - - cpu = part_stat_lock(); - part = disk_map_sector_rcu(req->rq_disk, req->sector); - part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9); - part_stat_unlock(); - } + blk_account_io_completion(req, nr_bytes); total_bytes = bio_nbytes = 0; while ((bio = req->bio) != NULL) { @@ -1787,8 +1827,6 @@ static int __end_that_request_first(struct request *req, int error, */ static void end_that_request_last(struct request *req, int error) { - struct gendisk *disk = req->rq_disk; - if (blk_rq_tagged(req)) blk_queue_end_tag(req->q, req); @@ -1800,27 +1838,7 @@ static void end_that_request_last(struct request *req, int error) blk_delete_timer(req); - /* - * Account IO completion. bar_rq isn't accounted as a normal - * IO on queueing nor completion. Accounting the containing - * request is enough. - */ - if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { - unsigned long duration = jiffies - req->start_time; - const int rw = rq_data_dir(req); - struct hd_struct *part; - int cpu; - - cpu = part_stat_lock(); - part = disk_map_sector_rcu(disk, req->sector); - - part_stat_inc(cpu, part, ios[rw]); - part_stat_add(cpu, part, ticks[rw], duration); - part_round_stats(cpu, part); - part_dec_in_flight(part); - - part_stat_unlock(); - } + blk_account_io_done(req); if (req->end_io) req->end_io(req, error); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index b538ab8dbbd1..e29ddfc73cf4 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -197,6 +197,27 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) return ret; } +static ssize_t queue_iostats_show(struct request_queue *q, char *page) +{ + return queue_var_show(blk_queue_io_stat(q), page); +} + +static ssize_t queue_iostats_store(struct request_queue *q, const char *page, + size_t count) +{ + unsigned long stats; + ssize_t ret = queue_var_store(&stats, page, count); + + spin_lock_irq(q->queue_lock); + if (stats) + queue_flag_set(QUEUE_FLAG_IO_STAT, q); + else + queue_flag_clear(QUEUE_FLAG_IO_STAT, q); + spin_unlock_irq(q->queue_lock); + + return ret; +} + static struct queue_sysfs_entry queue_requests_entry = { .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, .show = queue_requests_show, @@ -249,6 +270,12 @@ static struct queue_sysfs_entry queue_rq_affinity_entry = { .store = queue_rq_affinity_store, }; +static struct queue_sysfs_entry queue_iostats_entry = { + .attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR }, + .show = queue_iostats_show, + .store = queue_iostats_store, +}; + static struct attribute *default_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -259,6 +286,7 @@ static struct attribute *default_attrs[] = { &queue_nonrot_entry.attr, &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, + &queue_iostats_entry.attr, NULL, }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 75426e4b8cdf..d08c4b8219a6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -451,6 +451,11 @@ struct request_queue #define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */ #define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */ #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ +#define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ + +#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ + (1 << QUEUE_FLAG_CLUSTER) | \ + 1 << QUEUE_FLAG_STACKABLE) static inline int queue_is_locked(struct request_queue *q) { @@ -567,6 +572,7 @@ enum { #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) +#define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_flushing(q) ((q)->ordseq) #define blk_queue_stackable(q) \ test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) -- cgit v1.2.3-71-gd317 From 9d6aa4c7ece26652fcbfe37bd45679eac5f69347 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 19:50:25 +0530 Subject: headers_check fix: can/bcm.h fix the following 'make headers_check' warning: usr/include/linux/can/bcm.h:29: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/can/bcm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/bcm.h b/include/linux/can/bcm.h index 7f293273c444..1432b278c52d 100644 --- a/include/linux/can/bcm.h +++ b/include/linux/can/bcm.h @@ -14,6 +14,8 @@ #ifndef CAN_BCM_H #define CAN_BCM_H +#include + /** * struct bcm_msg_head - head of messages to/from the broadcast manager * @opcode: opcode, see enum below. -- cgit v1.2.3-71-gd317 From 15cf98ad2965aaefaa2f85332535ff39e48f9f4e Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 19:53:38 +0530 Subject: headers_check fix: dvb/audio.h fix the following 'make headers_check' warning: usr/include/linux/dvb/audio.h:133: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/dvb/audio.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dvb/audio.h b/include/linux/dvb/audio.h index 89412e18f571..bb0df2aaebfa 100644 --- a/include/linux/dvb/audio.h +++ b/include/linux/dvb/audio.h @@ -24,12 +24,7 @@ #ifndef _DVBAUDIO_H_ #define _DVBAUDIO_H_ -#ifdef __KERNEL__ #include -#else -#include -#endif - typedef enum { AUDIO_SOURCE_DEMUX, /* Select the demux as the main source */ -- cgit v1.2.3-71-gd317 From c86629c855800071314810881d1d2fb226ca9ec9 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 19:55:32 +0530 Subject: headers_check fix: dvb/dmx.h fix the following 'make headers_check' warnings: usr/include/linux/dvb/dmx.h:27: include of is preferred over usr/include/linux/dvb/dmx.h:90: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/dvb/dmx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dvb/dmx.h b/include/linux/dvb/dmx.h index 402fb7a8d922..fef943738a24 100644 --- a/include/linux/dvb/dmx.h +++ b/include/linux/dvb/dmx.h @@ -24,7 +24,7 @@ #ifndef _DVBDMX_H_ #define _DVBDMX_H_ -#include +#include #ifdef __KERNEL__ #include #else -- cgit v1.2.3-71-gd317 From de189f078ee4ae74944e6827dff184a3ef1fc89b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 19:56:41 +0530 Subject: headers_check fix: dvb/frontend.h fix the following 'make headers_check' warnings: usr/include/linux/dvb/frontend.h:29: include of is preferred over usr/include/linux/dvb/frontend.h:76: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/dvb/frontend.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h index 55026b1a40bd..51c8d2d49e42 100644 --- a/include/linux/dvb/frontend.h +++ b/include/linux/dvb/frontend.h @@ -26,8 +26,7 @@ #ifndef _DVBFRONTEND_H_ #define _DVBFRONTEND_H_ -#include - +#include typedef enum fe_type { FE_QPSK, -- cgit v1.2.3-71-gd317 From 8996be9de98a9362a3192b866dd8ab9930e28ad9 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 19:58:00 +0530 Subject: headers_check fix: dvb/net.h fix the following 'make headers_check' warnings: usr/include/linux/dvb/net.h:27: include of is preferred over usr/include/linux/dvb/net.h:31: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/dvb/net.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dvb/net.h b/include/linux/dvb/net.h index 5be474bf0d2b..f451e7eb0b0b 100644 --- a/include/linux/dvb/net.h +++ b/include/linux/dvb/net.h @@ -24,8 +24,7 @@ #ifndef _DVBNET_H_ #define _DVBNET_H_ -#include - +#include struct dvb_net_if { __u16 pid; -- cgit v1.2.3-71-gd317 From b852d36b86902abb272b0f2dd7a56dd2d17ea88c Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 19:59:53 +0530 Subject: headers_check fix: dvb/video.h fix the following 'make headers_check' warnings: usr/include/linux/dvb/video.h:29: include of is preferred over usr/include/linux/dvb/video.h:102: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/dvb/video.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dvb/video.h b/include/linux/dvb/video.h index 50839fe9e39e..bd49c3ebf916 100644 --- a/include/linux/dvb/video.h +++ b/include/linux/dvb/video.h @@ -24,17 +24,14 @@ #ifndef _DVBVIDEO_H_ #define _DVBVIDEO_H_ -#include - -#ifdef __KERNEL__ #include +#ifdef __KERNEL__ +#include #else -#include #include #include #endif - typedef enum { VIDEO_FORMAT_4_3, /* Select 4:3 format */ VIDEO_FORMAT_16_9, /* Select 16:9 format. */ -- cgit v1.2.3-71-gd317 From 9df27bab62e60d1f786abd0599af4a5e3192a784 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:00:47 +0530 Subject: headers_check fix: netfilter/xt_conntrack.h fix the following 'make headers_check' warning: usr/include/linux/netfilter/xt_conntrack.h:40: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/netfilter/xt_conntrack.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_conntrack.h b/include/linux/netfilter/xt_conntrack.h index f3fd83e46bab..8f5345275393 100644 --- a/include/linux/netfilter/xt_conntrack.h +++ b/include/linux/netfilter/xt_conntrack.h @@ -5,6 +5,7 @@ #ifndef _XT_CONNTRACK_H #define _XT_CONNTRACK_H +#include #include #define XT_CONNTRACK_STATE_BIT(ctinfo) (1 << ((ctinfo)%IP_CT_IS_REPLY+1)) -- cgit v1.2.3-71-gd317 From 3187cedf158687432cdf152eeee205f7b149f4ef Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:03:06 +0530 Subject: headers_check fix: nfsd/export.h fix the following 'make headers_check' warning: usr/include/linux/nfsd/export.h:13: include of is preferred over Signed-off-by: Jaswinder Singh Rajput --- include/linux/nfsd/export.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index 5431512b2757..bcd0201589f8 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -10,9 +10,8 @@ #ifndef NFSD_EXPORT_H #define NFSD_EXPORT_H -#include -#ifdef __KERNEL__ # include +#ifdef __KERNEL__ # include #endif -- cgit v1.2.3-71-gd317 From 9e87b1e53f3c72c1196dc22cb359b5d6188a3729 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:04:03 +0530 Subject: headers_check fix: nfsd/nfsfh.h fix the following 'make headers_check' warnings: usr/include/linux/nfsd/nfsfh.h:17: include of is preferred over usr/include/linux/nfsd/nfsfh.h:28: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/nfsd/nfsfh.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h index b2e093870bc6..fa317f6c154b 100644 --- a/include/linux/nfsd/nfsfh.h +++ b/include/linux/nfsd/nfsfh.h @@ -14,9 +14,8 @@ #ifndef _LINUX_NFSD_FH_H #define _LINUX_NFSD_FH_H -#include -#ifdef __KERNEL__ # include +#ifdef __KERNEL__ # include # include #endif -- cgit v1.2.3-71-gd317 From 03cf1e0c3b4ee4ef51dc7eb197a4d098ad4873af Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:05:27 +0530 Subject: headers_check fix: nfsd/syscall.h fix the following 'make headers_check' warnings: usr/include/linux/nfsd/syscall.h:12: include of is preferred over usr/include/linux/nfsd/syscall.h:104: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/nfsd/syscall.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/syscall.h b/include/linux/nfsd/syscall.h index 4e439765b705..7a3b565b898f 100644 --- a/include/linux/nfsd/syscall.h +++ b/include/linux/nfsd/syscall.h @@ -9,9 +9,8 @@ #ifndef NFSD_SYSCALL_H #define NFSD_SYSCALL_H -#include -#ifdef __KERNEL__ # include +#ifdef __KERNEL__ # include #endif #include -- cgit v1.2.3-71-gd317 From bcf74582af3feca80ec96cc21d0a26c938d1863e Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:06:44 +0530 Subject: headers_check fix: raid/md_p.h fix the following 'make headers_check' warning: usr/include/linux/raid/md_p.h:85: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/raid/md_p.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h index 9491026afe66..6ba830fa8538 100644 --- a/include/linux/raid/md_p.h +++ b/include/linux/raid/md_p.h @@ -15,6 +15,8 @@ #ifndef _MD_P_H #define _MD_P_H +#include + /* * RAID superblock. * -- cgit v1.2.3-71-gd317 From 550e978aa52e2ac3c493e8a0b36b368ade6dd2b4 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:07:35 +0530 Subject: headers_check fix: spi/spidev.h fix the following 'make headers_check' warning: usr/include/linux/spi/spidev.h:83: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/spi/spidev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/spi/spidev.h b/include/linux/spi/spidev.h index c93ef9d42a01..95251ccd5a07 100644 --- a/include/linux/spi/spidev.h +++ b/include/linux/spi/spidev.h @@ -22,6 +22,7 @@ #ifndef SPIDEV_H #define SPIDEV_H +#include /* User space versions of kernel symbols for SPI clocking modes, * matching -- cgit v1.2.3-71-gd317 From 2d594c0c8aa46beb21be1c5c2b7141f89d206313 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:10:05 +0530 Subject: headers_check fix: tc_act/tc_gact.h fix the following 'make headers_check' warning: usr/include/linux/tc_act/tc_gact.h:19: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/tc_act/tc_gact.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tc_act/tc_gact.h b/include/linux/tc_act/tc_gact.h index 23a03eb630db..e895c0a39629 100644 --- a/include/linux/tc_act/tc_gact.h +++ b/include/linux/tc_act/tc_gact.h @@ -1,6 +1,7 @@ #ifndef __LINUX_TC_GACT_H #define __LINUX_TC_GACT_H +#include #include #define TCA_ACT_GACT 5 -- cgit v1.2.3-71-gd317 From 9c536d275823b8a6281894f4f8c2687f60578253 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:10:44 +0530 Subject: headers_check fix: tc_act/tc_mirred.h fix the following 'make headers_check' warning: usr/include/linux/tc_act/tc_mirred.h:16: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/tc_act/tc_mirred.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tc_act/tc_mirred.h b/include/linux/tc_act/tc_mirred.h index 71d63409d568..0a99ab60d610 100644 --- a/include/linux/tc_act/tc_mirred.h +++ b/include/linux/tc_act/tc_mirred.h @@ -1,6 +1,7 @@ #ifndef __LINUX_TC_MIR_H #define __LINUX_TC_MIR_H +#include #include #define TCA_ACT_MIRRED 8 -- cgit v1.2.3-71-gd317 From 5dbbf3bcae2f6b5dee1c33b3eeced00bcb6c4f71 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:11:22 +0530 Subject: headers_check fix: tc_act/tc_pedit.h fix the following 'make headers_check' warning: usr/include/linux/tc_act/tc_pedit.h:19: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/tc_act/tc_pedit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tc_act/tc_pedit.h b/include/linux/tc_act/tc_pedit.h index 83e56e32e8e0..54ce9064115a 100644 --- a/include/linux/tc_act/tc_pedit.h +++ b/include/linux/tc_act/tc_pedit.h @@ -1,6 +1,7 @@ #ifndef __LINUX_TC_PED_H #define __LINUX_TC_PED_H +#include #include #define TCA_ACT_PEDIT 7 -- cgit v1.2.3-71-gd317 From ba3a51e3b899c1bd34c18f84a1c6f7e5f99be850 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:12:01 +0530 Subject: headers_check fix: tc_ematch/tc_em_cmp.h fix the following 'make headers_check' warning: usr/include/linux/tc_ematch/tc_em_cmp.h:8: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/tc_ematch/tc_em_cmp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tc_ematch/tc_em_cmp.h b/include/linux/tc_ematch/tc_em_cmp.h index c7f4d43618fd..38e7f7b25ec2 100644 --- a/include/linux/tc_ematch/tc_em_cmp.h +++ b/include/linux/tc_ematch/tc_em_cmp.h @@ -1,6 +1,7 @@ #ifndef __LINUX_TC_EM_CMP_H #define __LINUX_TC_EM_CMP_H +#include #include struct tcf_em_cmp -- cgit v1.2.3-71-gd317 From 9976007a13e67b973f94c8d472ed615ac4cf8078 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:12:59 +0530 Subject: headers_check fix: tc_ematch/tc_em_meta.h fix the following 'make headers_check' warning: usr/include/linux/tc_ematch/tc_em_meta.h:18: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/tc_ematch/tc_em_meta.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h index c50d2ba5caf0..dcfb733fa1f6 100644 --- a/include/linux/tc_ematch/tc_em_meta.h +++ b/include/linux/tc_ematch/tc_em_meta.h @@ -1,6 +1,7 @@ #ifndef __LINUX_TC_EM_META_H #define __LINUX_TC_EM_META_H +#include #include enum -- cgit v1.2.3-71-gd317 From ac836c6f1b17f674e35a7e2784541bb8ab0bce38 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:13:41 +0530 Subject: headers_check fix: tc_ematch/tc_em_nbyte.h fix the following 'make headers_check' warning: usr/include/linux/tc_ematch/tc_em_nbyte.h:8: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/tc_ematch/tc_em_nbyte.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tc_ematch/tc_em_nbyte.h b/include/linux/tc_ematch/tc_em_nbyte.h index f19d1f58ec9d..9ed8c2e58488 100644 --- a/include/linux/tc_ematch/tc_em_nbyte.h +++ b/include/linux/tc_ematch/tc_em_nbyte.h @@ -1,6 +1,7 @@ #ifndef __LINUX_TC_EM_NBYTE_H #define __LINUX_TC_EM_NBYTE_H +#include #include struct tcf_em_nbyte -- cgit v1.2.3-71-gd317 From 30f410a6d372f067df3d02e3db328720bf421c81 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:14:36 +0530 Subject: headers_check fix: tc_ematch/tc_em_text.h fix the following 'make headers_check' warning: usr/include/linux/tc_ematch/tc_em_text.h:11: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/tc_ematch/tc_em_text.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tc_ematch/tc_em_text.h b/include/linux/tc_ematch/tc_em_text.h index 7cd43e99c7f5..d12a73a225fc 100644 --- a/include/linux/tc_ematch/tc_em_text.h +++ b/include/linux/tc_ematch/tc_em_text.h @@ -1,6 +1,7 @@ #ifndef __LINUX_TC_EM_TEXT_H #define __LINUX_TC_EM_TEXT_H +#include #include #define TC_EM_TEXT_ALGOSIZ 16 -- cgit v1.2.3-71-gd317 From d8151585690d824ac5b60a94ef86f8bfd61478fa Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:15:47 +0530 Subject: headers_check fix: usb/cdc.h fix the following 'make headers_check' warning: usr/include/linux/usb/cdc.h:50: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/usb/cdc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h index 18a729343ffa..3c86ed25a04c 100644 --- a/include/linux/usb/cdc.h +++ b/include/linux/usb/cdc.h @@ -9,6 +9,8 @@ #ifndef __LINUX_USB_CDC_H #define __LINUX_USB_CDC_H +#include + #define USB_CDC_SUBCLASS_ACM 0x02 #define USB_CDC_SUBCLASS_ETHERNET 0x06 #define USB_CDC_SUBCLASS_WHCM 0x08 -- cgit v1.2.3-71-gd317 From 4c866d444078d931579c50c9ce3133709390287b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:16:33 +0530 Subject: headers_check fix: usb/gadgetfs.h fix the following 'make headers_check' warning: usr/include/linux/usb/gadgetfs.h:21: include of is preferred over Signed-off-by: Jaswinder Singh Rajput --- include/linux/usb/gadgetfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/gadgetfs.h b/include/linux/usb/gadgetfs.h index ea45f265ec05..612102e4d75e 100644 --- a/include/linux/usb/gadgetfs.h +++ b/include/linux/usb/gadgetfs.h @@ -18,7 +18,7 @@ #ifndef __LINUX_USB_GADGETFS_H #define __LINUX_USB_GADGETFS_H -#include +#include #include #include -- cgit v1.2.3-71-gd317 From bd247b348aaa9f28a53a64df06c69d6f40ff2280 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:20:10 +0530 Subject: headers_check fix: linux/aio_abi.h fix the following 'make headers_check' warning: usr/include/linux/aio_abi.h:58: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/aio_abi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/aio_abi.h b/include/linux/aio_abi.h index 9e0172931315..2c8731664180 100644 --- a/include/linux/aio_abi.h +++ b/include/linux/aio_abi.h @@ -27,6 +27,7 @@ #ifndef __LINUX__AIO_ABI_H #define __LINUX__AIO_ABI_H +#include #include typedef unsigned long aio_context_t; -- cgit v1.2.3-71-gd317 From 85c09569e563cbb9376f10da20ada42107dfef98 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:26:25 +0530 Subject: headers_check fix: linux/atalk.h fix the following 'make headers_check' warning: usr/include/linux/atalk.h:15: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/atalk.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/atalk.h b/include/linux/atalk.h index e9ebac2e2ecc..d34c187432ed 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -1,6 +1,7 @@ #ifndef __LINUX_ATALK_H__ #define __LINUX_ATALK_H__ +#include #include /* -- cgit v1.2.3-71-gd317 From f757f603f7d52254120cbfcd967f67f663264c64 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:29:11 +0530 Subject: headers_check fix: linux/atmbr2684.h fix the following 'make headers_check' warning: usr/include/linux/atmbr2684.h:88: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/atmbr2684.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/atmbr2684.h b/include/linux/atmbr2684.h index 52bf72affbba..fdb2629b6189 100644 --- a/include/linux/atmbr2684.h +++ b/include/linux/atmbr2684.h @@ -1,6 +1,7 @@ #ifndef _LINUX_ATMBR2684_H #define _LINUX_ATMBR2684_H +#include #include #include /* For IFNAMSIZ */ -- cgit v1.2.3-71-gd317 From 5d461bfebe4be9ae8d25d4570d4eaa415ca76f0f Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:31:19 +0530 Subject: headers_check fix: linux/auto_fs4.h fix the following 'make headers_check' warning: usr/include/linux/auto_fs4.h:132: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/auto_fs4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h index 55fa478bd639..8b49ac48a5b7 100644 --- a/include/linux/auto_fs4.h +++ b/include/linux/auto_fs4.h @@ -12,6 +12,7 @@ #define _LINUX_AUTO_FS4_H /* Include common v3 definitions */ +#include #include /* autofs v4 definitions */ -- cgit v1.2.3-71-gd317 From 1da9ebd5abb2e960c4ca4d49f7587e6c76b16ac0 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:34:15 +0530 Subject: headers_check fix: linux/bfs_fs.h fix the following 'make headers_check' warning: usr/include/linux/bfs_fs.h:24: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/bfs_fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bfs_fs.h b/include/linux/bfs_fs.h index 8ed6dfdcd783..1c0b355aa515 100644 --- a/include/linux/bfs_fs.h +++ b/include/linux/bfs_fs.h @@ -6,6 +6,8 @@ #ifndef _LINUX_BFS_FS_H #define _LINUX_BFS_FS_H +#include + #define BFS_BSIZE_BITS 9 #define BFS_BSIZE (1< Date: Fri, 30 Jan 2009 20:36:52 +0530 Subject: headers_check fix: linux/blktrace_api.h fix the following 'make headers_check' warning: usr/include/linux/blktrace_api.h:96: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/blktrace_api.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 1dba3493d520..25379cba2370 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -1,6 +1,7 @@ #ifndef BLKTRACE_H #define BLKTRACE_H +#include #ifdef __KERNEL__ #include #include -- cgit v1.2.3-71-gd317 From 9fa91d99bfdd9582e43b6b9ab97678c51373c4ae Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:39:30 +0530 Subject: headers_check fix: linux/capability.h fix the following 'make headers_check' warning: usr/include/linux/capability.h:73: extern's make no sense in userspace Signed-off-by: Jaswinder Singh Rajput --- include/linux/capability.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index 02bdb768d43b..1b9872556131 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -69,10 +69,6 @@ typedef struct __user_cap_data_struct { #define VFS_CAP_U32 VFS_CAP_U32_2 #define VFS_CAP_REVISION VFS_CAP_REVISION_2 -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES -extern int file_caps_enabled; -#endif - struct vfs_cap_data { __le32 magic_etc; /* Little endian */ struct { @@ -96,6 +92,10 @@ struct vfs_cap_data { #define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3 #define _KERNEL_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 +#ifdef CONFIG_SECURITY_FILE_CAPABILITIES +extern int file_caps_enabled; +#endif + typedef struct kernel_cap_struct { __u32 cap[_KERNEL_CAPABILITY_U32S]; } kernel_cap_t; -- cgit v1.2.3-71-gd317 From 960066a919f1db57817df6d02e72b01542f1deed Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:41:51 +0530 Subject: headers_check fix: linux/cdrom.h fix the following 'make headers_check' warning: usr/include/linux/cdrom.h:155: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/cdrom.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index 0b49e08d3cb0..78e904796622 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h @@ -11,6 +11,7 @@ #ifndef _LINUX_CDROM_H #define _LINUX_CDROM_H +#include #include /******************************************************* -- cgit v1.2.3-71-gd317 From 59e4cf19ede2d2725c1b336707c1077afdd3cf85 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:43:31 +0530 Subject: headers_check fix: linux/cgroupstats.h fix the following 'make headers_check' warning: usr/include/linux/cgroupstats.h:31: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/cgroupstats.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cgroupstats.h b/include/linux/cgroupstats.h index 4f53abf6855d..3753c33160d1 100644 --- a/include/linux/cgroupstats.h +++ b/include/linux/cgroupstats.h @@ -15,6 +15,7 @@ #ifndef _LINUX_CGROUPSTATS_H #define _LINUX_CGROUPSTATS_H +#include #include /* -- cgit v1.2.3-71-gd317 From 37eb1f4c3320ed505fbe59a916635b2342c740e4 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:51:30 +0530 Subject: headers_check fix: linux/dlm_plock.h fix the following 'make headers_check' warning: usr/include/linux/dlm_plock.h:25: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/dlm_plock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dlm_plock.h b/include/linux/dlm_plock.h index 18d5fdbceb74..2dd21243104f 100644 --- a/include/linux/dlm_plock.h +++ b/include/linux/dlm_plock.h @@ -9,6 +9,8 @@ #ifndef __DLM_PLOCK_DOT_H__ #define __DLM_PLOCK_DOT_H__ +#include + #define DLM_PLOCK_MISC_NAME "dlm_plock" #define DLM_PLOCK_VERSION_MAJOR 1 -- cgit v1.2.3-71-gd317 From 57d1780fab89d3736de0d24189129c17178448f0 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:52:54 +0530 Subject: headers_check fix: linux/dn.h fix the following 'make headers_check' warning: usr/include/linux/dn.h:75: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/dn.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dn.h b/include/linux/dn.h index 02bba040fcfb..fe9990823193 100644 --- a/include/linux/dn.h +++ b/include/linux/dn.h @@ -1,6 +1,8 @@ #ifndef _LINUX_DN_H #define _LINUX_DN_H +#include + /* DECnet Data Structures and Constants -- cgit v1.2.3-71-gd317 From 4144147081b9d08e69055a780888fcbb7cfcbb8e Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 20:54:27 +0530 Subject: headers_check fix: linux/edd.h fix the following 'make headers_check' warning: usr/include/linux/edd.h:70: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/edd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/edd.h b/include/linux/edd.h index 5d747c5cd0fe..4cbd0fe9df08 100644 --- a/include/linux/edd.h +++ b/include/linux/edd.h @@ -30,6 +30,8 @@ #ifndef _LINUX_EDD_H #define _LINUX_EDD_H +#include + #define EDDNR 0x1e9 /* addr of number of edd_info structs at EDDBUF in boot_params - treat this as 1 byte */ #define EDDBUF 0xd00 /* addr of edd_info structs in boot_params */ -- cgit v1.2.3-71-gd317 From bd71b5f734c66ad0134e308036b13d122907b8c6 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:01:11 +0530 Subject: headers_check fix: linux/efs_fs_sb.h fix the following 'make headers_check' warning: usr/include/linux/efs_fs_sb.h:49: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/efs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efs_fs_sb.h b/include/linux/efs_fs_sb.h index ff1945e37790..a01be90c58cc 100644 --- a/include/linux/efs_fs_sb.h +++ b/include/linux/efs_fs_sb.h @@ -9,6 +9,7 @@ #ifndef __EFS_FS_SB_H__ #define __EFS_FS_SB_H__ +#include #include /* EFS superblock magic numbers */ -- cgit v1.2.3-71-gd317 From 177a858ff8d71a8e7f8b0ef53ff49441e29c8fb1 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:04:44 +0530 Subject: headers_check fix: linux/elf-fdpic.h fix the following 'make headers_check' warning: usr/include/linux/elf-fdpic.h:62: extern's make no sense in userspace Signed-off-by: Jaswinder Singh Rajput --- include/linux/elf-fdpic.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/elf-fdpic.h b/include/linux/elf-fdpic.h index 9f5b7456bff3..7cd2e80cebc8 100644 --- a/include/linux/elf-fdpic.h +++ b/include/linux/elf-fdpic.h @@ -58,11 +58,13 @@ struct elf_fdpic_params { #define ELF_FDPIC_FLAG_PRESENT 0x80000000 /* T if this object is present */ }; +#ifdef __KERNEL__ #ifdef CONFIG_MMU extern void elf_fdpic_arch_lay_out_mm(struct elf_fdpic_params *exec_params, struct elf_fdpic_params *interp_params, unsigned long *start_stack, unsigned long *start_brk); #endif +#endif /* __KERNEL__ */ #endif /* _LINUX_ELF_FDPIC_H */ -- cgit v1.2.3-71-gd317 From f4aa1c30255278b7b50a1cd273c7b4a46f099a90 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:05:50 +0530 Subject: headers_check fix: linux/elf.h fix the following 'make headers_check' warnings: usr/include/linux/elf.h:379: extern's make no sense in userspace usr/include/linux/elf.h:387: extern's make no sense in userspace usr/include/linux/elf.h:401: extern's make no sense in userspace usr/include/linux/elf.h:402: extern's make no sense in userspace Signed-off-by: Jaswinder Singh Rajput --- include/linux/elf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/elf.h b/include/linux/elf.h index 0b61ca41a044..45a937be6d38 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -377,6 +377,7 @@ typedef struct elf64_note { Elf64_Word n_type; /* Content type */ } Elf64_Nhdr; +#ifdef __KERNEL__ #if ELF_CLASS == ELFCLASS32 extern Elf32_Dyn _DYNAMIC []; @@ -404,5 +405,5 @@ static inline int elf_coredump_extra_notes_write(struct file *file, extern int elf_coredump_extra_notes_size(void); extern int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset); #endif - +#endif /* __KERNEL__ */ #endif /* _LINUX_ELF_H */ -- cgit v1.2.3-71-gd317 From 93c1c0e310b56acbd366a43b15260a1775481f24 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:09:18 +0530 Subject: headers_check fix: linux/errqueue.h fix the following 'make headers_check' warning: usr/include/linux/errqueue.h:6: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/errqueue.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h index 92f8d4fab32b..ceb1454b6977 100644 --- a/include/linux/errqueue.h +++ b/include/linux/errqueue.h @@ -1,6 +1,8 @@ #ifndef _LINUX_ERRQUEUE_H #define _LINUX_ERRQUEUE_H 1 +#include + struct sock_extended_err { __u32 ee_errno; -- cgit v1.2.3-71-gd317 From 985f302cb42e18912c88a3d2f9d9008844045ee3 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:10:52 +0530 Subject: headers_check fix: linux/genetlink.h fix the following 'make headers_check' warning: usr/include/linux/genetlink.h:12: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/genetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index 7da02c93002b..b834ef6d59fa 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -1,6 +1,7 @@ #ifndef __LINUX_GENERIC_NETLINK_H #define __LINUX_GENERIC_NETLINK_H +#include #include #define GENL_NAMSIZ 16 /* length of family name */ -- cgit v1.2.3-71-gd317 From 237416fe05067237f0bcc6370d84c09b52fb776a Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:12:38 +0530 Subject: headers_check fix: linux/gfs2_ondisk.h fix the following 'make headers_check' warning: usr/include/linux/gfs2_ondisk.h:109: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/gfs2_ondisk.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index 14d0df0b5749..c56b4bce56d0 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -10,6 +10,8 @@ #ifndef __GFS2_ONDISK_DOT_H__ #define __GFS2_ONDISK_DOT_H__ +#include + #define GFS2_MAGIC 0x01161970 #define GFS2_BASIC_BLOCK 512 #define GFS2_BASIC_BLOCK_SHIFT 9 -- cgit v1.2.3-71-gd317 From b08ead0527bcfdcab39a347b531701289485b484 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:15:34 +0530 Subject: headers_check fix: linux/hid.h fix the following 'make headers_check' warnings: usr/include/linux/hid.h:69: extern's make no sense in userspace usr/include/linux/hid.h:76: extern's make no sense in userspace Signed-off-by: Jaswinder Singh Rajput --- include/linux/hid.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 81aa84d60c6b..fa8ee9cef7be 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -791,6 +791,7 @@ dbg_hid(const char *fmt, ...) __FILE__ , ## arg) #endif /* HID_FF */ +#ifdef __KERNEL__ #ifdef CONFIG_HID_COMPAT #define HID_COMPAT_LOAD_DRIVER(name) \ /* prototype to avoid sparse warning */ \ @@ -804,6 +805,7 @@ EXPORT_SYMBOL(hid_compat_##name) extern void hid_compat_##name(void); \ hid_compat_##name(); \ } while (0) +#endif /* __KERNEL__ */ #endif -- cgit v1.2.3-71-gd317 From c244ae5b16dc31b5bea67e6d6e9d6ff654aee781 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:17:06 +0530 Subject: headers_check fix: linux/hiddev.h fix the following 'make headers_check' warning: usr/include/linux/hiddev.h:40: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/hiddev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hiddev.h b/include/linux/hiddev.h index c760ae0eb6a1..bb6f58baf319 100644 --- a/include/linux/hiddev.h +++ b/include/linux/hiddev.h @@ -27,6 +27,8 @@ * Vojtech Pavlik, Ucitelska 1576, Prague 8, 182 00 Czech Republic */ +#include + /* * The event structure itself */ -- cgit v1.2.3-71-gd317 From 1cc49ae2e6d241e5cfc2c52e3329f5ef8dd42f18 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:18:37 +0530 Subject: headers_check fix: linux/icmpv6.h fix the following 'make headers_check' warning: usr/include/linux/icmpv6.h:8: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/icmpv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index a93a8dd33118..10d701eec484 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -1,6 +1,7 @@ #ifndef _LINUX_ICMPV6_H #define _LINUX_ICMPV6_H +#include #include struct icmp6hdr { -- cgit v1.2.3-71-gd317 From 680ee0bd2a9625965812c1209476168fd0704a00 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:21:01 +0530 Subject: headers_check fix: linux/if_addr.h fix the following 'make headers_check' warning: usr/include/linux/if_addr.h:8: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/if_addr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h index 43f3bedaafd3..a60c821be44c 100644 --- a/include/linux/if_addr.h +++ b/include/linux/if_addr.h @@ -1,6 +1,7 @@ #ifndef __LINUX_IF_ADDR_H #define __LINUX_IF_ADDR_H +#include #include struct ifaddrmsg -- cgit v1.2.3-71-gd317 From 1759cb994c3ff51e69268379da1cdd96048a8268 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:25:00 +0530 Subject: headers_check fix: linux/if_addrlabel.h fix the following 'make headers_check' warning: usr/include/linux/if_addrlabel.h:15: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/if_addrlabel.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_addrlabel.h b/include/linux/if_addrlabel.h index 9fe79c95dd28..89571f65d6de 100644 --- a/include/linux/if_addrlabel.h +++ b/include/linux/if_addrlabel.h @@ -10,6 +10,8 @@ #ifndef __LINUX_IF_ADDRLABEL_H #define __LINUX_IF_ADDRLABEL_H +#include + struct ifaddrlblmsg { __u8 ifal_family; /* Address family */ -- cgit v1.2.3-71-gd317 From ba7161387e82fbbdc4b49533aa1345bb7befda13 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:26:35 +0530 Subject: headers_check fix: linux/if_fc.h fix the following 'make headers_check' warning: usr/include/linux/if_fc.h:37: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/if_fc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_fc.h b/include/linux/if_fc.h index 376a34ea4723..6ed7f1bf35c8 100644 --- a/include/linux/if_fc.h +++ b/include/linux/if_fc.h @@ -20,6 +20,7 @@ #ifndef _LINUX_IF_FC_H #define _LINUX_IF_FC_H +#include #define FC_ALEN 6 /* Octets in one ethernet addr */ #define FC_HLEN (sizeof(struct fch_hdr)+sizeof(struct fcllc)) -- cgit v1.2.3-71-gd317 From b06e936939931c5acb1ca5dfe1d02b4d2f7cb11f Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:27:59 +0530 Subject: headers_check fix: linux/if_hippi.h fix the following 'make headers_check' warning: usr/include/linux/if_hippi.h:82: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/if_hippi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_hippi.h b/include/linux/if_hippi.h index f0f23516bb59..4a7c9940b080 100644 --- a/include/linux/if_hippi.h +++ b/include/linux/if_hippi.h @@ -22,6 +22,7 @@ #ifndef _LINUX_IF_HIPPI_H #define _LINUX_IF_HIPPI_H +#include #include /* -- cgit v1.2.3-71-gd317 From 85db53102dbf0816e9c5426c9322a64759e7166b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:29:20 +0530 Subject: headers_check fix: linux/if_link.h fix the following 'make headers_check' warning: usr/include/linux/if_link.h:9: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index f9032c88716a..176c5182c515 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -1,6 +1,7 @@ #ifndef _LINUX_IF_LINK_H #define _LINUX_IF_LINK_H +#include #include /* The struct should be in sync with struct net_device_stats */ -- cgit v1.2.3-71-gd317 From 0fe5a8fe0c145a6ff8f3daacd32f1824d0c021a9 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:31:29 +0530 Subject: headers_check fix: linux/if_ppp.h fix the following 'make headers_check' warning: usr/include/linux/if_ppp.h:96: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/if_ppp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_ppp.h b/include/linux/if_ppp.h index c3b1f8562709..fcef103aa3f6 100644 --- a/include/linux/if_ppp.h +++ b/include/linux/if_ppp.h @@ -33,6 +33,7 @@ #ifndef _IF_PPP_H_ #define _IF_PPP_H_ +#include #include /* -- cgit v1.2.3-71-gd317 From 84ad40ebbaeb22fc665b1f307d32128c46e8d42d Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:32:52 +0530 Subject: headers_check fix: linux/if_strip.h fix the following 'make headers_check' warning: usr/include/linux/if_strip.h:22: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/if_strip.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_strip.h b/include/linux/if_strip.h index fb5c5c98442f..6526a6235832 100644 --- a/include/linux/if_strip.h +++ b/include/linux/if_strip.h @@ -18,6 +18,8 @@ #ifndef __LINUX_STRIP_H #define __LINUX_STRIP_H +#include + typedef struct { __u8 c[6]; } MetricomAddress; -- cgit v1.2.3-71-gd317 From 65863dbc0833e06b905679f61450f05a68bae4c2 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:34:36 +0530 Subject: headers_check fix: linux/if_tr.h fix the following 'make headers_check' warning: usr/include/linux/if_tr.h:37: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/if_tr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_tr.h b/include/linux/if_tr.h index 5bcec8b2c5e2..fc23aeb0f201 100644 --- a/include/linux/if_tr.h +++ b/include/linux/if_tr.h @@ -19,6 +19,7 @@ #ifndef _LINUX_IF_TR_H #define _LINUX_IF_TR_H +#include #include /* For __be16 */ /* IEEE 802.5 Token-Ring magic constants. The frame sizes omit the preamble -- cgit v1.2.3-71-gd317 From de8b0bcafabfb4400aa028282293ce7d52307433 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:36:04 +0530 Subject: headers_check fix: linux/igmp.h fix the following 'make headers_check' warning: usr/include/linux/igmp.h:31: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/igmp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index f734a0ba0698..92fbd8cbd68f 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -16,6 +16,7 @@ #ifndef _LINUX_IGMP_H #define _LINUX_IGMP_H +#include #include /* -- cgit v1.2.3-71-gd317 From 2df005b75ab910f789f099f81bb70b3aa37203a7 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:42:47 +0530 Subject: headers_check fix: linux/inet_diag.h fix the following 'make headers_check' warning: usr/include/linux/inet_diag.h:16: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/inet_diag.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 6e8bc548635a..bc8c49022084 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -1,6 +1,8 @@ #ifndef _INET_DIAG_H_ #define _INET_DIAG_H_ 1 +#include + /* Just some random number */ #define TCPDIAG_GETSOCK 18 #define DCCPDIAG_GETSOCK 19 -- cgit v1.2.3-71-gd317 From 217a2291570b1e4c28cb6e4cd099707e456a09b8 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:44:19 +0530 Subject: headers_check fix: linux/ip6_tunnel.h fix the following 'make headers_check' warning: include/linux/ip6_tunnel.h:21: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/ip6_tunnel.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ip6_tunnel.h b/include/linux/ip6_tunnel.h index 1e7cc4af40de..acb9ad684d63 100644 --- a/include/linux/ip6_tunnel.h +++ b/include/linux/ip6_tunnel.h @@ -1,6 +1,8 @@ #ifndef _IP6_TUNNEL_H #define _IP6_TUNNEL_H +#include + #define IPV6_TLV_TNL_ENCAP_LIMIT 4 #define IPV6_DEFAULT_TNL_ENCAP_LIMIT 4 -- cgit v1.2.3-71-gd317 From 5c6aa2badf1b97ead5ffec8094f0c6236e0c07c5 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:46:43 +0530 Subject: headers_check fix: linux/ipv6.h fix the following 'make headers_check' warning: usr/include/linux/ipv6.h:26: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 0b816cae533e..476d9464ac82 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -1,6 +1,7 @@ #ifndef _IPV6_H #define _IPV6_H +#include #include #include -- cgit v1.2.3-71-gd317 From e5144de521417b0f0eea74ece89acd437ecd32c9 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:47:52 +0530 Subject: headers_check fix: linux/ipv6_route.h fix the following 'make headers_check' warning: usr/include/linux/ipv6_route.h:42: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/ipv6_route.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ipv6_route.h b/include/linux/ipv6_route.h index b323ff577967..1e7d8af2defe 100644 --- a/include/linux/ipv6_route.h +++ b/include/linux/ipv6_route.h @@ -13,6 +13,8 @@ #ifndef _LINUX_IPV6_ROUTE_H #define _LINUX_IPV6_ROUTE_H +#include + #define RTF_DEFAULT 0x00010000 /* default - learned via ND */ #define RTF_ALLONLINK 0x00020000 /* (deprecated and will be removed) fallback, no routers on link */ -- cgit v1.2.3-71-gd317 From d6d20f54847e27ed886e8285c208368ef3d42abb Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:49:07 +0530 Subject: headers_check fix: linux/ipx.h fix the following 'make headers_check' warning: usr/include/linux/ipx.h:13: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/ipx.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ipx.h b/include/linux/ipx.h index eb19b4ea84f4..aabb1d294025 100644 --- a/include/linux/ipx.h +++ b/include/linux/ipx.h @@ -1,5 +1,6 @@ #ifndef _IPX_H_ #define _IPX_H_ +#include #include #include #define IPX_NODE_LEN 6 -- cgit v1.2.3-71-gd317 From df9c04ed3ff455aa5cb7c4bcddf4544fe54cfa33 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:50:39 +0530 Subject: headers_check fix: linux/irda.h fix the following 'make headers_check' warning: usr/include/linux/irda.h:127: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/irda.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irda.h b/include/linux/irda.h index 28f88ecba344..00bdad0e8515 100644 --- a/include/linux/irda.h +++ b/include/linux/irda.h @@ -25,6 +25,8 @@ #ifndef KERNEL_IRDA_H #define KERNEL_IRDA_H +#include + /* Please do *not* add any #include in this file, this file is * included as-is in user space. * Please fix the calling file to properly included needed files before -- cgit v1.2.3-71-gd317 From 4b7ae34277608a30346d076beb44cbc466aa73e5 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:54:23 +0530 Subject: headers_check fix: linux/minix_fs.h fix the following 'make headers_check' warning: usr/include/linux/minix_fs.h:34: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/minix_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/minix_fs.h b/include/linux/minix_fs.h index 0e39745f5111..13fe09e0576a 100644 --- a/include/linux/minix_fs.h +++ b/include/linux/minix_fs.h @@ -1,6 +1,7 @@ #ifndef _LINUX_MINIX_FS_H #define _LINUX_MINIX_FS_H +#include #include /* -- cgit v1.2.3-71-gd317 From 8ef342021a55e4237e593c7f6304d0caa7bf1232 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:56:32 +0530 Subject: headers_check fix: linux/msdos_fs.h fix the following 'make headers_check' warning: usr/include/linux/msdos_fs.h:100: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/msdos_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index e0a9b207920d..ce38f1caa5e1 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -1,6 +1,7 @@ #ifndef _LINUX_MSDOS_FS_H #define _LINUX_MSDOS_FS_H +#include #include #include -- cgit v1.2.3-71-gd317 From ee79a6415f911801eb7804704ac130088281b2d8 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:58:19 +0530 Subject: headers_check fix: linux/neighbour.h fix the following 'make headers_check' warning: usr/include/linux/neighbour.h:8: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/neighbour.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h index bd3bbf668cdb..8730d5dae1bc 100644 --- a/include/linux/neighbour.h +++ b/include/linux/neighbour.h @@ -1,6 +1,7 @@ #ifndef __LINUX_NEIGHBOUR_H #define __LINUX_NEIGHBOUR_H +#include #include struct ndmsg -- cgit v1.2.3-71-gd317 From a81184c1f8cf8589a00894c20422982defc3f056 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 21:59:48 +0530 Subject: headers_check fix: linux/nfs_idmap.h fix the following 'make headers_check' warning: usr/include/linux/nfs_idmap.h:55: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/nfs_idmap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index 15a9f3b7289a..91a1c24e0cbf 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -37,6 +37,8 @@ #ifndef NFS_IDMAP_H #define NFS_IDMAP_H +#include + /* XXX from bits/utmp.h */ #define IDMAP_NAMESZ 128 -- cgit v1.2.3-71-gd317 From 06f43adba62f99de101616ffc5d0564aab237111 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:03:25 +0530 Subject: headers_check fix: linux/phonet.h fix the following 'make headers_check' warning: usr/include/linux/phonet.h:50: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/phonet.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 4157faa857b6..ee5e3c9e2bca 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -23,6 +23,8 @@ #ifndef LINUX_PHONET_H #define LINUX_PHONET_H +#include + /* Automatic protocol selection */ #define PN_PROTO_TRANSPORT 0 /* Phonet datagram socket */ -- cgit v1.2.3-71-gd317 From ed307444d8f276d7052400c47d9f4c5393997c42 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:05:32 +0530 Subject: headers_check fix: linux/pkt_cls.h fix the following 'make headers_check' warning: linux/pkt_cls.h:122: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/pkt_cls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index e6aa8482ad7a..3c842edff388 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -1,6 +1,7 @@ #ifndef __LINUX_PKT_CLS_H #define __LINUX_PKT_CLS_H +#include #include /* I think i could have done better macros ; for now this is stolen from -- cgit v1.2.3-71-gd317 From b8adfd3c753b47c47f626e032da7999530c316f0 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:07:05 +0530 Subject: headers_check fix: linux/pkt_sched.h fix the following 'make headers_check' warning: usr/include/linux/pkt_sched.h:32: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/pkt_sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index e3f133adba78..b2648e8e4987 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -1,6 +1,8 @@ #ifndef __LINUX_PKT_SCHED_H #define __LINUX_PKT_SCHED_H +#include + /* Logical priority bands not depending on specific packet scheduler. Every scheduler will map them to real traffic classes, if it has no more precise mechanism to classify packets. -- cgit v1.2.3-71-gd317 From 7260a91666a3149181e7b78bbf73beebbb04f8fa Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:09:17 +0530 Subject: headers_check fix: linux/ppp_defs.h fix the following 'make headers_check' warning: usr/include/linux/ppp_defs.h:50: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/ppp_defs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ppp_defs.h b/include/linux/ppp_defs.h index 6e8adc77522c..1c866bda2018 100644 --- a/include/linux/ppp_defs.h +++ b/include/linux/ppp_defs.h @@ -25,6 +25,8 @@ * OR MODIFICATIONS. */ +#include + /* * ==FILEVERSION 20000114== * -- cgit v1.2.3-71-gd317 From 68622c61dc7971382f5d69cd5d881e618ea30414 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:11:32 +0530 Subject: headers_check fix: linux/random.h fix the following 'make headers_check' warning: usr/include/linux/random.h:39: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/random.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index 407ea3646f8f..25d02fe5c9b5 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -7,6 +7,7 @@ #ifndef _LINUX_RANDOM_H #define _LINUX_RANDOM_H +#include #include #include -- cgit v1.2.3-71-gd317 From a788fd53aec9a439f6b8bf57888c30aea1176e1b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:14:02 +0530 Subject: headers_check fix: linux/signalfd.h fix the following 'make headers_check' warning: usr/include/linux/signalfd.h:19: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/signalfd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h index bef0c46d4713..b363b916c909 100644 --- a/include/linux/signalfd.h +++ b/include/linux/signalfd.h @@ -8,6 +8,7 @@ #ifndef _LINUX_SIGNALFD_H #define _LINUX_SIGNALFD_H +#include /* For O_CLOEXEC and O_NONBLOCK */ #include -- cgit v1.2.3-71-gd317 From e6faa002be269233bf1e8961e7e0a79ca3f2db8b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:17:53 +0530 Subject: headers_check fix: linux/sound.h fix the following 'make headers_check' warnings: usr/include/linux/sound.h:33: extern's make no sense in userspace usr/include/linux/sound.h:34: extern's make no sense in userspace usr/include/linux/sound.h:35: extern's make no sense in userspace usr/include/linux/sound.h:36: extern's make no sense in userspace usr/include/linux/sound.h:37: extern's make no sense in userspace usr/include/linux/sound.h:39: extern's make no sense in userspace usr/include/linux/sound.h:40: extern's make no sense in userspace usr/include/linux/sound.h:41: extern's make no sense in userspace usr/include/linux/sound.h:42: extern's make no sense in userspace Signed-off-by: Jaswinder Singh Rajput --- include/linux/sound.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sound.h b/include/linux/sound.h index 9e2a94feed6b..44dcf0570432 100644 --- a/include/linux/sound.h +++ b/include/linux/sound.h @@ -25,6 +25,7 @@ #define SND_DEV_AMIDI 13 /* Like /dev/midi (obsolete) */ #define SND_DEV_ADMMIDI 14 /* Like /dev/dmmidi (onsolete) */ +#ifdef __KERNEL__ /* * Sound core interface functions */ @@ -40,3 +41,4 @@ extern void unregister_sound_special(int unit); extern void unregister_sound_mixer(int unit); extern void unregister_sound_midi(int unit); extern void unregister_sound_dsp(int unit); +#endif /* __KERNEL__ */ -- cgit v1.2.3-71-gd317 From 6b6bcd0ed953ae0ed73af4759788fb8384bbaeed Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:20:04 +0530 Subject: headers_check fix: linux/synclink.h fix the following 'make headers_check' warning: usr/include/linux/synclink.h:209: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/synclink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/synclink.h b/include/linux/synclink.h index c844a229acc9..99b8bdb17b2b 100644 --- a/include/linux/synclink.h +++ b/include/linux/synclink.h @@ -13,6 +13,8 @@ #define _SYNCLINK_H_ #define SYNCLINK_H_VERSION 3.6 +#include + #define BIT0 0x0001 #define BIT1 0x0002 #define BIT2 0x0004 -- cgit v1.2.3-71-gd317 From 448314fc968252b0b95f74bbe63fdcaf41e6413d Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:21:19 +0530 Subject: headers_check fix: linux/taskstats.h fix the following 'make headers_check' warning: usr/include/linux/taskstats.h:44: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/taskstats.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h index 18269e956a71..341dddb55090 100644 --- a/include/linux/taskstats.h +++ b/include/linux/taskstats.h @@ -16,6 +16,8 @@ #ifndef _LINUX_TASKSTATS_H #define _LINUX_TASKSTATS_H +#include + /* Format for per-task data returned to userland when * - a task exits * - listener requests stats for a task -- cgit v1.2.3-71-gd317 From 8b1e3a2f7f84484a8c208671adac39eb148c7d61 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:22:51 +0530 Subject: headers_check fix: linux/video_decoder.h fix the following 'make headers_check' warning: usr/include/linux/video_decoder.h:7: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/video_decoder.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/video_decoder.h b/include/linux/video_decoder.h index 121e26da2c18..e26c0c86a6ea 100644 --- a/include/linux/video_decoder.h +++ b/include/linux/video_decoder.h @@ -1,6 +1,8 @@ #ifndef _LINUX_VIDEO_DECODER_H #define _LINUX_VIDEO_DECODER_H +#include + #define HAVE_VIDEO_DECODER 1 struct video_decoder_capability { /* this name is too long */ -- cgit v1.2.3-71-gd317 From a4c1d7c8c61969667a853d08b039507669463807 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:24:09 +0530 Subject: headers_check fix: linux/video_encoder.h fix the following 'make headers_check' warning: usr/include/linux/video_encoder.h:5: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/video_encoder.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/video_encoder.h b/include/linux/video_encoder.h index 4b0e6907a7b4..b7b6423bbb8a 100644 --- a/include/linux/video_encoder.h +++ b/include/linux/video_encoder.h @@ -1,6 +1,8 @@ #ifndef _LINUX_VIDEO_ENCODER_H #define _LINUX_VIDEO_ENCODER_H +#include + struct video_encoder_capability { /* this name is too long */ __u32 flags; #define VIDEO_ENCODER_PAL 1 /* can encode PAL signal */ -- cgit v1.2.3-71-gd317 From 98be96b85398499212bc77ae3076a69e20368428 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:26:01 +0530 Subject: headers_check fix: linux/videodev.h fix the following 'make headers_check' warning: usr/include/linux/videodev.h:53: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/videodev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/videodev.h b/include/linux/videodev.h index 15a653d41132..837f392fbe97 100644 --- a/include/linux/videodev.h +++ b/include/linux/videodev.h @@ -12,6 +12,7 @@ #ifndef __LINUX_VIDEODEV_H #define __LINUX_VIDEODEV_H +#include #include #include -- cgit v1.2.3-71-gd317 From 982f8184f9a9251ba4e5c6d79ec32d25c0ad3cc8 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:27:58 +0530 Subject: headers_check fix: linux/virtio_blk.h fix the following 'make headers_check' warning: usr/include/linux/virtio_blk.h:21: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/virtio_blk.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index c1aef85243bf..94c56d29869d 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -2,6 +2,7 @@ #define _LINUX_VIRTIO_BLK_H /* This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. */ +#include #include /* The ID for virtio_block */ -- cgit v1.2.3-71-gd317 From 8697325408d9be18fa24346c346b23fa56c3b190 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:29:33 +0530 Subject: headers_check fix: linux/virtio_console.h fix the following 'make headers_check' warning: usr/include/linux/virtio_console.h:15: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/virtio_console.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h index 7615ffcdd555..dc161115ae35 100644 --- a/include/linux/virtio_console.h +++ b/include/linux/virtio_console.h @@ -1,5 +1,6 @@ #ifndef _LINUX_VIRTIO_CONSOLE_H #define _LINUX_VIRTIO_CONSOLE_H +#include #include /* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so * anyone can use the definitions to implement compatible drivers/servers. */ -- cgit v1.2.3-71-gd317 From 9a0e0ac21ca2af4715808b97bd600f0aecd87240 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:31:05 +0530 Subject: headers_check fix: linux/virtio_net.h fix the following 'make headers_check' warning: usr/include/linux/virtio_net.h:28: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/virtio_net.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 5cdd0aa8bde9..3efa86c3ecb3 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -2,6 +2,7 @@ #define _LINUX_VIRTIO_NET_H /* This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. */ +#include #include /* The ID for virtio_net */ -- cgit v1.2.3-71-gd317 From d5c72d7842c71403bc3d57ca05a8a1f96d81e262 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:39:17 +0530 Subject: headers_check fix: linux/nubus.h fix the following 'make headers_check' warning: usr/include/linux/nubus.h:232: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/nubus.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nubus.h b/include/linux/nubus.h index c4355076d1a5..7382af374731 100644 --- a/include/linux/nubus.h +++ b/include/linux/nubus.h @@ -12,6 +12,7 @@ #ifndef LINUX_NUBUS_H #define LINUX_NUBUS_H +#include #ifdef __KERNEL__ #include #endif -- cgit v1.2.3-71-gd317 From 541c94f1d5ac2665fd15f1b827416f8c0b2f55cb Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:40:32 +0530 Subject: headers_check fix: linux/rtnetlink.h fix the following 'make headers_check' warning: usr/include/linux/rtnetlink.h:328: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/rtnetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index e88f7058b3a1..1e5f6730ff31 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -1,6 +1,7 @@ #ifndef __LINUX_RTNETLINK_H #define __LINUX_RTNETLINK_H +#include #include #include #include -- cgit v1.2.3-71-gd317 From 94df7de0289bc2df3d6e85cd2ece52bf42682f45 Mon Sep 17 00:00:00 2001 From: Sebastien Dugue Date: Mon, 1 Dec 2008 14:09:07 +0100 Subject: hrtimers: allow the hot-unplugging of all cpus Impact: fix CPU hotplug hang on Power6 testbox On architectures that support offlining all cpus (at least powerpc/pseries), hot-unpluging the tick_do_timer_cpu can result in a system hang. This comes from the fact that if the cpu going down happens to be the cpu doing the tick, then as the tick_do_timer_cpu handover happens after the cpu is dead (via the CPU_DEAD notification), we're left without ticks, jiffies are frozen and any task relying on timers (msleep, ...) is stuck. That's particularly the case for the cpu looping in __cpu_die() waiting for the dying cpu to be dead. This patch addresses this by having the tick_do_timer_cpu handover happen earlier during the CPU_DYING notification. For this, a new clockevent notification type is introduced (CLOCK_EVT_NOTIFY_CPU_DYING) which is triggered in hrtimer_cpu_notify(). Signed-off-by: Sebastien Dugue Cc: Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 1 + kernel/hrtimer.c | 4 ++++ kernel/time/tick-common.c | 26 +++++++++++++++++++------- 3 files changed, 24 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index cea153697ec7..3a1dbba4d3ae 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -36,6 +36,7 @@ enum clock_event_nofitiers { CLOCK_EVT_NOTIFY_BROADCAST_EXIT, CLOCK_EVT_NOTIFY_SUSPEND, CLOCK_EVT_NOTIFY_RESUME, + CLOCK_EVT_NOTIFY_CPU_DYING, CLOCK_EVT_NOTIFY_CPU_DEAD, }; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 8fea312ca36c..647a40e2fea1 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1608,6 +1608,10 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, break; #ifdef CONFIG_HOTPLUG_CPU + case CPU_DYING: + case CPU_DYING_FROZEN: + clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu); + break; case CPU_DEAD: case CPU_DEAD_FROZEN: { diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 63e05d423a09..21a5ca849514 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -273,6 +273,21 @@ out_bc: return ret; } +/* + * Transfer the do_timer job away from a dying cpu. + * + * Called with interrupts disabled. + */ +static void tick_handover_do_timer(int *cpup) +{ + if (*cpup == tick_do_timer_cpu) { + int cpu = cpumask_first(cpu_online_mask); + + tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu : + TICK_DO_TIMER_NONE; + } +} + /* * Shutdown an event device on a given cpu: * @@ -297,13 +312,6 @@ static void tick_shutdown(unsigned int *cpup) clockevents_exchange_device(dev, NULL); td->evtdev = NULL; } - /* Transfer the do_timer job away from this cpu */ - if (*cpup == tick_do_timer_cpu) { - int cpu = cpumask_first(cpu_online_mask); - - tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu : - TICK_DO_TIMER_NONE; - } spin_unlock_irqrestore(&tick_device_lock, flags); } @@ -357,6 +365,10 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason, tick_broadcast_oneshot_control(reason); break; + case CLOCK_EVT_NOTIFY_CPU_DYING: + tick_handover_do_timer(dev); + break; + case CLOCK_EVT_NOTIFY_CPU_DEAD: tick_shutdown_broadcast_oneshot(dev); tick_shutdown_broadcast(dev); -- cgit v1.2.3-71-gd317 From 0648e10d71c8e510d80772c4cb4220c97e9c34c7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 2 Feb 2009 08:43:48 +0100 Subject: block: fix inconsistent parenthesisation of QUEUE_FLAG_DEFAULT Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d08c4b8219a6..dcaa0fd84b02 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -455,7 +455,7 @@ struct request_queue #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_CLUSTER) | \ - 1 << QUEUE_FLAG_STACKABLE) + (1 << QUEUE_FLAG_STACKABLE)) static inline int queue_is_locked(struct request_queue *q) { -- cgit v1.2.3-71-gd317 From 20b636bf7c946da260391cd4570b16506f140a2c Mon Sep 17 00:00:00 2001 From: Alberto Bertogli Date: Mon, 2 Feb 2009 12:41:07 +0100 Subject: Fix misleading comment in bio.h The comment says "remember to add offset!", but the function already adds it. Signed-off-by: Alberto Bertogli Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 0942765cf8c0..99728320cc05 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -451,8 +451,8 @@ extern struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly; #ifdef CONFIG_HIGHMEM /* - * remember to add offset! and never ever reenable interrupts between a - * bvec_kmap_irq and bvec_kunmap_irq!! + * remember never ever reenable interrupts between a bvec_kmap_irq and + * bvec_kunmap_irq! * * This function MUST be inlined - it plays with the CPU interrupt flags. */ -- cgit v1.2.3-71-gd317 From c52440a69df22dca69794936a91e2fb529a707fb Mon Sep 17 00:00:00 2001 From: Alberto Bertogli Date: Mon, 2 Feb 2009 12:41:07 +0100 Subject: bio.h: If they MUST be inlined, then use __always_inline bvec_kmap_irq() and bvec_kunmap_irq() comments say they MUST be inlined, so mark them as __always_inline. Signed-off-by: Alberto Bertogli Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 99728320cc05..2aa283ab062b 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -456,7 +456,8 @@ extern struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly; * * This function MUST be inlined - it plays with the CPU interrupt flags. */ -static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) +static __always_inline char *bvec_kmap_irq(struct bio_vec *bvec, + unsigned long *flags) { unsigned long addr; @@ -472,7 +473,8 @@ static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) return (char *) addr + bvec->bv_offset; } -static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) +static __always_inline void bvec_kunmap_irq(char *buffer, + unsigned long *flags) { unsigned long ptr = (unsigned long) buffer & PAGE_MASK; -- cgit v1.2.3-71-gd317 From 720eba31f47aeade8ec130ca7f4353223c49170f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 3 Feb 2009 13:31:36 +1030 Subject: modules: Use a better scheme for refcounting Current refcounting for modules (done if CONFIG_MODULE_UNLOAD=y) is using a lot of memory. Each 'struct module' contains an [NR_CPUS] array of full cache lines. This patch uses existing infrastructure (percpu_modalloc() & percpu_modfree()) to allocate percpu space for the refcount storage. Instead of wasting NR_CPUS*128 bytes (on i386), we now use nr_cpu_ids*sizeof(local_t) bytes. On a typical distro, where NR_CPUS=8, shiping 2000 modules, we reduce size of module files by about 2 Mbytes. (1Kb per module) Instead of having all refcounters in the same memory node - with TLB misses because of vmalloc() - this new implementation permits to have better NUMA properties, since each CPU will use storage on its preferred node, thanks to percpu storage. Signed-off-by: Eric Dumazet Signed-off-by: Rusty Russell Signed-off-by: Linus Torvalds --- include/linux/module.h | 25 ++++++++++++++++--------- kernel/module.c | 35 +++++++++++++++++++++++++---------- 2 files changed, 41 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 4f7ea12463d3..f3b8329eb5b8 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -219,11 +219,6 @@ void *__symbol_get_gpl(const char *symbol); #endif -struct module_ref -{ - local_t count; -} ____cacheline_aligned; - enum module_state { MODULE_STATE_LIVE, @@ -344,8 +339,11 @@ struct module /* Destruction function. */ void (*exit)(void); - /* Reference counts */ - struct module_ref ref[NR_CPUS]; +#ifdef CONFIG_SMP + char *refptr; +#else + local_t ref; +#endif #endif }; #ifndef MODULE_ARCH_INIT @@ -395,13 +393,22 @@ void __symbol_put(const char *symbol); #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x) void symbol_put_addr(void *addr); +static inline local_t *__module_ref_addr(struct module *mod, int cpu) +{ +#ifdef CONFIG_SMP + return (local_t *) (mod->refptr + per_cpu_offset(cpu)); +#else + return &mod->ref; +#endif +} + /* Sometimes we know we already have a refcount, and it's easier not to handle the error case (which only happens with rmmod --wait). */ static inline void __module_get(struct module *module) { if (module) { BUG_ON(module_refcount(module) == 0); - local_inc(&module->ref[get_cpu()].count); + local_inc(__module_ref_addr(module, get_cpu())); put_cpu(); } } @@ -413,7 +420,7 @@ static inline int try_module_get(struct module *module) if (module) { unsigned int cpu = get_cpu(); if (likely(module_is_live(module))) - local_inc(&module->ref[cpu].count); + local_inc(__module_ref_addr(module, cpu)); else ret = 0; put_cpu(); diff --git a/kernel/module.c b/kernel/module.c index e8b51d41dd72..ba22484a987e 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -573,13 +573,13 @@ static char last_unloaded_module[MODULE_NAME_LEN+1]; /* Init the unload section of the module. */ static void module_unload_init(struct module *mod) { - unsigned int i; + int cpu; INIT_LIST_HEAD(&mod->modules_which_use_me); - for (i = 0; i < NR_CPUS; i++) - local_set(&mod->ref[i].count, 0); + for_each_possible_cpu(cpu) + local_set(__module_ref_addr(mod, cpu), 0); /* Hold reference count during initialization. */ - local_set(&mod->ref[raw_smp_processor_id()].count, 1); + local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1); /* Backwards compatibility macros put refcount during init. */ mod->waiter = current; } @@ -717,10 +717,11 @@ static int try_stop_module(struct module *mod, int flags, int *forced) unsigned int module_refcount(struct module *mod) { - unsigned int i, total = 0; + unsigned int total = 0; + int cpu; - for (i = 0; i < NR_CPUS; i++) - total += local_read(&mod->ref[i].count); + for_each_possible_cpu(cpu) + total += local_read(__module_ref_addr(mod, cpu)); return total; } EXPORT_SYMBOL(module_refcount); @@ -894,7 +895,7 @@ void module_put(struct module *module) { if (module) { unsigned int cpu = get_cpu(); - local_dec(&module->ref[cpu].count); + local_dec(__module_ref_addr(module, cpu)); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); @@ -1464,7 +1465,10 @@ static void free_module(struct module *mod) kfree(mod->args); if (mod->percpu) percpu_modfree(mod->percpu); - +#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) + if (mod->refptr) + percpu_modfree(mod->refptr); +#endif /* Free lock-classes: */ lockdep_free_key_range(mod->module_core, mod->core_size); @@ -2011,6 +2015,14 @@ static noinline struct module *load_module(void __user *umod, if (err < 0) goto free_mod; +#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) + mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t), + mod->name); + if (!mod->refptr) { + err = -ENOMEM; + goto free_mod; + } +#endif if (pcpuindex) { /* We have a special allocation for this section. */ percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, @@ -2018,7 +2030,7 @@ static noinline struct module *load_module(void __user *umod, mod->name); if (!percpu) { err = -ENOMEM; - goto free_mod; + goto free_percpu; } sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; mod->percpu = percpu; @@ -2282,6 +2294,9 @@ static noinline struct module *load_module(void __user *umod, free_percpu: if (percpu) percpu_modfree(percpu); +#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) + percpu_modfree(mod->refptr); +#endif free_mod: kfree(args); free_hdr: -- cgit v1.2.3-71-gd317