From 8d82ffd15e59febf2c597067a777526958b7f769 Mon Sep 17 00:00:00 2001 From: Wolfgang Grandegger Date: Tue, 7 Apr 2009 10:20:56 +0200 Subject: powerpc: Document new FSL I2C bindings and cleanup This patch documents the new bindings for the MPC I2C bus driver. Furthermore, it removes obsolete FSL device related definitions for I2C. Signed-off-by: Wolfgang Grandegger Signed-off-by: Kumar Gala --- include/linux/fsl_devices.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index f2a78b5e8b55..43fc95d822d5 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -43,10 +43,6 @@ * */ -/* Flags related to I2C device features */ -#define FSL_I2C_DEV_SEPARATE_DFSRR 0x00000001 -#define FSL_I2C_DEV_CLOCK_5200 0x00000002 - enum fsl_usb2_operating_modes { FSL_USB2_MPH_HOST, FSL_USB2_DR_HOST, -- cgit v1.2.3-71-gd317 From fd746d540abf8c686f5f868ae62112692e684088 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Sat, 11 Apr 2009 16:54:59 -0700 Subject: Input: ads7846 - introduce platform specific way to synchronize sampling Noises can be introduced when LCD signals are being driven, some platforms provide a signal to assist the synchronization of this sampling procedure. Signed-off-by: Eric Miao Signed-off-by: Andrew Morton Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ads7846.c | 10 ++++++++++ include/linux/spi/ads7846.h | 1 + 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index 7c27c8b9b6d0..cf7e69766b2b 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -127,6 +127,8 @@ struct ads7846 { void (*filter_cleanup)(void *data); int (*get_pendown_state)(void); int gpio_pendown; + + void (*wait_for_sync)(void); }; /* leave chip selected when we're done, for quicker re-select? */ @@ -511,6 +513,10 @@ static int get_pendown_state(struct ads7846 *ts) return !gpio_get_value(ts->gpio_pendown); } +static void null_wait_for_sync(void) +{ +} + /* * PENIRQ only kicks the timer. The timer only reissues the SPI transfer, * to retrieve touchscreen status. @@ -686,6 +692,7 @@ static void ads7846_rx_val(void *ads) default: BUG(); } + ts->wait_for_sync(); status = spi_async(ts->spi, m); if (status) dev_err(&ts->spi->dev, "spi_async --> %d\n", @@ -723,6 +730,7 @@ static enum hrtimer_restart ads7846_timer(struct hrtimer *handle) } else { /* pen is still down, continue with the measurement */ ts->msg_idx = 0; + ts->wait_for_sync(); status = spi_async(ts->spi, &ts->msg[0]); if (status) dev_err(&ts->spi->dev, "spi_async --> %d\n", status); @@ -947,6 +955,8 @@ static int __devinit ads7846_probe(struct spi_device *spi) ts->penirq_recheck_delay_usecs = pdata->penirq_recheck_delay_usecs; + ts->wait_for_sync = pdata->wait_for_sync ? : null_wait_for_sync; + snprintf(ts->phys, sizeof(ts->phys), "%s/input0", dev_name(&spi->dev)); input_dev->name = "ADS784x Touchscreen"; diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h index 05eab2f11e63..2ea20320c093 100644 --- a/include/linux/spi/ads7846.h +++ b/include/linux/spi/ads7846.h @@ -51,5 +51,6 @@ struct ads7846_platform_data { void **filter_data); int (*filter) (void *filter_data, int data_idx, int *val); void (*filter_cleanup)(void *filter_data); + void (*wait_for_sync)(void); }; -- cgit v1.2.3-71-gd317 From ebde441177da3bad156701d351509f34295282ab Mon Sep 17 00:00:00 2001 From: Michal Januszewski Date: Mon, 13 Apr 2009 14:39:41 -0700 Subject: fbdev: fix color component field length documentation The documentation about the meaning of the color component bitfield lengths in pseudocolor modes is inconsistent. Fix it, so that it indicates the correct interpretation everywhere, i.e. that 1 << length is the number of palette entries. Signed-off-by: Michal Januszewski Acked-by: Krzysztof Helt Cc: Acked-by: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/skeletonfb.c | 8 +++++--- drivers/video/vfb.c | 11 +++++++---- include/linux/fb.h | 8 ++++++-- 3 files changed, 18 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/skeletonfb.c b/drivers/video/skeletonfb.c index a439159204a8..89158bc71da2 100644 --- a/drivers/video/skeletonfb.c +++ b/drivers/video/skeletonfb.c @@ -308,9 +308,11 @@ static int xxxfb_setcolreg(unsigned regno, unsigned red, unsigned green, * color depth = SUM(var->{color}.length) * * Pseudocolor: - * var->{color}.offset is 0 - * var->{color}.length contains width of DAC or the number of unique - * colors available (color depth) + * var->{color}.offset is 0 unless the palette index takes less than + * bits_per_pixel bits and is stored in the upper + * bits of the pixel value + * var->{color}.length is set so that 1 << length is the number of + * available palette entries * pseudo_palette is not used * RAMDAC[X] is programmed to (red, green, blue) * color depth = var->{color}.length diff --git a/drivers/video/vfb.c b/drivers/video/vfb.c index cc919ae46571..050d432c7d95 100644 --- a/drivers/video/vfb.c +++ b/drivers/video/vfb.c @@ -318,13 +318,16 @@ static int vfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue, * {hardwarespecific} contains width of RAMDAC * cmap[X] is programmed to (X << red.offset) | (X << green.offset) | (X << blue.offset) * RAMDAC[X] is programmed to (red, green, blue) - * + * * Pseudocolor: - * uses offset = 0 && length = RAMDAC register width. - * var->{color}.offset is 0 - * var->{color}.length contains widht of DAC + * var->{color}.offset is 0 unless the palette index takes less than + * bits_per_pixel bits and is stored in the upper + * bits of the pixel value + * var->{color}.length is set so that 1 << length is the number of available + * palette entries * cmap is not used * RAMDAC[X] is programmed to (red, green, blue) + * * Truecolor: * does not use DAC. Usually 3 are present. * var->{color}.offset contains start of bitfield diff --git a/include/linux/fb.h b/include/linux/fb.h index f563c5013932..330c4b1bfcaa 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -173,8 +173,12 @@ struct fb_fix_screeninfo { /* Interpretation of offset for color fields: All offsets are from the right, * inside a "pixel" value, which is exactly 'bits_per_pixel' wide (means: you * can use the offset as right argument to <<). A pixel afterwards is a bit - * stream and is written to video memory as that unmodified. This implies - * big-endian byte order if bits_per_pixel is greater than 8. + * stream and is written to video memory as that unmodified. + * + * For pseudocolor: offset and length should be the same for all color + * components. Offset specifies the position of the least significant bit + * of the pallette index in a pixel value. Length indicates the number + * of available palette entries (i.e. # of entries = 1 << length). */ struct fb_bitfield { __u32 offset; /* beginning of bitfield */ -- cgit v1.2.3-71-gd317 From 251eb40f5ccd07a905633a816fbf8f2b6b25cced Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 13 Apr 2009 14:39:45 -0700 Subject: hwmon: sht15 humidity sensor driver Data sheet at: http://www.sensirion.ch/en/pdf/product_information/Datasheet-humidity-sensor-SHT1x.pdf These sensors communicate over a 2 wire bus running a device specific protocol. The complexity of the driver is mainly due to handling the substantial delays between requesting a reading and the device pulling the data line low to indicate that the data is available. This is handled by an interrupt that is disabled under all other conditions. I wasn't terribly clear on the best way to handle this, so comments on that aspect would be particularly welcome! Interpretation of the temperature depends on knowing the supply voltage. If configured in a board config as a regulator consumer this is obtained from the regulator subsystem. If not it should be provided in the platform data. I've placed this driver in the hwmon subsystem as it is definitely a device that may be used for hardware monitoring and with it's relatively slow response times (up to 120 millisecs to get a reading) a caching strategy certainly seems to make sense! Signed-off-by: Jonathan Cameron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/hwmon/Kconfig | 10 + drivers/hwmon/Makefile | 1 + drivers/hwmon/sht15.c | 692 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/sht15.h | 24 ++ 4 files changed, 727 insertions(+) create mode 100644 drivers/hwmon/sht15.c create mode 100644 include/linux/sht15.h (limited to 'include/linux') diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 0e8a9185f676..d73f5f473e38 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -692,6 +692,16 @@ config SENSORS_PCF8591 These devices are hard to detect and rarely found on mainstream hardware. If unsure, say N. +config SENSORS_SHT15 + tristate "Sensiron humidity and temperature sensors. SHT15 and compat." + depends on GENERIC_GPIO + help + If you say yes here you get support for the Sensiron SHT10, SHT11, + SHT15, SHT71, SHT75 humidity and temperature sensors. + + This driver can also be built as a module. If so, the module + will be called sht15. + config SENSORS_SIS5595 tristate "Silicon Integrated Systems Corp. SiS5595" depends on PCI diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index 1d3757837b4f..0ae26984ba45 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -76,6 +76,7 @@ obj-$(CONFIG_SENSORS_MAX6650) += max6650.o obj-$(CONFIG_SENSORS_PC87360) += pc87360.o obj-$(CONFIG_SENSORS_PC87427) += pc87427.o obj-$(CONFIG_SENSORS_PCF8591) += pcf8591.o +obj-$(CONFIG_SENSORS_SHT15) += sht15.o obj-$(CONFIG_SENSORS_SIS5595) += sis5595.o obj-$(CONFIG_SENSORS_SMSC47B397)+= smsc47b397.o obj-$(CONFIG_SENSORS_SMSC47M1) += smsc47m1.o diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c new file mode 100644 index 000000000000..6cbdc2fea734 --- /dev/null +++ b/drivers/hwmon/sht15.c @@ -0,0 +1,692 @@ +/* + * sht15.c - support for the SHT15 Temperature and Humidity Sensor + * + * Copyright (c) 2009 Jonathan Cameron + * + * Copyright (c) 2007 Wouter Horre + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Currently ignoring checksum on readings. + * Default resolution only (14bit temp, 12bit humidity) + * Ignoring battery status. + * Heater not enabled. + * Timings are all conservative. + * + * Data sheet available (1/2009) at + * http://www.sensirion.ch/en/pdf/product_information/Datasheet-humidity-sensor-SHT1x.pdf + * + * Regulator supply name = vcc + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SHT15_MEASURE_TEMP 3 +#define SHT15_MEASURE_RH 5 + +#define SHT15_READING_NOTHING 0 +#define SHT15_READING_TEMP 1 +#define SHT15_READING_HUMID 2 + +/* Min timings in nsecs */ +#define SHT15_TSCKL 100 /* clock low */ +#define SHT15_TSCKH 100 /* clock high */ +#define SHT15_TSU 150 /* data setup time */ + +/** + * struct sht15_temppair - elements of voltage dependant temp calc + * @vdd: supply voltage in microvolts + * @d1: see data sheet + */ +struct sht15_temppair { + int vdd; /* microvolts */ + int d1; +}; + +/* Table 9 from data sheet - relates temperature calculation + * to supply voltage. + */ +static const struct sht15_temppair temppoints[] = { + { 2500000, -39400 }, + { 3000000, -39600 }, + { 3500000, -39700 }, + { 4000000, -39800 }, + { 5000000, -40100 }, +}; + +/** + * struct sht15_data - device instance specific data + * @pdata: platform data (gpio's etc) + * @read_work: bh of interrupt handler + * @wait_queue: wait queue for getting values from device + * @val_temp: last temperature value read from device + * @val_humid: last humidity value read from device + * @flag: status flag used to identify what the last request was + * @valid: are the current stored values valid (start condition) + * @last_updat: time of last update + * @read_lock: mutex to ensure only one read in progress + * at a time. + * @dev: associate device structure + * @hwmon_dev: device associated with hwmon subsystem + * @reg: associated regulator (if specified) + * @nb: notifier block to handle notifications of voltage changes + * @supply_uV: local copy of supply voltage used to allow + * use of regulator consumer if available + * @supply_uV_valid: indicates that an updated value has not yet + * been obtained from the regulator and so any calculations + * based upon it will be invalid. + * @update_supply_work: work struct that is used to update the supply_uV + * @interrupt_handled: flag used to indicate a hander has been scheduled + */ +struct sht15_data { + struct sht15_platform_data *pdata; + struct work_struct read_work; + wait_queue_head_t wait_queue; + uint16_t val_temp; + uint16_t val_humid; + u8 flag; + u8 valid; + unsigned long last_updat; + struct mutex read_lock; + struct device *dev; + struct device *hwmon_dev; + struct regulator *reg; + struct notifier_block nb; + int supply_uV; + int supply_uV_valid; + struct work_struct update_supply_work; + atomic_t interrupt_handled; +}; + +/** + * sht15_connection_reset() - reset the comms interface + * @data: sht15 specific data + * + * This implements section 3.4 of the data sheet + */ +static void sht15_connection_reset(struct sht15_data *data) +{ + int i; + gpio_direction_output(data->pdata->gpio_data, 1); + ndelay(SHT15_TSCKL); + gpio_set_value(data->pdata->gpio_sck, 0); + ndelay(SHT15_TSCKL); + for (i = 0; i < 9; ++i) { + gpio_set_value(data->pdata->gpio_sck, 1); + ndelay(SHT15_TSCKH); + gpio_set_value(data->pdata->gpio_sck, 0); + ndelay(SHT15_TSCKL); + } +} +/** + * sht15_send_bit() - send an individual bit to the device + * @data: device state data + * @val: value of bit to be sent + **/ +static inline void sht15_send_bit(struct sht15_data *data, int val) +{ + + gpio_set_value(data->pdata->gpio_data, val); + ndelay(SHT15_TSU); + gpio_set_value(data->pdata->gpio_sck, 1); + ndelay(SHT15_TSCKH); + gpio_set_value(data->pdata->gpio_sck, 0); + ndelay(SHT15_TSCKL); /* clock low time */ +} + +/** + * sht15_transmission_start() - specific sequence for new transmission + * + * @data: device state data + * Timings for this are not documented on the data sheet, so very + * conservative ones used in implementation. This implements + * figure 12 on the data sheet. + **/ +static void sht15_transmission_start(struct sht15_data *data) +{ + /* ensure data is high and output */ + gpio_direction_output(data->pdata->gpio_data, 1); + ndelay(SHT15_TSU); + gpio_set_value(data->pdata->gpio_sck, 0); + ndelay(SHT15_TSCKL); + gpio_set_value(data->pdata->gpio_sck, 1); + ndelay(SHT15_TSCKH); + gpio_set_value(data->pdata->gpio_data, 0); + ndelay(SHT15_TSU); + gpio_set_value(data->pdata->gpio_sck, 0); + ndelay(SHT15_TSCKL); + gpio_set_value(data->pdata->gpio_sck, 1); + ndelay(SHT15_TSCKH); + gpio_set_value(data->pdata->gpio_data, 1); + ndelay(SHT15_TSU); + gpio_set_value(data->pdata->gpio_sck, 0); + ndelay(SHT15_TSCKL); +} +/** + * sht15_send_byte() - send a single byte to the device + * @data: device state + * @byte: value to be sent + **/ +static void sht15_send_byte(struct sht15_data *data, u8 byte) +{ + int i; + for (i = 0; i < 8; i++) { + sht15_send_bit(data, !!(byte & 0x80)); + byte <<= 1; + } +} +/** + * sht15_wait_for_response() - checks for ack from device + * @data: device state + **/ +static int sht15_wait_for_response(struct sht15_data *data) +{ + gpio_direction_input(data->pdata->gpio_data); + gpio_set_value(data->pdata->gpio_sck, 1); + ndelay(SHT15_TSCKH); + if (gpio_get_value(data->pdata->gpio_data)) { + gpio_set_value(data->pdata->gpio_sck, 0); + dev_err(data->dev, "Command not acknowledged\n"); + sht15_connection_reset(data); + return -EIO; + } + gpio_set_value(data->pdata->gpio_sck, 0); + ndelay(SHT15_TSCKL); + return 0; +} + +/** + * sht15_send_cmd() - Sends a command to the device. + * @data: device state + * @cmd: command byte to be sent + * + * On entry, sck is output low, data is output pull high + * and the interrupt disabled. + **/ +static int sht15_send_cmd(struct sht15_data *data, u8 cmd) +{ + int ret = 0; + sht15_transmission_start(data); + sht15_send_byte(data, cmd); + ret = sht15_wait_for_response(data); + return ret; +} +/** + * sht15_update_single_val() - get a new value from device + * @data: device instance specific data + * @command: command sent to request value + * @timeout_msecs: timeout after which comms are assumed + * to have failed are reset. + **/ +static inline int sht15_update_single_val(struct sht15_data *data, + int command, + int timeout_msecs) +{ + int ret; + ret = sht15_send_cmd(data, command); + if (ret) + return ret; + + gpio_direction_input(data->pdata->gpio_data); + atomic_set(&data->interrupt_handled, 0); + + enable_irq(gpio_to_irq(data->pdata->gpio_data)); + if (gpio_get_value(data->pdata->gpio_data) == 0) { + disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data)); + /* Only relevant if the interrupt hasn't occured. */ + if (!atomic_read(&data->interrupt_handled)) + schedule_work(&data->read_work); + } + ret = wait_event_timeout(data->wait_queue, + (data->flag == SHT15_READING_NOTHING), + msecs_to_jiffies(timeout_msecs)); + if (ret == 0) {/* timeout occurred */ + disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data));; + sht15_connection_reset(data); + return -ETIME; + } + return 0; +} + +/** + * sht15_update_vals() - get updated readings from device if too old + * @data: device state + **/ +static int sht15_update_vals(struct sht15_data *data) +{ + int ret = 0; + int timeout = HZ; + + mutex_lock(&data->read_lock); + if (time_after(jiffies, data->last_updat + timeout) + || !data->valid) { + data->flag = SHT15_READING_HUMID; + ret = sht15_update_single_val(data, SHT15_MEASURE_RH, 160); + if (ret) + goto error_ret; + data->flag = SHT15_READING_TEMP; + ret = sht15_update_single_val(data, SHT15_MEASURE_TEMP, 400); + if (ret) + goto error_ret; + data->valid = 1; + data->last_updat = jiffies; + } +error_ret: + mutex_unlock(&data->read_lock); + + return ret; +} + +/** + * sht15_calc_temp() - convert the raw reading to a temperature + * @data: device state + * + * As per section 4.3 of the data sheet. + **/ +static inline int sht15_calc_temp(struct sht15_data *data) +{ + int d1 = 0; + int i; + + for (i = 1; i < ARRAY_SIZE(temppoints) - 1; i++) + /* Find pointer to interpolate */ + if (data->supply_uV > temppoints[i - 1].vdd) { + d1 = (data->supply_uV/1000 - temppoints[i - 1].vdd) + * (temppoints[i].d1 - temppoints[i - 1].d1) + / (temppoints[i].vdd - temppoints[i - 1].vdd) + + temppoints[i - 1].d1; + break; + } + + return data->val_temp*10 + d1; +} + +/** + * sht15_calc_humid() - using last temperature convert raw to humid + * @data: device state + * + * This is the temperature compensated version as per section 4.2 of + * the data sheet. + **/ +static inline int sht15_calc_humid(struct sht15_data *data) +{ + int RHlinear; /* milli percent */ + int temp = sht15_calc_temp(data); + + const int c1 = -4; + const int c2 = 40500; /* x 10 ^ -6 */ + const int c3 = 2800; /* x10 ^ -9 */ + + RHlinear = c1*1000 + + c2 * data->val_humid/1000 + + (data->val_humid * data->val_humid * c3)/1000000; + return (temp - 25000) * (10000 + 800 * data->val_humid) + / 1000000 + RHlinear; +} + +static ssize_t sht15_show_temp(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int ret; + struct sht15_data *data = dev_get_drvdata(dev); + + /* Technically no need to read humidity as well */ + ret = sht15_update_vals(data); + + return ret ? ret : sprintf(buf, "%d\n", + sht15_calc_temp(data)); +} + +static ssize_t sht15_show_humidity(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int ret; + struct sht15_data *data = dev_get_drvdata(dev); + + ret = sht15_update_vals(data); + + return ret ? ret : sprintf(buf, "%d\n", sht15_calc_humid(data)); + +}; +static ssize_t show_name(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct platform_device *pdev = to_platform_device(dev); + return sprintf(buf, "%s\n", pdev->name); +} + +static SENSOR_DEVICE_ATTR(temp1_input, + S_IRUGO, sht15_show_temp, + NULL, 0); +static SENSOR_DEVICE_ATTR(humidity1_input, + S_IRUGO, sht15_show_humidity, + NULL, 0); +static DEVICE_ATTR(name, S_IRUGO, show_name, NULL); +static struct attribute *sht15_attrs[] = { + &sensor_dev_attr_temp1_input.dev_attr.attr, + &sensor_dev_attr_humidity1_input.dev_attr.attr, + &dev_attr_name.attr, + NULL, +}; + +static const struct attribute_group sht15_attr_group = { + .attrs = sht15_attrs, +}; + +static irqreturn_t sht15_interrupt_fired(int irq, void *d) +{ + struct sht15_data *data = d; + /* First disable the interrupt */ + disable_irq_nosync(irq); + atomic_inc(&data->interrupt_handled); + /* Then schedule a reading work struct */ + if (data->flag != SHT15_READING_NOTHING) + schedule_work(&data->read_work); + return IRQ_HANDLED; +} + +/* Each byte of data is acknowledged by pulling the data line + * low for one clock pulse. + */ +static void sht15_ack(struct sht15_data *data) +{ + gpio_direction_output(data->pdata->gpio_data, 0); + ndelay(SHT15_TSU); + gpio_set_value(data->pdata->gpio_sck, 1); + ndelay(SHT15_TSU); + gpio_set_value(data->pdata->gpio_sck, 0); + ndelay(SHT15_TSU); + gpio_set_value(data->pdata->gpio_data, 1); + + gpio_direction_input(data->pdata->gpio_data); +} +/** + * sht15_end_transmission() - notify device of end of transmission + * @data: device state + * + * This is basically a NAK. (single clock pulse, data high) + **/ +static void sht15_end_transmission(struct sht15_data *data) +{ + gpio_direction_output(data->pdata->gpio_data, 1); + ndelay(SHT15_TSU); + gpio_set_value(data->pdata->gpio_sck, 1); + ndelay(SHT15_TSCKH); + gpio_set_value(data->pdata->gpio_sck, 0); + ndelay(SHT15_TSCKL); +} + +static void sht15_bh_read_data(struct work_struct *work_s) +{ + int i; + uint16_t val = 0; + struct sht15_data *data + = container_of(work_s, struct sht15_data, + read_work); + /* Firstly, verify the line is low */ + if (gpio_get_value(data->pdata->gpio_data)) { + /* If not, then start the interrupt again - care + here as could have gone low in meantime so verify + it hasn't! + */ + atomic_set(&data->interrupt_handled, 0); + enable_irq(gpio_to_irq(data->pdata->gpio_data)); + /* If still not occured or another handler has been scheduled */ + if (gpio_get_value(data->pdata->gpio_data) + || atomic_read(&data->interrupt_handled)) + return; + } + /* Read the data back from the device */ + for (i = 0; i < 16; ++i) { + val <<= 1; + gpio_set_value(data->pdata->gpio_sck, 1); + ndelay(SHT15_TSCKH); + val |= !!gpio_get_value(data->pdata->gpio_data); + gpio_set_value(data->pdata->gpio_sck, 0); + ndelay(SHT15_TSCKL); + if (i == 7) + sht15_ack(data); + } + /* Tell the device we are done */ + sht15_end_transmission(data); + + switch (data->flag) { + case SHT15_READING_TEMP: + data->val_temp = val; + break; + case SHT15_READING_HUMID: + data->val_humid = val; + break; + } + + data->flag = SHT15_READING_NOTHING; + wake_up(&data->wait_queue); +} + +static void sht15_update_voltage(struct work_struct *work_s) +{ + struct sht15_data *data + = container_of(work_s, struct sht15_data, + update_supply_work); + data->supply_uV = regulator_get_voltage(data->reg); +} + +/** + * sht15_invalidate_voltage() - mark supply voltage invalid when notified by reg + * @nb: associated notification structure + * @event: voltage regulator state change event code + * @ignored: function parameter - ignored here + * + * Note that as the notification code holds the regulator lock, we have + * to schedule an update of the supply voltage rather than getting it directly. + **/ +static int sht15_invalidate_voltage(struct notifier_block *nb, + unsigned long event, + void *ignored) +{ + struct sht15_data *data = container_of(nb, struct sht15_data, nb); + + if (event == REGULATOR_EVENT_VOLTAGE_CHANGE) + data->supply_uV_valid = false; + schedule_work(&data->update_supply_work); + + return NOTIFY_OK; +} + +static int __devinit sht15_probe(struct platform_device *pdev) +{ + int ret = 0; + struct sht15_data *data = kzalloc(sizeof(*data), GFP_KERNEL); + + if (!data) { + ret = -ENOMEM; + dev_err(&pdev->dev, "kzalloc failed"); + goto error_ret; + } + + INIT_WORK(&data->read_work, sht15_bh_read_data); + INIT_WORK(&data->update_supply_work, sht15_update_voltage); + platform_set_drvdata(pdev, data); + mutex_init(&data->read_lock); + data->dev = &pdev->dev; + init_waitqueue_head(&data->wait_queue); + + if (pdev->dev.platform_data == NULL) { + dev_err(&pdev->dev, "no platform data supplied"); + goto err_free_data; + } + data->pdata = pdev->dev.platform_data; + data->supply_uV = data->pdata->supply_mv*1000; + +/* If a regulator is available, query what the supply voltage actually is!*/ + data->reg = regulator_get(data->dev, "vcc"); + if (!IS_ERR(data->reg)) { + data->supply_uV = regulator_get_voltage(data->reg); + regulator_enable(data->reg); + /* setup a notifier block to update this if another device + * causes the voltage to change */ + data->nb.notifier_call = &sht15_invalidate_voltage; + ret = regulator_register_notifier(data->reg, &data->nb); + } +/* Try requesting the GPIOs */ + ret = gpio_request(data->pdata->gpio_sck, "SHT15 sck"); + if (ret) { + dev_err(&pdev->dev, "gpio request failed"); + goto err_free_data; + } + gpio_direction_output(data->pdata->gpio_sck, 0); + ret = gpio_request(data->pdata->gpio_data, "SHT15 data"); + if (ret) { + dev_err(&pdev->dev, "gpio request failed"); + goto err_release_gpio_sck; + } + ret = sysfs_create_group(&pdev->dev.kobj, &sht15_attr_group); + if (ret) { + dev_err(&pdev->dev, "sysfs create failed"); + goto err_free_data; + } + + ret = request_irq(gpio_to_irq(data->pdata->gpio_data), + sht15_interrupt_fired, + IRQF_TRIGGER_FALLING, + "sht15 data", + data); + if (ret) { + dev_err(&pdev->dev, "failed to get irq for data line"); + goto err_release_gpio_data; + } + disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data)); + sht15_connection_reset(data); + sht15_send_cmd(data, 0x1E); + + data->hwmon_dev = hwmon_device_register(data->dev); + if (IS_ERR(data->hwmon_dev)) { + ret = PTR_ERR(data->hwmon_dev); + goto err_release_gpio_data; + } + return 0; + +err_release_gpio_data: + gpio_free(data->pdata->gpio_data); +err_release_gpio_sck: + gpio_free(data->pdata->gpio_sck); +err_free_data: + kfree(data); +error_ret: + + return ret; +} + +static int __devexit sht15_remove(struct platform_device *pdev) +{ + struct sht15_data *data = platform_get_drvdata(pdev); + + /* Make sure any reads from the device are done and + * prevent new ones beginnning */ + mutex_lock(&data->read_lock); + hwmon_device_unregister(data->hwmon_dev); + sysfs_remove_group(&pdev->dev.kobj, &sht15_attr_group); + if (!IS_ERR(data->reg)) { + regulator_unregister_notifier(data->reg, &data->nb); + regulator_disable(data->reg); + regulator_put(data->reg); + } + + free_irq(gpio_to_irq(data->pdata->gpio_data), data); + gpio_free(data->pdata->gpio_data); + gpio_free(data->pdata->gpio_sck); + mutex_unlock(&data->read_lock); + kfree(data); + return 0; +} + + +static struct platform_driver sht_drivers[] = { + { + .driver = { + .name = "sht10", + .owner = THIS_MODULE, + }, + .probe = sht15_probe, + .remove = sht15_remove, + }, { + .driver = { + .name = "sht11", + .owner = THIS_MODULE, + }, + .probe = sht15_probe, + .remove = sht15_remove, + }, { + .driver = { + .name = "sht15", + .owner = THIS_MODULE, + }, + .probe = sht15_probe, + .remove = sht15_remove, + }, { + .driver = { + .name = "sht71", + .owner = THIS_MODULE, + }, + .probe = sht15_probe, + .remove = sht15_remove, + }, { + .driver = { + .name = "sht75", + .owner = THIS_MODULE, + }, + .probe = sht15_probe, + .remove = sht15_remove, + }, +}; + + +static int __init sht15_init(void) +{ + int ret; + int i; + + for (i = 0; i < ARRAY_SIZE(sht_drivers); i++) { + ret = platform_driver_register(&sht_drivers[i]); + if (ret) + goto error_unreg; + } + + return 0; + +error_unreg: + while (--i >= 0) + platform_driver_unregister(&sht_drivers[i]); + + return ret; +} +module_init(sht15_init); + +static void __exit sht15_exit(void) +{ + int i; + for (i = ARRAY_SIZE(sht_drivers) - 1; i >= 0; i--) + platform_driver_unregister(&sht_drivers[i]); +} +module_exit(sht15_exit); + +MODULE_LICENSE("GPL"); diff --git a/include/linux/sht15.h b/include/linux/sht15.h new file mode 100644 index 000000000000..046bce05ecab --- /dev/null +++ b/include/linux/sht15.h @@ -0,0 +1,24 @@ +/* + * sht15.h - support for the SHT15 Temperature and Humidity Sensor + * + * Copyright (c) 2009 Jonathan Cameron + * + * Copyright (c) 2007 Wouter Horre + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/** + * struct sht15_platform_data - sht15 connectivity info + * @gpio_data: no. of gpio to which bidirectional data line is connected + * @gpio_sck: no. of gpio to which the data clock is connected. + * @supply_mv: supply voltage in mv. Overridden by regulator if available. + **/ +struct sht15_platform_data { + int gpio_data; + int gpio_sck; + int supply_mv; +}; + -- cgit v1.2.3-71-gd317 From 17a5138d204014b00cb9c1d6e8ff311993041b5c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 13 Apr 2009 14:39:47 -0700 Subject: aio: remove INIT_KIOCTX Unused after 20dcae32439384b6863c626bb3b2a09bed65b33e aka "[PATCH] aio: remove kioctx from mm_struct". Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/init_task.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index dcfb93337e9a..d87247d2641f 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -15,19 +15,6 @@ extern struct files_struct init_files; extern struct fs_struct init_fs; -#define INIT_KIOCTX(name, which_mm) \ -{ \ - .users = ATOMIC_INIT(1), \ - .dead = 0, \ - .mm = &which_mm, \ - .user_id = 0, \ - .next = NULL, \ - .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait), \ - .ctx_lock = __SPIN_LOCK_UNLOCKED(name.ctx_lock), \ - .reqs_active = 0U, \ - .max_reqs = ~0U, \ -} - #define INIT_MM(name) \ { \ .mm_rb = RB_ROOT, \ -- cgit v1.2.3-71-gd317 From 5dec8bfbdd4921522565a7b0e0c8760ae042ef6d Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 13 Apr 2009 14:39:54 -0700 Subject: include/linux/fiemap.h: include types.h now that it's exported Include in fiemap.h. Sam Ravnborg pointed out that this was missing in this newly-exported header which uses the __u32 and __u64 types. Signed-off-by: Eric Sandeen Cc: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fiemap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h index 671decbd2aeb..934e22d65801 100644 --- a/include/linux/fiemap.h +++ b/include/linux/fiemap.h @@ -11,6 +11,8 @@ #ifndef _LINUX_FIEMAP_H #define _LINUX_FIEMAP_H +#include + struct fiemap_extent { __u64 fe_logical; /* logical offset in bytes for the start of * the extent from the beginning of the file */ -- cgit v1.2.3-71-gd317 From 347486bb108fa6e0fd2753c1be3519d6be2516ed Mon Sep 17 00:00:00 2001 From: Stefan Husemann Date: Mon, 13 Apr 2009 14:40:10 -0700 Subject: intelfb: support i854 Support the Intel 854 Chipset in fbdev. We test and use the patch on a Thomson IP1101 IPTV-Box. On the VGA-Port we get a normal signal. Here is the link to the Mambux-Project: http://www.mambux.de Cc: Keith Packard Cc: Dave Airlie Cc: Krzysztof Helt Signed-off-by: Stefan Husemann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/agp/intel-agp.c | 3 +++ drivers/video/intelfb/intelfb.h | 2 ++ drivers/video/intelfb/intelfb_i2c.c | 1 + drivers/video/intelfb/intelfbdrv.c | 1 + drivers/video/intelfb/intelfbhw.c | 5 +++++ include/drm/drm_pciids.h | 2 ++ include/linux/pci_ids.h | 2 ++ 7 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index 9d9490e22e07..3686912427ba 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -2131,6 +2131,8 @@ static const struct intel_driver_description { { PCI_DEVICE_ID_INTEL_82845G_HB, PCI_DEVICE_ID_INTEL_82845G_IG, 0, "830M", &intel_845_driver, &intel_830_driver }, { PCI_DEVICE_ID_INTEL_82850_HB, 0, 0, "i850", &intel_850_driver, NULL }, + { PCI_DEVICE_ID_INTEL_82854_HB, PCI_DEVICE_ID_INTEL_82854_IG, 0, "854", + &intel_845_driver, &intel_830_driver }, { PCI_DEVICE_ID_INTEL_82855PM_HB, 0, 0, "855PM", &intel_845_driver, NULL }, { PCI_DEVICE_ID_INTEL_82855GM_HB, PCI_DEVICE_ID_INTEL_82855GM_IG, 0, "855GM", &intel_845_driver, &intel_830_driver }, @@ -2355,6 +2357,7 @@ static struct pci_device_id agp_intel_pci_table[] = { ID(PCI_DEVICE_ID_INTEL_82845_HB), ID(PCI_DEVICE_ID_INTEL_82845G_HB), ID(PCI_DEVICE_ID_INTEL_82850_HB), + ID(PCI_DEVICE_ID_INTEL_82854_HB), ID(PCI_DEVICE_ID_INTEL_82855PM_HB), ID(PCI_DEVICE_ID_INTEL_82855GM_HB), ID(PCI_DEVICE_ID_INTEL_82860_HB), diff --git a/drivers/video/intelfb/intelfb.h b/drivers/video/intelfb/intelfb.h index a50bea614804..40984551c927 100644 --- a/drivers/video/intelfb/intelfb.h +++ b/drivers/video/intelfb/intelfb.h @@ -53,6 +53,7 @@ #define PCI_DEVICE_ID_INTEL_830M 0x3577 #define PCI_DEVICE_ID_INTEL_845G 0x2562 #define PCI_DEVICE_ID_INTEL_85XGM 0x3582 +#define PCI_DEVICE_ID_INTEL_854 0x358E #define PCI_DEVICE_ID_INTEL_865G 0x2572 #define PCI_DEVICE_ID_INTEL_915G 0x2582 #define PCI_DEVICE_ID_INTEL_915GM 0x2592 @@ -154,6 +155,7 @@ enum intel_chips { INTEL_85XGM, INTEL_852GM, INTEL_852GME, + INTEL_854, INTEL_855GM, INTEL_855GME, INTEL_865G, diff --git a/drivers/video/intelfb/intelfb_i2c.c b/drivers/video/intelfb/intelfb_i2c.c index b3065492bb20..487f2be47460 100644 --- a/drivers/video/intelfb/intelfb_i2c.c +++ b/drivers/video/intelfb/intelfb_i2c.c @@ -156,6 +156,7 @@ void intelfb_create_i2c_busses(struct intelfb_info *dinfo) switch(dinfo->chipset) { case INTEL_830M: case INTEL_845G: + case INTEL_854: case INTEL_855GM: case INTEL_865G: dinfo->output[i].type = INTELFB_OUTPUT_DVO; diff --git a/drivers/video/intelfb/intelfbdrv.c b/drivers/video/intelfb/intelfbdrv.c index 6d8e5415c809..ace14fe02fc4 100644 --- a/drivers/video/intelfb/intelfbdrv.c +++ b/drivers/video/intelfb/intelfbdrv.c @@ -182,6 +182,7 @@ static struct pci_device_id intelfb_pci_table[] __devinitdata = { { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_845G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_845G }, { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_85XGM, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_85XGM }, { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_865G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_865G }, + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_854, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_854 }, { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_915G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_915G }, { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_915GM, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_915GM }, { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_945G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_945G }, diff --git a/drivers/video/intelfb/intelfbhw.c b/drivers/video/intelfb/intelfbhw.c index 8b26b27c2db6..0689f97c5238 100644 --- a/drivers/video/intelfb/intelfbhw.c +++ b/drivers/video/intelfb/intelfbhw.c @@ -84,6 +84,11 @@ int intelfbhw_get_chipset(struct pci_dev *pdev, struct intelfb_info *dinfo) dinfo->mobile = 0; dinfo->pll_index = PLLS_I8xx; return 0; + case PCI_DEVICE_ID_INTEL_854: + dinfo->mobile = 1; + dinfo->name = "Intel(R) 854"; + dinfo->chipset = INTEL_854; + return 0; case PCI_DEVICE_ID_INTEL_85XGM: tmp = 0; dinfo->mobile = 1; diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 2df74eb09563..9477af01a639 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -472,6 +472,7 @@ {0x8086, 0x2562, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ {0x8086, 0x3582, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ {0x8086, 0x2572, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ + {0x8086, 0x358e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ {0, 0, 0} #define gamma_PCI_IDS \ @@ -533,4 +534,5 @@ {0x8086, 0x2e22, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0x8086, 0xa001, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0x8086, 0xa011, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ + {0x8086, 0x35e8, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0, 0, 0} diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index ee98cd570885..06ba90c211a5 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2514,6 +2514,8 @@ #define PCI_DEVICE_ID_INTEL_IOAT_TBG3 0x3433 #define PCI_DEVICE_ID_INTEL_82830_HB 0x3575 #define PCI_DEVICE_ID_INTEL_82830_CGC 0x3577 +#define PCI_DEVICE_ID_INTEL_82854_HB 0x358c +#define PCI_DEVICE_ID_INTEL_82854_IG 0x358e #define PCI_DEVICE_ID_INTEL_82855GM_HB 0x3580 #define PCI_DEVICE_ID_INTEL_82855GM_IG 0x3582 #define PCI_DEVICE_ID_INTEL_E7520_MCH 0x3590 -- cgit v1.2.3-71-gd317 From 27b19565fe4ca5b0e9d2ae98ce4b81ca728bf445 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 14 Apr 2009 11:03:12 +0200 Subject: lockdep: warn about lockdep disabling after kernel taint, fix Impact: build fix for Sparc and s390 Stephen Rothwell reported that the Sparc build broke: In file included from kernel/panic.c:12: include/linux/debug_locks.h: In function '__debug_locks_off': include/linux/debug_locks.h:15: error: implicit declaration of function 'xchg' due to: 9eeba61: lockdep: warn about lockdep disabling after kernel taint There is some inconsistency between architectures about where exactly xchg() is defined. The traditional place is in system.h but the more logical point for it is in atomic.h - where most architectures (especially new ones) have it defined. These architecture also still offer it via system.h. Some, such as Sparc or s390 only have it in asm/system.h and not available via asm/atomic.h at all. Use the widest set of headers in debug_locks.h and also include asm/system.h. Reported-by: Stephen Rothwell Cc: Frederic Weisbecker Cc: "David S. Miller" Cc: Linus Torvalds LKML-Reference: <20090414144317.026498df.sfr@canb.auug.org.au> Signed-off-by: Ingo Molnar --- include/linux/debug_locks.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 493dedb7a67b..29b3ce3f2a1d 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -3,6 +3,7 @@ #include #include +#include struct task_struct; -- cgit v1.2.3-71-gd317 From ef631b0ca01655d24e9ca7e199262c4a46416a26 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 13 Apr 2009 21:31:16 -0700 Subject: rcu: Make hierarchical RCU less IPI-happy This patch fixes a hierarchical-RCU performance bug located by Anton Blanchard. The problem stems from a misguided attempt to provide a work-around for jiffies-counter failure. This work-around uses a per-CPU n_rcu_pending counter, which is incremented on each call to rcu_pending(), which in turn is called from each scheduling-clock interrupt. Each CPU then treats this counter as a surrogate for the jiffies counter, so that if the jiffies counter fails to advance, the per-CPU n_rcu_pending counter will cause RCU to invoke force_quiescent_state(), which in turn will (among other things) send resched IPIs to CPUs that have thus far failed to pass through an RCU quiescent state. Unfortunately, each CPU resets only its own counter after sending a batch of IPIs. This means that the other CPUs will also (needlessly) send -another- round of IPIs, for a full N-squared set of IPIs in the worst case every three scheduler-clock ticks until the grace period finally ends. It is not reasonable for a given CPU to reset each and every n_rcu_pending for all the other CPUs, so this patch instead simply disables the jiffies-counter "training wheels", thus eliminating the excessive IPIs. Note that the jiffies-counter IPIs do not have this problem due to the fact that the jiffies counter is global, so that the CPU sending the IPIs can easily reset things, thus preventing the other CPUs from sending redundant IPIs. Note also that the n_rcu_pending counter remains, as it will continue to be used for tracing. It may also see use to update the jiffies counter, should an appropriate kick-the-jiffies-counter API appear. Located-by: Anton Blanchard Tested-by: Anton Blanchard Signed-off-by: Paul E. McKenney Cc: anton@samba.org Cc: akpm@linux-foundation.org Cc: dipankar@in.ibm.com Cc: manfred@colorfullife.com Cc: cl@linux-foundation.org Cc: josht@linux.vnet.ibm.com Cc: schamp@sgi.com Cc: niv@us.ibm.com Cc: dvhltc@us.ibm.com Cc: ego@in.ibm.com Cc: laijs@cn.fujitsu.com Cc: rostedt@goodmis.org Cc: peterz@infradead.org Cc: penberg@cs.helsinki.fi Cc: andi@firstfloor.org Cc: "Paul E. McKenney" LKML-Reference: <12396834793575-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcutree.h | 3 +-- kernel/rcutree.c | 19 ++++--------------- kernel/rcutree_trace.c | 14 +++++--------- 3 files changed, 10 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 0cdda00f2b2a..58b2aa5312b9 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -161,9 +161,8 @@ struct rcu_data { unsigned long offline_fqs; /* Kicked due to being offline. */ unsigned long resched_ipi; /* Sent a resched IPI. */ - /* 5) state to allow this CPU to force_quiescent_state on others */ + /* 5) For future __rcu_pending statistics. */ long n_rcu_pending; /* rcu_pending() calls since boot. */ - long n_rcu_pending_force_qs; /* when to force quiescent states. */ int cpu; }; diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 7f3266922572..d2a372fb0b9b 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -530,8 +530,6 @@ static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp) rdp->qs_pending = 1; rdp->passed_quiesc = 0; rdp->gpnum = rsp->gpnum; - rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending + - RCU_JIFFIES_TILL_FORCE_QS; } /* @@ -578,8 +576,6 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) rsp->gpnum++; rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; - rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending + - RCU_JIFFIES_TILL_FORCE_QS; record_gp_stall_check_time(rsp); dyntick_record_completed(rsp, rsp->completed - 1); note_new_gpnum(rsp, rdp); @@ -1055,7 +1051,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) { unsigned long flags; long lastcomp; - struct rcu_data *rdp = rsp->rda[smp_processor_id()]; struct rcu_node *rnp = rcu_get_root(rsp); u8 signaled; @@ -1066,16 +1061,13 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) return; /* Someone else is already on the job. */ } if (relaxed && - (long)(rsp->jiffies_force_qs - jiffies) >= 0 && - (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) >= 0) + (long)(rsp->jiffies_force_qs - jiffies) >= 0) goto unlock_ret; /* no emergency and done recently. */ rsp->n_force_qs++; spin_lock(&rnp->lock); lastcomp = rsp->completed; signaled = rsp->signaled; rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; - rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending + - RCU_JIFFIES_TILL_FORCE_QS; if (lastcomp == rsp->gpnum) { rsp->n_force_qs_ngp++; spin_unlock(&rnp->lock); @@ -1144,8 +1136,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) * If an RCU GP has gone long enough, go check for dyntick * idle CPUs and, if needed, send resched IPIs. */ - if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 || - (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0) + if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0) force_quiescent_state(rsp, 1); /* @@ -1230,8 +1221,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), if (unlikely(++rdp->qlen > qhimark)) { rdp->blimit = LONG_MAX; force_quiescent_state(rsp, 0); - } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 || - (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0) + } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0) force_quiescent_state(rsp, 1); local_irq_restore(flags); } @@ -1290,8 +1280,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) /* Has an RCU GP gone long enough to send resched IPIs &c? */ if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) && - ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 || - (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0)) + ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) return 1; /* nothing to do */ diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 4ee954f6a8d5..4b1875ba9404 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -49,14 +49,12 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) { if (!rdp->beenonline) return; - seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d rpfq=%ld rp=%x", + seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d", rdp->cpu, cpu_is_offline(rdp->cpu) ? '!' : ' ', rdp->completed, rdp->gpnum, rdp->passed_quiesc, rdp->passed_quiesc_completed, - rdp->qs_pending, - rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending, - (int)(rdp->n_rcu_pending & 0xffff)); + rdp->qs_pending); #ifdef CONFIG_NO_HZ seq_printf(m, " dt=%d/%d dn=%d df=%lu", rdp->dynticks->dynticks, @@ -102,14 +100,12 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) { if (!rdp->beenonline) return; - seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d,%ld,%ld", + seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d", rdp->cpu, cpu_is_offline(rdp->cpu) ? "\"Y\"" : "\"N\"", rdp->completed, rdp->gpnum, rdp->passed_quiesc, rdp->passed_quiesc_completed, - rdp->qs_pending, - rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending, - rdp->n_rcu_pending); + rdp->qs_pending); #ifdef CONFIG_NO_HZ seq_printf(m, ",%d,%d,%d,%lu", rdp->dynticks->dynticks, @@ -123,7 +119,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) static int show_rcudata_csv(struct seq_file *m, void *unused) { - seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",\"rpfq\",\"rp\","); + seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\","); #ifdef CONFIG_NO_HZ seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); #endif /* #ifdef CONFIG_NO_HZ */ -- cgit v1.2.3-71-gd317 From 67c457a8c378a006a34d92f9bd3078a80a92f250 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 14 Apr 2009 07:50:56 -0400 Subject: jbd2: use SWRITE_SYNC_PLUG when writing synchronous revoke records The revoke records must be written using the same way as the rest of the blocks during the commit process; that is, either marked as synchronous writes or as asynchornous writes. Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 3 ++- fs/jbd2/revoke.c | 21 ++++++++++++--------- include/linux/jbd2.h | 3 ++- 3 files changed, 16 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 073c8c3df7cd..0b7d3b8226fd 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -506,7 +506,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) if (err) jbd2_journal_abort(journal, err); - jbd2_journal_write_revoke_records(journal, commit_transaction); + jbd2_journal_write_revoke_records(journal, commit_transaction, + write_op); jbd_debug(3, "JBD: commit phase 2\n"); diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index bbe6d592d8b3..a360b06af2e3 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -86,6 +86,7 @@ #include #include #include +#include #endif #include @@ -118,8 +119,8 @@ struct jbd2_revoke_table_s #ifdef __KERNEL__ static void write_one_revoke_record(journal_t *, transaction_t *, struct journal_head **, int *, - struct jbd2_revoke_record_s *); -static void flush_descriptor(journal_t *, struct journal_head *, int); + struct jbd2_revoke_record_s *, int); +static void flush_descriptor(journal_t *, struct journal_head *, int, int); #endif /* Utility functions to maintain the revoke table */ @@ -499,7 +500,8 @@ void jbd2_journal_switch_revoke_table(journal_t *journal) * revoke hash, deleting the entries as we go. */ void jbd2_journal_write_revoke_records(journal_t *journal, - transaction_t *transaction) + transaction_t *transaction, + int write_op) { struct journal_head *descriptor; struct jbd2_revoke_record_s *record; @@ -523,14 +525,14 @@ void jbd2_journal_write_revoke_records(journal_t *journal, hash_list->next; write_one_revoke_record(journal, transaction, &descriptor, &offset, - record); + record, write_op); count++; list_del(&record->hash); kmem_cache_free(jbd2_revoke_record_cache, record); } } if (descriptor) - flush_descriptor(journal, descriptor, offset); + flush_descriptor(journal, descriptor, offset, write_op); jbd_debug(1, "Wrote %d revoke records\n", count); } @@ -543,7 +545,8 @@ static void write_one_revoke_record(journal_t *journal, transaction_t *transaction, struct journal_head **descriptorp, int *offsetp, - struct jbd2_revoke_record_s *record) + struct jbd2_revoke_record_s *record, + int write_op) { struct journal_head *descriptor; int offset; @@ -562,7 +565,7 @@ static void write_one_revoke_record(journal_t *journal, /* Make sure we have a descriptor with space left for the record */ if (descriptor) { if (offset == journal->j_blocksize) { - flush_descriptor(journal, descriptor, offset); + flush_descriptor(journal, descriptor, offset, write_op); descriptor = NULL; } } @@ -607,7 +610,7 @@ static void write_one_revoke_record(journal_t *journal, static void flush_descriptor(journal_t *journal, struct journal_head *descriptor, - int offset) + int offset, int write_op) { jbd2_journal_revoke_header_t *header; struct buffer_head *bh = jh2bh(descriptor); @@ -622,7 +625,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block(SWRITE, 1, &bh); + ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); } #endif diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 8815a3456b3b..cc02393bfce8 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1193,7 +1193,8 @@ extern int jbd2_journal_init_revoke_caches(void); extern void jbd2_journal_destroy_revoke(journal_t *); extern int jbd2_journal_revoke (handle_t *, unsigned long long, struct buffer_head *); extern int jbd2_journal_cancel_revoke(handle_t *, struct journal_head *); -extern void jbd2_journal_write_revoke_records(journal_t *, transaction_t *); +extern void jbd2_journal_write_revoke_records(journal_t *, + transaction_t *, int); /* Recovery revoke support */ extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t); -- cgit v1.2.3-71-gd317 From 38d726d153cfe5efe5fe22d28d36ab382dda3a5c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 14 Apr 2009 10:10:47 -0400 Subject: jbd: use SWRITE_SYNC_PLUG when writing synchronous revoke records The revoke records must be written using the same way as the rest of the blocks during the commit process; that is, either marked as synchronous writes or as asynchornous writes. Signed-off-by: "Theodore Ts'o" --- fs/jbd/commit.c | 2 +- fs/jbd/revoke.c | 20 +++++++++++--------- include/linux/jbd.h | 3 ++- 3 files changed, 14 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index a8e8513a78a9..06560c520f49 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -502,7 +502,7 @@ void journal_commit_transaction(journal_t *journal) err = 0; } - journal_write_revoke_records(journal, commit_transaction); + journal_write_revoke_records(journal, commit_transaction, write_op); /* * If we found any dirty or locked buffers, then we should have diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index c7bd649bbbdc..1b1a06e1c836 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -67,6 +67,7 @@ #include #include #include +#include #endif #include @@ -99,8 +100,8 @@ struct jbd_revoke_table_s #ifdef __KERNEL__ static void write_one_revoke_record(journal_t *, transaction_t *, struct journal_head **, int *, - struct jbd_revoke_record_s *); -static void flush_descriptor(journal_t *, struct journal_head *, int); + struct jbd_revoke_record_s *, int); +static void flush_descriptor(journal_t *, struct journal_head *, int, int); #endif /* Utility functions to maintain the revoke table */ @@ -486,7 +487,7 @@ void journal_switch_revoke_table(journal_t *journal) */ void journal_write_revoke_records(journal_t *journal, - transaction_t *transaction) + transaction_t *transaction, int write_op) { struct journal_head *descriptor; struct jbd_revoke_record_s *record; @@ -510,14 +511,14 @@ void journal_write_revoke_records(journal_t *journal, hash_list->next; write_one_revoke_record(journal, transaction, &descriptor, &offset, - record); + record, write_op); count++; list_del(&record->hash); kmem_cache_free(revoke_record_cache, record); } } if (descriptor) - flush_descriptor(journal, descriptor, offset); + flush_descriptor(journal, descriptor, offset, write_op); jbd_debug(1, "Wrote %d revoke records\n", count); } @@ -530,7 +531,8 @@ static void write_one_revoke_record(journal_t *journal, transaction_t *transaction, struct journal_head **descriptorp, int *offsetp, - struct jbd_revoke_record_s *record) + struct jbd_revoke_record_s *record, + int write_op) { struct journal_head *descriptor; int offset; @@ -549,7 +551,7 @@ static void write_one_revoke_record(journal_t *journal, /* Make sure we have a descriptor with space left for the record */ if (descriptor) { if (offset == journal->j_blocksize) { - flush_descriptor(journal, descriptor, offset); + flush_descriptor(journal, descriptor, offset, write_op); descriptor = NULL; } } @@ -586,7 +588,7 @@ static void write_one_revoke_record(journal_t *journal, static void flush_descriptor(journal_t *journal, struct journal_head *descriptor, - int offset) + int offset, int write_op) { journal_revoke_header_t *header; struct buffer_head *bh = jh2bh(descriptor); @@ -601,7 +603,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block(SWRITE, 1, &bh); + ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); } #endif diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 53ae4399da2d..c2049a04fa0b 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -978,7 +978,8 @@ extern void journal_destroy_revoke(journal_t *); extern int journal_revoke (handle_t *, unsigned long, struct buffer_head *); extern int journal_cancel_revoke(handle_t *, struct journal_head *); -extern void journal_write_revoke_records(journal_t *, transaction_t *); +extern void journal_write_revoke_records(journal_t *, + transaction_t *, int); /* Recovery revoke support */ extern int journal_set_revoke(journal_t *, unsigned long, tid_t); -- cgit v1.2.3-71-gd317 From 78c5b82ee68207a176ad5ca5eabdb2dbe5cfbfd3 Mon Sep 17 00:00:00 2001 From: Leandro Dorileo Date: Tue, 14 Apr 2009 14:59:51 +0100 Subject: tty: Update some of the USB kernel doc Updates some usb_serial_port members documentation. Signed-off-by: Leandro Dorileo Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- include/linux/usb/serial.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index b95842542590..625e9e4639c6 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -29,7 +29,7 @@ /** * usb_serial_port: structure for the specific ports of a device. * @serial: pointer back to the struct usb_serial owner of this port. - * @tty: pointer to the corresponding tty for this port. + * @port: pointer to the corresponding tty_port for this port. * @lock: spinlock to grab when updating portions of this structure. * @mutex: mutex used to synchronize serial_open() and serial_close() * access for this port. @@ -44,19 +44,22 @@ * @interrupt_out_endpointAddress: endpoint address for the interrupt out pipe * for this port. * @bulk_in_buffer: pointer to the bulk in buffer for this port. + * @bulk_in_size: the size of the bulk_in_buffer, in bytes. * @read_urb: pointer to the bulk in struct urb for this port. * @bulk_in_endpointAddress: endpoint address for the bulk in pipe for this * port. * @bulk_out_buffer: pointer to the bulk out buffer for this port. * @bulk_out_size: the size of the bulk_out_buffer, in bytes. * @write_urb: pointer to the bulk out struct urb for this port. + * @write_urb_busy: port`s writing status * @bulk_out_endpointAddress: endpoint address for the bulk out pipe for this * port. * @write_wait: a wait_queue_head_t used by the port. * @work: work queue entry for the line discipline waking up. - * @open_count: number of times this port has been opened. * @throttled: nonzero if the read urb is inactive to throttle the device * @throttle_req: nonzero if the tty wants to throttle us + * @console: attached usb serial console + * @dev: pointer to the serial device * * This structure is used by the usb-serial core and drivers for the specific * ports of a device. -- cgit v1.2.3-71-gd317 From 8f3d8ba20e67991b531e9c0227dcd1f99271a32c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Apr 2009 19:55:13 +0200 Subject: block: move bio list helpers into bio.h It's used by DM and MD and generally useful, so move the bio list helpers into bio.h. Signed-off-by: Christoph Hellwig Acked-by: Alasdair G Kergon Signed-off-by: Jens Axboe --- drivers/md/dm-bio-list.h | 117 -------------------------------------------- drivers/md/dm-delay.c | 2 - drivers/md/dm-mpath.c | 1 - drivers/md/dm-raid1.c | 1 - drivers/md/dm-region-hash.c | 1 - drivers/md/dm-snap.c | 1 - drivers/md/dm.c | 1 - drivers/md/raid1.c | 1 - drivers/md/raid10.c | 1 - include/linux/bio.h | 109 +++++++++++++++++++++++++++++++++++++++++ 10 files changed, 109 insertions(+), 126 deletions(-) delete mode 100644 drivers/md/dm-bio-list.h (limited to 'include/linux') diff --git a/drivers/md/dm-bio-list.h b/drivers/md/dm-bio-list.h deleted file mode 100644 index 345098b4ca77..000000000000 --- a/drivers/md/dm-bio-list.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (C) 2004 Red Hat UK Ltd. - * - * This file is released under the GPL. - */ - -#ifndef DM_BIO_LIST_H -#define DM_BIO_LIST_H - -#include - -#ifdef CONFIG_BLOCK - -struct bio_list { - struct bio *head; - struct bio *tail; -}; - -static inline int bio_list_empty(const struct bio_list *bl) -{ - return bl->head == NULL; -} - -static inline void bio_list_init(struct bio_list *bl) -{ - bl->head = bl->tail = NULL; -} - -#define bio_list_for_each(bio, bl) \ - for (bio = (bl)->head; bio; bio = bio->bi_next) - -static inline unsigned bio_list_size(const struct bio_list *bl) -{ - unsigned sz = 0; - struct bio *bio; - - bio_list_for_each(bio, bl) - sz++; - - return sz; -} - -static inline void bio_list_add(struct bio_list *bl, struct bio *bio) -{ - bio->bi_next = NULL; - - if (bl->tail) - bl->tail->bi_next = bio; - else - bl->head = bio; - - bl->tail = bio; -} - -static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio) -{ - bio->bi_next = bl->head; - - bl->head = bio; - - if (!bl->tail) - bl->tail = bio; -} - -static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) -{ - if (!bl2->head) - return; - - if (bl->tail) - bl->tail->bi_next = bl2->head; - else - bl->head = bl2->head; - - bl->tail = bl2->tail; -} - -static inline void bio_list_merge_head(struct bio_list *bl, - struct bio_list *bl2) -{ - if (!bl2->head) - return; - - if (bl->head) - bl2->tail->bi_next = bl->head; - else - bl->tail = bl2->tail; - - bl->head = bl2->head; -} - -static inline struct bio *bio_list_pop(struct bio_list *bl) -{ - struct bio *bio = bl->head; - - if (bio) { - bl->head = bl->head->bi_next; - if (!bl->head) - bl->tail = NULL; - - bio->bi_next = NULL; - } - - return bio; -} - -static inline struct bio *bio_list_get(struct bio_list *bl) -{ - struct bio *bio = bl->head; - - bl->head = bl->tail = NULL; - - return bio; -} - -#endif /* CONFIG_BLOCK */ -#endif diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 59ee1b015d2d..559dbb52bc85 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -15,8 +15,6 @@ #include -#include "dm-bio-list.h" - #define DM_MSG_PREFIX "delay" struct delay_c { diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 095f77bf9681..6a386ab4f7eb 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -8,7 +8,6 @@ #include #include "dm-path-selector.h" -#include "dm-bio-list.h" #include "dm-bio-record.h" #include "dm-uevent.h" diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 536ef0bef154..076fbb4e967a 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -5,7 +5,6 @@ * This file is released under the GPL. */ -#include "dm-bio-list.h" #include "dm-bio-record.h" #include diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c index 59f8d9df9e1a..7b899be0b087 100644 --- a/drivers/md/dm-region-hash.c +++ b/drivers/md/dm-region-hash.c @@ -14,7 +14,6 @@ #include #include "dm.h" -#include "dm-bio-list.h" #define DM_MSG_PREFIX "region hash" diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 981a0413068f..d73f17fc7778 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -22,7 +22,6 @@ #include #include "dm-exception-store.h" -#include "dm-bio-list.h" #define DM_MSG_PREFIX "snapshots" diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8a994be035ba..424f7b048c30 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -6,7 +6,6 @@ */ #include "dm.h" -#include "dm-bio-list.h" #include "dm-uevent.h" #include diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 274b491a11c1..36df9109cde1 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -35,7 +35,6 @@ #include #include #include "md.h" -#include "dm-bio-list.h" #include "raid1.h" #include "bitmap.h" diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e293d92641ac..81a54f17417e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -22,7 +22,6 @@ #include #include #include "md.h" -#include "dm-bio-list.h" #include "raid10.h" #include "bitmap.h" diff --git a/include/linux/bio.h b/include/linux/bio.h index b900d2c67d29..b89cf2d82898 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -504,6 +504,115 @@ static inline int bio_has_data(struct bio *bio) return bio && bio->bi_io_vec != NULL; } +/* + * BIO list managment for use by remapping drivers (e.g. DM or MD). + * + * A bio_list anchors a singly-linked list of bios chained through the bi_next + * member of the bio. The bio_list also caches the last list member to allow + * fast access to the tail. + */ +struct bio_list { + struct bio *head; + struct bio *tail; +}; + +static inline int bio_list_empty(const struct bio_list *bl) +{ + return bl->head == NULL; +} + +static inline void bio_list_init(struct bio_list *bl) +{ + bl->head = bl->tail = NULL; +} + +#define bio_list_for_each(bio, bl) \ + for (bio = (bl)->head; bio; bio = bio->bi_next) + +static inline unsigned bio_list_size(const struct bio_list *bl) +{ + unsigned sz = 0; + struct bio *bio; + + bio_list_for_each(bio, bl) + sz++; + + return sz; +} + +static inline void bio_list_add(struct bio_list *bl, struct bio *bio) +{ + bio->bi_next = NULL; + + if (bl->tail) + bl->tail->bi_next = bio; + else + bl->head = bio; + + bl->tail = bio; +} + +static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio) +{ + bio->bi_next = bl->head; + + bl->head = bio; + + if (!bl->tail) + bl->tail = bio; +} + +static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) +{ + if (!bl2->head) + return; + + if (bl->tail) + bl->tail->bi_next = bl2->head; + else + bl->head = bl2->head; + + bl->tail = bl2->tail; +} + +static inline void bio_list_merge_head(struct bio_list *bl, + struct bio_list *bl2) +{ + if (!bl2->head) + return; + + if (bl->head) + bl2->tail->bi_next = bl->head; + else + bl->tail = bl2->tail; + + bl->head = bl2->head; +} + +static inline struct bio *bio_list_pop(struct bio_list *bl) +{ + struct bio *bio = bl->head; + + if (bio) { + bl->head = bl->head->bi_next; + if (!bl->head) + bl->tail = NULL; + + bio->bi_next = NULL; + } + + return bio; +} + +static inline struct bio *bio_list_get(struct bio_list *bl) +{ + struct bio *bio = bl->head; + + bl->head = bl->tail = NULL; + + return bio; +} + #if defined(CONFIG_BLK_DEV_INTEGRITY) #define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) -- cgit v1.2.3-71-gd317 From 48e70bc18ac81881dedd3aa327c55b924fc41ecf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 14 Apr 2009 08:19:27 +0200 Subject: Document and move the various READ/WRITE types It's a somewhat twisty maze of hints and behavioural modifiers, try and clear it up a bit with some documentation. Signed-off-by: Jens Axboe --- include/linux/fs.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 562d2855cf30..b535aec4406b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -87,6 +87,60 @@ struct inodes_stat_t { */ #define FMODE_NOCMTIME ((__force fmode_t)2048) +/* + * The below are the various read and write types that we support. Some of + * them include behavioral modifiers that send information down to the + * block layer and IO scheduler. Terminology: + * + * The block layer uses device plugging to defer IO a little bit, in + * the hope that we will see more IO very shortly. This increases + * coalescing of adjacent IO and thus reduces the number of IOs we + * have to send to the device. It also allows for better queuing, + * if the IO isn't mergeable. If the caller is going to be waiting + * for the IO, then he must ensure that the device is unplugged so + * that the IO is dispatched to the driver. + * + * All IO is handled async in Linux. This is fine for background + * writes, but for reads or writes that someone waits for completion + * on, we want to notify the block layer and IO scheduler so that they + * know about it. That allows them to make better scheduling + * decisions. So when the below references 'sync' and 'async', it + * is referencing this priority hint. + * + * With that in mind, the available types are: + * + * READ A normal read operation. Device will be plugged. + * READ_SYNC A synchronous read. Device is not plugged, caller can + * immediately wait on this read without caring about + * unplugging. + * READA Used for read-ahead operations. Lower priority, and the + * block layer could (in theory) choose to ignore this + * request if it runs into resource problems. + * WRITE A normal async write. Device will be plugged. + * SWRITE Like WRITE, but a special case for ll_rw_block() that + * tells it to lock the buffer first. Normally a buffer + * must be locked before doing IO. + * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down + * the hint that someone will be waiting on this IO + * shortly. The device must still be unplugged explicitly, + * WRITE_SYNC_PLUG does not do this as we could be + * submitting more writes before we actually wait on any + * of them. + * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device + * immediately after submission. The write equivalent + * of READ_SYNC. + * WRITE_ODIRECT Special case write for O_DIRECT only. + * SWRITE_SYNC + * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. + * See SWRITE. + * WRITE_BARRIER Like WRITE, but tells the block layer that all + * previously submitted writes must be safely on storage + * before this one is started. Also guarantees that when + * this write is complete, it itself is also safely on + * storage. Prevents reordering of writes on both sides + * of this IO. + * + */ #define RW_MASK 1 #define RWA_MASK 2 #define READ 0 @@ -102,6 +156,11 @@ struct inodes_stat_t { (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) #define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) + +/* + * These aren't really reads or writes, they pass down information about + * parts of device that are now unused by the file system. + */ #define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) #define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) -- cgit v1.2.3-71-gd317 From b3c2d2ddd63944ef2a1e4a43077b602288107e01 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 14 Apr 2009 19:48:36 +0200 Subject: splice: split up __splice_from_pipe() Split up __splice_from_pipe() into four helper functions: splice_from_pipe_begin() splice_from_pipe_next() splice_from_pipe_feed() splice_from_pipe_end() splice_from_pipe_next() will wait (if necessary) for more buffers to be added to the pipe. splice_from_pipe_feed() will feed the buffers to the supplied actor and return when there's no more data available (or if all of the requested data has been copied). This is necessary so that implementations can do locking around the non-waiting splice_from_pipe_feed(). This patch should not cause any change in behavior. Signed-off-by: Miklos Szeredi Signed-off-by: Jens Axboe --- fs/splice.c | 217 ++++++++++++++++++++++++++++++++----------------- include/linux/splice.h | 10 +++ 2 files changed, 153 insertions(+), 74 deletions(-) (limited to 'include/linux') diff --git a/fs/splice.c b/fs/splice.c index c18aa7e03e2b..fd6b278d447b 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -601,107 +601,176 @@ out: return ret; } +static void wakeup_pipe_writers(struct pipe_inode_info *pipe) +{ + smp_mb(); + if (waitqueue_active(&pipe->wait)) + wake_up_interruptible(&pipe->wait); + kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); +} + /** - * __splice_from_pipe - splice data from a pipe to given actor + * splice_from_pipe_feed - feed available data from a pipe to a file * @pipe: pipe to splice from * @sd: information to @actor * @actor: handler that splices the data * * Description: - * This function does little more than loop over the pipe and call - * @actor to do the actual moving of a single struct pipe_buffer to - * the desired destination. See pipe_to_file, pipe_to_sendpage, or - * pipe_to_user. + + * This function loops over the pipe and calls @actor to do the + * actual moving of a single struct pipe_buffer to the desired + * destination. It returns when there's no more buffers left in + * the pipe or if the requested number of bytes (@sd->total_len) + * have been copied. It returns a positive number (one) if the + * pipe needs to be filled with more data, zero if the required + * number of bytes have been copied and -errno on error. * + * This, together with splice_from_pipe_{begin,end,next}, may be + * used to implement the functionality of __splice_from_pipe() when + * locking is required around copying the pipe buffers to the + * destination. */ -ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, - splice_actor *actor) +int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, + splice_actor *actor) { - int ret, do_wakeup, err; - - ret = 0; - do_wakeup = 0; - - for (;;) { - if (pipe->nrbufs) { - struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; - const struct pipe_buf_operations *ops = buf->ops; + int ret; - sd->len = buf->len; - if (sd->len > sd->total_len) - sd->len = sd->total_len; + while (pipe->nrbufs) { + struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; + const struct pipe_buf_operations *ops = buf->ops; - err = actor(pipe, buf, sd); - if (err <= 0) { - if (!ret && err != -ENODATA) - ret = err; + sd->len = buf->len; + if (sd->len > sd->total_len) + sd->len = sd->total_len; - break; - } + ret = actor(pipe, buf, sd); + if (ret <= 0) { + if (ret == -ENODATA) + ret = 0; + return ret; + } + buf->offset += ret; + buf->len -= ret; - ret += err; - buf->offset += err; - buf->len -= err; + sd->num_spliced += ret; + sd->len -= ret; + sd->pos += ret; + sd->total_len -= ret; - sd->len -= err; - sd->pos += err; - sd->total_len -= err; - if (sd->len) - continue; + if (!buf->len) { + buf->ops = NULL; + ops->release(pipe, buf); + pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); + pipe->nrbufs--; + if (pipe->inode) + sd->need_wakeup = true; + } - if (!buf->len) { - buf->ops = NULL; - ops->release(pipe, buf); - pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); - pipe->nrbufs--; - if (pipe->inode) - do_wakeup = 1; - } + if (!sd->total_len) + return 0; + } - if (!sd->total_len) - break; - } + return 1; +} +EXPORT_SYMBOL(splice_from_pipe_feed); - if (pipe->nrbufs) - continue; +/** + * splice_from_pipe_next - wait for some data to splice from + * @pipe: pipe to splice from + * @sd: information about the splice operation + * + * Description: + * This function will wait for some data and return a positive + * value (one) if pipe buffers are available. It will return zero + * or -errno if no more data needs to be spliced. + */ +int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) +{ + while (!pipe->nrbufs) { if (!pipe->writers) - break; - if (!pipe->waiting_writers) { - if (ret) - break; - } + return 0; - if (sd->flags & SPLICE_F_NONBLOCK) { - if (!ret) - ret = -EAGAIN; - break; - } + if (!pipe->waiting_writers && sd->num_spliced) + return 0; - if (signal_pending(current)) { - if (!ret) - ret = -ERESTARTSYS; - break; - } + if (sd->flags & SPLICE_F_NONBLOCK) + return -EAGAIN; - if (do_wakeup) { - smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible_sync(&pipe->wait); - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); - do_wakeup = 0; + if (signal_pending(current)) + return -ERESTARTSYS; + + if (sd->need_wakeup) { + wakeup_pipe_writers(pipe); + sd->need_wakeup = false; } pipe_wait(pipe); } - if (do_wakeup) { - smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible(&pipe->wait); - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); - } + return 1; +} +EXPORT_SYMBOL(splice_from_pipe_next); - return ret; +/** + * splice_from_pipe_begin - start splicing from pipe + * @pipe: pipe to splice from + * + * Description: + * This function should be called before a loop containing + * splice_from_pipe_next() and splice_from_pipe_feed() to + * initialize the necessary fields of @sd. + */ +void splice_from_pipe_begin(struct splice_desc *sd) +{ + sd->num_spliced = 0; + sd->need_wakeup = false; +} +EXPORT_SYMBOL(splice_from_pipe_begin); + +/** + * splice_from_pipe_end - finish splicing from pipe + * @pipe: pipe to splice from + * @sd: information about the splice operation + * + * Description: + * This function will wake up pipe writers if necessary. It should + * be called after a loop containing splice_from_pipe_next() and + * splice_from_pipe_feed(). + */ +void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd) +{ + if (sd->need_wakeup) + wakeup_pipe_writers(pipe); +} +EXPORT_SYMBOL(splice_from_pipe_end); + +/** + * __splice_from_pipe - splice data from a pipe to given actor + * @pipe: pipe to splice from + * @sd: information to @actor + * @actor: handler that splices the data + * + * Description: + * This function does little more than loop over the pipe and call + * @actor to do the actual moving of a single struct pipe_buffer to + * the desired destination. See pipe_to_file, pipe_to_sendpage, or + * pipe_to_user. + * + */ +ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, + splice_actor *actor) +{ + int ret; + + splice_from_pipe_begin(sd); + do { + ret = splice_from_pipe_next(pipe, sd); + if (ret > 0) + ret = splice_from_pipe_feed(pipe, sd, actor); + } while (ret > 0); + splice_from_pipe_end(pipe, sd); + + return sd->num_spliced ? sd->num_spliced : ret; } EXPORT_SYMBOL(__splice_from_pipe); diff --git a/include/linux/splice.h b/include/linux/splice.h index 528dcb93c2f2..8fc2a635586e 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -36,6 +36,8 @@ struct splice_desc { void *data; /* cookie */ } u; loff_t pos; /* file position */ + size_t num_spliced; /* number of bytes already spliced */ + bool need_wakeup; /* need to wake up writer */ }; struct partial_page { @@ -66,6 +68,14 @@ extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *, splice_actor *); extern ssize_t __splice_from_pipe(struct pipe_inode_info *, struct splice_desc *, splice_actor *); +extern int splice_from_pipe_feed(struct pipe_inode_info *, struct splice_desc *, + splice_actor *); +extern int splice_from_pipe_next(struct pipe_inode_info *, + struct splice_desc *); +extern void splice_from_pipe_begin(struct splice_desc *); +extern void splice_from_pipe_end(struct pipe_inode_info *, + struct splice_desc *); + extern ssize_t splice_to_pipe(struct pipe_inode_info *, struct splice_pipe_desc *); extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, -- cgit v1.2.3-71-gd317 From 328eaaba4e41a04c1dc4679d65bea3fee4349d86 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 14 Apr 2009 19:48:39 +0200 Subject: ocfs2: fix i_mutex locking in ocfs2_splice_to_file() Rearrange locking of i_mutex on destination and call to ocfs2_rw_lock() so locks are only held while buffers are copied with the pipe_to_file() actor, and not while waiting for more data on the pipe. Signed-off-by: Miklos Szeredi Signed-off-by: Jens Axboe --- fs/ocfs2/file.c | 94 +++++++++++++++++++++++++++++++++++++++----------- fs/splice.c | 5 +-- include/linux/splice.h | 2 ++ 3 files changed, 79 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8672b9536039..c2a87c885b73 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1912,6 +1912,22 @@ out_sems: return written ? written : ret; } +static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, + struct file *out, + struct splice_desc *sd) +{ + int ret; + + ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, + sd->total_len, 0, NULL); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + return splice_from_pipe_feed(pipe, sd, pipe_to_file); +} + static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, @@ -1919,38 +1935,76 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, unsigned int flags) { int ret; - struct inode *inode = out->f_path.dentry->d_inode; + struct address_space *mapping = out->f_mapping; + struct inode *inode = mapping->host; + struct splice_desc sd = { + .total_len = len, + .flags = flags, + .pos = *ppos, + .u.file = out, + }; mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe, (unsigned int)len, out->f_path.dentry->d_name.len, out->f_path.dentry->d_name.name); - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); + if (pipe->inode) + mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT); - ret = ocfs2_rw_lock(inode, 1); - if (ret < 0) { - mlog_errno(ret); - goto out; - } + splice_from_pipe_begin(&sd); + do { + ret = splice_from_pipe_next(pipe, &sd); + if (ret <= 0) + break; - ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0, - NULL); - if (ret < 0) { - mlog_errno(ret); - goto out_unlock; - } + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); + ret = ocfs2_rw_lock(inode, 1); + if (ret < 0) + mlog_errno(ret); + else { + ret = ocfs2_splice_to_file(pipe, out, &sd); + ocfs2_rw_unlock(inode, 1); + } + mutex_unlock(&inode->i_mutex); + } while (ret > 0); + splice_from_pipe_end(pipe, &sd); - if (pipe->inode) - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); - ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); if (pipe->inode) mutex_unlock(&pipe->inode->i_mutex); -out_unlock: - ocfs2_rw_unlock(inode, 1); -out: - mutex_unlock(&inode->i_mutex); + if (sd.num_spliced) + ret = sd.num_spliced; + + if (ret > 0) { + unsigned long nr_pages; + + *ppos += ret; + nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + /* + * If file or inode is SYNC and we actually wrote some data, + * sync it. + */ + if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { + int err; + + mutex_lock(&inode->i_mutex); + err = ocfs2_rw_lock(inode, 1); + if (err < 0) { + mlog_errno(err); + } else { + err = generic_osync_inode(inode, mapping, + OSYNC_METADATA|OSYNC_DATA); + ocfs2_rw_unlock(inode, 1); + } + mutex_unlock(&inode->i_mutex); + + if (err) + ret = err; + } + balance_dirty_pages_ratelimited_nr(mapping, nr_pages); + } mlog_exit(ret); return ret; diff --git a/fs/splice.c b/fs/splice.c index a1f595b9db40..584b2b7a1dbe 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -555,8 +555,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create * a new page in the output file page cache and fill/dirty that. */ -static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, - struct splice_desc *sd) +int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, + struct splice_desc *sd) { struct file *file = sd->u.file; struct address_space *mapping = file->f_mapping; @@ -600,6 +600,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, out: return ret; } +EXPORT_SYMBOL(pipe_to_file); static void wakeup_pipe_writers(struct pipe_inode_info *pipe) { diff --git a/include/linux/splice.h b/include/linux/splice.h index 8fc2a635586e..5f3faa9d15ae 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -75,6 +75,8 @@ extern int splice_from_pipe_next(struct pipe_inode_info *, extern void splice_from_pipe_begin(struct splice_desc *); extern void splice_from_pipe_end(struct pipe_inode_info *, struct splice_desc *); +extern int pipe_to_file(struct pipe_inode_info *, struct pipe_buffer *, + struct splice_desc *); extern ssize_t splice_to_pipe(struct pipe_inode_info *, struct splice_pipe_desc *); -- cgit v1.2.3-71-gd317 From f8cc774ce4844811a55e2352f1443055e3994e28 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 14 Apr 2009 19:48:40 +0200 Subject: splice: remove generic_file_splice_write_nolock() Remove the now unused generic_file_splice_write_nolock() function. It's conceptually broken anyway, because splice may need to wait for pipe events so holding locks across the whole operation is wrong. Signed-off-by: Miklos Szeredi Signed-off-by: Jens Axboe --- fs/splice.c | 59 ------------------------------------------------------ include/linux/fs.h | 2 -- 2 files changed, 61 deletions(-) (limited to 'include/linux') diff --git a/fs/splice.c b/fs/splice.c index 584b2b7a1dbe..128ee36a719b 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -810,65 +810,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, return ret; } -/** - * generic_file_splice_write_nolock - generic_file_splice_write without mutexes - * @pipe: pipe info - * @out: file to write to - * @ppos: position in @out - * @len: number of bytes to splice - * @flags: splice modifier flags - * - * Description: - * Will either move or copy pages (determined by @flags options) from - * the given pipe inode to the given file. The caller is responsible - * for acquiring i_mutex on both inodes. - * - */ -ssize_t -generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, - loff_t *ppos, size_t len, unsigned int flags) -{ - struct address_space *mapping = out->f_mapping; - struct inode *inode = mapping->host; - struct splice_desc sd = { - .total_len = len, - .flags = flags, - .pos = *ppos, - .u.file = out, - }; - ssize_t ret; - int err; - - err = file_remove_suid(out); - if (unlikely(err)) - return err; - - ret = __splice_from_pipe(pipe, &sd, pipe_to_file); - if (ret > 0) { - unsigned long nr_pages; - - *ppos += ret; - nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - /* - * If file or inode is SYNC and we actually wrote some data, - * sync it. - */ - if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { - err = generic_osync_inode(inode, mapping, - OSYNC_METADATA|OSYNC_DATA); - - if (err) - ret = err; - } - balance_dirty_pages_ratelimited_nr(mapping, nr_pages); - } - - return ret; -} - -EXPORT_SYMBOL(generic_file_splice_write_nolock); - /** * generic_file_splice_write - splice data from a pipe to a file * @pipe: pipe info diff --git a/include/linux/fs.h b/include/linux/fs.h index b535aec4406b..907d8f56c6fa 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2209,8 +2209,6 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); extern ssize_t generic_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); -extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *, - struct file *, loff_t *, size_t, unsigned int); extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, loff_t *, size_t len, unsigned int flags); extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, -- cgit v1.2.3-71-gd317 From 61e0d47c33cc371f725bcda4a47ae0efe652dba8 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 14 Apr 2009 19:48:41 +0200 Subject: splice: add helpers for locking pipe inode There are lots of sequences like this, especially in splice code: if (pipe->inode) mutex_lock(&pipe->inode->i_mutex); /* do something */ if (pipe->inode) mutex_unlock(&pipe->inode->i_mutex); so introduce helpers which do the conditional locking and unlocking. Also replace the inode_double_lock() call with a pipe_double_lock() helper to avoid spreading the use of this functionality beyond the pipe code. This patch is just a cleanup, and should cause no behavioral changes. Signed-off-by: Miklos Szeredi Signed-off-by: Jens Axboe --- fs/inode.c | 36 ---------------------------------- fs/pipe.c | 42 +++++++++++++++++++++++++++++++++++---- fs/splice.c | 50 ++++++++++++++++++++--------------------------- include/linux/fs.h | 3 --- include/linux/pipe_fs_i.h | 5 +++++ 5 files changed, 64 insertions(+), 72 deletions(-) (limited to 'include/linux') diff --git a/fs/inode.c b/fs/inode.c index d06d6d268de9..6ad14a1cd8c9 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1470,42 +1470,6 @@ static void __wait_on_freeing_inode(struct inode *inode) spin_lock(&inode_lock); } -/* - * We rarely want to lock two inodes that do not have a parent/child - * relationship (such as directory, child inode) simultaneously. The - * vast majority of file systems should be able to get along fine - * without this. Do not use these functions except as a last resort. - */ -void inode_double_lock(struct inode *inode1, struct inode *inode2) -{ - if (inode1 == NULL || inode2 == NULL || inode1 == inode2) { - if (inode1) - mutex_lock(&inode1->i_mutex); - else if (inode2) - mutex_lock(&inode2->i_mutex); - return; - } - - if (inode1 < inode2) { - mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); - } else { - mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); - } -} -EXPORT_SYMBOL(inode_double_lock); - -void inode_double_unlock(struct inode *inode1, struct inode *inode2) -{ - if (inode1) - mutex_unlock(&inode1->i_mutex); - - if (inode2 && inode2 != inode1) - mutex_unlock(&inode2->i_mutex); -} -EXPORT_SYMBOL(inode_double_unlock); - static __initdata unsigned long ihash_entries; static int __init set_ihash_entries(char *str) { diff --git a/fs/pipe.c b/fs/pipe.c index 4af7aa521813..13414ec45b8d 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -37,6 +37,42 @@ * -- Manfred Spraul 2002-05-09 */ +static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) +{ + if (pipe->inode) + mutex_lock_nested(&pipe->inode->i_mutex, subclass); +} + +void pipe_lock(struct pipe_inode_info *pipe) +{ + /* + * pipe_lock() nests non-pipe inode locks (for writing to a file) + */ + pipe_lock_nested(pipe, I_MUTEX_PARENT); +} +EXPORT_SYMBOL(pipe_lock); + +void pipe_unlock(struct pipe_inode_info *pipe) +{ + if (pipe->inode) + mutex_unlock(&pipe->inode->i_mutex); +} +EXPORT_SYMBOL(pipe_unlock); + +void pipe_double_lock(struct pipe_inode_info *pipe1, + struct pipe_inode_info *pipe2) +{ + BUG_ON(pipe1 == pipe2); + + if (pipe1 < pipe2) { + pipe_lock_nested(pipe1, I_MUTEX_PARENT); + pipe_lock_nested(pipe2, I_MUTEX_CHILD); + } else { + pipe_lock_nested(pipe2, I_MUTEX_CHILD); + pipe_lock_nested(pipe1, I_MUTEX_PARENT); + } +} + /* Drop the inode semaphore and wait for a pipe event, atomically */ void pipe_wait(struct pipe_inode_info *pipe) { @@ -47,12 +83,10 @@ void pipe_wait(struct pipe_inode_info *pipe) * is considered a noninteractive wait: */ prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE); - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); schedule(); finish_wait(&pipe->wait, &wait); - if (pipe->inode) - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); } static int diff --git a/fs/splice.c b/fs/splice.c index 128ee36a719b..5384a90665d0 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -182,8 +182,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, do_wakeup = 0; page_nr = 0; - if (pipe->inode) - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); for (;;) { if (!pipe->readers) { @@ -245,15 +244,13 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, pipe->waiting_writers--; } - if (pipe->inode) { - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); - if (do_wakeup) { - smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible(&pipe->wait); - kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); - } + if (do_wakeup) { + smp_mb(); + if (waitqueue_active(&pipe->wait)) + wake_up_interruptible(&pipe->wait); + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } while (page_nr < spd_pages) @@ -801,11 +798,9 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, .u.file = out, }; - if (pipe->inode) - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); ret = __splice_from_pipe(pipe, &sd, actor); - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); return ret; } @@ -837,8 +832,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, }; ssize_t ret; - if (pipe->inode) - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT); + pipe_lock(pipe); splice_from_pipe_begin(&sd); do { @@ -854,8 +848,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, } while (ret > 0); splice_from_pipe_end(pipe, &sd); - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); if (sd.num_spliced) ret = sd.num_spliced; @@ -1348,8 +1341,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, if (!pipe) return -EBADF; - if (pipe->inode) - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); error = ret = 0; while (nr_segs) { @@ -1404,8 +1396,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, iov++; } - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); if (!ret) ret = error; @@ -1533,7 +1524,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) return 0; ret = 0; - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); while (!pipe->nrbufs) { if (signal_pending(current)) { @@ -1551,7 +1542,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) pipe_wait(pipe); } - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); return ret; } @@ -1571,7 +1562,7 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) return 0; ret = 0; - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); while (pipe->nrbufs >= PIPE_BUFFERS) { if (!pipe->readers) { @@ -1592,7 +1583,7 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) pipe->waiting_writers--; } - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); return ret; } @@ -1608,10 +1599,10 @@ static int link_pipe(struct pipe_inode_info *ipipe, /* * Potential ABBA deadlock, work around it by ordering lock - * grabbing by inode address. Otherwise two different processes + * grabbing by pipe info address. Otherwise two different processes * could deadlock (one doing tee from A -> B, the other from B -> A). */ - inode_double_lock(ipipe->inode, opipe->inode); + pipe_double_lock(ipipe, opipe); do { if (!opipe->readers) { @@ -1662,7 +1653,8 @@ static int link_pipe(struct pipe_inode_info *ipipe, if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK)) ret = -EAGAIN; - inode_double_unlock(ipipe->inode, opipe->inode); + pipe_unlock(ipipe); + pipe_unlock(opipe); /* * If we put data in the output pipe, wakeup any potential readers. diff --git a/include/linux/fs.h b/include/linux/fs.h index 907d8f56c6fa..e766be0d4329 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -797,9 +797,6 @@ enum inode_i_mutex_lock_class I_MUTEX_QUOTA }; -extern void inode_double_lock(struct inode *inode1, struct inode *inode2); -extern void inode_double_unlock(struct inode *inode1, struct inode *inode2); - /* * NOTE: in a 32bit arch with a preemptable kernel and * an UP compile the i_size_read/write must be atomic diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 8e4120285f72..c8f038554e80 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -134,6 +134,11 @@ struct pipe_buf_operations { memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE +/* Pipe lock and unlock operations */ +void pipe_lock(struct pipe_inode_info *); +void pipe_unlock(struct pipe_inode_info *); +void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); + /* Drop the inode semaphore and wait for a pipe event, atomically */ void pipe_wait(struct pipe_inode_info *pipe); -- cgit v1.2.3-71-gd317 From 35c80d5f400f68f2eccf3069d1c068e154bde9c9 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 15 Apr 2009 13:22:38 -0400 Subject: Add block_write_full_page_endio for passing endio handler block_write_full_page doesn't allow the caller to control what happens when the IO is over. This adds a new call named block_write_full_page_endio so the buffer head end_io handler can be provided by the caller. This will be used by the ext3 data=guarded mode to do i_size updates in a workqueue based end_io handler. end_buffer_async_write is also exported so it can be called to do the dirty work of managing page writeback for the higher level end_io handler. Signed-off-by: Chris Mason Acked-by: Theodore Tso Acked-by: Jan Kara Signed-off-by: Linus Torvalds --- fs/buffer.c | 45 ++++++++++++++++++++++++++++++++++----------- include/linux/buffer_head.h | 3 +++ 2 files changed, 37 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index ff8bb1f2333a..b3e5be7514f5 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -360,7 +360,7 @@ still_busy: * Completion handler for block_write_full_page() - pages which are unlocked * during I/O, and which have PageWriteback cleared upon I/O completion. */ -static void end_buffer_async_write(struct buffer_head *bh, int uptodate) +void end_buffer_async_write(struct buffer_head *bh, int uptodate) { char b[BDEVNAME_SIZE]; unsigned long flags; @@ -438,11 +438,17 @@ static void mark_buffer_async_read(struct buffer_head *bh) set_buffer_async_read(bh); } -void mark_buffer_async_write(struct buffer_head *bh) +void mark_buffer_async_write_endio(struct buffer_head *bh, + bh_end_io_t *handler) { - bh->b_end_io = end_buffer_async_write; + bh->b_end_io = handler; set_buffer_async_write(bh); } + +void mark_buffer_async_write(struct buffer_head *bh) +{ + mark_buffer_async_write_endio(bh, end_buffer_async_write); +} EXPORT_SYMBOL(mark_buffer_async_write); @@ -1615,7 +1621,8 @@ EXPORT_SYMBOL(unmap_underlying_metadata); * unplugging the device queue. */ static int __block_write_full_page(struct inode *inode, struct page *page, - get_block_t *get_block, struct writeback_control *wbc) + get_block_t *get_block, struct writeback_control *wbc, + bh_end_io_t *handler) { int err; sector_t block; @@ -1700,7 +1707,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, continue; } if (test_clear_buffer_dirty(bh)) { - mark_buffer_async_write(bh); + mark_buffer_async_write_endio(bh, handler); } else { unlock_buffer(bh); } @@ -1753,7 +1760,7 @@ recover: if (buffer_mapped(bh) && buffer_dirty(bh) && !buffer_delay(bh)) { lock_buffer(bh); - mark_buffer_async_write(bh); + mark_buffer_async_write_endio(bh, handler); } else { /* * The buffer may have been set dirty during @@ -2679,7 +2686,8 @@ int nobh_writepage(struct page *page, get_block_t *get_block, out: ret = mpage_writepage(page, get_block, wbc); if (ret == -EAGAIN) - ret = __block_write_full_page(inode, page, get_block, wbc); + ret = __block_write_full_page(inode, page, get_block, wbc, + end_buffer_async_write); return ret; } EXPORT_SYMBOL(nobh_writepage); @@ -2837,9 +2845,10 @@ out: /* * The generic ->writepage function for buffer-backed address_spaces + * this form passes in the end_io handler used to finish the IO. */ -int block_write_full_page(struct page *page, get_block_t *get_block, - struct writeback_control *wbc) +int block_write_full_page_endio(struct page *page, get_block_t *get_block, + struct writeback_control *wbc, bh_end_io_t *handler) { struct inode * const inode = page->mapping->host; loff_t i_size = i_size_read(inode); @@ -2848,7 +2857,8 @@ int block_write_full_page(struct page *page, get_block_t *get_block, /* Is the page fully inside i_size? */ if (page->index < end_index) - return __block_write_full_page(inode, page, get_block, wbc); + return __block_write_full_page(inode, page, get_block, wbc, + handler); /* Is the page fully outside i_size? (truncate in progress) */ offset = i_size & (PAGE_CACHE_SIZE-1); @@ -2871,9 +2881,20 @@ int block_write_full_page(struct page *page, get_block_t *get_block, * writes to that region are not written out to the file." */ zero_user_segment(page, offset, PAGE_CACHE_SIZE); - return __block_write_full_page(inode, page, get_block, wbc); + return __block_write_full_page(inode, page, get_block, wbc, handler); } +/* + * The generic ->writepage function for buffer-backed address_spaces + */ +int block_write_full_page(struct page *page, get_block_t *get_block, + struct writeback_control *wbc) +{ + return block_write_full_page_endio(page, get_block, wbc, + end_buffer_async_write); +} + + sector_t generic_block_bmap(struct address_space *mapping, sector_t block, get_block_t *get_block) { @@ -3342,9 +3363,11 @@ EXPORT_SYMBOL(block_read_full_page); EXPORT_SYMBOL(block_sync_page); EXPORT_SYMBOL(block_truncate_page); EXPORT_SYMBOL(block_write_full_page); +EXPORT_SYMBOL(block_write_full_page_endio); EXPORT_SYMBOL(cont_write_begin); EXPORT_SYMBOL(end_buffer_read_sync); EXPORT_SYMBOL(end_buffer_write_sync); +EXPORT_SYMBOL(end_buffer_async_write); EXPORT_SYMBOL(file_fsync); EXPORT_SYMBOL(generic_block_bmap); EXPORT_SYMBOL(generic_cont_expand_simple); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 7b73bb8f1970..16ed0284d780 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -155,6 +155,7 @@ void create_empty_buffers(struct page *, unsigned long, unsigned long b_state); void end_buffer_read_sync(struct buffer_head *bh, int uptodate); void end_buffer_write_sync(struct buffer_head *bh, int uptodate); +void end_buffer_async_write(struct buffer_head *bh, int uptodate); /* Things to do with buffers at mapping->private_list */ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode); @@ -197,6 +198,8 @@ extern int buffer_heads_over_limit; void block_invalidatepage(struct page *page, unsigned long offset); int block_write_full_page(struct page *page, get_block_t *get_block, struct writeback_control *wbc); +int block_write_full_page_endio(struct page *page, get_block_t *get_block, + struct writeback_control *wbc, bh_end_io_t *handler); int block_read_full_page(struct page*, get_block_t*); int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, unsigned long from); -- cgit v1.2.3-71-gd317 From 412401029259b1ad67559cec93bcc7ee4a9551aa Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 16 Apr 2009 09:58:44 -0600 Subject: powerpc/5200: Bring the legacy fsl_spi_platform_data hooks back In commit 364fdbc00fbdd409ade63500710123fe323aa164 ("spi_mpc83xx: rework chip selects handling"), I merged activate_cs and deactivate_cs hooks into cs_control, but I overlooked that mpc52xx_psc_spi driver is using these hooks too. And that resulted in the following build failure: CC drivers/spi/mpc52xx_psc_spi.o drivers/spi/mpc52xx_psc_spi.c: In function 'mpc52xx_psc_spi_do_probe': drivers/spi/mpc52xx_psc_spi.c:398: error: 'struct fsl_spi_platform_data' has no member named 'activate_cs' drivers/spi/mpc52xx_psc_spi.c:399: error: 'struct fsl_spi_platform_data' has no member named 'deactivate_cs' make[2]: *** [drivers/spi/mpc52xx_psc_spi.o] Error 1 This patch simply adds the legacy hooks back for 2.6.30, and for 2.6.31 we'll convert the driver to ->cs_control. Reported-by: Subrata Modak Signed-off-by: Anton Vorontsov Signed-off-by: Grant Likely --- include/linux/fsl_devices.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index f2a78b5e8b55..0cde1806cfab 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -83,6 +83,10 @@ struct fsl_spi_platform_data { u16 max_chipselect; void (*cs_control)(struct spi_device *spi, bool on); u32 sysclk; + + /* Legacy hooks, used by mpc52xx_psc_spi driver. */ + void (*activate_cs)(u8 cs, u8 polarity); + void (*deactivate_cs)(u8 cs, u8 polarity); }; struct mpc8xx_pcmcia_ops { -- cgit v1.2.3-71-gd317 From e3cf95dd6d352954b663d2934110d6e30af2406d Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 9 Apr 2009 17:31:17 +0100 Subject: ata: Report 16/32bit PIO as best we can The legacy old IDE ioctl API for this is a bit primitive so we try and map stuff sensibly onto it. - Set PIO over DMA devices to report 32bit - Add ability to change the PIO32 settings if the controller permits it - Add that functionality into the sff drivers - Add that functionality into the VLB legacy driver - Turn on the 32bit PIO on the ninja32 and add support there Signed-off-by: Alan Cox Signed-off-by: Jeff Garzik --- drivers/ata/libata-scsi.c | 30 ++++++++++++++++++++++++++---- drivers/ata/libata-sff.c | 27 +++++++++++++++++++++++++++ drivers/ata/pata_legacy.c | 33 ++++++++++++++++++++------------- drivers/ata/pata_ninja32.c | 4 +++- include/linux/libata.h | 8 ++++++++ 5 files changed, 84 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index b9747fa59e54..2733b0c90b75 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -647,23 +647,45 @@ int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg) return rc; } +static int ata_ioc32(struct ata_port *ap) +{ + if (ap->flags & ATA_FLAG_PIO_DMA) + return 1; + if (ap->pflags & ATA_PFLAG_PIO32) + return 1; + return 0; +} + int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *scsidev, int cmd, void __user *arg) { int val = -EINVAL, rc = -EINVAL; + unsigned long flags; switch (cmd) { case ATA_IOC_GET_IO32: - val = 0; + spin_lock_irqsave(ap->lock, flags); + val = ata_ioc32(ap); + spin_unlock_irqrestore(ap->lock, flags); if (copy_to_user(arg, &val, 1)) return -EFAULT; return 0; case ATA_IOC_SET_IO32: val = (unsigned long) arg; - if (val != 0) - return -EINVAL; - return 0; + rc = 0; + spin_lock_irqsave(ap->lock, flags); + if (ap->pflags & ATA_PFLAG_PIO32CHANGE) { + if (val) + ap->pflags |= ATA_PFLAG_PIO32; + else + ap->pflags &= ~ATA_PFLAG_PIO32; + } else { + if (val != ata_ioc32(ap)) + rc = -EINVAL; + } + spin_unlock_irqrestore(ap->lock, flags); + return rc; case HDIO_GET_IDENTITY: return ata_get_identity(ap, scsidev, arg); diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 8332e97a9de3..bb18415d3d63 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -87,6 +87,7 @@ const struct ata_port_operations ata_bmdma32_port_ops = { .inherits = &ata_bmdma_port_ops, .sff_data_xfer = ata_sff_data_xfer32, + .port_start = ata_sff_port_start32, }; EXPORT_SYMBOL_GPL(ata_bmdma32_port_ops); @@ -769,6 +770,9 @@ unsigned int ata_sff_data_xfer32(struct ata_device *dev, unsigned char *buf, void __iomem *data_addr = ap->ioaddr.data_addr; unsigned int words = buflen >> 2; int slop = buflen & 3; + + if (!(ap->pflags & ATA_PFLAG_PIO32)) + return ata_sff_data_xfer(dev, buf, buflen, rw); /* Transfer multiple of 4 bytes */ if (rw == READ) @@ -2401,6 +2405,29 @@ int ata_sff_port_start(struct ata_port *ap) } EXPORT_SYMBOL_GPL(ata_sff_port_start); +/** + * ata_sff_port_start32 - Set port up for dma. + * @ap: Port to initialize + * + * Called just after data structures for each port are + * initialized. Allocates space for PRD table if the device + * is DMA capable SFF. + * + * May be used as the port_start() entry in ata_port_operations for + * devices that are capable of 32bit PIO. + * + * LOCKING: + * Inherited from caller. + */ +int ata_sff_port_start32(struct ata_port *ap) +{ + ap->pflags |= ATA_PFLAG_PIO32 | ATA_PFLAG_PIO32CHANGE; + if (ap->ioaddr.bmdma_addr) + return ata_port_start(ap); + return 0; +} +EXPORT_SYMBOL_GPL(ata_sff_port_start32); + /** * ata_sff_std_ports - initialize ioaddr with standard port offsets. * @ioaddr: IO address structure to be initialized diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c index 0c6dde80417b..6f985bed8cbb 100644 --- a/drivers/ata/pata_legacy.c +++ b/drivers/ata/pata_legacy.c @@ -108,6 +108,7 @@ struct legacy_controller { struct ata_port_operations *ops; unsigned int pio_mask; unsigned int flags; + unsigned int pflags; int (*setup)(struct platform_device *, struct legacy_probe *probe, struct legacy_data *data); }; @@ -285,7 +286,8 @@ static unsigned int pdc_data_xfer_vlb(struct ata_device *dev, { int slop = buflen & 3; /* 32bit I/O capable *and* we need to write a whole number of dwords */ - if (ata_id_has_dword_io(dev->id) && (slop == 0 || slop == 3)) { + if (ata_id_has_dword_io(dev->id) && (slop == 0 || slop == 3) + && (ap->pflags & ATA_PFLAG_PIO32)) { struct ata_port *ap = dev->link->ap; unsigned long flags; @@ -736,7 +738,8 @@ static unsigned int vlb32_data_xfer(struct ata_device *adev, unsigned char *buf, struct ata_port *ap = adev->link->ap; int slop = buflen & 3; - if (ata_id_has_dword_io(adev->id) && (slop == 0 || slop == 3)) { + if (ata_id_has_dword_io(adev->id) && (slop == 0 || slop == 3) + && (ap->pflags & ATA_PFLAG_PIO32)) { if (rw == WRITE) iowrite32_rep(ap->ioaddr.data_addr, buf, buflen >> 2); else @@ -858,27 +861,30 @@ static struct ata_port_operations winbond_port_ops = { static struct legacy_controller controllers[] = { {"BIOS", &legacy_port_ops, 0x1F, - ATA_FLAG_NO_IORDY, NULL }, + ATA_FLAG_NO_IORDY, 0, NULL }, {"Snooping", &simple_port_ops, 0x1F, - 0 , NULL }, + 0, 0, NULL }, {"PDC20230", &pdc20230_port_ops, 0x7, - ATA_FLAG_NO_IORDY, NULL }, + ATA_FLAG_NO_IORDY, + ATA_PFLAG_PIO32 | ATA_PFLAG_PIO32_CHANGE, NULL }, {"HT6560A", &ht6560a_port_ops, 0x07, - ATA_FLAG_NO_IORDY, NULL }, + ATA_FLAG_NO_IORDY, 0, NULL }, {"HT6560B", &ht6560b_port_ops, 0x1F, - ATA_FLAG_NO_IORDY, NULL }, + ATA_FLAG_NO_IORDY, 0, NULL }, {"OPTI82C611A", &opti82c611a_port_ops, 0x0F, - 0 , NULL }, + 0, 0, NULL }, {"OPTI82C46X", &opti82c46x_port_ops, 0x0F, - 0 , NULL }, + 0, 0, NULL }, {"QDI6500", &qdi6500_port_ops, 0x07, - ATA_FLAG_NO_IORDY, qdi_port }, + ATA_FLAG_NO_IORDY, + ATA_PFLAG_PIO32 | ATA_PFLAG_PIO32_CHANGE, qdi_port }, {"QDI6580", &qdi6580_port_ops, 0x1F, - 0 , qdi_port }, + 0, ATA_PFLAG_PIO32 | ATA_PFLAG_PIO32_CHANGE, qdi_port }, {"QDI6580DP", &qdi6580dp_port_ops, 0x1F, - 0 , qdi_port }, + 0, ATA_PFLAG_PIO32 | ATA_PFLAG_PIO32_CHANGE, qdi_port }, {"W83759A", &winbond_port_ops, 0x1F, - 0 , winbond_port } + 0, ATA_PFLAG_PIO32 | ATA_PFLAG_PIO32_CHANGE, + winbond_port } }; /** @@ -1008,6 +1014,7 @@ static __init int legacy_init_one(struct legacy_probe *probe) ap->ops = ops; ap->pio_mask = pio_modes; ap->flags |= ATA_FLAG_SLAVE_POSS | iordy; + ap->pflags |= controller->pflags; ap->ioaddr.cmd_addr = io_addr; ap->ioaddr.altstatus_addr = ctrl_addr; ap->ioaddr.ctl_addr = ctrl_addr; diff --git a/drivers/ata/pata_ninja32.c b/drivers/ata/pata_ninja32.c index 0fb6b1b1e634..dd53a66b19e3 100644 --- a/drivers/ata/pata_ninja32.c +++ b/drivers/ata/pata_ninja32.c @@ -44,7 +44,7 @@ #include #define DRV_NAME "pata_ninja32" -#define DRV_VERSION "0.1.3" +#define DRV_VERSION "0.1.5" /** @@ -86,6 +86,7 @@ static struct ata_port_operations ninja32_port_ops = { .sff_dev_select = ninja32_dev_select, .cable_detect = ata_cable_40wire, .set_piomode = ninja32_set_piomode, + .sff_data_xfer = ata_sff_data_xfer32 }; static void ninja32_program(void __iomem *base) @@ -144,6 +145,7 @@ static int ninja32_init_one(struct pci_dev *dev, const struct pci_device_id *id) ap->ioaddr.altstatus_addr = base + 0x1E; ap->ioaddr.bmdma_addr = base; ata_sff_std_ports(&ap->ioaddr); + ap->pflags = ATA_PFLAG_PIO32 | ATA_PFLAG_PIO32CHANGE; ninja32_program(base); /* FIXME: Should we disable them at remove ? */ diff --git a/include/linux/libata.h b/include/linux/libata.h index b450a2628855..3d501db36a26 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -209,6 +209,7 @@ enum { /* bits 24:31 of ap->flags are reserved for LLD specific flags */ + /* struct ata_port pflags */ ATA_PFLAG_EH_PENDING = (1 << 0), /* EH pending */ ATA_PFLAG_EH_IN_PROGRESS = (1 << 1), /* EH in progress */ @@ -225,6 +226,9 @@ enum { ATA_PFLAG_PM_PENDING = (1 << 18), /* PM operation pending */ ATA_PFLAG_INIT_GTM_VALID = (1 << 19), /* initial gtm data valid */ + ATA_PFLAG_PIO32 = (1 << 20), /* 32bit PIO */ + ATA_PFLAG_PIO32CHANGE = (1 << 21), /* 32bit PIO can be turned on/off */ + /* struct ata_queued_cmd flags */ ATA_QCFLAG_ACTIVE = (1 << 0), /* cmd not yet ack'd to scsi lyer */ ATA_QCFLAG_DMAMAP = (1 << 1), /* SG table is DMA mapped */ @@ -689,7 +693,10 @@ struct ata_port { struct Scsi_Host *scsi_host; /* our co-allocated scsi host */ struct ata_port_operations *ops; spinlock_t *lock; + /* Flags owned by the EH context. Only EH should touch these once the + port is active */ unsigned long flags; /* ATA_FLAG_xxx */ + /* Flags that change dynamically, protected by ap->lock */ unsigned int pflags; /* ATA_PFLAG_xxx */ unsigned int print_id; /* user visible unique port ID */ unsigned int port_no; /* 0 based port no. inside the host */ @@ -1595,6 +1602,7 @@ extern void ata_sff_drain_fifo(struct ata_queued_cmd *qc); extern void ata_sff_error_handler(struct ata_port *ap); extern void ata_sff_post_internal_cmd(struct ata_queued_cmd *qc); extern int ata_sff_port_start(struct ata_port *ap); +extern int ata_sff_port_start32(struct ata_port *ap); extern void ata_sff_std_ports(struct ata_ioports *ioaddr); extern unsigned long ata_bmdma_mode_filter(struct ata_device *dev, unsigned long xfer_mask); -- cgit v1.2.3-71-gd317 From 13977091a988fb0d21821c2221ddc920eba36b79 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 30 Mar 2009 14:37:25 -0700 Subject: Driver Core: early platform driver V3 of the early platform driver implementation. Platform drivers are great for embedded platforms because we can separate driver configuration from the actual driver. So base addresses, interrupts and other configuration can be kept with the processor or board code, and the platform driver can be reused by many different platforms. For early devices we have nothing today. For instance, to configure early timers and early serial ports we cannot use platform devices. This because the setup order during boot. Timers are needed before the platform driver core code is available. The same goes for early printk support. Early in this case means before initcalls. These early drivers today have their configuration either hard coded or they receive it using some special configuration method. This is working quite well, but if we want to support both regular kernel modules and early devices then we need to have two ways of configuring the same driver. A single way would be better. The early platform driver patch is basically a set of functions that allow drivers to register themselves and architecture code to locate them and probe. Registration happens through early_param(). The time for the probe is decided by the architecture code. See Documentation/driver-model/platform.txt for more details. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Magnus Damm Cc: Paul Mundt Cc: Kay Sievers Cc: David Brownell Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-model/platform.txt | 59 ++++++++ drivers/base/platform.c | 239 ++++++++++++++++++++++++++++++++ include/linux/init.h | 1 + include/linux/platform_device.h | 42 ++++++ init/main.c | 7 +- 5 files changed, 347 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/driver-model/platform.txt b/Documentation/driver-model/platform.txt index 83009fdcbbc8..2e2c2ea90ceb 100644 --- a/Documentation/driver-model/platform.txt +++ b/Documentation/driver-model/platform.txt @@ -169,3 +169,62 @@ three different ways to find such a match: be probed later if another device registers. (Which is OK, since this interface is only for use with non-hotpluggable devices.) + +Early Platform Devices and Drivers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The early platform interfaces provide platform data to platform device +drivers early on during the system boot. The code is built on top of the +early_param() command line parsing and can be executed very early on. + +Example: "earlyprintk" class early serial console in 6 steps + +1. Registering early platform device data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The architecture code registers platform device data using the function +early_platform_add_devices(). In the case of early serial console this +should be hardware configuration for the serial port. Devices registered +at this point will later on be matched against early platform drivers. + +2. Parsing kernel command line +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The architecture code calls parse_early_param() to parse the kernel +command line. This will execute all matching early_param() callbacks. +User specified early platform devices will be registered at this point. +For the early serial console case the user can specify port on the +kernel command line as "earlyprintk=serial.0" where "earlyprintk" is +the class string, "serial" is the name of the platfrom driver and +0 is the platform device id. If the id is -1 then the dot and the +id can be omitted. + +3. Installing early platform drivers belonging to a certain class +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The architecture code may optionally force registration of all early +platform drivers belonging to a certain class using the function +early_platform_driver_register_all(). User specified devices from +step 2 have priority over these. This step is omitted by the serial +driver example since the early serial driver code should be disabled +unless the user has specified port on the kernel command line. + +4. Early platform driver registration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Compiled-in platform drivers making use of early_platform_init() are +automatically registered during step 2 or 3. The serial driver example +should use early_platform_init("earlyprintk", &platform_driver). + +5. Probing of early platform drivers belonging to a certain class +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The architecture code calls early_platform_driver_probe() to match +registered early platform devices associated with a certain class with +registered early platform drivers. Matched devices will get probed(). +This step can be executed at any point during the early boot. As soon +as possible may be good for the serial port case. + +6. Inside the early platform driver probe() +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The driver code needs to take special care during early boot, especially +when it comes to memory allocation and interrupt registration. The code +in the probe() function can use is_early_platform_device() to check if +it is called at early platform device or at the regular platform device +time. The early serial driver performs register_console() at this point. + +For further information, see . diff --git a/drivers/base/platform.c b/drivers/base/platform.c index d2198f64ad4e..b5b6c973a2e0 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -990,6 +990,8 @@ int __init platform_bus_init(void) { int error; + early_platform_cleanup(); + error = device_register(&platform_bus); if (error) return error; @@ -1020,3 +1022,240 @@ u64 dma_get_required_mask(struct device *dev) } EXPORT_SYMBOL_GPL(dma_get_required_mask); #endif + +static __initdata LIST_HEAD(early_platform_driver_list); +static __initdata LIST_HEAD(early_platform_device_list); + +/** + * early_platform_driver_register + * @edrv: early_platform driver structure + * @buf: string passed from early_param() + */ +int __init early_platform_driver_register(struct early_platform_driver *epdrv, + char *buf) +{ + unsigned long index; + int n; + + /* Simply add the driver to the end of the global list. + * Drivers will by default be put on the list in compiled-in order. + */ + if (!epdrv->list.next) { + INIT_LIST_HEAD(&epdrv->list); + list_add_tail(&epdrv->list, &early_platform_driver_list); + } + + /* If the user has specified device then make sure the driver + * gets prioritized. The driver of the last device specified on + * command line will be put first on the list. + */ + n = strlen(epdrv->pdrv->driver.name); + if (buf && !strncmp(buf, epdrv->pdrv->driver.name, n)) { + list_move(&epdrv->list, &early_platform_driver_list); + + if (!strcmp(buf, epdrv->pdrv->driver.name)) + epdrv->requested_id = -1; + else if (buf[n] == '.' && strict_strtoul(&buf[n + 1], 10, + &index) == 0) + epdrv->requested_id = index; + else + epdrv->requested_id = EARLY_PLATFORM_ID_ERROR; + } + + return 0; +} + +/** + * early_platform_add_devices - add a numbers of early platform devices + * @devs: array of early platform devices to add + * @num: number of early platform devices in array + */ +void __init early_platform_add_devices(struct platform_device **devs, int num) +{ + struct device *dev; + int i; + + /* simply add the devices to list */ + for (i = 0; i < num; i++) { + dev = &devs[i]->dev; + + if (!dev->devres_head.next) { + INIT_LIST_HEAD(&dev->devres_head); + list_add_tail(&dev->devres_head, + &early_platform_device_list); + } + } +} + +/** + * early_platform_driver_register_all + * @class_str: string to identify early platform driver class + */ +void __init early_platform_driver_register_all(char *class_str) +{ + /* The "class_str" parameter may or may not be present on the kernel + * command line. If it is present then there may be more than one + * matching parameter. + * + * Since we register our early platform drivers using early_param() + * we need to make sure that they also get registered in the case + * when the parameter is missing from the kernel command line. + * + * We use parse_early_options() to make sure the early_param() gets + * called at least once. The early_param() may be called more than + * once since the name of the preferred device may be specified on + * the kernel command line. early_platform_driver_register() handles + * this case for us. + */ + parse_early_options(class_str); +} + +/** + * early_platform_match + * @edrv: early platform driver structure + * @id: id to match against + */ +static __init struct platform_device * +early_platform_match(struct early_platform_driver *epdrv, int id) +{ + struct platform_device *pd; + + list_for_each_entry(pd, &early_platform_device_list, dev.devres_head) + if (platform_match(&pd->dev, &epdrv->pdrv->driver)) + if (pd->id == id) + return pd; + + return NULL; +} + +/** + * early_platform_left + * @edrv: early platform driver structure + * @id: return true if id or above exists + */ +static __init int early_platform_left(struct early_platform_driver *epdrv, + int id) +{ + struct platform_device *pd; + + list_for_each_entry(pd, &early_platform_device_list, dev.devres_head) + if (platform_match(&pd->dev, &epdrv->pdrv->driver)) + if (pd->id >= id) + return 1; + + return 0; +} + +/** + * early_platform_driver_probe_id + * @class_str: string to identify early platform driver class + * @id: id to match against + * @nr_probe: number of platform devices to successfully probe before exiting + */ +static int __init early_platform_driver_probe_id(char *class_str, + int id, + int nr_probe) +{ + struct early_platform_driver *epdrv; + struct platform_device *match; + int match_id; + int n = 0; + int left = 0; + + list_for_each_entry(epdrv, &early_platform_driver_list, list) { + /* only use drivers matching our class_str */ + if (strcmp(class_str, epdrv->class_str)) + continue; + + if (id == -2) { + match_id = epdrv->requested_id; + left = 1; + + } else { + match_id = id; + left += early_platform_left(epdrv, id); + + /* skip requested id */ + switch (epdrv->requested_id) { + case EARLY_PLATFORM_ID_ERROR: + case EARLY_PLATFORM_ID_UNSET: + break; + default: + if (epdrv->requested_id == id) + match_id = EARLY_PLATFORM_ID_UNSET; + } + } + + switch (match_id) { + case EARLY_PLATFORM_ID_ERROR: + pr_warning("%s: unable to parse %s parameter\n", + class_str, epdrv->pdrv->driver.name); + /* fall-through */ + case EARLY_PLATFORM_ID_UNSET: + match = NULL; + break; + default: + match = early_platform_match(epdrv, match_id); + } + + if (match) { + if (epdrv->pdrv->probe(match)) + pr_warning("%s: unable to probe %s early.\n", + class_str, match->name); + else + n++; + } + + if (n >= nr_probe) + break; + } + + if (left) + return n; + else + return -ENODEV; +} + +/** + * early_platform_driver_probe + * @class_str: string to identify early platform driver class + * @nr_probe: number of platform devices to successfully probe before exiting + * @user_only: only probe user specified early platform devices + */ +int __init early_platform_driver_probe(char *class_str, + int nr_probe, + int user_only) +{ + int k, n, i; + + n = 0; + for (i = -2; n < nr_probe; i++) { + k = early_platform_driver_probe_id(class_str, i, nr_probe - n); + + if (k < 0) + break; + + n += k; + + if (user_only) + break; + } + + return n; +} + +/** + * early_platform_cleanup - clean up early platform code + */ +void __init early_platform_cleanup(void) +{ + struct platform_device *pd, *pd2; + + /* clean up the devres list used to chain devices */ + list_for_each_entry_safe(pd, pd2, &early_platform_device_list, + dev.devres_head) { + list_del(&pd->dev.devres_head); + memset(&pd->dev.devres_head, 0, sizeof(pd->dev.devres_head)); + } +} + diff --git a/include/linux/init.h b/include/linux/init.h index 68cb0265d009..f121a7a10c3d 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -247,6 +247,7 @@ struct obs_kernel_param { /* Relies on boot_command_line being set */ void __init parse_early_param(void); +void __init parse_early_options(char *cmdline); #endif /* __ASSEMBLY__ */ /** diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 76e470a299bf..72736fd8223c 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -77,4 +77,46 @@ extern int platform_driver_probe(struct platform_driver *driver, #define platform_get_drvdata(_dev) dev_get_drvdata(&(_dev)->dev) #define platform_set_drvdata(_dev,data) dev_set_drvdata(&(_dev)->dev, (data)) +/* early platform driver interface */ +struct early_platform_driver { + const char *class_str; + struct platform_driver *pdrv; + struct list_head list; + int requested_id; +}; + +#define EARLY_PLATFORM_ID_UNSET -2 +#define EARLY_PLATFORM_ID_ERROR -3 + +extern int early_platform_driver_register(struct early_platform_driver *epdrv, + char *buf); +extern void early_platform_add_devices(struct platform_device **devs, int num); + +static inline int is_early_platform_device(struct platform_device *pdev) +{ + return !pdev->dev.driver; +} + +extern void early_platform_driver_register_all(char *class_str); +extern int early_platform_driver_probe(char *class_str, + int nr_probe, int user_only); +extern void early_platform_cleanup(void); + + +#ifndef MODULE +#define early_platform_init(class_string, platform_driver) \ +static __initdata struct early_platform_driver early_driver = { \ + .class_str = class_string, \ + .pdrv = platform_driver, \ + .requested_id = EARLY_PLATFORM_ID_UNSET, \ +}; \ +static int __init early_platform_driver_setup_func(char *buf) \ +{ \ + return early_platform_driver_register(&early_driver, buf); \ +} \ +early_param(class_string, early_platform_driver_setup_func) +#else /* MODULE */ +#define early_platform_init(class_string, platform_driver) +#endif /* MODULE */ + #endif /* _PLATFORM_DEVICE_H_ */ diff --git a/init/main.c b/init/main.c index 3585f073d636..3bbf93be744c 100644 --- a/init/main.c +++ b/init/main.c @@ -492,6 +492,11 @@ static int __init do_early_param(char *param, char *val) return 0; } +void __init parse_early_options(char *cmdline) +{ + parse_args("early options", cmdline, NULL, 0, do_early_param); +} + /* Arch code calls this early on, or if not, just before other parsing. */ void __init parse_early_param(void) { @@ -503,7 +508,7 @@ void __init parse_early_param(void) /* All fall through to do_early_param. */ strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE); - parse_args("early options", tmp_cmdline, NULL, 0, do_early_param); + parse_early_options(tmp_cmdline); done = 1; } -- cgit v1.2.3-71-gd317 From 4ccb457966391295bd9b3644f6bdc9ddd97b6051 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 9 Apr 2009 14:48:24 -0700 Subject: dynamic debug: resurrect old pr_debug() semantics as pr_devel() pr_debug() used to produce zero code unless DEBUG was #defined. This is now no longer the case in practice[1]. There are places where it's useful to have debugging printks, but we don't want them to generate any code in production kernels. So add a new macro, pr_devel(), for _devel_opment, to provide the old semantics, ie. if the programmer doesn't explicitly enable debugging, no code is produced. [1]: You can turn CONFIG_DYNAMIC_DEBUG off, but it's enabled in at least one distro kernel, so it's not really a solution. Signed-off-by: Michael Ellerman Cc: Jason Baron Cc: Greg Banks Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/kernel.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d9e75ec7def5..883cd44ff765 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -377,6 +377,15 @@ static inline char *pack_hex_byte(char *buf, u8 byte) #define pr_cont(fmt, ...) \ printk(KERN_CONT fmt, ##__VA_ARGS__) +/* pr_devel() should produce zero code unless DEBUG is defined */ +#ifdef DEBUG +#define pr_devel(fmt, ...) \ + printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#else +#define pr_devel(fmt, ...) \ + ({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; }) +#endif + /* If you are writing a driver, please use dev_dbg instead */ #if defined(DEBUG) #define pr_debug(fmt, ...) \ -- cgit v1.2.3-71-gd317 From 7607b1d673469d5b5dce4c9b6779d165e03c8ff5 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Wed, 8 Apr 2009 12:12:52 -0400 Subject: Driver core: remove pr_fmt() from dynamic_dev_dbg() printk When pr_fmt() was added to the pr_debug() code, we added it not only to the dynamic_pr_debug() function, but also to the dynamic_dev_dbg() funciton. However, dev_dbg() doesn't make use of pr_fmt(), so neither should dynamic_dev_dbg(). Signed-off-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- include/linux/dynamic_debug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index baabf33be244..a0d9422a1569 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -70,7 +70,7 @@ extern int ddebug_remove_module(char *mod_name); DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT }; \ if (__dynamic_dbg_enabled(descriptor)) \ dev_printk(KERN_DEBUG, dev, \ - KBUILD_MODNAME ": " pr_fmt(fmt),\ + KBUILD_MODNAME ": " fmt, \ ##__VA_ARGS__); \ } while (0) -- cgit v1.2.3-71-gd317 From 3444b26afa145148951112534f298bdc554ec789 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 8 Apr 2009 17:36:28 +0000 Subject: USB: add reset endpoint operations Wireless USB endpoint state has a sequence number and a current window and not just a single toggle bit. So allow HCDs to provide a endpoint_reset method and call this or clear the software toggles as required (after a clear halt, set configuration etc.). usb_settoggle() and friends are then HCD internal and are moved into core/hcd.h and all device drivers call usb_reset_endpoint() instead. If the device endpoint state has been reset (with a clear halt) but the host endpoint state has not then subsequent data transfers will not complete. The device will only work again after it is reset or disconnected. Signed-off-by: David Vrabel Signed-off-by: Greg Kroah-Hartman --- drivers/block/ub.c | 20 +++++------ drivers/isdn/hisax/st5481_usb.c | 9 +---- drivers/media/video/pvrusb2/pvrusb2-hdw.c | 1 - drivers/usb/core/devio.c | 2 +- drivers/usb/core/hcd.c | 26 ++++++++++++++ drivers/usb/core/hcd.h | 14 ++++++++ drivers/usb/core/message.c | 58 ++++++++++++++++++++----------- drivers/usb/core/usb.c | 2 +- drivers/usb/storage/transport.c | 4 +-- include/linux/usb.h | 9 +---- 10 files changed, 91 insertions(+), 54 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/ub.c b/drivers/block/ub.c index 69b7f8e77596..689cd27ac890 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -1025,6 +1025,7 @@ static void ub_scsi_urb_compl(struct ub_dev *sc, struct ub_scsi_cmd *cmd) { struct urb *urb = &sc->work_urb; struct bulk_cs_wrap *bcs; + int endp; int len; int rc; @@ -1033,6 +1034,10 @@ static void ub_scsi_urb_compl(struct ub_dev *sc, struct ub_scsi_cmd *cmd) return; } + endp = usb_pipeendpoint(sc->last_pipe); + if (usb_pipein(sc->last_pipe)) + endp |= USB_DIR_IN; + if (cmd->state == UB_CMDST_CLEAR) { if (urb->status == -EPIPE) { /* @@ -1048,9 +1053,7 @@ static void ub_scsi_urb_compl(struct ub_dev *sc, struct ub_scsi_cmd *cmd) * We ignore the result for the halt clear. */ - /* reset the endpoint toggle */ - usb_settoggle(sc->dev, usb_pipeendpoint(sc->last_pipe), - usb_pipeout(sc->last_pipe), 0); + usb_reset_endpoint(sc->dev, endp); ub_state_sense(sc, cmd); @@ -1065,9 +1068,7 @@ static void ub_scsi_urb_compl(struct ub_dev *sc, struct ub_scsi_cmd *cmd) * We ignore the result for the halt clear. */ - /* reset the endpoint toggle */ - usb_settoggle(sc->dev, usb_pipeendpoint(sc->last_pipe), - usb_pipeout(sc->last_pipe), 0); + usb_reset_endpoint(sc->dev, endp); ub_state_stat(sc, cmd); @@ -1082,9 +1083,7 @@ static void ub_scsi_urb_compl(struct ub_dev *sc, struct ub_scsi_cmd *cmd) * We ignore the result for the halt clear. */ - /* reset the endpoint toggle */ - usb_settoggle(sc->dev, usb_pipeendpoint(sc->last_pipe), - usb_pipeout(sc->last_pipe), 0); + usb_reset_endpoint(sc->dev, endp); ub_state_stat_counted(sc, cmd); @@ -2119,8 +2118,7 @@ static int ub_probe_clear_stall(struct ub_dev *sc, int stalled_pipe) del_timer_sync(&timer); usb_kill_urb(&sc->work_urb); - /* reset the endpoint toggle */ - usb_settoggle(sc->dev, endp, usb_pipeout(sc->last_pipe), 0); + usb_reset_endpoint(sc->dev, endp); return 0; } diff --git a/drivers/isdn/hisax/st5481_usb.c b/drivers/isdn/hisax/st5481_usb.c index ec3c0e507669..2b3a055059ea 100644 --- a/drivers/isdn/hisax/st5481_usb.c +++ b/drivers/isdn/hisax/st5481_usb.c @@ -149,14 +149,7 @@ static void usb_ctrl_complete(struct urb *urb) if (ctrl_msg->dr.bRequest == USB_REQ_CLEAR_FEATURE) { /* Special case handling for pipe reset */ le16_to_cpus(&ctrl_msg->dr.wIndex); - - /* toggle is reset on clear */ - usb_settoggle(adapter->usb_dev, - ctrl_msg->dr.wIndex & ~USB_DIR_IN, - (ctrl_msg->dr.wIndex & USB_DIR_IN) == 0, - 0); - - + usb_reset_endpoint(adapter->usb_dev, ctrl_msg->dr.wIndex); } if (ctrl_msg->complete) diff --git a/drivers/media/video/pvrusb2/pvrusb2-hdw.c b/drivers/media/video/pvrusb2/pvrusb2-hdw.c index d9d974a8f52a..add3395d3248 100644 --- a/drivers/media/video/pvrusb2/pvrusb2-hdw.c +++ b/drivers/media/video/pvrusb2/pvrusb2-hdw.c @@ -1461,7 +1461,6 @@ static int pvr2_upload_firmware1(struct pvr2_hdw *hdw) return ret; } - usb_settoggle(hdw->usb_dev, 0 & 0xf, !(0 & USB_DIR_IN), 0); usb_clear_halt(hdw->usb_dev, usb_sndbulkpipe(hdw->usb_dev, 0 & 0x7f)); pipe = usb_sndctrlpipe(hdw->usb_dev, 0); diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index df3c539f652a..308609039c73 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -841,7 +841,7 @@ static int proc_resetep(struct dev_state *ps, void __user *arg) ret = checkintf(ps, ret); if (ret) return ret; - usb_settoggle(ps->dev, ep & 0xf, !(ep & USB_DIR_IN), 0); + usb_reset_endpoint(ps->dev, ep); return 0; } diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 81fa8506825d..42b93da1085d 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1539,6 +1539,32 @@ void usb_hcd_disable_endpoint(struct usb_device *udev, hcd->driver->endpoint_disable(hcd, ep); } +/** + * usb_hcd_reset_endpoint - reset host endpoint state + * @udev: USB device. + * @ep: the endpoint to reset. + * + * Resets any host endpoint state such as the toggle bit, sequence + * number and current window. + */ +void usb_hcd_reset_endpoint(struct usb_device *udev, + struct usb_host_endpoint *ep) +{ + struct usb_hcd *hcd = bus_to_hcd(udev->bus); + + if (hcd->driver->endpoint_reset) + hcd->driver->endpoint_reset(hcd, ep); + else { + int epnum = usb_endpoint_num(&ep->desc); + int is_out = usb_endpoint_dir_out(&ep->desc); + int is_control = usb_endpoint_xfer_control(&ep->desc); + + usb_settoggle(udev, epnum, is_out, 0); + if (is_control) + usb_settoggle(udev, epnum, !is_out, 0); + } +} + /* Protect against drivers that try to unlink URBs after the device * is gone, by waiting until all unlinks for @udev are finished. * Since we don't currently track URBs by device, simply wait until diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h index f750eb1ab595..e7d4479de41c 100644 --- a/drivers/usb/core/hcd.h +++ b/drivers/usb/core/hcd.h @@ -206,6 +206,11 @@ struct hc_driver { void (*endpoint_disable)(struct usb_hcd *hcd, struct usb_host_endpoint *ep); + /* (optional) reset any endpoint state such as sequence number + and current window */ + void (*endpoint_reset)(struct usb_hcd *hcd, + struct usb_host_endpoint *ep); + /* root hub support */ int (*hub_status_data) (struct usb_hcd *hcd, char *buf); int (*hub_control) (struct usb_hcd *hcd, @@ -234,6 +239,8 @@ extern void usb_hcd_flush_endpoint(struct usb_device *udev, struct usb_host_endpoint *ep); extern void usb_hcd_disable_endpoint(struct usb_device *udev, struct usb_host_endpoint *ep); +extern void usb_hcd_reset_endpoint(struct usb_device *udev, + struct usb_host_endpoint *ep); extern void usb_hcd_synchronize_unlinks(struct usb_device *udev); extern int usb_hcd_get_frame_number(struct usb_device *udev); @@ -279,6 +286,13 @@ extern irqreturn_t usb_hcd_irq(int irq, void *__hcd); extern void usb_hc_died(struct usb_hcd *hcd); extern void usb_hcd_poll_rh_status(struct usb_hcd *hcd); +/* The D0/D1 toggle bits ... USE WITH CAUTION (they're almost hcd-internal) */ +#define usb_gettoggle(dev, ep, out) (((dev)->toggle[out] >> (ep)) & 1) +#define usb_dotoggle(dev, ep, out) ((dev)->toggle[out] ^= (1 << (ep))) +#define usb_settoggle(dev, ep, out, bit) \ + ((dev)->toggle[out] = ((dev)->toggle[out] & ~(1 << (ep))) | \ + ((bit) << (ep))) + /* -------------------------------------------------------------------------- */ /* Enumeration is only for the hub driver, or HCD virtual root hubs */ diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 30a0690f3683..b62628377654 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -1002,8 +1002,7 @@ int usb_clear_halt(struct usb_device *dev, int pipe) * the copy in usb-storage, for as long as we need two copies. */ - /* toggle was reset by the clear */ - usb_settoggle(dev, usb_pipeendpoint(pipe), usb_pipeout(pipe), 0); + usb_reset_endpoint(dev, endp); return 0; } @@ -1075,6 +1074,30 @@ void usb_disable_endpoint(struct usb_device *dev, unsigned int epaddr, } } +/** + * usb_reset_endpoint - Reset an endpoint's state. + * @dev: the device whose endpoint is to be reset + * @epaddr: the endpoint's address. Endpoint number for output, + * endpoint number + USB_DIR_IN for input + * + * Resets any host-side endpoint state such as the toggle bit, + * sequence number or current window. + */ +void usb_reset_endpoint(struct usb_device *dev, unsigned int epaddr) +{ + unsigned int epnum = epaddr & USB_ENDPOINT_NUMBER_MASK; + struct usb_host_endpoint *ep; + + if (usb_endpoint_out(epaddr)) + ep = dev->ep_out[epnum]; + else + ep = dev->ep_in[epnum]; + if (ep) + usb_hcd_reset_endpoint(dev, ep); +} +EXPORT_SYMBOL_GPL(usb_reset_endpoint); + + /** * usb_disable_interface -- Disable all endpoints for an interface * @dev: the device whose interface is being disabled @@ -1117,7 +1140,6 @@ void usb_disable_device(struct usb_device *dev, int skip_ep0) usb_disable_endpoint(dev, i, true); usb_disable_endpoint(dev, i + USB_DIR_IN, true); } - dev->toggle[0] = dev->toggle[1] = 0; /* getting rid of interfaces will disconnect * any drivers bound to them (a key side effect) @@ -1154,28 +1176,24 @@ void usb_disable_device(struct usb_device *dev, int skip_ep0) * usb_enable_endpoint - Enable an endpoint for USB communications * @dev: the device whose interface is being enabled * @ep: the endpoint - * @reset_toggle: flag to set the endpoint's toggle back to 0 + * @reset_ep: flag to reset the endpoint state * - * Resets the endpoint toggle if asked, and sets dev->ep_{in,out} pointers. + * Resets the endpoint state if asked, and sets dev->ep_{in,out} pointers. * For control endpoints, both the input and output sides are handled. */ void usb_enable_endpoint(struct usb_device *dev, struct usb_host_endpoint *ep, - bool reset_toggle) + bool reset_ep) { int epnum = usb_endpoint_num(&ep->desc); int is_out = usb_endpoint_dir_out(&ep->desc); int is_control = usb_endpoint_xfer_control(&ep->desc); - if (is_out || is_control) { - if (reset_toggle) - usb_settoggle(dev, epnum, 1, 0); + if (reset_ep) + usb_hcd_reset_endpoint(dev, ep); + if (is_out || is_control) dev->ep_out[epnum] = ep; - } - if (!is_out || is_control) { - if (reset_toggle) - usb_settoggle(dev, epnum, 0, 0); + if (!is_out || is_control) dev->ep_in[epnum] = ep; - } ep->enabled = 1; } @@ -1183,18 +1201,18 @@ void usb_enable_endpoint(struct usb_device *dev, struct usb_host_endpoint *ep, * usb_enable_interface - Enable all the endpoints for an interface * @dev: the device whose interface is being enabled * @intf: pointer to the interface descriptor - * @reset_toggles: flag to set the endpoints' toggles back to 0 + * @reset_eps: flag to reset the endpoints' state * * Enables all the endpoints for the interface's current altsetting. */ void usb_enable_interface(struct usb_device *dev, - struct usb_interface *intf, bool reset_toggles) + struct usb_interface *intf, bool reset_eps) { struct usb_host_interface *alt = intf->cur_altsetting; int i; for (i = 0; i < alt->desc.bNumEndpoints; ++i) - usb_enable_endpoint(dev, &alt->endpoint[i], reset_toggles); + usb_enable_endpoint(dev, &alt->endpoint[i], reset_eps); } /** @@ -1335,7 +1353,7 @@ EXPORT_SYMBOL_GPL(usb_set_interface); * This issues a standard SET_CONFIGURATION request to the device using * the current configuration. The effect is to reset most USB-related * state in the device, including interface altsettings (reset to zero), - * endpoint halts (cleared), and data toggle (only for bulk and interrupt + * endpoint halts (cleared), and endpoint state (only for bulk and interrupt * endpoints). Other usbcore state is unchanged, including bindings of * usb device drivers to interfaces. * @@ -1343,7 +1361,7 @@ EXPORT_SYMBOL_GPL(usb_set_interface); * (multi-interface) devices. Instead, the driver for each interface may * use usb_set_interface() on the interfaces it claims. Be careful though; * some devices don't support the SET_INTERFACE request, and others won't - * reset all the interface state (notably data toggles). Resetting the whole + * reset all the interface state (notably endpoint state). Resetting the whole * configuration would affect other drivers' interfaces. * * The caller must own the device lock. @@ -1376,8 +1394,6 @@ int usb_reset_configuration(struct usb_device *dev) if (retval < 0) return retval; - dev->toggle[0] = dev->toggle[1] = 0; - /* re-init hc/hcd interface/endpoint state */ for (i = 0; i < config->desc.bNumInterfaces; i++) { struct usb_interface *intf = config->interface[i]; diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index dcfc072630c1..7eee400d3e32 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -362,7 +362,7 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent, dev->ep0.desc.bLength = USB_DT_ENDPOINT_SIZE; dev->ep0.desc.bDescriptorType = USB_DT_ENDPOINT; /* ep0 maxpacket comes later, from device descriptor */ - usb_enable_endpoint(dev, &dev->ep0, true); + usb_enable_endpoint(dev, &dev->ep0, false); dev->can_submit = 1; /* Save readable and stable topology id, distinguishing devices diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c index 49aedb36dc19..fcb320217218 100644 --- a/drivers/usb/storage/transport.c +++ b/drivers/usb/storage/transport.c @@ -247,10 +247,8 @@ int usb_stor_clear_halt(struct us_data *us, unsigned int pipe) USB_ENDPOINT_HALT, endp, NULL, 0, 3*HZ); - /* reset the endpoint toggle */ if (result >= 0) - usb_settoggle(us->pusb_dev, usb_pipeendpoint(pipe), - usb_pipeout(pipe), 0); + usb_reset_endpoint(us->pusb_dev, endp); US_DEBUGP("%s: result = %d\n", __func__, result); return result; diff --git a/include/linux/usb.h b/include/linux/usb.h index c6b2ab41b908..3aa2cd1f8d08 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1387,6 +1387,7 @@ extern int usb_string(struct usb_device *dev, int index, extern int usb_clear_halt(struct usb_device *dev, int pipe); extern int usb_reset_configuration(struct usb_device *dev); extern int usb_set_interface(struct usb_device *dev, int ifnum, int alternate); +extern void usb_reset_endpoint(struct usb_device *dev, unsigned int epaddr); /* this request isn't really synchronous, but it belongs with the others */ extern int usb_driver_set_configuration(struct usb_device *udev, int config); @@ -1491,14 +1492,6 @@ void usb_sg_wait(struct usb_sg_request *io); #define usb_pipecontrol(pipe) (usb_pipetype((pipe)) == PIPE_CONTROL) #define usb_pipebulk(pipe) (usb_pipetype((pipe)) == PIPE_BULK) -/* The D0/D1 toggle bits ... USE WITH CAUTION (they're almost hcd-internal) */ -#define usb_gettoggle(dev, ep, out) (((dev)->toggle[out] >> (ep)) & 1) -#define usb_dotoggle(dev, ep, out) ((dev)->toggle[out] ^= (1 << (ep))) -#define usb_settoggle(dev, ep, out, bit) \ - ((dev)->toggle[out] = ((dev)->toggle[out] & ~(1 << (ep))) | \ - ((bit) << (ep))) - - static inline unsigned int __create_pipe(struct usb_device *dev, unsigned int endpoint) { -- cgit v1.2.3-71-gd317 From 42a17ad2762f465d291c3bc0b6ed2b3738f65481 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Sat, 18 Apr 2009 11:30:56 +0200 Subject: needs to include . uses EINVAL so should include . This fixes a build error on 64-bit MIPS if CONFIG_SECCOMP is disabled. Signed-off-by: Ralf Baechle Signed-off-by: Linus Torvalds --- include/linux/seccomp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 262a8dccfa81..167c33361d9c 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -21,6 +21,8 @@ extern long prctl_set_seccomp(unsigned long); #else /* CONFIG_SECCOMP */ +#include + typedef struct { } seccomp_t; #define secure_computing(x) do { } while (0) -- cgit v1.2.3-71-gd317 From 6a7c7eaf71b636f197d73b381a2ab729ebdcfb2e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 19 Apr 2009 20:08:42 +0200 Subject: PM/Suspend: Introduce two new platform callbacks to avoid breakage Commit 900af0d973856d6feb6fc088c2d0d3fde57707d3 (PM: Change suspend code ordering) changed the ordering of suspend code in such a way that the platform .prepare() callback is now executed after the device drivers' late suspend callbacks have run. Unfortunately, this turns out to break ARM platforms that need to talk via I2C to power control devices during the .prepare() callback. For this reason introduce two new platform suspend callbacks, .prepare_late() and .wake(), that will be called just prior to disabling non-boot CPUs and right after bringing them back on line, respectively, and use them instead of .prepare() and .finish() for ACPI suspend. Make the PM core execute the .prepare() and .finish() platform suspend callbacks where they were executed previously (that is, right after calling the regular suspend methods provided by device drivers and right before executing their regular resume methods, respectively). It is not necessary to make analogous changes to the hibernation code and data structures at the moment, because they are only used by ACPI platforms. Signed-off-by: Rafael J. Wysocki Reported-by: Russell King Acked-by: Len Brown --- drivers/acpi/sleep.c | 8 ++++---- include/linux/suspend.h | 36 ++++++++++++++++++++++++++---------- kernel/power/main.c | 24 +++++++++++++++++------- 3 files changed, 47 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 779e4e500df4..d060e6fd7fd5 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -300,9 +300,9 @@ static int acpi_suspend_state_valid(suspend_state_t pm_state) static struct platform_suspend_ops acpi_suspend_ops = { .valid = acpi_suspend_state_valid, .begin = acpi_suspend_begin, - .prepare = acpi_pm_prepare, + .prepare_late = acpi_pm_prepare, .enter = acpi_suspend_enter, - .finish = acpi_pm_finish, + .wake = acpi_pm_finish, .end = acpi_pm_end, }; @@ -328,9 +328,9 @@ static int acpi_suspend_begin_old(suspend_state_t pm_state) static struct platform_suspend_ops acpi_suspend_ops_old = { .valid = acpi_suspend_state_valid, .begin = acpi_suspend_begin_old, - .prepare = acpi_pm_disable_gpes, + .prepare_late = acpi_pm_disable_gpes, .enter = acpi_suspend_enter, - .finish = acpi_pm_finish, + .wake = acpi_pm_finish, .end = acpi_pm_end, .recover = acpi_pm_finish, }; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 3e3a4364cbff..795032edfc46 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -58,10 +58,17 @@ typedef int __bitwise suspend_state_t; * by @begin(). * @prepare() is called right after devices have been suspended (ie. the * appropriate .suspend() method has been executed for each device) and - * before the nonboot CPUs are disabled (it is executed with IRQs enabled). - * This callback is optional. It returns 0 on success or a negative - * error code otherwise, in which case the system cannot enter the desired - * sleep state (@enter() and @finish() will not be called in that case). + * before device drivers' late suspend callbacks are executed. It returns + * 0 on success or a negative error code otherwise, in which case the + * system cannot enter the desired sleep state (@prepare_late(), @enter(), + * @wake(), and @finish() will not be called in that case). + * + * @prepare_late: Finish preparing the platform for entering the system sleep + * state indicated by @begin(). + * @prepare_late is called before disabling nonboot CPUs and after + * device drivers' late suspend callbacks have been executed. It returns + * 0 on success or a negative error code otherwise, in which case the + * system cannot enter the desired sleep state (@enter() and @wake()). * * @enter: Enter the system sleep state indicated by @begin() or represented by * the argument if @begin() is not implemented. @@ -69,19 +76,26 @@ typedef int __bitwise suspend_state_t; * error code otherwise, in which case the system cannot enter the desired * sleep state. * - * @finish: Called when the system has just left a sleep state, right after - * the nonboot CPUs have been enabled and before devices are resumed (it is - * executed with IRQs enabled). + * @wake: Called when the system has just left a sleep state, right after + * the nonboot CPUs have been enabled and before device drivers' early + * resume callbacks are executed. + * This callback is optional, but should be implemented by the platforms + * that implement @prepare_late(). If implemented, it is always called + * after @enter(), even if @enter() fails. + * + * @finish: Finish wake-up of the platform. + * @finish is called right prior to calling device drivers' regular suspend + * callbacks. * This callback is optional, but should be implemented by the platforms * that implement @prepare(). If implemented, it is always called after - * @enter() (even if @enter() fails). + * @enter() and @wake(), if implemented, even if any of them fails. * * @end: Called by the PM core right after resuming devices, to indicate to * the platform that the system has returned to the working state or * the transition to the sleep state has been aborted. * This callback is optional, but should be implemented by the platforms - * that implement @begin(), but platforms implementing @begin() should - * also provide a @end() which cleans up transitions aborted before + * that implement @begin(). Accordingly, platforms implementing @begin() + * should also provide a @end() which cleans up transitions aborted before * @enter(). * * @recover: Recover the platform from a suspend failure. @@ -93,7 +107,9 @@ struct platform_suspend_ops { int (*valid)(suspend_state_t state); int (*begin)(suspend_state_t state); int (*prepare)(void); + int (*prepare_late)(void); int (*enter)(suspend_state_t state); + void (*wake)(void); void (*finish)(void); void (*end)(void); void (*recover)(void); diff --git a/kernel/power/main.c b/kernel/power/main.c index f172f41858bb..f99ed6a75eac 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -291,20 +291,26 @@ static int suspend_enter(suspend_state_t state) device_pm_lock(); + if (suspend_ops->prepare) { + error = suspend_ops->prepare(); + if (error) + goto Done; + } + error = device_power_down(PMSG_SUSPEND); if (error) { printk(KERN_ERR "PM: Some devices failed to power down\n"); - goto Done; + goto Platfrom_finish; } - if (suspend_ops->prepare) { - error = suspend_ops->prepare(); + if (suspend_ops->prepare_late) { + error = suspend_ops->prepare_late(); if (error) goto Power_up_devices; } if (suspend_test(TEST_PLATFORM)) - goto Platfrom_finish; + goto Platform_wake; error = disable_nonboot_cpus(); if (error || suspend_test(TEST_CPUS)) @@ -326,13 +332,17 @@ static int suspend_enter(suspend_state_t state) Enable_cpus: enable_nonboot_cpus(); - Platfrom_finish: - if (suspend_ops->finish) - suspend_ops->finish(); + Platform_wake: + if (suspend_ops->wake) + suspend_ops->wake(); Power_up_devices: device_power_up(PMSG_RESUME); + Platfrom_finish: + if (suspend_ops->finish) + suspend_ops->finish(); + Done: device_pm_unlock(); -- cgit v1.2.3-71-gd317 From 0112fc2229847feb6c4eb011e6833d8f1742a375 Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Wed, 8 Apr 2009 20:05:42 +0400 Subject: Separate out common fstatat code into vfs_fstatat This is a version incorporating Christoph's suggestion. Separate out common *fstatat functionality into a single function instead of duplicating it all over the code. Signed-off-by: Oleg Drokin Signed-off-by: Al Viro --- arch/arm/kernel/sys_oabi-compat.c | 19 ++++--------- arch/s390/kernel/compat_linux.c | 18 ++++--------- arch/sparc/kernel/sys_sparc32.c | 19 ++++--------- arch/x86/ia32/sys_ia32.c | 19 ++++--------- fs/compat.c | 19 ++++--------- fs/stat.c | 56 +++++++++++++++++++-------------------- include/linux/fs.h | 1 + 7 files changed, 54 insertions(+), 97 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index e04173c7e621..d59a0cd537f0 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -177,21 +177,12 @@ asmlinkage long sys_oabi_fstatat64(int dfd, int flag) { struct kstat stat; - int error = -EINVAL; + int error; - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_oldabi_stat64(&stat, statbuf); - -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_oldabi_stat64(&stat, statbuf); } struct oabi_flock64 { diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 6cc87d8c8682..002c70d3cb75 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -702,20 +702,12 @@ asmlinkage long sys32_fstatat64(unsigned int dfd, char __user *filename, struct stat64_emu31 __user* statbuf, int flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); + int error; - if (!error) - error = cp_stat64(statbuf, &stat); -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_stat64(statbuf, &stat); } /* diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index e800503879e4..f5000a460c05 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -206,21 +206,12 @@ asmlinkage long compat_sys_fstatat64(unsigned int dfd, char __user *filename, struct compat_stat64 __user * statbuf, int flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_compat_stat64(&stat, statbuf); + int error; -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_compat_stat64(&stat, statbuf); } asmlinkage long compat_sys_sysfs(int option, u32 arg1, u32 arg2) diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index efac92fd1efb..085a8c35f149 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -129,21 +129,12 @@ asmlinkage long sys32_fstatat(unsigned int dfd, char __user *filename, struct stat64 __user *statbuf, int flag) { struct kstat stat; - int error = -EINVAL; + int error; - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_stat64(statbuf, &stat); - -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_stat64(statbuf, &stat); } /* diff --git a/fs/compat.c b/fs/compat.c index 3f84d5f15889..dda72e267092 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -204,21 +204,12 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user *filename, struct compat_stat __user *statbuf, int flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_compat_stat(&stat, statbuf); + int error; -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_compat_stat(&stat, statbuf); } #endif diff --git a/fs/stat.c b/fs/stat.c index 2db740a0cfb5..54711662b855 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -109,6 +109,24 @@ int vfs_fstat(unsigned int fd, struct kstat *stat) EXPORT_SYMBOL(vfs_fstat); +int vfs_fstatat(int dfd, char __user *filename, struct kstat *stat, int flag) +{ + int error = -EINVAL; + + if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) + goto out; + + if (flag & AT_SYMLINK_NOFOLLOW) + error = vfs_lstat_fd(dfd, filename, stat); + else + error = vfs_stat_fd(dfd, filename, stat); +out: + return error; +} + +EXPORT_SYMBOL(vfs_fstatat); + + #ifdef __ARCH_WANT_OLD_STAT /* @@ -264,21 +282,12 @@ SYSCALL_DEFINE4(newfstatat, int, dfd, char __user *, filename, struct stat __user *, statbuf, int, flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_new_stat(&stat, statbuf); + int error; -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_new_stat(&stat, statbuf); } #endif @@ -404,21 +413,12 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename, struct stat64 __user *, statbuf, int, flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); - - if (!error) - error = cp_new_stat64(&stat, statbuf); + int error; -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_new_stat64(&stat, statbuf); } #endif /* __ARCH_WANT_STAT64 */ diff --git a/include/linux/fs.h b/include/linux/fs.h index e766be0d4329..257f4d37ad23 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2302,6 +2302,7 @@ extern int vfs_lstat(char __user *, struct kstat *); extern int vfs_stat_fd(int dfd, char __user *, struct kstat *); extern int vfs_lstat_fd(int dfd, char __user *, struct kstat *); extern int vfs_fstat(unsigned int, struct kstat *); +extern int vfs_fstatat(int , char __user *, struct kstat *, int); extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, unsigned long arg); -- cgit v1.2.3-71-gd317 From 2eae7a1874ca5be3232765d89e0250a449f1bc90 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 8 Apr 2009 16:34:03 -0400 Subject: kill vfs_stat_fd / vfs_lstat_fd There's really no reason to keep vfs_stat_fd and vfs_lstat_fd with Oleg's vfs_fstatat. Use vfs_fstatat for the few cases having the directory fd, and switch all others to vfs_stat / vfs_lstat. Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/compat.c | 18 +++++---- fs/stat.c | 105 +++++++++++++++++++++-------------------------------- include/linux/fs.h | 2 - 3 files changed, 52 insertions(+), 73 deletions(-) (limited to 'include/linux') diff --git a/fs/compat.c b/fs/compat.c index dda72e267092..379a399bf5c3 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -181,22 +181,24 @@ asmlinkage long compat_sys_newstat(char __user * filename, struct compat_stat __user *statbuf) { struct kstat stat; - int error = vfs_stat_fd(AT_FDCWD, filename, &stat); + int error; - if (!error) - error = cp_compat_stat(&stat, statbuf); - return error; + error = vfs_stat(filename, &stat); + if (error) + return error; + return cp_compat_stat(&stat, statbuf); } asmlinkage long compat_sys_newlstat(char __user * filename, struct compat_stat __user *statbuf) { struct kstat stat; - int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); + int error; - if (!error) - error = cp_compat_stat(&stat, statbuf); - return error; + error = vfs_lstat(filename, &stat); + if (error) + return error; + return cp_compat_stat(&stat, statbuf); } #ifndef __ARCH_WANT_STAT64 diff --git a/fs/stat.c b/fs/stat.c index 54711662b855..075694e31d8b 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -55,46 +55,6 @@ int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) EXPORT_SYMBOL(vfs_getattr); -int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat) -{ - struct path path; - int error; - - error = user_path_at(dfd, name, LOOKUP_FOLLOW, &path); - if (!error) { - error = vfs_getattr(path.mnt, path.dentry, stat); - path_put(&path); - } - return error; -} - -int vfs_stat(char __user *name, struct kstat *stat) -{ - return vfs_stat_fd(AT_FDCWD, name, stat); -} - -EXPORT_SYMBOL(vfs_stat); - -int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat) -{ - struct path path; - int error; - - error = user_path_at(dfd, name, 0, &path); - if (!error) { - error = vfs_getattr(path.mnt, path.dentry, stat); - path_put(&path); - } - return error; -} - -int vfs_lstat(char __user *name, struct kstat *stat) -{ - return vfs_lstat_fd(AT_FDCWD, name, stat); -} - -EXPORT_SYMBOL(vfs_lstat); - int vfs_fstat(unsigned int fd, struct kstat *stat) { struct file *f = fget(fd); @@ -106,26 +66,43 @@ int vfs_fstat(unsigned int fd, struct kstat *stat) } return error; } - EXPORT_SYMBOL(vfs_fstat); int vfs_fstatat(int dfd, char __user *filename, struct kstat *stat, int flag) { + struct path path; int error = -EINVAL; + int lookup_flags = 0; if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) goto out; - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, stat); - else - error = vfs_stat_fd(dfd, filename, stat); + if (!(flag & AT_SYMLINK_NOFOLLOW)) + lookup_flags |= LOOKUP_FOLLOW; + + error = user_path_at(dfd, filename, lookup_flags, &path); + if (error) + goto out; + + error = vfs_getattr(path.mnt, path.dentry, stat); + path_put(&path); out: return error; } - EXPORT_SYMBOL(vfs_fstatat); +int vfs_stat(char __user *name, struct kstat *stat) +{ + return vfs_fstatat(AT_FDCWD, name, stat, 0); +} +EXPORT_SYMBOL(vfs_stat); + +int vfs_lstat(char __user *name, struct kstat *stat) +{ + return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW); +} +EXPORT_SYMBOL(vfs_lstat); + #ifdef __ARCH_WANT_OLD_STAT @@ -173,23 +150,25 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) { struct kstat stat; - int error = vfs_stat_fd(AT_FDCWD, filename, &stat); + int error; - if (!error) - error = cp_old_stat(&stat, statbuf); + error = vfs_stat(filename, &stat); + if (error) + return error; - return error; + return cp_old_stat(&stat, statbuf); } SYSCALL_DEFINE2(lstat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) { struct kstat stat; - int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); + int error; - if (!error) - error = cp_old_stat(&stat, statbuf); + error = vfs_lstat(filename, &stat); + if (error) + return error; - return error; + return cp_old_stat(&stat, statbuf); } SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf) @@ -258,23 +237,23 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) SYSCALL_DEFINE2(newstat, char __user *, filename, struct stat __user *, statbuf) { struct kstat stat; - int error = vfs_stat_fd(AT_FDCWD, filename, &stat); - - if (!error) - error = cp_new_stat(&stat, statbuf); + int error = vfs_stat(filename, &stat); - return error; + if (error) + return error; + return cp_new_stat(&stat, statbuf); } SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf) { struct kstat stat; - int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); + int error; - if (!error) - error = cp_new_stat(&stat, statbuf); + error = vfs_lstat(filename, &stat); + if (error) + return error; - return error; + return cp_new_stat(&stat, statbuf); } #if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT) diff --git a/include/linux/fs.h b/include/linux/fs.h index 257f4d37ad23..8f42b35a7565 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2299,8 +2299,6 @@ extern int vfs_readdir(struct file *, filldir_t, void *); extern int vfs_stat(char __user *, struct kstat *); extern int vfs_lstat(char __user *, struct kstat *); -extern int vfs_stat_fd(int dfd, char __user *, struct kstat *); -extern int vfs_lstat_fd(int dfd, char __user *, struct kstat *); extern int vfs_fstat(unsigned int, struct kstat *); extern int vfs_fstatat(int , char __user *, struct kstat *, int); -- cgit v1.2.3-71-gd317 From 38e23c95f92a84fb8505a9f572b8a209c9c372c1 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 9 Apr 2009 20:17:52 +0900 Subject: fs: Mark get_filesystem_list() as __init function. "int get_filesystem_list(char * buf)" is called by only "static void __init get_fs_names(char *page)". We can mark get_filesystem_list() as "__init". Signed-off-by: Tetsuo Handa Signed-off-by: Al Viro --- fs/filesystems.c | 2 +- include/linux/fs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/filesystems.c b/fs/filesystems.c index 1aa70260e6d1..a24c58e181db 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -199,7 +199,7 @@ SYSCALL_DEFINE3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2) return retval; } -int get_filesystem_list(char * buf) +int __init get_filesystem_list(char *buf) { int len = 0; struct file_system_type * tmp; diff --git a/include/linux/fs.h b/include/linux/fs.h index 8f42b35a7565..5bed436f4353 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2448,7 +2448,7 @@ struct ctl_table; int proc_nr_files(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); -int get_filesystem_list(char * buf); +int __init get_filesystem_list(char *buf); #endif /* __KERNEL__ */ #endif /* _LINUX_FS_H */ -- cgit v1.2.3-71-gd317 From be9208dff23af904655807672dd8235abf6ac039 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 20 Apr 2009 23:29:41 -0400 Subject: reiserfs: fix j_last_flush_trans_id type Conversion in commit 600ed41675d8c384519d8f0b3c76afed39ef2f4b had missed that one, but converted format from %lu to %u. As the result, /proc/..../journal got buggered on 64bit boxen. Signed-off-by: Al Viro --- include/linux/reiserfs_fs_sb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index 5621d87c4479..6b361d23a499 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -193,7 +193,7 @@ struct reiserfs_journal { atomic_t j_wcount; /* count of writers for current commit */ unsigned long j_bcount; /* batch count. allows turning X transactions into 1 */ unsigned long j_first_unflushed_offset; /* first unflushed transactions offset */ - unsigned long j_last_flush_trans_id; /* last fully flushed journal timestamp */ + unsigned j_last_flush_trans_id; /* last fully flushed journal timestamp */ struct buffer_head *j_header_bh; time_t j_trans_start_time; /* time this transaction started */ -- cgit v1.2.3-71-gd317 From 88bea188b85f9cefefbbd56b8a48d0f798409177 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 21 Apr 2009 00:35:47 -0400 Subject: ACPI: add /sys/firmware/acpi/interrupts/sci_not counter This counter may prove useful in debugging some spurious interrupt issues seen in the field. Signed-off-by: Len Brown --- Documentation/ABI/testing/sysfs-firmware-acpi | 8 ++++++-- drivers/acpi/osl.c | 4 +++- drivers/acpi/system.c | 11 +++++++++-- include/linux/acpi.h | 1 + 4 files changed, 19 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-firmware-acpi b/Documentation/ABI/testing/sysfs-firmware-acpi index e8ffc70ffe12..4f9ba3c2fca7 100644 --- a/Documentation/ABI/testing/sysfs-firmware-acpi +++ b/Documentation/ABI/testing/sysfs-firmware-acpi @@ -69,9 +69,13 @@ Description: gpe1F: 0 invalid gpe_all: 1192 sci: 1194 + sci_not: 0 - sci - The total number of times the ACPI SCI - has claimed an interrupt. + sci - The number of times the ACPI SCI + has been called and claimed an interrupt. + + sci_not - The number of times the ACPI SCI + has been called and NOT claimed an interrupt. gpe_all - count of SCI caused by GPEs. diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index d59f08ecaf16..d916bea729f1 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -353,8 +353,10 @@ static irqreturn_t acpi_irq(int irq, void *dev_id) if (handled) { acpi_irq_handled++; return IRQ_HANDLED; - } else + } else { + acpi_irq_not_handled++; return IRQ_NONE; + } } acpi_status diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c index da51f05ef8d8..0944daec064f 100644 --- a/drivers/acpi/system.c +++ b/drivers/acpi/system.c @@ -38,6 +38,7 @@ ACPI_MODULE_NAME("system"); #define ACPI_SYSTEM_DEVICE_NAME "System" u32 acpi_irq_handled; +u32 acpi_irq_not_handled; /* * Make ACPICA version work as module param @@ -214,8 +215,9 @@ err: #define COUNT_GPE 0 #define COUNT_SCI 1 /* acpi_irq_handled */ -#define COUNT_ERROR 2 /* other */ -#define NUM_COUNTERS_EXTRA 3 +#define COUNT_SCI_NOT 2 /* acpi_irq_not_handled */ +#define COUNT_ERROR 3 /* other */ +#define NUM_COUNTERS_EXTRA 4 struct event_counter { u32 count; @@ -317,6 +319,8 @@ static ssize_t counter_show(struct kobject *kobj, all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI].count = acpi_irq_handled; + all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI_NOT].count = + acpi_irq_not_handled; all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_GPE].count = acpi_gpe_count; @@ -363,6 +367,7 @@ static ssize_t counter_set(struct kobject *kobj, all_counters[i].count = 0; acpi_gpe_count = 0; acpi_irq_handled = 0; + acpi_irq_not_handled = 0; goto end; } @@ -456,6 +461,8 @@ void acpi_irq_stats_init(void) sprintf(buffer, "gpe_all"); else if (i == num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI) sprintf(buffer, "sci"); + else if (i == num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI_NOT) + sprintf(buffer, "sci_not"); else if (i == num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_ERROR) sprintf(buffer, "error"); else diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6586cbd0d4af..88be890ee3c7 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -111,6 +111,7 @@ int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base); int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base); void acpi_irq_stats_init(void); extern u32 acpi_irq_handled; +extern u32 acpi_irq_not_handled; extern struct acpi_mcfg_allocation *pci_mmcfg_config; extern int pci_mmcfg_config_num; -- cgit v1.2.3-71-gd317 From 8b9cf76d0fa6cd98fe42dd2f86460d6ede55fed8 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Tue, 21 Apr 2009 13:44:13 +0200 Subject: Fix SYSCALL_ALIAS for older MIPS assembler Older MIPS assembler don't support .set for defining aliases. Using = works for old and new assembers. Signed-off-by: Thomas Bogendoerfer Acked-by: Ralf Baechle Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index dabe4ad89141..40617c1d8976 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -148,7 +148,7 @@ struct old_linux_dirent; asm ("\t.globl " #alias "\n\t.set " #alias ", " #name "\n" \ "\t.globl ." #alias "\n\t.set ." #alias ", ." #name) #else -#ifdef CONFIG_ALPHA +#if defined(CONFIG_ALPHA) || defined(CONFIG_MIPS) #define SYSCALL_ALIAS(alias, name) \ asm ( #alias " = " #name "\n\t.globl " #alias) #else -- cgit v1.2.3-71-gd317 From 8e19608e8b5c001e4a66ce482edc474f05fb7355 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 21 Apr 2009 12:24:00 -0700 Subject: clocksource: pass clocksource to read() callback Pass clocksource pointer to the read() callback for clocksources. This allows us to share the callback between multiple instances. [hugh@veritas.com: fix powerpc build of clocksource pass clocksource mods] [akpm@linux-foundation.org: cleanup] Signed-off-by: Magnus Damm Acked-by: John Stultz Cc: Thomas Gleixner Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mach-at91/at91rm9200_time.c | 2 +- arch/arm/mach-at91/at91sam926x_time.c | 2 +- arch/arm/mach-davinci/time.c | 2 +- arch/arm/mach-imx/time.c | 2 +- arch/arm/mach-ixp4xx/common.c | 2 +- arch/arm/mach-msm/timer.c | 4 ++-- arch/arm/mach-netx/time.c | 2 +- arch/arm/mach-ns9xxx/time-ns9360.c | 2 +- arch/arm/mach-omap1/time.c | 2 +- arch/arm/mach-omap2/timer-gp.c | 2 +- arch/arm/mach-pxa/time.c | 2 +- arch/arm/mach-realview/core.c | 2 +- arch/arm/mach-versatile/core.c | 2 +- arch/arm/plat-mxc/time.c | 2 +- arch/arm/plat-omap/common.c | 4 ++-- arch/arm/plat-orion/time.c | 2 +- arch/avr32/kernel/time.c | 2 +- arch/blackfin/kernel/time-ts.c | 12 ++++++------ arch/ia64/kernel/cyclone.c | 2 +- arch/ia64/kernel/time.c | 4 ++-- arch/ia64/sn/kernel/sn2/timer.c | 2 +- arch/m68knommu/platform/68328/timers.c | 2 +- arch/m68knommu/platform/coldfire/dma_timer.c | 2 +- arch/m68knommu/platform/coldfire/pit.c | 2 +- arch/m68knommu/platform/coldfire/timers.c | 2 +- arch/mips/kernel/cevt-txx9.c | 2 +- arch/mips/kernel/csrc-bcm1480.c | 2 +- arch/mips/kernel/csrc-ioasic.c | 6 +++--- arch/mips/kernel/csrc-r4k.c | 2 +- arch/mips/kernel/csrc-sb1250.c | 2 +- arch/mips/kernel/i8253.c | 2 +- arch/mips/nxp/pnx8550/common/time.c | 2 +- arch/mips/sgi-ip27/ip27-timer.c | 2 +- arch/powerpc/kernel/time.c | 8 ++++---- arch/s390/kernel/time.c | 2 +- arch/sh/kernel/time_32.c | 2 +- arch/sh/kernel/timers/timer-tmu.c | 2 +- arch/sparc/kernel/time_64.c | 7 ++++++- arch/um/kernel/time.c | 2 +- arch/x86/kernel/hpet.c | 6 +++--- arch/x86/kernel/i8253.c | 2 +- arch/x86/kernel/kvmclock.c | 7 ++++++- arch/x86/kernel/tsc.c | 2 +- arch/x86/kernel/vmiclock_32.c | 2 +- arch/x86/lguest/boot.c | 2 +- arch/x86/xen/time.c | 7 ++++++- drivers/char/hpet.c | 2 +- drivers/clocksource/acpi_pm.c | 12 ++++++------ drivers/clocksource/cyclone.c | 2 +- drivers/clocksource/scx200_hrt.c | 2 +- drivers/clocksource/tcb_clksrc.c | 2 +- include/linux/clocksource.h | 6 +++--- kernel/time/clocksource.c | 8 ++++---- kernel/time/jiffies.c | 2 +- 54 files changed, 94 insertions(+), 79 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c index 1ff1bda0a894..309f3511aa20 100644 --- a/arch/arm/mach-at91/at91rm9200_time.c +++ b/arch/arm/mach-at91/at91rm9200_time.c @@ -85,7 +85,7 @@ static struct irqaction at91rm9200_timer_irq = { .handler = at91rm9200_timer_interrupt }; -static cycle_t read_clk32k(void) +static cycle_t read_clk32k(struct clocksource *cs) { return read_CRTR(); } diff --git a/arch/arm/mach-at91/at91sam926x_time.c b/arch/arm/mach-at91/at91sam926x_time.c index b63e1d5f1bad..4bd56aee4370 100644 --- a/arch/arm/mach-at91/at91sam926x_time.c +++ b/arch/arm/mach-at91/at91sam926x_time.c @@ -31,7 +31,7 @@ static u32 pit_cnt; /* access only w/system irq blocked */ * Clocksource: just a monotonic counter of MCK/16 cycles. * We don't care whether or not PIT irqs are enabled. */ -static cycle_t read_pit_clk(void) +static cycle_t read_pit_clk(struct clocksource *cs) { unsigned long flags; u32 elapsed; diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c index f8bcd29d17a6..6c227d4ba998 100644 --- a/arch/arm/mach-davinci/time.c +++ b/arch/arm/mach-davinci/time.c @@ -238,7 +238,7 @@ static void __init timer_init(void) /* * clocksource */ -static cycle_t read_cycles(void) +static cycle_t read_cycles(struct clocksource *cs) { struct timer_s *t = &timers[TID_CLOCKSOURCE]; diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c index aff0ebcfa847..5aef18b599e5 100644 --- a/arch/arm/mach-imx/time.c +++ b/arch/arm/mach-imx/time.c @@ -73,7 +73,7 @@ static void __init imx_timer_hardware_init(void) IMX_TCTL(TIMER_BASE) = TCTL_FRR | TCTL_CLK_PCLK1 | TCTL_TEN; } -cycle_t imx_get_cycles(void) +cycle_t imx_get_cycles(struct clocksource *cs) { return IMX_TCN(TIMER_BASE); } diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index f4656d2ac8a8..1e93dfee7543 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -401,7 +401,7 @@ void __init ixp4xx_sys_init(void) /* * clocksource */ -cycle_t ixp4xx_get_cycles(void) +cycle_t ixp4xx_get_cycles(struct clocksource *cs) { return *IXP4XX_OSTS; } diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c index 444d9c0f5ca6..4855b8ca5101 100644 --- a/arch/arm/mach-msm/timer.c +++ b/arch/arm/mach-msm/timer.c @@ -57,12 +57,12 @@ static irqreturn_t msm_timer_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static cycle_t msm_gpt_read(void) +static cycle_t msm_gpt_read(struct clocksource *cs) { return readl(MSM_GPT_BASE + TIMER_COUNT_VAL); } -static cycle_t msm_dgt_read(void) +static cycle_t msm_dgt_read(struct clocksource *cs) { return readl(MSM_DGT_BASE + TIMER_COUNT_VAL) >> MSM_DGT_SHIFT; } diff --git a/arch/arm/mach-netx/time.c b/arch/arm/mach-netx/time.c index f201fddb594f..82801dbf0579 100644 --- a/arch/arm/mach-netx/time.c +++ b/arch/arm/mach-netx/time.c @@ -104,7 +104,7 @@ static struct irqaction netx_timer_irq = { .handler = netx_timer_interrupt, }; -cycle_t netx_get_cycles(void) +cycle_t netx_get_cycles(struct clocksource *cs) { return readl(NETX_GPIO_COUNTER_CURRENT(TIMER_CLOCKSOURCE)); } diff --git a/arch/arm/mach-ns9xxx/time-ns9360.c b/arch/arm/mach-ns9xxx/time-ns9360.c index 41df69721769..77281260358a 100644 --- a/arch/arm/mach-ns9xxx/time-ns9360.c +++ b/arch/arm/mach-ns9xxx/time-ns9360.c @@ -25,7 +25,7 @@ #define TIMER_CLOCKEVENT 1 static u32 latch; -static cycle_t ns9360_clocksource_read(void) +static cycle_t ns9360_clocksource_read(struct clocksource *cs) { return __raw_readl(SYS_TR(TIMER_CLOCKSOURCE)); } diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c index 495a32c287b4..4d56408d3cff 100644 --- a/arch/arm/mach-omap1/time.c +++ b/arch/arm/mach-omap1/time.c @@ -198,7 +198,7 @@ static struct irqaction omap_mpu_timer2_irq = { .handler = omap_mpu_timer2_interrupt, }; -static cycle_t mpu_read(void) +static cycle_t mpu_read(struct clocksource *cs) { return ~omap_mpu_timer_read(1); } diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c index 9fc13a2cc3f4..1cb2c0909c2b 100644 --- a/arch/arm/mach-omap2/timer-gp.c +++ b/arch/arm/mach-omap2/timer-gp.c @@ -138,7 +138,7 @@ static inline void __init omap2_gp_clocksource_init(void) {} * clocksource */ static struct omap_dm_timer *gpt_clocksource; -static cycle_t clocksource_read_cycles(void) +static cycle_t clocksource_read_cycles(struct clocksource *cs) { return (cycle_t)omap_dm_timer_read_counter(gpt_clocksource); } diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c index 8eb3830fbb0b..750c448db672 100644 --- a/arch/arm/mach-pxa/time.c +++ b/arch/arm/mach-pxa/time.c @@ -125,7 +125,7 @@ static struct clock_event_device ckevt_pxa_osmr0 = { .set_mode = pxa_osmr0_set_mode, }; -static cycle_t pxa_read_oscr(void) +static cycle_t pxa_read_oscr(struct clocksource *cs) { return OSCR; } diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index 9ab947c14f26..942e1a7eb9b2 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -715,7 +715,7 @@ static struct irqaction realview_timer_irq = { .handler = realview_timer_interrupt, }; -static cycle_t realview_get_cycles(void) +static cycle_t realview_get_cycles(struct clocksource *cs) { return ~readl(timer3_va_base + TIMER_VALUE); } diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c index 565776680d8c..1f929c391af7 100644 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@ -948,7 +948,7 @@ static struct irqaction versatile_timer_irq = { .handler = versatile_timer_interrupt, }; -static cycle_t versatile_get_cycles(void) +static cycle_t versatile_get_cycles(struct clocksource *cs) { return ~readl(TIMER3_VA_BASE + TIMER_VALUE); } diff --git a/arch/arm/plat-mxc/time.c b/arch/arm/plat-mxc/time.c index ef1b3cd85bd3..dab3357196fb 100644 --- a/arch/arm/plat-mxc/time.c +++ b/arch/arm/plat-mxc/time.c @@ -36,7 +36,7 @@ static enum clock_event_mode clockevent_mode = CLOCK_EVT_MODE_UNUSED; /* clock source */ -static cycle_t mxc_get_cycles(void) +static cycle_t mxc_get_cycles(struct clocksource *cs) { return __raw_readl(TIMER_BASE + MXC_TCN); } diff --git a/arch/arm/plat-omap/common.c b/arch/arm/plat-omap/common.c index d1797147732f..433021f3d7cc 100644 --- a/arch/arm/plat-omap/common.c +++ b/arch/arm/plat-omap/common.c @@ -185,7 +185,7 @@ console_initcall(omap_add_serial_console); #include -static cycle_t omap_32k_read(void) +static cycle_t omap_32k_read(struct clocksource *cs) { return omap_readl(TIMER_32K_SYNCHRONIZED); } @@ -207,7 +207,7 @@ unsigned long long sched_clock(void) { unsigned long long ret; - ret = (unsigned long long)omap_32k_read(); + ret = (unsigned long long)omap_32k_read(&clocksource_32k); ret = (ret * clocksource_32k.mult_orig) >> clocksource_32k.shift; return ret; } diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c index 6fa2923e6dca..2faf9dba4ef7 100644 --- a/arch/arm/plat-orion/time.c +++ b/arch/arm/plat-orion/time.c @@ -41,7 +41,7 @@ static u32 ticks_per_jiffy; /* * Clocksource handling. */ -static cycle_t orion_clksrc_read(void) +static cycle_t orion_clksrc_read(struct clocksource *cs) { return 0xffffffff - readl(TIMER0_VAL); } diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c index 0ff46bf873b0..f27aa3b259fa 100644 --- a/arch/avr32/kernel/time.c +++ b/arch/avr32/kernel/time.c @@ -18,7 +18,7 @@ #include -static cycle_t read_cycle_count(void) +static cycle_t read_cycle_count(struct clocksource *cs) { return (cycle_t)sysreg_read(COUNT); } diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c index 0ed2badfd746..27646121280a 100644 --- a/arch/blackfin/kernel/time-ts.c +++ b/arch/blackfin/kernel/time-ts.c @@ -58,16 +58,11 @@ static inline unsigned long long cycles_2_ns(cycle_t cyc) return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; } -static cycle_t read_cycles(void) +static cycle_t read_cycles(struct clocksource *cs) { return __bfin_cycles_off + (get_cycles() << __bfin_cycles_mod); } -unsigned long long sched_clock(void) -{ - return cycles_2_ns(read_cycles()); -} - static struct clocksource clocksource_bfin = { .name = "bfin_cycles", .rating = 350, @@ -77,6 +72,11 @@ static struct clocksource clocksource_bfin = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +unsigned long long sched_clock(void) +{ + return cycles_2_ns(read_cycles(&clocksource_bfin)); +} + static int __init bfin_clocksource_init(void) { set_cyc2ns_scale(get_cclk() / 1000); diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c index 790ef0d87e12..71e35864d2e2 100644 --- a/arch/ia64/kernel/cyclone.c +++ b/arch/ia64/kernel/cyclone.c @@ -21,7 +21,7 @@ void __init cyclone_setup(void) static void __iomem *cyclone_mc; -static cycle_t read_cyclone(void) +static cycle_t read_cyclone(struct clocksource *cs) { return (cycle_t)readq((void __iomem *)cyclone_mc); } diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 641c8b61c4f1..604c1a35db33 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -33,7 +33,7 @@ #include "fsyscall_gtod_data.h" -static cycle_t itc_get_cycles(void); +static cycle_t itc_get_cycles(struct clocksource *cs); struct fsyscall_gtod_data_t fsyscall_gtod_data = { .lock = SEQLOCK_UNLOCKED, @@ -383,7 +383,7 @@ ia64_init_itm (void) } } -static cycle_t itc_get_cycles(void) +static cycle_t itc_get_cycles(struct clocksource *cs) { u64 lcycle, now, ret; diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c index cf67fc562054..21d6f09e3447 100644 --- a/arch/ia64/sn/kernel/sn2/timer.c +++ b/arch/ia64/sn/kernel/sn2/timer.c @@ -23,7 +23,7 @@ extern unsigned long sn_rtc_cycles_per_second; -static cycle_t read_sn2(void) +static cycle_t read_sn2(struct clocksource *cs) { return (cycle_t)readq(RTC_COUNTER_ADDR); } diff --git a/arch/m68knommu/platform/68328/timers.c b/arch/m68knommu/platform/68328/timers.c index 6bafefa546e5..309f725995bf 100644 --- a/arch/m68knommu/platform/68328/timers.c +++ b/arch/m68knommu/platform/68328/timers.c @@ -75,7 +75,7 @@ static struct irqaction m68328_timer_irq = { /***************************************************************************/ -static cycle_t m68328_read_clk(void) +static cycle_t m68328_read_clk(struct clocksource *cs) { unsigned long flags; u32 cycles; diff --git a/arch/m68knommu/platform/coldfire/dma_timer.c b/arch/m68knommu/platform/coldfire/dma_timer.c index 772578b1084f..a5f562823d7a 100644 --- a/arch/m68knommu/platform/coldfire/dma_timer.c +++ b/arch/m68knommu/platform/coldfire/dma_timer.c @@ -34,7 +34,7 @@ #define DMA_DTMR_CLK_DIV_16 (2 << 1) #define DMA_DTMR_ENABLE (1 << 0) -static cycle_t cf_dt_get_cycles(void) +static cycle_t cf_dt_get_cycles(struct clocksource *cs) { return __raw_readl(DTCN0); } diff --git a/arch/m68knommu/platform/coldfire/pit.c b/arch/m68knommu/platform/coldfire/pit.c index 2a12e7fa9748..61b96211f8ff 100644 --- a/arch/m68knommu/platform/coldfire/pit.c +++ b/arch/m68knommu/platform/coldfire/pit.c @@ -125,7 +125,7 @@ static struct irqaction pit_irq = { /***************************************************************************/ -static cycle_t pit_read_clk(void) +static cycle_t pit_read_clk(struct clocksource *cs) { unsigned long flags; u32 cycles; diff --git a/arch/m68knommu/platform/coldfire/timers.c b/arch/m68knommu/platform/coldfire/timers.c index 454f25493491..1ba8a3731653 100644 --- a/arch/m68knommu/platform/coldfire/timers.c +++ b/arch/m68knommu/platform/coldfire/timers.c @@ -78,7 +78,7 @@ static struct irqaction mcftmr_timer_irq = { /***************************************************************************/ -static cycle_t mcftmr_read_clk(void) +static cycle_t mcftmr_read_clk(struct clocksource *cs) { unsigned long flags; u32 cycles; diff --git a/arch/mips/kernel/cevt-txx9.c b/arch/mips/kernel/cevt-txx9.c index eccf7d6096bd..2e911e3da8d3 100644 --- a/arch/mips/kernel/cevt-txx9.c +++ b/arch/mips/kernel/cevt-txx9.c @@ -22,7 +22,7 @@ static struct txx9_tmr_reg __iomem *txx9_cs_tmrptr; -static cycle_t txx9_cs_read(void) +static cycle_t txx9_cs_read(struct clocksource *cs) { return __raw_readl(&txx9_cs_tmrptr->trr); } diff --git a/arch/mips/kernel/csrc-bcm1480.c b/arch/mips/kernel/csrc-bcm1480.c index 868745e7184b..51489f8a825e 100644 --- a/arch/mips/kernel/csrc-bcm1480.c +++ b/arch/mips/kernel/csrc-bcm1480.c @@ -28,7 +28,7 @@ #include -static cycle_t bcm1480_hpt_read(void) +static cycle_t bcm1480_hpt_read(struct clocksource *cs) { return (cycle_t) __raw_readq(IOADDR(A_SCD_ZBBUS_CYCLE_COUNT)); } diff --git a/arch/mips/kernel/csrc-ioasic.c b/arch/mips/kernel/csrc-ioasic.c index 1d5f63cf8997..b551f48d3a07 100644 --- a/arch/mips/kernel/csrc-ioasic.c +++ b/arch/mips/kernel/csrc-ioasic.c @@ -25,7 +25,7 @@ #include #include -static cycle_t dec_ioasic_hpt_read(void) +static cycle_t dec_ioasic_hpt_read(struct clocksource *cs) { return ioasic_read(IO_REG_FCTR); } @@ -47,13 +47,13 @@ void __init dec_ioasic_clocksource_init(void) while (!ds1287_timer_state()) ; - start = dec_ioasic_hpt_read(); + start = dec_ioasic_hpt_read(&clocksource_dec); while (i--) while (!ds1287_timer_state()) ; - end = dec_ioasic_hpt_read(); + end = dec_ioasic_hpt_read(&clocksource_dec); freq = (end - start) * 10; printk(KERN_INFO "I/O ASIC clock frequency %dHz\n", freq); diff --git a/arch/mips/kernel/csrc-r4k.c b/arch/mips/kernel/csrc-r4k.c index f1a2893931ed..e95a3cd48eea 100644 --- a/arch/mips/kernel/csrc-r4k.c +++ b/arch/mips/kernel/csrc-r4k.c @@ -10,7 +10,7 @@ #include -static cycle_t c0_hpt_read(void) +static cycle_t c0_hpt_read(struct clocksource *cs) { return read_c0_count(); } diff --git a/arch/mips/kernel/csrc-sb1250.c b/arch/mips/kernel/csrc-sb1250.c index 92212bbb8e45..d14d3d1907fa 100644 --- a/arch/mips/kernel/csrc-sb1250.c +++ b/arch/mips/kernel/csrc-sb1250.c @@ -33,7 +33,7 @@ * The HPT is free running from SB1250_HPT_VALUE down to 0 then starts over * again. */ -static cycle_t sb1250_hpt_read(void) +static cycle_t sb1250_hpt_read(struct clocksource *cs) { unsigned int count; diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c index 689719e34f08..ed20e7fe65e3 100644 --- a/arch/mips/kernel/i8253.c +++ b/arch/mips/kernel/i8253.c @@ -128,7 +128,7 @@ void __init setup_pit_timer(void) * to just read by itself. So use jiffies to emulate a free * running counter: */ -static cycle_t pit_read(void) +static cycle_t pit_read(struct clocksource *cs) { unsigned long flags; int count; diff --git a/arch/mips/nxp/pnx8550/common/time.c b/arch/mips/nxp/pnx8550/common/time.c index cf293b279098..8df43e9e4d90 100644 --- a/arch/mips/nxp/pnx8550/common/time.c +++ b/arch/mips/nxp/pnx8550/common/time.c @@ -35,7 +35,7 @@ static unsigned long cpj; -static cycle_t hpt_read(void) +static cycle_t hpt_read(struct clocksource *cs) { return read_c0_count2(); } diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c index f024057a35f8..f10a7cd64f7e 100644 --- a/arch/mips/sgi-ip27/ip27-timer.c +++ b/arch/mips/sgi-ip27/ip27-timer.c @@ -159,7 +159,7 @@ static void __init hub_rt_clock_event_global_init(void) setup_irq(irq, &hub_rt_irqaction); } -static cycle_t hub_rt_read(void) +static cycle_t hub_rt_read(struct clocksource *cs) { return REMOTE_HUB_L(cputonasid(0), PI_RT_COUNT); } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 926ea864e34f..48571ac56fb7 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -77,7 +77,7 @@ #include #include -static cycle_t rtc_read(void); +static cycle_t rtc_read(struct clocksource *); static struct clocksource clocksource_rtc = { .name = "rtc", .rating = 400, @@ -88,7 +88,7 @@ static struct clocksource clocksource_rtc = { .read = rtc_read, }; -static cycle_t timebase_read(void); +static cycle_t timebase_read(struct clocksource *); static struct clocksource clocksource_timebase = { .name = "timebase", .rating = 400, @@ -766,12 +766,12 @@ unsigned long read_persistent_clock(void) } /* clocksource code */ -static cycle_t rtc_read(void) +static cycle_t rtc_read(struct clocksource *cs) { return (cycle_t)get_rtc(); } -static cycle_t timebase_read(void) +static cycle_t timebase_read(struct clocksource *cs) { return (cycle_t)get_tb(); } diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 6ded50dfa75a..ef596d020573 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -201,7 +201,7 @@ unsigned long read_persistent_clock(void) return ts.tv_sec; } -static cycle_t read_tod_clock(void) +static cycle_t read_tod_clock(struct clocksource *cs) { return get_clock(); } diff --git a/arch/sh/kernel/time_32.c b/arch/sh/kernel/time_32.c index c34e1e0f9b02..1700d2465f6c 100644 --- a/arch/sh/kernel/time_32.c +++ b/arch/sh/kernel/time_32.c @@ -208,7 +208,7 @@ unsigned long long sched_clock(void) if (!clocksource_sh.rating) return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ); - cycles = clocksource_sh.read(); + cycles = clocksource_sh.read(&clocksource_sh); return cyc2ns(&clocksource_sh, cycles); } #endif diff --git a/arch/sh/kernel/timers/timer-tmu.c b/arch/sh/kernel/timers/timer-tmu.c index c5d3396f5960..fe8d8930ccb6 100644 --- a/arch/sh/kernel/timers/timer-tmu.c +++ b/arch/sh/kernel/timers/timer-tmu.c @@ -81,7 +81,7 @@ static int tmu_timer_stop(void) */ static int tmus_are_scaled; -static cycle_t tmu_timer_read(void) +static cycle_t tmu_timer_read(struct clocksource *cs) { return ((cycle_t)(~_tmu_read(TMU1)))<get_tick(); +} + void __init time_init(void) { unsigned long freq = sparc64_init_timers(); @@ -827,7 +832,7 @@ void __init time_init(void) clocksource_tick.mult = clocksource_hz2mult(freq, clocksource_tick.shift); - clocksource_tick.read = tick_ops->get_tick; + clocksource_tick.read = clocksource_tick_read; printk("clocksource: mult[%x] shift[%d]\n", clocksource_tick.mult, clocksource_tick.shift); diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index b13a87a3ec95..c8b9c469fcd7 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -65,7 +65,7 @@ static irqreturn_t um_timer(int irq, void *dev) return IRQ_HANDLED; } -static cycle_t itimer_read(void) +static cycle_t itimer_read(struct clocksource *cs) { return os_nsecs() / 1000; } diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 648b3a2a3a44..3f0019e0a229 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -722,7 +722,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n, /* * Clock source related code */ -static cycle_t read_hpet(void) +static cycle_t read_hpet(struct clocksource *cs) { return (cycle_t)hpet_readl(HPET_COUNTER); } @@ -756,7 +756,7 @@ static int hpet_clocksource_register(void) hpet_restart_counter(); /* Verify whether hpet counter works */ - t1 = read_hpet(); + t1 = hpet_readl(HPET_COUNTER); rdtscll(start); /* @@ -770,7 +770,7 @@ static int hpet_clocksource_register(void) rdtscll(now); } while ((now - start) < 200000UL); - if (t1 == read_hpet()) { + if (t1 == hpet_readl(HPET_COUNTER)) { printk(KERN_WARNING "HPET counter not counting. HPET disabled\n"); return -ENODEV; diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 3475440baa54..c2e0bb0890d4 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -129,7 +129,7 @@ void __init setup_pit_timer(void) * to just read by itself. So use jiffies to emulate a free * running counter: */ -static cycle_t pit_read(void) +static cycle_t pit_read(struct clocksource *cs) { static int old_count; static u32 old_jifs; diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 137f2e8132df..223af43f1526 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -77,6 +77,11 @@ static cycle_t kvm_clock_read(void) return ret; } +static cycle_t kvm_clock_get_cycles(struct clocksource *cs) +{ + return kvm_clock_read(); +} + /* * If we don't do that, there is the possibility that the guest * will calibrate under heavy load - thus, getting a lower lpj - @@ -107,7 +112,7 @@ static void kvm_get_preset_lpj(void) static struct clocksource kvm_clock = { .name = "kvm-clock", - .read = kvm_clock_read, + .read = kvm_clock_get_cycles, .rating = 400, .mask = CLOCKSOURCE_MASK(64), .mult = 1 << KVM_SCALE, diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7a567ebe6361..d57de05dc430 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -699,7 +699,7 @@ static struct clocksource clocksource_tsc; * code, which is necessary to support wrapping clocksources like pm * timer. */ -static cycle_t read_tsc(void) +static cycle_t read_tsc(struct clocksource *cs) { cycle_t ret = (cycle_t)get_cycles(); diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index d303369a7bad..2b3eb82efeeb 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -283,7 +283,7 @@ void __devinit vmi_time_ap_init(void) /** vmi clocksource */ static struct clocksource clocksource_vmi; -static cycle_t read_real_cycles(void) +static cycle_t read_real_cycles(struct clocksource *cs) { cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); return max(ret, clocksource_vmi.cycle_last); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index a2085368a3dc..ca7ec44bafc3 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -663,7 +663,7 @@ static unsigned long lguest_tsc_khz(void) /* If we can't use the TSC, the kernel falls back to our lower-priority * "lguest_clock", where we read the time value given to us by the Host. */ -static cycle_t lguest_clock_read(void) +static cycle_t lguest_clock_read(struct clocksource *cs) { unsigned long sec, nsec; diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 14f240623497..0a5aa44299a5 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -213,6 +213,11 @@ cycle_t xen_clocksource_read(void) return ret; } +static cycle_t xen_clocksource_get_cycles(struct clocksource *cs) +{ + return xen_clocksource_read(); +} + static void xen_read_wallclock(struct timespec *ts) { struct shared_info *s = HYPERVISOR_shared_info; @@ -241,7 +246,7 @@ int xen_set_wallclock(unsigned long now) static struct clocksource xen_clocksource __read_mostly = { .name = "xen", .rating = 400, - .read = xen_clocksource_read, + .read = xen_clocksource_get_cycles, .mask = ~0, .mult = 1< value1) diff --git a/drivers/clocksource/cyclone.c b/drivers/clocksource/cyclone.c index 8615059a8729..64e528e8bfa6 100644 --- a/drivers/clocksource/cyclone.c +++ b/drivers/clocksource/cyclone.c @@ -19,7 +19,7 @@ int use_cyclone = 0; static void __iomem *cyclone_ptr; -static cycle_t read_cyclone(void) +static cycle_t read_cyclone(struct clocksource *cs) { return (cycle_t)readl(cyclone_ptr); } diff --git a/drivers/clocksource/scx200_hrt.c b/drivers/clocksource/scx200_hrt.c index b92da677aa5d..27f4d9637b62 100644 --- a/drivers/clocksource/scx200_hrt.c +++ b/drivers/clocksource/scx200_hrt.c @@ -43,7 +43,7 @@ MODULE_PARM_DESC(ppm, "+-adjust to actual XO freq (ppm)"); /* The base timer frequency, * 27 if selected */ #define HRT_FREQ 1000000 -static cycle_t read_hrt(void) +static cycle_t read_hrt(struct clocksource *cs) { /* Read the timer value */ return (cycle_t) inl(scx200_cb_base + SCx200_TIMER_OFFSET); diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c index 254f1064d973..01b886e68822 100644 --- a/drivers/clocksource/tcb_clksrc.c +++ b/drivers/clocksource/tcb_clksrc.c @@ -39,7 +39,7 @@ static void __iomem *tcaddr; -static cycle_t tc_get_cycles(void) +static cycle_t tc_get_cycles(struct clocksource *cs) { unsigned long flags; u32 lower, upper; diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 573819ef4cc0..0d96cde9ee5d 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -143,7 +143,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, * 400-499: Perfect * The ideal clocksource. A must-use where * available. - * @read: returns a cycle value + * @read: returns a cycle value, passes clocksource as argument * @mask: bitmask for two's complement * subtraction of non 64 bit counters * @mult: cycle to nanosecond multiplier (adjusted by NTP) @@ -162,7 +162,7 @@ struct clocksource { char *name; struct list_head list; int rating; - cycle_t (*read)(void); + cycle_t (*read)(struct clocksource *cs); cycle_t mask; u32 mult; u32 mult_orig; @@ -271,7 +271,7 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant) */ static inline cycle_t clocksource_read(struct clocksource *cs) { - return cs->read(); + return cs->read(cs); } /** diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c46c931a7fe7..ecfd7b5187e0 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -181,12 +181,12 @@ static void clocksource_watchdog(unsigned long data) resumed = test_and_clear_bit(0, &watchdog_resumed); - wdnow = watchdog->read(); + wdnow = watchdog->read(watchdog); wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); watchdog_last = wdnow; list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { - csnow = cs->read(); + csnow = cs->read(cs); if (unlikely(resumed)) { cs->wd_last = csnow; @@ -247,7 +247,7 @@ static void clocksource_check_watchdog(struct clocksource *cs) list_add(&cs->wd_list, &watchdog_list); if (!started && watchdog) { - watchdog_last = watchdog->read(); + watchdog_last = watchdog->read(watchdog); watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask)); @@ -268,7 +268,7 @@ static void clocksource_check_watchdog(struct clocksource *cs) cse->flags &= ~CLOCK_SOURCE_WATCHDOG; /* Start if list is not empty */ if (!list_empty(&watchdog_list)) { - watchdog_last = watchdog->read(); + watchdog_last = watchdog->read(watchdog); watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; add_timer_on(&watchdog_timer, diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 06f197560f3b..c3f6c30816e3 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -50,7 +50,7 @@ */ #define JIFFIES_SHIFT 8 -static cycle_t jiffies_read(void) +static cycle_t jiffies_read(struct clocksource *cs) { return (cycle_t) jiffies; } -- cgit v1.2.3-71-gd317 From 4614e6adafa2c5e6c3a9c245af2807fa7bc5117a Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 21 Apr 2009 12:24:02 -0700 Subject: clocksource: add enable() and disable() callbacks Add enable() and disable() callbacks for clocksources. This allows us to put unused clocksources in power save mode. The functions clocksource_enable() and clocksource_disable() wrap the callbacks and are inserted in the timekeeping code to enable before use and disable after switching to a new clocksource. Signed-off-by: Magnus Damm Acked-by: John Stultz Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/clocksource.h | 31 +++++++++++++++++++++++++++++++ kernel/time/timekeeping.c | 12 +++++++++--- 2 files changed, 40 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 0d96cde9ee5d..5a40d14daa9f 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -144,6 +144,8 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, * The ideal clocksource. A must-use where * available. * @read: returns a cycle value, passes clocksource as argument + * @enable: optional function to enable the clocksource + * @disable: optional function to disable the clocksource * @mask: bitmask for two's complement * subtraction of non 64 bit counters * @mult: cycle to nanosecond multiplier (adjusted by NTP) @@ -163,6 +165,8 @@ struct clocksource { struct list_head list; int rating; cycle_t (*read)(struct clocksource *cs); + int (*enable)(struct clocksource *cs); + void (*disable)(struct clocksource *cs); cycle_t mask; u32 mult; u32 mult_orig; @@ -274,6 +278,33 @@ static inline cycle_t clocksource_read(struct clocksource *cs) return cs->read(cs); } +/** + * clocksource_enable: - enable clocksource + * @cs: pointer to clocksource + * + * Enables the specified clocksource. The clocksource callback + * function should start up the hardware and setup mult and field + * members of struct clocksource to reflect hardware capabilities. + */ +static inline int clocksource_enable(struct clocksource *cs) +{ + return cs->enable ? cs->enable(cs) : 0; +} + +/** + * clocksource_disable: - disable clocksource + * @cs: pointer to clocksource + * + * Disables the specified clocksource. The clocksource callback + * function should power down the now unused hardware block to + * save power. + */ +static inline void clocksource_disable(struct clocksource *cs) +{ + if (cs->disable) + cs->disable(cs); +} + /** * cyc2ns - converts clocksource cycles to nanoseconds * @cs: Pointer to clocksource diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 900f1b6598d1..687dff49f6e7 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -182,7 +182,7 @@ EXPORT_SYMBOL(do_settimeofday); */ static void change_clocksource(void) { - struct clocksource *new; + struct clocksource *new, *old; new = clocksource_get_next(); @@ -191,11 +191,16 @@ static void change_clocksource(void) clocksource_forward_now(); - new->raw_time = clock->raw_time; + if (clocksource_enable(new)) + return; + new->raw_time = clock->raw_time; + old = clock; clock = new; + clocksource_disable(old); + clock->cycle_last = 0; - clock->cycle_last = clocksource_read(new); + clock->cycle_last = clocksource_read(clock); clock->error = 0; clock->xtime_nsec = 0; clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); @@ -292,6 +297,7 @@ void __init timekeeping_init(void) ntp_init(); clock = clocksource_get_next(); + clocksource_enable(clock); clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); clock->cycle_last = clocksource_read(clock); -- cgit v1.2.3-71-gd317 From 40112ae7504745799e75ef418057f0d2cb745050 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 21 Apr 2009 12:24:03 -0700 Subject: ipmi: test for event buffer before using The IPMI driver would attempt to use the event buffer even if that didn't exist on the BMC. This patch modified the IPMI driver to check for the event buffer's existence before trying to use it. Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/ipmi/ipmi_si_intf.c | 148 +++++++++++++++++++++++++++++++-------- include/linux/ipmi_msgdefs.h | 6 ++ 2 files changed, 125 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 2438fdf889b4..259644646b82 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -82,12 +82,6 @@ #define SI_SHORT_TIMEOUT_USEC 250 /* .25ms when the SM request a short timeout */ -/* Bit for BMC global enables. */ -#define IPMI_BMC_RCV_MSG_INTR 0x01 -#define IPMI_BMC_EVT_MSG_INTR 0x02 -#define IPMI_BMC_EVT_MSG_BUFF 0x04 -#define IPMI_BMC_SYS_LOG 0x08 - enum si_intf_state { SI_NORMAL, SI_GETTING_FLAGS, @@ -220,6 +214,9 @@ struct smi_info { OEM2_DATA_AVAIL) unsigned char msg_flags; + /* Does the BMC have an event buffer? */ + char has_event_buffer; + /* * If set to true, this will request events the next time the * state machine is idle. @@ -968,7 +965,8 @@ static void request_events(void *send_info) { struct smi_info *smi_info = send_info; - if (atomic_read(&smi_info->stop_operation)) + if (atomic_read(&smi_info->stop_operation) || + !smi_info->has_event_buffer) return; atomic_set(&smi_info->req_events, 1); @@ -2407,26 +2405,9 @@ static struct of_platform_driver ipmi_of_platform_driver = { }; #endif /* CONFIG_PPC_OF */ - -static int try_get_dev_id(struct smi_info *smi_info) +static int wait_for_msg_done(struct smi_info *smi_info) { - unsigned char msg[2]; - unsigned char *resp; - unsigned long resp_len; enum si_sm_result smi_result; - int rv = 0; - - resp = kmalloc(IPMI_MAX_MSG_LENGTH, GFP_KERNEL); - if (!resp) - return -ENOMEM; - - /* - * Do a Get Device ID command, since it comes back with some - * useful info. - */ - msg[0] = IPMI_NETFN_APP_REQUEST << 2; - msg[1] = IPMI_GET_DEVICE_ID_CMD; - smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2); smi_result = smi_info->handlers->event(smi_info->si_sm, 0); for (;;) { @@ -2441,16 +2422,39 @@ static int try_get_dev_id(struct smi_info *smi_info) } else break; } - if (smi_result == SI_SM_HOSED) { + if (smi_result == SI_SM_HOSED) /* * We couldn't get the state machine to run, so whatever's at * the port is probably not an IPMI SMI interface. */ - rv = -ENODEV; + return -ENODEV; + + return 0; +} + +static int try_get_dev_id(struct smi_info *smi_info) +{ + unsigned char msg[2]; + unsigned char *resp; + unsigned long resp_len; + int rv = 0; + + resp = kmalloc(IPMI_MAX_MSG_LENGTH, GFP_KERNEL); + if (!resp) + return -ENOMEM; + + /* + * Do a Get Device ID command, since it comes back with some + * useful info. + */ + msg[0] = IPMI_NETFN_APP_REQUEST << 2; + msg[1] = IPMI_GET_DEVICE_ID_CMD; + smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2); + + rv = wait_for_msg_done(smi_info); + if (rv) goto out; - } - /* Otherwise, we got some data. */ resp_len = smi_info->handlers->get_result(smi_info->si_sm, resp, IPMI_MAX_MSG_LENGTH); @@ -2462,6 +2466,88 @@ static int try_get_dev_id(struct smi_info *smi_info) return rv; } +static int try_enable_event_buffer(struct smi_info *smi_info) +{ + unsigned char msg[3]; + unsigned char *resp; + unsigned long resp_len; + int rv = 0; + + resp = kmalloc(IPMI_MAX_MSG_LENGTH, GFP_KERNEL); + if (!resp) + return -ENOMEM; + + msg[0] = IPMI_NETFN_APP_REQUEST << 2; + msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD; + smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2); + + rv = wait_for_msg_done(smi_info); + if (rv) { + printk(KERN_WARNING + "ipmi_si: Error getting response from get global," + " enables command, the event buffer is not" + " enabled.\n"); + goto out; + } + + resp_len = smi_info->handlers->get_result(smi_info->si_sm, + resp, IPMI_MAX_MSG_LENGTH); + + if (resp_len < 4 || + resp[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2 || + resp[1] != IPMI_GET_BMC_GLOBAL_ENABLES_CMD || + resp[2] != 0) { + printk(KERN_WARNING + "ipmi_si: Invalid return from get global" + " enables command, cannot enable the event" + " buffer.\n"); + rv = -EINVAL; + goto out; + } + + if (resp[3] & IPMI_BMC_EVT_MSG_BUFF) + /* buffer is already enabled, nothing to do. */ + goto out; + + msg[0] = IPMI_NETFN_APP_REQUEST << 2; + msg[1] = IPMI_SET_BMC_GLOBAL_ENABLES_CMD; + msg[2] = resp[3] | IPMI_BMC_EVT_MSG_BUFF; + smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3); + + rv = wait_for_msg_done(smi_info); + if (rv) { + printk(KERN_WARNING + "ipmi_si: Error getting response from set global," + " enables command, the event buffer is not" + " enabled.\n"); + goto out; + } + + resp_len = smi_info->handlers->get_result(smi_info->si_sm, + resp, IPMI_MAX_MSG_LENGTH); + + if (resp_len < 3 || + resp[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2 || + resp[1] != IPMI_SET_BMC_GLOBAL_ENABLES_CMD) { + printk(KERN_WARNING + "ipmi_si: Invalid return from get global," + "enables command, not enable the event" + " buffer.\n"); + rv = -EINVAL; + goto out; + } + + if (resp[2] != 0) + /* + * An error when setting the event buffer bit means + * that the event buffer is not supported. + */ + rv = -ENOENT; + out: + kfree(resp); + return rv; +} + static int type_file_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -2847,6 +2933,10 @@ static int try_smi_init(struct smi_info *new_smi) new_smi->intf_num = smi_num; smi_num++; + rv = try_enable_event_buffer(new_smi); + if (rv == 0) + new_smi->has_event_buffer = 1; + /* * Start clearing the flags before we enable interrupts or the * timer to avoid racing with the timer. diff --git a/include/linux/ipmi_msgdefs.h b/include/linux/ipmi_msgdefs.h index b56a158d587a..a079f586e907 100644 --- a/include/linux/ipmi_msgdefs.h +++ b/include/linux/ipmi_msgdefs.h @@ -58,6 +58,12 @@ #define IPMI_READ_EVENT_MSG_BUFFER_CMD 0x35 #define IPMI_GET_CHANNEL_INFO_CMD 0x42 +/* Bit for BMC global enables. */ +#define IPMI_BMC_RCV_MSG_INTR 0x01 +#define IPMI_BMC_EVT_MSG_INTR 0x02 +#define IPMI_BMC_EVT_MSG_BUFF 0x04 +#define IPMI_BMC_SYS_LOG 0x08 + #define IPMI_NETFN_STORAGE_REQUEST 0x0a #define IPMI_NETFN_STORAGE_RESPONSE 0x0b #define IPMI_ADD_SEL_ENTRY_CMD 0x44 -- cgit v1.2.3-71-gd317 From 4dec302ff71ebf48f5784a2d2fc5e3745e6d4d52 Mon Sep 17 00:00:00 2001 From: dann frazier Date: Tue, 21 Apr 2009 12:24:05 -0700 Subject: ipmi: add oem message handling Enable userspace to receive messages that a BMC transmits using an OEM medium. This is used by the HP iLO2. Based on code originally written by Patrick Schoeller. Signed-off-by: dann frazier Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/ipmi/ipmi_msghandler.c | 138 ++++++++++++++++++++++++++++++++++-- include/linux/ipmi.h | 2 + include/linux/ipmi_msgdefs.h | 2 + 3 files changed, 137 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 83c7477ba801..aa83a0865ec1 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -3284,6 +3284,114 @@ static int handle_lan_get_msg_cmd(ipmi_smi_t intf, return rv; } +/* + * This routine will handle "Get Message" command responses with + * channels that use an OEM Medium. The message format belongs to + * the OEM. See IPMI 2.0 specification, Chapter 6 and + * Chapter 22, sections 22.6 and 22.24 for more details. + */ +static int handle_oem_get_msg_cmd(ipmi_smi_t intf, + struct ipmi_smi_msg *msg) +{ + struct cmd_rcvr *rcvr; + int rv = 0; + unsigned char netfn; + unsigned char cmd; + unsigned char chan; + ipmi_user_t user = NULL; + struct ipmi_system_interface_addr *smi_addr; + struct ipmi_recv_msg *recv_msg; + + /* + * We expect the OEM SW to perform error checking + * so we just do some basic sanity checks + */ + if (msg->rsp_size < 4) { + /* Message not big enough, just ignore it. */ + ipmi_inc_stat(intf, invalid_commands); + return 0; + } + + if (msg->rsp[2] != 0) { + /* An error getting the response, just ignore it. */ + return 0; + } + + /* + * This is an OEM Message so the OEM needs to know how + * handle the message. We do no interpretation. + */ + netfn = msg->rsp[0] >> 2; + cmd = msg->rsp[1]; + chan = msg->rsp[3] & 0xf; + + rcu_read_lock(); + rcvr = find_cmd_rcvr(intf, netfn, cmd, chan); + if (rcvr) { + user = rcvr->user; + kref_get(&user->refcount); + } else + user = NULL; + rcu_read_unlock(); + + if (user == NULL) { + /* We didn't find a user, just give up. */ + ipmi_inc_stat(intf, unhandled_commands); + + /* + * Don't do anything with these messages, just allow + * them to be freed. + */ + + rv = 0; + } else { + /* Deliver the message to the user. */ + ipmi_inc_stat(intf, handled_commands); + + recv_msg = ipmi_alloc_recv_msg(); + if (!recv_msg) { + /* + * We couldn't allocate memory for the + * message, so requeue it for handling + * later. + */ + rv = 1; + kref_put(&user->refcount, free_user); + } else { + /* + * OEM Messages are expected to be delivered via + * the system interface to SMS software. We might + * need to visit this again depending on OEM + * requirements + */ + smi_addr = ((struct ipmi_system_interface_addr *) + &(recv_msg->addr)); + smi_addr->addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; + smi_addr->channel = IPMI_BMC_CHANNEL; + smi_addr->lun = msg->rsp[0] & 3; + + recv_msg->user = user; + recv_msg->user_msg_data = NULL; + recv_msg->recv_type = IPMI_OEM_RECV_TYPE; + recv_msg->msg.netfn = msg->rsp[0] >> 2; + recv_msg->msg.cmd = msg->rsp[1]; + recv_msg->msg.data = recv_msg->msg_data; + + /* + * The message starts at byte 4 which follows the + * the Channel Byte in the "GET MESSAGE" command + */ + recv_msg->msg.data_len = msg->rsp_size - 4; + memcpy(recv_msg->msg_data, + &(msg->rsp[4]), + msg->rsp_size - 4); + deliver_response(recv_msg); + } + } + + return rv; +} + static void copy_event_into_recv_msg(struct ipmi_recv_msg *recv_msg, struct ipmi_smi_msg *msg) { @@ -3539,6 +3647,17 @@ static int handle_new_recv_msg(ipmi_smi_t intf, goto out; } + /* + ** We need to make sure the channels have been initialized. + ** The channel_handler routine will set the "curr_channel" + ** equal to or greater than IPMI_MAX_CHANNELS when all the + ** channels for this interface have been initialized. + */ + if (intf->curr_channel < IPMI_MAX_CHANNELS) { + requeue = 1; /* Just put the message back for now */ + goto out; + } + switch (intf->channels[chan].medium) { case IPMI_CHANNEL_MEDIUM_IPMB: if (msg->rsp[4] & 0x04) { @@ -3574,11 +3693,20 @@ static int handle_new_recv_msg(ipmi_smi_t intf, break; default: - /* - * We don't handle the channel type, so just - * free the message. - */ - requeue = 0; + /* Check for OEM Channels. Clients had better + register for these commands. */ + if ((intf->channels[chan].medium + >= IPMI_CHANNEL_MEDIUM_OEM_MIN) + && (intf->channels[chan].medium + <= IPMI_CHANNEL_MEDIUM_OEM_MAX)) { + requeue = handle_oem_get_msg_cmd(intf, msg); + } else { + /* + * We don't handle the channel type, so just + * free the message. + */ + requeue = 0; + } } } else if ((msg->rsp[0] == ((IPMI_NETFN_APP_REQUEST|1) << 2)) diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 7ebdb4fb4e54..65aae34759de 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -198,6 +198,8 @@ struct kernel_ipmi_msg { response. When you send a response message, this will be returned. */ +#define IPMI_OEM_RECV_TYPE 5 /* The response for OEM Channels */ + /* Note that async events and received commands do not have a completion code as the first byte of the incoming data, unlike a response. */ diff --git a/include/linux/ipmi_msgdefs.h b/include/linux/ipmi_msgdefs.h index a079f586e907..df97e6e31e87 100644 --- a/include/linux/ipmi_msgdefs.h +++ b/include/linux/ipmi_msgdefs.h @@ -115,5 +115,7 @@ #define IPMI_CHANNEL_MEDIUM_USB1 10 #define IPMI_CHANNEL_MEDIUM_USB2 11 #define IPMI_CHANNEL_MEDIUM_SYSINTF 12 +#define IPMI_CHANNEL_MEDIUM_OEM_MIN 0x60 +#define IPMI_CHANNEL_MEDIUM_OEM_MAX 0x7f #endif /* __LINUX_IPMI_MSGDEFS_H */ -- cgit v1.2.3-71-gd317 From e638c1394010859a015a3b533ee452d768e62cea Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 21 Apr 2009 12:24:41 -0700 Subject: memcg: use rcu_dereference to access mm->owner mm->owner should be accessed with rcu_dereference(). Reported-by: KOSAKI Motohiro Signed-off-by: KAMEZAWA Hiroyuki Acked-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 18146c980b68..a9e3b76aa884 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -75,7 +75,7 @@ int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) { struct mem_cgroup *mem; rcu_read_lock(); - mem = mem_cgroup_from_task((mm)->owner); + mem = mem_cgroup_from_task(rcu_dereference((mm)->owner)); rcu_read_unlock(); return cgroup == mem; } -- cgit v1.2.3-71-gd317 From 6e538aaf50ae782a890cbc02c27950448d8193e1 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 21 Apr 2009 12:24:49 -0700 Subject: spi: documentation: emphasise spi_master.setup() semantics This is a doc-only patch which I hope will reduce the number of spi_master controller driver patches starting out with a common implementation bug. (As in: almost every spi_master driver I see starts out with its version of this bug. Sigh.) It just re-emphasizes that the setup() method may be called for one device while a transfer is active on another ... which means that most driver implementations shouldn't touch any registers. Signed-off-by: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/spi/spi-summary | 6 ++++++ include/linux/spi/spi.h | 7 ++++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/spi/spi-summary b/Documentation/spi/spi-summary index 0f5122eb282b..4a02d2508bc8 100644 --- a/Documentation/spi/spi-summary +++ b/Documentation/spi/spi-summary @@ -511,10 +511,16 @@ SPI MASTER METHODS This sets up the device clock rate, SPI mode, and word sizes. Drivers may change the defaults provided by board_info, and then call spi_setup(spi) to invoke this routine. It may sleep. + Unless each SPI slave has its own configuration registers, don't change them right away ... otherwise drivers could corrupt I/O that's in progress for other SPI devices. + ** BUG ALERT: for some reason the first version of + ** many spi_master drivers seems to get this wrong. + ** When you code setup(), ASSUME that the controller + ** is actively processing transfers for another device. + master->transfer(struct spi_device *spi, struct spi_message *message) This must not sleep. Its responsibility is arrange that the transfer happens and its complete() callback is issued. The two diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 2cc43fa380cb..a0faa18f7b1b 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -245,7 +245,12 @@ struct spi_master { */ u16 dma_alignment; - /* setup mode and clock, etc (spi driver may call many times) */ + /* Setup mode and clock, etc (spi driver may call many times). + * + * IMPORTANT: this may be called when transfers to another + * device are active. DO NOT UPDATE SHARED REGISTERS in ways + * which could break those transfers. + */ int (*setup)(struct spi_device *spi); /* bidirectional bulk transfers -- cgit v1.2.3-71-gd317 From 9b8de7479d0dbab1ed98b5b015d44232c9d3d08e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 21 Apr 2009 23:00:24 +0100 Subject: FRV: Fix the section attribute on UP DECLARE_PER_CPU() In non-SMP mode, the variable section attribute specified by DECLARE_PER_CPU() does not agree with that specified by DEFINE_PER_CPU(). This means that architectures that have a small data section references relative to a base register may throw up linkage errors due to too great a displacement between where the base register points and the per-CPU variable. On FRV, the .h declaration says that the variable is in the .sdata section, but the .c definition says it's actually in the .data section. The linker throws up the following errors: kernel/built-in.o: In function `release_task': kernel/exit.c:78: relocation truncated to fit: R_FRV_GPREL12 against symbol `per_cpu__process_counts' defined in .data section in kernel/built-in.o kernel/exit.c:78: relocation truncated to fit: R_FRV_GPREL12 against symbol `per_cpu__process_counts' defined in .data section in kernel/built-in.o To fix this, DECLARE_PER_CPU() should simply apply the same section attribute as does DEFINE_PER_CPU(). However, this is made slightly more complex by virtue of the fact that there are several variants on DEFINE, so these need to be matched by variants on DECLARE. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/percpu.h | 2 +- arch/ia64/include/asm/smp.h | 2 +- arch/x86/include/asm/desc.h | 2 +- arch/x86/include/asm/hardirq.h | 2 +- arch/x86/include/asm/processor.h | 6 +++--- arch/x86/include/asm/tlbflush.h | 2 +- include/asm-generic/percpu.h | 43 ++++++++++++++++++++++++++++++++++++++-- include/linux/percpu.h | 24 ---------------------- net/rds/rds.h | 2 +- 9 files changed, 50 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/include/asm/percpu.h b/arch/alpha/include/asm/percpu.h index 3495e8e00d70..e9e0bb5a23bf 100644 --- a/arch/alpha/include/asm/percpu.h +++ b/arch/alpha/include/asm/percpu.h @@ -73,6 +73,6 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #endif /* SMP */ -#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu_var(name) +#include #endif /* __ALPHA_PERCPU_H */ diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h index 598408336251..d217d1d4e051 100644 --- a/arch/ia64/include/asm/smp.h +++ b/arch/ia64/include/asm/smp.h @@ -58,7 +58,7 @@ extern struct smp_boot_data { extern char no_int_routing __devinitdata; extern cpumask_t cpu_core_map[NR_CPUS]; -DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); +DECLARE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map); extern int smp_num_siblings; extern void __iomem *ipi_base_addr; extern unsigned char smp_int_redirect; diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 5623c50d67b2..c45f415ce315 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -37,7 +37,7 @@ extern gate_desc idt_table[]; struct gdt_page { struct desc_struct gdt[GDT_ENTRIES]; } __attribute__((aligned(PAGE_SIZE))); -DECLARE_PER_CPU(struct gdt_page, gdt_page); +DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page); static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) { diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 039db6aa8e02..37555e52f980 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -26,7 +26,7 @@ typedef struct { #endif } ____cacheline_aligned irq_cpustat_t; -DECLARE_PER_CPU(irq_cpustat_t, irq_stat); +DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); /* We can have at most NR_VECTORS irqs routed to a cpu at a time */ #define MAX_HARDIRQS_PER_CPU NR_VECTORS diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index fcf4d92e7e04..c2cceae709c8 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -138,7 +138,7 @@ extern struct tss_struct doublefault_tss; extern __u32 cleared_cpu_caps[NCAPINTS]; #ifdef CONFIG_SMP -DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); +DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); #define cpu_data(cpu) per_cpu(cpu_info, cpu) #define current_cpu_data __get_cpu_var(cpu_info) #else @@ -270,7 +270,7 @@ struct tss_struct { } ____cacheline_aligned; -DECLARE_PER_CPU(struct tss_struct, init_tss); +DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss); /* * Save the original ist values for checking stack pointers during debugging @@ -393,7 +393,7 @@ union irq_stack_union { }; }; -DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); +DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union); DECLARE_INIT_PER_CPU(irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index d3539f998f88..16a5c84b0329 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -152,7 +152,7 @@ struct tlb_state { struct mm_struct *active_mm; int state; }; -DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); +DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); static inline void reset_lazy_tlbstate(void) { diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index b0e63c672ebd..af47b9e10064 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -73,11 +73,50 @@ extern void setup_per_cpu_areas(void); #endif /* SMP */ +#ifndef PER_CPU_BASE_SECTION +#ifdef CONFIG_SMP +#define PER_CPU_BASE_SECTION ".data.percpu" +#else +#define PER_CPU_BASE_SECTION ".data" +#endif +#endif + +#ifdef CONFIG_SMP + +#ifdef MODULE +#define PER_CPU_SHARED_ALIGNED_SECTION "" +#else +#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned" +#endif +#define PER_CPU_FIRST_SECTION ".first" + +#else + +#define PER_CPU_SHARED_ALIGNED_SECTION "" +#define PER_CPU_FIRST_SECTION "" + +#endif + #ifndef PER_CPU_ATTRIBUTES #define PER_CPU_ATTRIBUTES #endif -#define DECLARE_PER_CPU(type, name) extern PER_CPU_ATTRIBUTES \ - __typeof__(type) per_cpu_var(name) +#define DECLARE_PER_CPU_SECTION(type, name, section) \ + extern \ + __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name + +#define DECLARE_PER_CPU(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, "") + +#define DECLARE_PER_CPU_SHARED_ALIGNED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ + ____cacheline_aligned_in_smp + +#define DECLARE_PER_CPU_PAGE_ALIGNED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, ".page_aligned") + +#define DECLARE_PER_CPU_FIRST(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) #endif /* _ASM_GENERIC_PERCPU_H_ */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index cfda2d5ad319..f052d8184993 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -9,30 +9,6 @@ #include -#ifndef PER_CPU_BASE_SECTION -#ifdef CONFIG_SMP -#define PER_CPU_BASE_SECTION ".data.percpu" -#else -#define PER_CPU_BASE_SECTION ".data" -#endif -#endif - -#ifdef CONFIG_SMP - -#ifdef MODULE -#define PER_CPU_SHARED_ALIGNED_SECTION "" -#else -#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned" -#endif -#define PER_CPU_FIRST_SECTION ".first" - -#else - -#define PER_CPU_SHARED_ALIGNED_SECTION "" -#define PER_CPU_FIRST_SECTION "" - -#endif - #define DEFINE_PER_CPU_SECTION(type, name, section) \ __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name diff --git a/net/rds/rds.h b/net/rds/rds.h index 619f0a30a4e5..71794449ca4e 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -638,7 +638,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *, void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force); /* stats.c */ -DECLARE_PER_CPU(struct rds_statistics, rds_stats); +DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); #define rds_stats_inc_which(which, member) do { \ per_cpu(which, get_cpu()).member++; \ put_cpu(); \ -- cgit v1.2.3-71-gd317 From 5028eaa97dd1dab9cd7c30c4d38f71c708ca64bc Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 21 Apr 2009 23:00:29 +0100 Subject: PERCPU: Collect the DECLARE/DEFINE declarations together Collect the DECLARE/DEFINE declarations together in linux/percpu-defs.h so that they're in one place, and give them descriptive comments, particularly the SHARED_ALIGNED variant. It would be nice to collect these in linux/percpu.h, but that's not possible without sorting out the severe #include recursion between the x86 arch headers and the general headers (and possibly other arches too). Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/asm-generic/percpu.h | 26 ++------------ include/linux/percpu-defs.h | 84 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/percpu.h | 20 ----------- 3 files changed, 86 insertions(+), 44 deletions(-) create mode 100644 include/linux/percpu-defs.h (limited to 'include/linux') diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index af47b9e10064..d7d50d7ee51e 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -1,13 +1,9 @@ #ifndef _ASM_GENERIC_PERCPU_H_ #define _ASM_GENERIC_PERCPU_H_ + #include #include - -/* - * Determine the real variable name from the name visible in the - * kernel sources. - */ -#define per_cpu_var(var) per_cpu__##var +#include #ifdef CONFIG_SMP @@ -101,22 +97,4 @@ extern void setup_per_cpu_areas(void); #define PER_CPU_ATTRIBUTES #endif -#define DECLARE_PER_CPU_SECTION(type, name, section) \ - extern \ - __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name - -#define DECLARE_PER_CPU(type, name) \ - DECLARE_PER_CPU_SECTION(type, name, "") - -#define DECLARE_PER_CPU_SHARED_ALIGNED(type, name) \ - DECLARE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ - ____cacheline_aligned_in_smp - -#define DECLARE_PER_CPU_PAGE_ALIGNED(type, name) \ - DECLARE_PER_CPU_SECTION(type, name, ".page_aligned") - -#define DECLARE_PER_CPU_FIRST(type, name) \ - DECLARE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) - #endif /* _ASM_GENERIC_PERCPU_H_ */ diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h new file mode 100644 index 000000000000..8f921d74f49f --- /dev/null +++ b/include/linux/percpu-defs.h @@ -0,0 +1,84 @@ +#ifndef _LINUX_PERCPU_DEFS_H +#define _LINUX_PERCPU_DEFS_H + +/* + * Determine the real variable name from the name visible in the + * kernel sources. + */ +#define per_cpu_var(var) per_cpu__##var + +/* + * Base implementations of per-CPU variable declarations and definitions, where + * the section in which the variable is to be placed is provided by the + * 'section' argument. This may be used to affect the parameters governing the + * variable's storage. + * + * NOTE! The sections for the DECLARE and for the DEFINE must match, lest + * linkage errors occur due the compiler generating the wrong code to access + * that section. + */ +#define DECLARE_PER_CPU_SECTION(type, name, section) \ + extern \ + __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name + +#define DEFINE_PER_CPU_SECTION(type, name, section) \ + __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name + +/* + * Variant on the per-CPU variable declaration/definition theme used for + * ordinary per-CPU variables. + */ +#define DECLARE_PER_CPU(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, "") + +#define DEFINE_PER_CPU(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, "") + +/* + * Declaration/definition used for per-CPU variables that must come first in + * the set of variables. + */ +#define DECLARE_PER_CPU_FIRST(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) + +#define DEFINE_PER_CPU_FIRST(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) + +/* + * Declaration/definition used for per-CPU variables that must be cacheline + * aligned under SMP conditions so that, whilst a particular instance of the + * data corresponds to a particular CPU, inefficiencies due to direct access by + * other CPUs are reduced by preventing the data from unnecessarily spanning + * cachelines. + * + * An example of this would be statistical data, where each CPU's set of data + * is updated by that CPU alone, but the data from across all CPUs is collated + * by a CPU processing a read from a proc file. + */ +#define DECLARE_PER_CPU_SHARED_ALIGNED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ + ____cacheline_aligned_in_smp + +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ + ____cacheline_aligned_in_smp + +/* + * Declaration/definition used for per-CPU variables that must be page aligned. + */ +#define DECLARE_PER_CPU_PAGE_ALIGNED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, ".page_aligned") + +#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, ".page_aligned") + +/* + * Intermodule exports for per-CPU variables. + */ +#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) +#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) + + +#endif /* _LINUX_PERCPU_DEFS_H */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index f052d8184993..1581ff235c7e 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -9,26 +9,6 @@ #include -#define DEFINE_PER_CPU_SECTION(type, name, section) \ - __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name - -#define DEFINE_PER_CPU(type, name) \ - DEFINE_PER_CPU_SECTION(type, name, "") - -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ - ____cacheline_aligned_in_smp - -#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ - DEFINE_PER_CPU_SECTION(type, name, ".page_aligned") - -#define DEFINE_PER_CPU_FIRST(type, name) \ - DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) - -#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) -#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) - /* enough to cover all DEFINE_PER_CPUs in modules */ #ifdef CONFIG_MODULES #define PERCPU_MODULE_RESERVE (8 << 10) -- cgit v1.2.3-71-gd317 From 5dd559f020c98a2a4b3e063f09c0e4bc771ed838 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Tue, 21 Apr 2009 16:30:32 -0600 Subject: Trivial: fix a typo in slow-work.h Fix a comment typo in slow-work.h ...a trivial mistake, but it will mess up kerneldoc if nothing else. Signed-off-by: Jonathan Corbet Signed-off-by: Linus Torvalds --- include/linux/slow-work.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index 85958277f83d..b65c8881f07a 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -67,7 +67,7 @@ static inline void slow_work_init(struct slow_work *work, } /** - * slow_work_init - Initialise a very slow work item + * vslow_work_init - Initialise a very slow work item * @work: The work item to initialise * @ops: The operations to use to handle the slow work item * -- cgit v1.2.3-71-gd317 From d4d5291c8cd499b1b590336059d5cc3e24c1ced6 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 21 Apr 2009 13:32:54 -0700 Subject: driver synchronization: make scsi_wait_scan more advanced There is currently only one way for userspace to say "wait for my storage device to get ready for the modules I just loaded": to load the scsi_wait_scan module. Expectations of userspace are that once this module is loaded, all the (storage) devices for which the drivers were loaded before the module load are present. Now, there are some issues with the implementation, and the async stuff got caught in the middle of this: The existing code only waits for the scsy async probing to finish, but it did not take into account at all that probing might not have begun yet. (Russell ran into this problem on his computer and the fix works for him) This patch fixes this more thoroughly than the previous "fix", which had some bad side effects (namely, for kernel code that wanted to wait for the scsi scan it would also do an async sync, which would deadlock if you did it from async context already.. there's a report about that on lkml): The patch makes the module first wait for all device driver probes, and then it will wait for the scsi parallel scan to finish. Signed-off-by: Arjan van de Ven Tested-by: Russell King Signed-off-by: Linus Torvalds --- drivers/base/dd.c | 1 + drivers/scsi/scsi_scan.c | 2 -- drivers/scsi/scsi_wait_scan.c | 11 +++++++++++ include/linux/device.h | 1 + 4 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/dd.c b/drivers/base/dd.c index f17c3266a0e0..742cbe6b042b 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -179,6 +179,7 @@ void wait_for_device_probe(void) wait_event(probe_waitqueue, atomic_read(&probe_count) == 0); async_synchronize_full(); } +EXPORT_SYMBOL_GPL(wait_for_device_probe); /** * driver_probe_device - attempt to bind device & driver together diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index a14d245a66b8..6f51ca485f35 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -180,8 +180,6 @@ int scsi_complete_async_scans(void) spin_unlock(&async_scan_lock); kfree(data); - /* Synchronize async operations globally */ - async_synchronize_full(); return 0; } diff --git a/drivers/scsi/scsi_wait_scan.c b/drivers/scsi/scsi_wait_scan.c index 2f21af21269a..74708fcaf82f 100644 --- a/drivers/scsi/scsi_wait_scan.c +++ b/drivers/scsi/scsi_wait_scan.c @@ -11,10 +11,21 @@ */ #include +#include #include static int __init wait_scan_init(void) { + /* + * First we need to wait for device probing to finish; + * the drivers we just loaded might just still be probing + * and might not yet have reached the scsi async scanning + */ + wait_for_device_probe(); + /* + * and then we wait for the actual asynchronous scsi scan + * to finish. + */ scsi_complete_async_scans(); return 0; } diff --git a/include/linux/device.h b/include/linux/device.h index 2918c0e8fdfd..6a69caaac18a 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -551,6 +551,7 @@ extern int (*platform_notify_remove)(struct device *dev); extern struct device *get_device(struct device *dev); extern void put_device(struct device *dev); +extern void wait_for_device_probe(void); /* drivers/base/power/shutdown.c */ extern void device_shutdown(void); -- cgit v1.2.3-71-gd317 From 451a9ebf653d28337ba53ed5b4b70b0b9543cca1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 15 Apr 2009 19:50:51 +0200 Subject: bio: fix bio_kmalloc() Impact: fix bio_kmalloc() and its destruction path bio_kmalloc() was broken in two ways. * bvec_alloc_bs() first allocates bvec using kmalloc() and then ignores it and allocates again like non-kmalloc bvecs. * bio_kmalloc_destructor() didn't check for and free bio integrity data. This patch fixes the above problems. kmalloc patch is separated out from bio_alloc_bioset() and allocates the requested number of bvecs as inline bvecs. * bio_alloc_bioset() no longer takes NULL @bs. None other than bio_kmalloc() used it and outside users can't know how it was allocated anyway. * Define and use BIO_POOL_NONE so that pool index check in bvec_free_bs() triggers if inline or kmalloc allocated bvec gets there. * Relocate destructors on top of each allocation function so that how they're used is more clear. Jens Axboe suggested allocating bvecs inline. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/bio.c | 118 ++++++++++++++++++++++++---------------------------- include/linux/bio.h | 1 + 2 files changed, 55 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/fs/bio.c b/fs/bio.c index cd42bb882f30..d35588fd6d57 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -174,14 +174,6 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, { struct bio_vec *bvl; - /* - * If 'bs' is given, lookup the pool and do the mempool alloc. - * If not, this is a bio_kmalloc() allocation and just do a - * kzalloc() for the exact number of vecs right away. - */ - if (!bs) - bvl = kmalloc(nr * sizeof(struct bio_vec), gfp_mask); - /* * see comment near bvec_array define! */ @@ -260,21 +252,6 @@ void bio_free(struct bio *bio, struct bio_set *bs) mempool_free(p, bs->bio_pool); } -/* - * default destructor for a bio allocated with bio_alloc_bioset() - */ -static void bio_fs_destructor(struct bio *bio) -{ - bio_free(bio, fs_bio_set); -} - -static void bio_kmalloc_destructor(struct bio *bio) -{ - if (bio_has_allocated_vec(bio)) - kfree(bio->bi_io_vec); - kfree(bio); -} - void bio_init(struct bio *bio) { memset(bio, 0, sizeof(*bio)); @@ -301,21 +278,15 @@ void bio_init(struct bio *bio) **/ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) { + unsigned long idx = BIO_POOL_NONE; struct bio_vec *bvl = NULL; - struct bio *bio = NULL; - unsigned long idx = 0; - void *p = NULL; - - if (bs) { - p = mempool_alloc(bs->bio_pool, gfp_mask); - if (!p) - goto err; - bio = p + bs->front_pad; - } else { - bio = kmalloc(sizeof(*bio), gfp_mask); - if (!bio) - goto err; - } + struct bio *bio; + void *p; + + p = mempool_alloc(bs->bio_pool, gfp_mask); + if (unlikely(!p)) + return NULL; + bio = p + bs->front_pad; bio_init(bio); @@ -332,22 +303,50 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) nr_iovecs = bvec_nr_vecs(idx); } +out_set: bio->bi_flags |= idx << BIO_POOL_OFFSET; bio->bi_max_vecs = nr_iovecs; -out_set: bio->bi_io_vec = bvl; - return bio; err_free: - if (bs) - mempool_free(p, bs->bio_pool); - else - kfree(bio); -err: + mempool_free(p, bs->bio_pool); return NULL; } +static void bio_fs_destructor(struct bio *bio) +{ + bio_free(bio, fs_bio_set); +} + +/** + * bio_alloc - allocate a new bio, memory pool backed + * @gfp_mask: allocation mask to use + * @nr_iovecs: number of iovecs + * + * Allocate a new bio with @nr_iovecs bvecs. If @gfp_mask + * contains __GFP_WAIT, the allocation is guaranteed to succeed. + * + * RETURNS: + * Pointer to new bio on success, NULL on failure. + */ +struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) +{ + struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); + + if (bio) + bio->bi_destructor = bio_fs_destructor; + + return bio; +} + +static void bio_kmalloc_destructor(struct bio *bio) +{ + if (bio_integrity(bio)) + bio_integrity_free(bio); + kfree(bio); +} + /** * bio_alloc - allocate a bio for I/O * @gfp_mask: the GFP_ mask given to the slab allocator @@ -366,29 +365,20 @@ err: * do so can cause livelocks under memory pressure. * **/ -struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) -{ - struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); - - if (bio) - bio->bi_destructor = bio_fs_destructor; - - return bio; -} - -/* - * Like bio_alloc(), but doesn't use a mempool backing. This means that - * it CAN fail, but while bio_alloc() can only be used for allocations - * that have a short (finite) life span, bio_kmalloc() should be used - * for more permanent bio allocations (like allocating some bio's for - * initalization or setup purposes). - */ struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) { - struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); + struct bio *bio; - if (bio) - bio->bi_destructor = bio_kmalloc_destructor; + bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec), + gfp_mask); + if (unlikely(!bio)) + return NULL; + + bio_init(bio); + bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET; + bio->bi_max_vecs = nr_iovecs; + bio->bi_io_vec = bio->bi_inline_vecs; + bio->bi_destructor = bio_kmalloc_destructor; return bio; } diff --git a/include/linux/bio.h b/include/linux/bio.h index b89cf2d82898..7b214fd672a2 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -132,6 +132,7 @@ struct bio { * top 4 bits of bio flags indicate the pool this bio came from */ #define BIO_POOL_BITS (4) +#define BIO_POOL_NONE ((1UL << BIO_POOL_BITS) - 1) #define BIO_POOL_OFFSET (BITS_PER_LONG - BIO_POOL_BITS) #define BIO_POOL_MASK (1UL << BIO_POOL_OFFSET) #define BIO_POOL_IDX(bio) ((bio)->bi_flags >> BIO_POOL_OFFSET) -- cgit v1.2.3-71-gd317 From 71982a409f12c50d011325a4471aa20666bb908d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 17 Apr 2009 08:34:48 +0200 Subject: block: include empty disks in /proc/diskstats /proc/diskstats used to show stats for all disks whether they're zero-sized or not and their non-zero partitions. Commit 074a7aca7afa6f230104e8e65eba3420263714a5 accidentally changed the behavior such that it doesn't print out zero sized disks. This patch implements DISK_PITER_INCL_EMPTY_PART0 flag to partition iterator and uses it in diskstats_show() such that empty part0 is shown in /proc/diskstats. Reported and bisectd by Dianel Collins. Signed-off-by: Tejun Heo Reported-by: Daniel Collins Signed-off-by: Jens Axboe --- block/genhd.c | 12 ++++++++---- include/linux/genhd.h | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/block/genhd.c b/block/genhd.c index a9ec910974c1..1a4916e01732 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -98,7 +98,7 @@ void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, if (flags & DISK_PITER_REVERSE) piter->idx = ptbl->len - 1; - else if (flags & DISK_PITER_INCL_PART0) + else if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0)) piter->idx = 0; else piter->idx = 1; @@ -134,7 +134,8 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) /* determine iteration parameters */ if (piter->flags & DISK_PITER_REVERSE) { inc = -1; - if (piter->flags & DISK_PITER_INCL_PART0) + if (piter->flags & (DISK_PITER_INCL_PART0 | + DISK_PITER_INCL_EMPTY_PART0)) end = -1; else end = 0; @@ -150,7 +151,10 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) part = rcu_dereference(ptbl->part[piter->idx]); if (!part) continue; - if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects) + if (!part->nr_sects && + !(piter->flags & DISK_PITER_INCL_EMPTY) && + !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && + piter->idx == 0)) continue; get_device(part_to_dev(part)); @@ -1011,7 +1015,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) "\n\n"); */ - disk_part_iter_init(&piter, gp, DISK_PITER_INCL_PART0); + disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0); while ((hd = disk_part_iter_next(&piter))) { cpu = part_stat_lock(); part_round_stats(cpu, hd); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 634c53028fb8..a1a28caed23d 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -214,6 +214,7 @@ static inline void disk_put_part(struct hd_struct *part) #define DISK_PITER_REVERSE (1 << 0) /* iterate in the reverse direction */ #define DISK_PITER_INCL_EMPTY (1 << 1) /* include 0-sized parts */ #define DISK_PITER_INCL_PART0 (1 << 2) /* include partition 0 */ +#define DISK_PITER_INCL_EMPTY_PART0 (1 << 3) /* include empty partition 0 */ struct disk_part_iter { struct gendisk *disk; -- cgit v1.2.3-71-gd317 From 4cd481f68dde99ac416003b825c835f71e364393 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 13 Apr 2009 11:59:32 +0200 Subject: KVM: Fix overlapping check for memory slots When checking for overlapping slots on registration of a new one, kvm currently also considers zero-length (ie. deleted) slots and rejects requests incorrectly. This finally denies user space from joining slots. Fix the check by skipping deleted slots and advertise this via a KVM_CAP_JOIN_MEMORY_REGIONS_WORKS. Cc: stable@kernel.org Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- include/linux/kvm.h | 2 ++ virt/kvm/kvm_main.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 311a073afe8a..8cc137911b34 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -409,6 +409,8 @@ struct kvm_trace_rec { #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT #define KVM_CAP_DEVICE_DEASSIGNMENT 27 #endif +/* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ +#define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 28d693a1ee8f..1ecbe2391c8b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -961,7 +961,7 @@ int __kvm_set_memory_region(struct kvm *kvm, for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { struct kvm_memory_slot *s = &kvm->memslots[i]; - if (s == memslot) + if (s == memslot || !s->npages) continue; if (!((base_gfn + npages <= s->base_gfn) || (base_gfn >= s->base_gfn + s->npages))) @@ -1983,6 +1983,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) switch (arg) { case KVM_CAP_USER_MEMORY: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: + case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: return 1; #ifdef CONFIG_HAVE_KVM_IRQCHIP case KVM_CAP_IRQ_ROUTING: -- cgit v1.2.3-71-gd317 From 1b6b8ce2ac372ea1f2065b89228ede105eb68dc5 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Thu, 9 Apr 2009 14:57:39 +0800 Subject: PCI: only save/restore existent registers in the PCIe capability PCIe 1.1 base neither requires the endpoint to implement the entire PCIe capability structure nor specifies default values of registers that are not implemented by the device. So we only save and restore registers that must be implemented by different device types if the device PCIe capability version is 1. PCIe 1.1 Capability Structure Expansion ECN and PCIe 2.0 requires all registers in the PCIe capability to be either implemented or hardwired to 0. Their PCIe capability version is 2. Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 70 ++++++++++++++++++++++++++++++++++++++---------- include/linux/pci_regs.h | 1 + 2 files changed, 57 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 16fd0d4c3166..34bf0fdf5047 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -681,11 +681,34 @@ EXPORT_SYMBOL(pci_choose_state); #define PCI_EXP_SAVE_REGS 7 +#define pcie_cap_has_devctl(type, flags) 1 +#define pcie_cap_has_lnkctl(type, flags) \ + ((flags & PCI_EXP_FLAGS_VERS) > 1 || \ + (type == PCI_EXP_TYPE_ROOT_PORT || \ + type == PCI_EXP_TYPE_ENDPOINT || \ + type == PCI_EXP_TYPE_LEG_END)) +#define pcie_cap_has_sltctl(type, flags) \ + ((flags & PCI_EXP_FLAGS_VERS) > 1 || \ + ((type == PCI_EXP_TYPE_ROOT_PORT) || \ + (type == PCI_EXP_TYPE_DOWNSTREAM && \ + (flags & PCI_EXP_FLAGS_SLOT)))) +#define pcie_cap_has_rtctl(type, flags) \ + ((flags & PCI_EXP_FLAGS_VERS) > 1 || \ + (type == PCI_EXP_TYPE_ROOT_PORT || \ + type == PCI_EXP_TYPE_RC_EC)) +#define pcie_cap_has_devctl2(type, flags) \ + ((flags & PCI_EXP_FLAGS_VERS) > 1) +#define pcie_cap_has_lnkctl2(type, flags) \ + ((flags & PCI_EXP_FLAGS_VERS) > 1) +#define pcie_cap_has_sltctl2(type, flags) \ + ((flags & PCI_EXP_FLAGS_VERS) > 1) + static int pci_save_pcie_state(struct pci_dev *dev) { int pos, i = 0; struct pci_cap_saved_state *save_state; u16 *cap; + u16 flags; pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (pos <= 0) @@ -698,13 +721,22 @@ static int pci_save_pcie_state(struct pci_dev *dev) } cap = (u16 *)&save_state->data[0]; - pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &cap[i++]); - pci_read_config_word(dev, pos + PCI_EXP_LNKCTL, &cap[i++]); - pci_read_config_word(dev, pos + PCI_EXP_SLTCTL, &cap[i++]); - pci_read_config_word(dev, pos + PCI_EXP_RTCTL, &cap[i++]); - pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &cap[i++]); - pci_read_config_word(dev, pos + PCI_EXP_LNKCTL2, &cap[i++]); - pci_read_config_word(dev, pos + PCI_EXP_SLTCTL2, &cap[i++]); + pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags); + + if (pcie_cap_has_devctl(dev->pcie_type, flags)) + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &cap[i++]); + if (pcie_cap_has_lnkctl(dev->pcie_type, flags)) + pci_read_config_word(dev, pos + PCI_EXP_LNKCTL, &cap[i++]); + if (pcie_cap_has_sltctl(dev->pcie_type, flags)) + pci_read_config_word(dev, pos + PCI_EXP_SLTCTL, &cap[i++]); + if (pcie_cap_has_rtctl(dev->pcie_type, flags)) + pci_read_config_word(dev, pos + PCI_EXP_RTCTL, &cap[i++]); + if (pcie_cap_has_devctl2(dev->pcie_type, flags)) + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &cap[i++]); + if (pcie_cap_has_lnkctl2(dev->pcie_type, flags)) + pci_read_config_word(dev, pos + PCI_EXP_LNKCTL2, &cap[i++]); + if (pcie_cap_has_sltctl2(dev->pcie_type, flags)) + pci_read_config_word(dev, pos + PCI_EXP_SLTCTL2, &cap[i++]); return 0; } @@ -714,6 +746,7 @@ static void pci_restore_pcie_state(struct pci_dev *dev) int i = 0, pos; struct pci_cap_saved_state *save_state; u16 *cap; + u16 flags; save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP); pos = pci_find_capability(dev, PCI_CAP_ID_EXP); @@ -721,13 +754,22 @@ static void pci_restore_pcie_state(struct pci_dev *dev) return; cap = (u16 *)&save_state->data[0]; - pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, cap[i++]); - pci_write_config_word(dev, pos + PCI_EXP_LNKCTL, cap[i++]); - pci_write_config_word(dev, pos + PCI_EXP_SLTCTL, cap[i++]); - pci_write_config_word(dev, pos + PCI_EXP_RTCTL, cap[i++]); - pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, cap[i++]); - pci_write_config_word(dev, pos + PCI_EXP_LNKCTL2, cap[i++]); - pci_write_config_word(dev, pos + PCI_EXP_SLTCTL2, cap[i++]); + pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags); + + if (pcie_cap_has_devctl(dev->pcie_type, flags)) + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, cap[i++]); + if (pcie_cap_has_lnkctl(dev->pcie_type, flags)) + pci_write_config_word(dev, pos + PCI_EXP_LNKCTL, cap[i++]); + if (pcie_cap_has_sltctl(dev->pcie_type, flags)) + pci_write_config_word(dev, pos + PCI_EXP_SLTCTL, cap[i++]); + if (pcie_cap_has_rtctl(dev->pcie_type, flags)) + pci_write_config_word(dev, pos + PCI_EXP_RTCTL, cap[i++]); + if (pcie_cap_has_devctl2(dev->pcie_type, flags)) + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, cap[i++]); + if (pcie_cap_has_lnkctl2(dev->pcie_type, flags)) + pci_write_config_word(dev, pos + PCI_EXP_LNKCTL2, cap[i++]); + if (pcie_cap_has_sltctl2(dev->pcie_type, flags)) + pci_write_config_word(dev, pos + PCI_EXP_SLTCTL2, cap[i++]); } diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index e4d08c1b2e0b..616bf8b3c8b5 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -376,6 +376,7 @@ #define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ #define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */ #define PCI_EXP_TYPE_RC_END 0x9 /* Root Complex Integrated Endpoint */ +#define PCI_EXP_TYPE_RC_EC 0x10 /* Root Complex Event Collector */ #define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ #define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ #define PCI_EXP_DEVCAP 4 /* Device capabilities */ -- cgit v1.2.3-71-gd317 From 952043ac12a117d8e94bddd9088338d7ad20ca7d Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 23 Apr 2009 08:48:15 +0100 Subject: bitops: Add __ffs64 bitop Finds the first set bit in a 64 bit word. This is required in order to fix a bug in GFS2, but I think it should be a generic function in case of future users. Signed-off-by: Steven Whitehouse Reviewed-by: Christoph Lameter Reviewed-by: Willy Tarreau --- include/linux/bitops.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 61829139795a..c05a29cb9bb2 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -112,6 +112,25 @@ static inline unsigned fls_long(unsigned long l) return fls64(l); } +/** + * __ffs64 - find first set bit in a 64 bit word + * @word: The 64 bit word + * + * On 64 bit arches this is a synomyn for __ffs + * The result is not defined if no bits are set, so check that @word + * is non-zero before calling this. + */ +static inline unsigned long __ffs64(u64 word) +{ +#if BITS_PER_LONG == 32 + if (((u32)word) == 0UL) + return __ffs((u32)(word >> 32)) + 32; +#elif BITS_PER_LONG != 64 +#error BITS_PER_LONG not 32 or 64 +#endif + return __ffs((unsigned long)word); +} + #ifdef __KERNEL__ #ifdef CONFIG_GENERIC_FIND_FIRST_BIT -- cgit v1.2.3-71-gd317 From fbfc396efbc11d784b4325adfc02e82a0df01a8d Mon Sep 17 00:00:00 2001 From: "Mark A. Greer" Date: Tue, 21 Apr 2009 20:52:54 -0700 Subject: USB: musb: Prevent multiple includes of musb.h Add #ifndef to musb header file to prevent multiple inclusions. Signed-off-by: Mark A. Greer Signed-off-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/musb.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h index d6aad0ea6033..d43755669261 100644 --- a/include/linux/usb/musb.h +++ b/include/linux/usb/musb.h @@ -7,6 +7,9 @@ * key configuration differences between boards. */ +#ifndef __LINUX_USB_MUSB_H +#define __LINUX_USB_MUSB_H + /* The USB role is defined by the connector used on the board, so long as * standards are being followed. (Developer boards sometimes won't.) */ @@ -101,3 +104,5 @@ extern int __init tusb6010_setup_interface( extern int tusb6010_platform_retime(unsigned is_refclk); #endif /* OMAP2 */ + +#endif /* __LINUX_USB_MUSB_H */ -- cgit v1.2.3-71-gd317 From 097102c2d04974bdfcfa16a5f3062d499842139c Mon Sep 17 00:00:00 2001 From: Alexander Beregalov Date: Tue, 21 Apr 2009 09:33:14 +0200 Subject: pktcdvd.h should include mempool.h Fix this build error: In file included from fs/compat_ioctl.c:104: include/linux/pktcdvd.h:285: error: expected specifier-qualifier-list before 'mempool_t' Signed-off-by: Alexander Beregalov Signed-off-by: Jens Axboe --- include/linux/pktcdvd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h index 04b4d7330e6d..d745f5b6c7b0 100644 --- a/include/linux/pktcdvd.h +++ b/include/linux/pktcdvd.h @@ -113,6 +113,7 @@ struct pkt_ctrl_command { #include #include #include +#include /* default bio write queue congestion marks */ #define PKT_WRITE_CONGESTION_ON 10000 -- cgit v1.2.3-71-gd317 From 42dad7647aec49b3ad20dd0cb832b232a6ae514f Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Wed, 22 Apr 2009 14:01:49 +0200 Subject: block: simplify I/O stat accounting This simplifies I/O stat accounting switching code and separates it completely from I/O scheduler switch code. Requests are accounted according to the state of their request queue at the time of the request allocation. There is no need anymore to flush the request queue when switching I/O accounting state. Signed-off-by: Jerome Marchand Signed-off-by: Jens Axboe --- block/blk-core.c | 6 ++++-- block/blk-merge.c | 5 ++++- block/blk-sysfs.c | 4 ---- block/blk.h | 7 +------ include/linux/blkdev.h | 3 +++ 5 files changed, 12 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 07ab75403e1a..2998fe3a2377 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -643,7 +643,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq) } static struct request * -blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) +blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask) { struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); @@ -652,7 +652,7 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) blk_rq_init(q, rq); - rq->cmd_flags = rw | REQ_ALLOCED; + rq->cmd_flags = flags | REQ_ALLOCED; if (priv) { if (unlikely(elv_set_request(q, rq, gfp_mask))) { @@ -792,6 +792,8 @@ static struct request *get_request(struct request_queue *q, int rw_flags, if (priv) rl->elvpriv++; + if (blk_queue_io_stat(q)) + rw_flags |= REQ_IO_STAT; spin_unlock_irq(q->queue_lock); rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); diff --git a/block/blk-merge.c b/block/blk-merge.c index 63760ca3da0f..23d2a6fe34a3 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -402,7 +402,10 @@ static int attempt_merge(struct request_queue *q, struct request *req, elv_merge_requests(q, req, next); - blk_account_io_merge(req); + /* + * 'next' is going away, so update stats accordingly + */ + blk_account_io_merge(next); req->ioprio = ioprio_best(req->ioprio, next->ioprio); if (blk_rq_cpu_valid(next)) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index cac4e9febe6a..3ff9bba3379a 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -209,14 +209,10 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page, ssize_t ret = queue_var_store(&stats, page, count); spin_lock_irq(q->queue_lock); - elv_quiesce_start(q); - if (stats) queue_flag_set(QUEUE_FLAG_IO_STAT, q); else queue_flag_clear(QUEUE_FLAG_IO_STAT, q); - - elv_quiesce_end(q); spin_unlock_irq(q->queue_lock); return ret; diff --git a/block/blk.h b/block/blk.h index 5dfc41267a08..79c85f7c9ff5 100644 --- a/block/blk.h +++ b/block/blk.h @@ -114,12 +114,7 @@ static inline int blk_cpu_to_group(int cpu) static inline int blk_do_io_stat(struct request *rq) { - struct gendisk *disk = rq->rq_disk; - - if (!disk || !disk->queue) - return 0; - - return blk_queue_io_stat(disk->queue) && (rq->cmd_flags & REQ_ELVPRIV); + return rq->rq_disk && blk_rq_io_stat(rq); } #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ba54c834a590..2755d5c6da22 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -118,6 +118,7 @@ enum rq_flag_bits { __REQ_COPY_USER, /* contains copies of user pages */ __REQ_INTEGRITY, /* integrity metadata has been remapped */ __REQ_NOIDLE, /* Don't anticipate more IO after this one */ + __REQ_IO_STAT, /* account I/O stat */ __REQ_NR_BITS, /* stops here */ }; @@ -145,6 +146,7 @@ enum rq_flag_bits { #define REQ_COPY_USER (1 << __REQ_COPY_USER) #define REQ_INTEGRITY (1 << __REQ_INTEGRITY) #define REQ_NOIDLE (1 << __REQ_NOIDLE) +#define REQ_IO_STAT (1 << __REQ_IO_STAT) #define BLK_MAX_CDB 16 @@ -598,6 +600,7 @@ enum { blk_failfast_transport(rq) || \ blk_failfast_driver(rq)) #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) +#define blk_rq_io_stat(rq) ((rq)->cmd_flags & REQ_IO_STAT) #define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) -- cgit v1.2.3-71-gd317 From c80d471a476b6d6fe0bc1fd25293c24c66b7aaaf Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Sat, 25 Apr 2009 22:10:56 -0400 Subject: Add new HEAD_TEXT_SECTION macro. This patch is preparation for replacing all uses of ".head.text" or ".text.head" in the kernel with macros, so that the section name can later be changed without having to touch a lot of the kernel. Since some linker scripts do more complex things than referencing HEAD_TEXT, we add a HEAD_TEXT_SECTION macro that just contains the actual name. I've defined HEAD_TEXT_SECTION in a new header, include/linux/section-names.h, so that this section name only needs to appear in one place. I anticipate creating similar macro structures for a number of other section names. The long-term goal here is to be able to change the kernel's magic section names to those that are compatible with -ffunction-sections -fdata-sections. This requires renaming all magic sections with names of the form ".text.foo". Signed-off-by: Tim Abbott Signed-off-by: Linus Torvalds --- include/asm-generic/vmlinux.lds.h | 4 +++- include/linux/init.h | 4 +++- include/linux/section-names.h | 6 ++++++ 3 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 include/linux/section-names.h (limited to 'include/linux') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 7fa660fd449c..eaa06ef6f7d9 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -1,3 +1,5 @@ +#include + #ifndef LOAD_OFFSET #define LOAD_OFFSET 0 #endif @@ -331,7 +333,7 @@ #endif /* Section used for early init (in .S files) */ -#define HEAD_TEXT *(.head.text) +#define HEAD_TEXT *(HEAD_TEXT_SECTION) /* init and exit section handling */ #define INIT_DATA \ diff --git a/include/linux/init.h b/include/linux/init.h index f121a7a10c3d..20a1334e34e9 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -2,6 +2,8 @@ #define _LINUX_INIT_H #include +#include +#include /* These macros are used to mark some functions or * initialized data (doesn't apply to uninitialized data) @@ -107,7 +109,7 @@ #define __memexitconst __section(.memexit.rodata) /* For assembly routines */ -#define __HEAD .section ".head.text","ax" +#define __HEAD .section __stringify(HEAD_TEXT_SECTION),"ax" #define __INIT .section ".init.text","ax" #define __FINIT .previous diff --git a/include/linux/section-names.h b/include/linux/section-names.h new file mode 100644 index 000000000000..c956f4eb2adf --- /dev/null +++ b/include/linux/section-names.h @@ -0,0 +1,6 @@ +#ifndef __LINUX_SECTION_NAMES_H +#define __LINUX_SECTION_NAMES_H + +#define HEAD_TEXT_SECTION .head.text + +#endif /* !__LINUX_SECTION_NAMES_H */ -- cgit v1.2.3-71-gd317 From 27b1833279995e7c290a40cac4ef36ccea7e9283 Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Mon, 27 Apr 2009 14:02:27 -0400 Subject: Remove unused support code for refok sections. The old refok sections .text.init.refok .data.init.refok .exit.text.refok have been deprecated since commit 312b1485fb509c9bc32eda28ad29537896658cb8. After the other patches in this patch series nothing is put in these sections, so clean things up by eliminating all the remaining references to them. Signed-off-by: Tim Abbott Acked-by: Sam Ravnborg Signed-off-by: Linus Torvalds --- include/asm-generic/vmlinux.lds.h | 3 --- include/linux/init.h | 8 -------- scripts/mod/modpost.c | 18 ------------------ 3 files changed, 29 deletions(-) (limited to 'include/linux') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index eaa06ef6f7d9..89853bcd27a6 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -90,7 +90,6 @@ /* .data section */ #define DATA_DATA \ *(.data) \ - *(.data.init.refok) \ *(.ref.data) \ DEV_KEEP(init.data) \ DEV_KEEP(exit.data) \ @@ -289,8 +288,6 @@ *(.text.hot) \ *(.text) \ *(.ref.text) \ - *(.text.init.refok) \ - *(.exit.text.refok) \ DEV_KEEP(init.text) \ DEV_KEEP(exit.text) \ CPU_KEEP(init.text) \ diff --git a/include/linux/init.h b/include/linux/init.h index 20a1334e34e9..0e06c176f185 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -62,14 +62,6 @@ #define __refdata __section(.ref.data) #define __refconst __section(.ref.rodata) -/* backward compatibility note - * A few places hardcode the old section names: - * .text.init.refok - * .data.init.refok - * .exit.text.refok - * They should be converted to use the defines from this file - */ - /* compatibility defines */ #define __init_refok __ref #define __initdata_refok __refdata diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index df6e6286a065..8d46ea7d6715 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -794,15 +794,6 @@ static const char *init_exit_sections[] = /* data section */ static const char *data_sections[] = { DATA_SECTIONS, NULL }; -/* sections that may refer to an init/exit section with no warning */ -static const char *initref_sections[] = -{ - ".text.init.refok*", - ".exit.text.refok*", - ".data.init.refok*", - NULL -}; - /* symbols in .data that may refer to init/exit sections */ static const char *symbol_white_list[] = @@ -915,11 +906,6 @@ static int section_mismatch(const char *fromsec, const char *tosec) /** * Whitelist to allow certain references to pass with no warning. * - * Pattern 0: - * Do not warn if funtion/data are marked with __init_refok/__initdata_refok. - * The pattern is identified by: - * fromsec = .text.init.refok* | .data.init.refok* - * * Pattern 1: * If a module parameter is declared __initdata and permissions=0 * then this is legal despite the warning generated. @@ -958,10 +944,6 @@ static int section_mismatch(const char *fromsec, const char *tosec) static int secref_whitelist(const char *fromsec, const char *fromsym, const char *tosec, const char *tosym) { - /* Check for pattern 0 */ - if (match(fromsec, initref_sections)) - return 0; - /* Check for pattern 1 */ if (match(tosec, init_data_sections) && match(fromsec, data_sections) && -- cgit v1.2.3-71-gd317 From 6916d97f6e25cc66a32d6e9a16419067d843b14f Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Mon, 27 Apr 2009 11:52:43 -0700 Subject: Input: bcm5974 - add quad-finger tapping The integrated button on the new unibody Macbooks presents a need to report explicit four-finger actions. Evidently, the finger pressing the button is also touching the trackpad, so in order to fully support three-finger actions, the driver must be able to report four-finger actions. This patch adds a new button, BTN_TOOL_QUADTAP, which achieves this. Signed-off-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/bcm5974.c | 4 +++- include/linux/input.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c index bda873393b0d..2ddf05e1d852 100644 --- a/drivers/input/mouse/bcm5974.c +++ b/drivers/input/mouse/bcm5974.c @@ -258,6 +258,7 @@ static void setup_events_to_report(struct input_dev *input_dev, __set_bit(BTN_TOOL_FINGER, input_dev->keybit); __set_bit(BTN_TOOL_DOUBLETAP, input_dev->keybit); __set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit); + __set_bit(BTN_TOOL_QUADTAP, input_dev->keybit); __set_bit(BTN_LEFT, input_dev->keybit); } @@ -329,7 +330,8 @@ static int report_tp_state(struct bcm5974 *dev, int size) input_report_key(input, BTN_TOUCH, dev->fingers > 0); input_report_key(input, BTN_TOOL_FINGER, dev->fingers == 1); input_report_key(input, BTN_TOOL_DOUBLETAP, dev->fingers == 2); - input_report_key(input, BTN_TOOL_TRIPLETAP, dev->fingers > 2); + input_report_key(input, BTN_TOOL_TRIPLETAP, dev->fingers == 3); + input_report_key(input, BTN_TOOL_QUADTAP, dev->fingers > 3); input_report_abs(input, ABS_PRESSURE, abs_p); input_report_abs(input, ABS_TOOL_WIDTH, abs_w); diff --git a/include/linux/input.h b/include/linux/input.h index 6b28048fc568..32cb825939be 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -445,6 +445,7 @@ struct input_absinfo { #define BTN_STYLUS2 0x14c #define BTN_TOOL_DOUBLETAP 0x14d #define BTN_TOOL_TRIPLETAP 0x14e +#define BTN_TOOL_QUADTAP 0x14f /* Four fingers on trackpad */ #define BTN_WHEEL 0x150 #define BTN_GEAR_DOWN 0x150 -- cgit v1.2.3-71-gd317 From 5e5ee686e3c0f8a3cbe9b75c2690326bf91af10d Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Tue, 28 Apr 2009 07:47:33 -0700 Subject: Input: add detailed multi-touch finger data report protocol In order to utilize the full power of the new multi-touch devices, a way to report detailed finger data to user space is needed. This patch adds a multi-touch (MT) protocol which allows drivers to report details for an arbitrary number of fingers. The driver sends a SYN_MT_REPORT event via the input_mt_sync() function when a complete finger has been reported. In order to stay compatible with existing applications, the data reported in a finger packet must not be recognized as single-touch events. In addition, all finger data must bypass input filtering, since subsequent events of the same type refer to different fingers. A set of ABS_MT events with the desired properties are defined. The events are divided into categories, to allow for partial implementation. The minimum set consists of ABS_MT_TOUCH_MAJOR, ABS_MT_POSITION_X and ABS_MT_POSITION_Y, which allows for multiple fingers to be tracked. If the device supports it, the ABS_MT_WIDTH_MAJOR may be used to provide the size of the approaching finger. Anisotropy and direction may be specified with ABS_MT_TOUCH_MINOR, ABS_MT_WIDTH_MINOR and ABS_MT_ORIENTATION. Devices with more granular information may specify general shapes as blobs, i.e., as a sequence of rectangular shapes grouped together by a ABS_MT_BLOB_ID. Finally, the ABS_MT_TOOL_TYPE may be used to specify whether the touching tool is a finger or a pen. Signed-off-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 13 +++++++++++++ include/linux/input.h | 23 +++++++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'include/linux') diff --git a/drivers/input/input.c b/drivers/input/input.c index 8ff92aa13a0a..e54e002665b0 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -33,6 +33,15 @@ MODULE_LICENSE("GPL"); * EV_ABS events which should not be cached are listed here. */ static unsigned int input_abs_bypass_init_data[] __initdata = { + ABS_MT_TOUCH_MAJOR, + ABS_MT_TOUCH_MINOR, + ABS_MT_WIDTH_MAJOR, + ABS_MT_WIDTH_MINOR, + ABS_MT_ORIENTATION, + ABS_MT_POSITION_X, + ABS_MT_POSITION_Y, + ABS_MT_TOOL_TYPE, + ABS_MT_BLOB_ID, 0 }; static unsigned long input_abs_bypass[BITS_TO_LONGS(ABS_CNT)]; @@ -169,6 +178,10 @@ static void input_handle_event(struct input_dev *dev, disposition = INPUT_PASS_TO_HANDLERS; } break; + case SYN_MT_REPORT: + dev->sync = 0; + disposition = INPUT_PASS_TO_HANDLERS; + break; } break; diff --git a/include/linux/input.h b/include/linux/input.h index 32cb825939be..0e6ff5de3588 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -106,6 +106,7 @@ struct input_absinfo { #define SYN_REPORT 0 #define SYN_CONFIG 1 +#define SYN_MT_REPORT 2 /* * Keys and buttons @@ -645,6 +646,17 @@ struct input_absinfo { #define ABS_TOOL_WIDTH 0x1c #define ABS_VOLUME 0x20 #define ABS_MISC 0x28 + +#define ABS_MT_TOUCH_MAJOR 0x30 /* Major axis of touching ellipse */ +#define ABS_MT_TOUCH_MINOR 0x31 /* Minor axis (omit if circular) */ +#define ABS_MT_WIDTH_MAJOR 0x32 /* Major axis of approaching ellipse */ +#define ABS_MT_WIDTH_MINOR 0x33 /* Minor axis (omit if circular) */ +#define ABS_MT_ORIENTATION 0x34 /* Ellipse orientation */ +#define ABS_MT_POSITION_X 0x35 /* Center X ellipse position */ +#define ABS_MT_POSITION_Y 0x36 /* Center Y ellipse position */ +#define ABS_MT_TOOL_TYPE 0x37 /* Type of touching device */ +#define ABS_MT_BLOB_ID 0x38 /* Group a set of packets as a blob */ + #define ABS_MAX 0x3f #define ABS_CNT (ABS_MAX+1) @@ -743,6 +755,12 @@ struct input_absinfo { #define BUS_GSC 0x1A #define BUS_ATARI 0x1B +/* + * MT_TOOL types + */ +#define MT_TOOL_FINGER 0 +#define MT_TOOL_PEN 1 + /* * Values describing the status of a force-feedback effect */ @@ -1312,6 +1330,11 @@ static inline void input_sync(struct input_dev *dev) input_event(dev, EV_SYN, SYN_REPORT, 0); } +static inline void input_mt_sync(struct input_dev *dev) +{ + input_event(dev, EV_SYN, SYN_MT_REPORT, 0); +} + void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int code); static inline void input_set_abs_params(struct input_dev *dev, int axis, int min, int max, int fuzz, int flat) -- cgit v1.2.3-71-gd317 From 9f6532519feab921856f41b30a2397ee25f4de49 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 3 Apr 2009 21:31:30 -0700 Subject: regulator: fix header file missing kernel-doc Add regulator header file missing kernel-doc: Warning(include/linux/regulator/driver.h:117): No description found for parameter 'set_mode' Signed-off-by: Randy Dunlap cc: Liam Girdwood cc: Mark Brown Signed-off-by: Liam Girdwood --- include/linux/regulator/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 4848d8dacd90..225f733e7533 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -50,6 +50,7 @@ enum regulator_status { * @set_current_limit: Configure a limit for a current-limited regulator. * @get_current_limit: Get the configured limit for a current-limited regulator. * + * @set_mode: Set the configured operating mode for the regulator. * @get_mode: Get the configured operating mode for the regulator. * @get_status: Return actual (not as-configured) status of regulator, as a * REGULATOR_STATUS value (or negative errno) -- cgit v1.2.3-71-gd317 From d37dc42ab6f040b8f0f2962ab219c5b2accf748d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 30 Apr 2009 06:45:08 -0400 Subject: nls: add a nls_nullsize inline It's possible for character sets to require a multi-byte null string terminator. Add a helper function that determines the size of the null terminator at runtime. Signed-off-by: Jeff Layton Acked-by: Suresh Jayaraman Signed-off-by: Steve French --- include/linux/nls.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nls.h b/include/linux/nls.h index 6a882208301a..52b1a76c1b43 100644 --- a/include/linux/nls.h +++ b/include/linux/nls.h @@ -58,6 +58,25 @@ static inline int nls_strnicmp(struct nls_table *t, const unsigned char *s1, return 0; } +/* + * nls_nullsize - return length of null character for codepage + * @codepage - codepage for which to return length of NULL terminator + * + * Since we can't guarantee that the null terminator will be a particular + * length, we have to check against the codepage. If there's a problem + * determining it, assume a single-byte NULL terminator. + */ +static inline int +nls_nullsize(const struct nls_table *codepage) +{ + int charlen; + char tmp[NLS_MAX_CHARSET_SIZE]; + + charlen = codepage->uni2char(0, tmp, NLS_MAX_CHARSET_SIZE); + + return charlen > 0 ? charlen : 1; +} + #define MODULE_ALIAS_NLS(name) MODULE_ALIAS("nls_" __stringify(name)) #endif /* _LINUX_NLS_H */ -- cgit v1.2.3-71-gd317 From 96c16743973e8c1a7b9c655d10b7973408d6d1dd Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 30 Apr 2009 18:24:34 +0200 Subject: ide-cd: fix REQ_QUIET tests in cdrom_decode_status Original patch (dfa4411cc3a690011cab90e9a536938795366cf9) was buggy. This is a more proper fix which introduces blk_rq_quiet() macro alleviating the need for dumb, too short caching variables. Thanks to Helge Deller and Bart for debugging this. Signed-off-by: Borislav Petkov Cc: Jens Axboe Cc: Sergei Shtylyov Reported-and-tested-by: Helge Deller Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-cd.c | 9 ++++----- include/linux/blkdev.h | 1 + 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 3d4e09969763..925eb9e245d1 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -312,7 +312,6 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) ide_hwif_t *hwif = drive->hwif; struct request *rq = hwif->rq; int err, sense_key, do_end_request = 0; - u8 quiet = rq->cmd_flags & REQ_QUIET; /* get the IDE error register */ err = ide_read_error(drive); @@ -347,7 +346,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) } else { cdrom_saw_media_change(drive); - if (blk_fs_request(rq) && !quiet) + if (blk_fs_request(rq) && !blk_rq_quiet(rq)) printk(KERN_ERR PFX "%s: tray open\n", drive->name); } @@ -382,7 +381,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) * No point in retrying after an illegal request or data * protect error. */ - if (!quiet) + if (!blk_rq_quiet(rq)) ide_dump_status(drive, "command error", stat); do_end_request = 1; break; @@ -391,14 +390,14 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) * No point in re-trying a zillion times on a bad sector. * If we got here the error is not correctable. */ - if (!quiet) + if (!blk_rq_quiet(rq)) ide_dump_status(drive, "media error " "(bad sector)", stat); do_end_request = 1; break; case BLANK_CHECK: /* disk appears blank? */ - if (!quiet) + if (!blk_rq_quiet(rq)) ide_dump_status(drive, "media error (blank)", stat); do_end_request = 1; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ba54c834a590..6f841fb1be30 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -598,6 +598,7 @@ enum { blk_failfast_transport(rq) || \ blk_failfast_driver(rq)) #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) +#define blk_rq_quiet(rq) ((rq)->cmd_flags & REQ_QUIET) #define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) -- cgit v1.2.3-71-gd317 From ae3abae64f177586be55b04a7fb7047a34b21a3e Mon Sep 17 00:00:00 2001 From: Daisuke Nishimura Date: Thu, 30 Apr 2009 15:08:19 -0700 Subject: memcg: fix mem_cgroup_shrink_usage() Current mem_cgroup_shrink_usage() has two problems. 1. It doesn't call mem_cgroup_out_of_memory and doesn't update last_oom_jiffies, so pagefault_out_of_memory invokes global OOM. 2. Considering hierarchy, shrinking has to be done from the mem_over_limit, not from the memcg which the page would be charged to. mem_cgroup_try_charge_swapin() does all of these things properly, so we use it and call cancel_charge_swapin when it succeeded. The name of "shrink_usage" is not appropriate for this behavior, so we change it too. Signed-off-by: Daisuke Nishimura Acked-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Paul Menage Cc: Dhaval Giani Cc: Daisuke Nishimura Cc: YAMAMOTO Takashi Cc: KOSAKI Motohiro Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 4 ++-- mm/memcontrol.c | 33 ++++++++++++--------------------- mm/shmem.c | 8 ++++++-- 3 files changed, 20 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index a9e3b76aa884..25b9ca93d232 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -56,7 +56,7 @@ extern void mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to); extern void mem_cgroup_uncharge_page(struct page *page); extern void mem_cgroup_uncharge_cache_page(struct page *page); -extern int mem_cgroup_shrink_usage(struct page *page, +extern int mem_cgroup_shmem_charge_fallback(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, @@ -155,7 +155,7 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page) { } -static inline int mem_cgroup_shrink_usage(struct page *page, +static inline int mem_cgroup_shmem_charge_fallback(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { return 0; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 575203ae2109..01c2d8f14685 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1617,37 +1617,28 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, } /* - * A call to try to shrink memory usage under specified resource controller. - * This is typically used for page reclaiming for shmem for reducing side - * effect of page allocation from shmem, which is used by some mem_cgroup. + * A call to try to shrink memory usage on charge failure at shmem's swapin. + * Calling hierarchical_reclaim is not enough because we should update + * last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM. + * Moreover considering hierarchy, we should reclaim from the mem_over_limit, + * not from the memcg which this page would be charged to. + * try_charge_swapin does all of these works properly. */ -int mem_cgroup_shrink_usage(struct page *page, +int mem_cgroup_shmem_charge_fallback(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { struct mem_cgroup *mem = NULL; - int progress = 0; - int retry = MEM_CGROUP_RECLAIM_RETRIES; + int ret; if (mem_cgroup_disabled()) return 0; - if (page) - mem = try_get_mem_cgroup_from_swapcache(page); - if (!mem && mm) - mem = try_get_mem_cgroup_from_mm(mm); - if (unlikely(!mem)) - return 0; - do { - progress = mem_cgroup_hierarchical_reclaim(mem, - gfp_mask, true, false); - progress += mem_cgroup_check_under_limit(mem); - } while (!progress && --retry); + ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); + if (!ret) + mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */ - css_put(&mem->css); - if (!retry) - return -ENOMEM; - return 0; + return ret; } static DEFINE_MUTEX(set_limit_mutex); diff --git a/mm/shmem.c b/mm/shmem.c index f9cb20ebb990..b25f95ce3db7 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1340,8 +1340,12 @@ repeat: shmem_swp_unmap(entry); spin_unlock(&info->lock); if (error == -ENOMEM) { - /* allow reclaim from this memory cgroup */ - error = mem_cgroup_shrink_usage(swappage, + /* + * reclaim from proper memory cgroup and + * call memcg's OOM if needed. + */ + error = mem_cgroup_shmem_charge_fallback( + swappage, current->mm, gfp); if (error) { -- cgit v1.2.3-71-gd317 From 74641f584da8eccf30becfbb5507ab457187db22 Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky Date: Thu, 30 Apr 2009 15:08:49 -0700 Subject: alpha: binfmt_aout fix This fixes the problem introduced by commit 3bfacef412 (get rid of special-casing the /sbin/loader on alpha): osf/1 ecoff binary segfaults when binfmt_aout built as module. That happens because aout binary handler gets on the top of the binfmt list due to late registration, and kernel attempts to execute the binary without preparatory work that must be done by binfmt_loader. Fixed by changing the registration order of the default binfmt handlers using list_add_tail() and introducing insert_binfmt() function which places new handler on the top of the binfmt list. This might be generally useful for installing arch-specific frontends for default handlers or just for overriding them. Signed-off-by: Ivan Kokshaysky Cc: Al Viro Cc: Richard Henderson Signed-off-by: Linus Torvalds --- arch/alpha/kernel/Makefile | 6 +++++- arch/alpha/kernel/binfmt_loader.c | 2 +- fs/exec.c | 7 ++++--- include/linux/binfmts.h | 14 +++++++++++++- 4 files changed, 23 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile index a427538252f8..7739a62440a7 100644 --- a/arch/alpha/kernel/Makefile +++ b/arch/alpha/kernel/Makefile @@ -8,7 +8,7 @@ EXTRA_CFLAGS := -Werror -Wno-sign-compare obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \ irq_alpha.o signal.o setup.o ptrace.o time.o \ - alpha_ksyms.o systbls.o err_common.o io.o binfmt_loader.o + alpha_ksyms.o systbls.o err_common.o io.o obj-$(CONFIG_VGA_HOSE) += console.o obj-$(CONFIG_SMP) += smp.o @@ -43,6 +43,10 @@ else # Misc support obj-$(CONFIG_ALPHA_SRM) += srmcons.o +ifdef CONFIG_BINFMT_AOUT +obj-y += binfmt_loader.o +endif + # Core logic support obj-$(CONFIG_ALPHA_APECS) += core_apecs.o obj-$(CONFIG_ALPHA_CIA) += core_cia.o diff --git a/arch/alpha/kernel/binfmt_loader.c b/arch/alpha/kernel/binfmt_loader.c index 4a0af906b00a..3fcfad410130 100644 --- a/arch/alpha/kernel/binfmt_loader.c +++ b/arch/alpha/kernel/binfmt_loader.c @@ -46,6 +46,6 @@ static struct linux_binfmt loader_format = { static int __init init_loader_binfmt(void) { - return register_binfmt(&loader_format); + return insert_binfmt(&loader_format); } arch_initcall(init_loader_binfmt); diff --git a/fs/exec.c b/fs/exec.c index a3a8ce83940f..639177b0eeac 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -69,17 +69,18 @@ int suid_dumpable = 0; static LIST_HEAD(formats); static DEFINE_RWLOCK(binfmt_lock); -int register_binfmt(struct linux_binfmt * fmt) +int __register_binfmt(struct linux_binfmt * fmt, int insert) { if (!fmt) return -EINVAL; write_lock(&binfmt_lock); - list_add(&fmt->lh, &formats); + insert ? list_add(&fmt->lh, &formats) : + list_add_tail(&fmt->lh, &formats); write_unlock(&binfmt_lock); return 0; } -EXPORT_SYMBOL(register_binfmt); +EXPORT_SYMBOL(__register_binfmt); void unregister_binfmt(struct linux_binfmt * fmt) { diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 6638b8148de7..61ee18c1bdb4 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -82,7 +82,19 @@ struct linux_binfmt { int hasvdso; }; -extern int register_binfmt(struct linux_binfmt *); +extern int __register_binfmt(struct linux_binfmt *fmt, int insert); + +/* Registration of default binfmt handlers */ +static inline int register_binfmt(struct linux_binfmt *fmt) +{ + return __register_binfmt(fmt, 0); +} +/* Same as above, but adds a new binfmt at the top of the list */ +static inline int insert_binfmt(struct linux_binfmt *fmt) +{ + return __register_binfmt(fmt, 1); +} + extern void unregister_binfmt(struct linux_binfmt *); extern int prepare_binprm(struct linux_binprm *); -- cgit v1.2.3-71-gd317 From 0763ed2355198cdef2f6a2098e9d52eb1fe4365d Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 30 Apr 2009 15:08:50 -0700 Subject: of: make of_(un)register_platform_driver common code Some drivers using of_register_platform_driver() wrapper break on sparc because the wrapper isn't in the header file. This patch moves it from Microblaze and PowerPC implementations and makes it common code. Fixes this sparc64 allmodconfig build error (at least): drivers/leds/leds-gpio.c: In function `gpio_led_init': drivers/leds/leds-gpio.c:295: error: implicit declaration of function `of_register_platform_driver' drivers/leds/leds-gpio.c: In function `gpio_led_exit': drivers/leds/leds-gpio.c:311: error: implicit declaration of function `of_unregister_platform_driver' Signed-off-by: Grant Likely Acked-by: David S. Miller Cc: Michal Simek Cc: Benjamin Herrenschmidt Cc: Stephen Rothwell Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/microblaze/include/asm/of_platform.h | 10 ---------- arch/powerpc/include/asm/of_platform.h | 10 ---------- include/linux/of_platform.h | 10 ++++++++++ 3 files changed, 10 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/arch/microblaze/include/asm/of_platform.h b/arch/microblaze/include/asm/of_platform.h index 187c0eedaece..37491276c6ca 100644 --- a/arch/microblaze/include/asm/of_platform.h +++ b/arch/microblaze/include/asm/of_platform.h @@ -36,16 +36,6 @@ static const struct of_device_id of_default_bus_ids[] = { {}, }; -/* Platform drivers register/unregister */ -static inline int of_register_platform_driver(struct of_platform_driver *drv) -{ - return of_register_driver(drv, &of_platform_bus_type); -} -static inline void of_unregister_platform_driver(struct of_platform_driver *drv) -{ - of_unregister_driver(drv); -} - /* Platform devices and busses creation */ extern struct of_device *of_platform_device_create(struct device_node *np, const char *bus_id, diff --git a/arch/powerpc/include/asm/of_platform.h b/arch/powerpc/include/asm/of_platform.h index 53b46507ffde..d4aaa3489440 100644 --- a/arch/powerpc/include/asm/of_platform.h +++ b/arch/powerpc/include/asm/of_platform.h @@ -11,16 +11,6 @@ * */ -/* Platform drivers register/unregister */ -static inline int of_register_platform_driver(struct of_platform_driver *drv) -{ - return of_register_driver(drv, &of_platform_bus_type); -} -static inline void of_unregister_platform_driver(struct of_platform_driver *drv) -{ - of_unregister_driver(drv); -} - /* Platform devices and busses creation */ extern struct of_device *of_platform_device_create(struct device_node *np, const char *bus_id, diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index 3d327b67d7e2..908406651330 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -51,6 +51,16 @@ extern int of_register_driver(struct of_platform_driver *drv, struct bus_type *bus); extern void of_unregister_driver(struct of_platform_driver *drv); +/* Platform drivers register/unregister */ +static inline int of_register_platform_driver(struct of_platform_driver *drv) +{ + return of_register_driver(drv, &of_platform_bus_type); +} +static inline void of_unregister_platform_driver(struct of_platform_driver *drv) +{ + of_unregister_driver(drv); +} + #include extern struct of_device *of_find_device_by_node(struct device_node *np); -- cgit v1.2.3-71-gd317 From 00a62ce91e554198ef28234c91c36f850f5a3bc9 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 30 Apr 2009 15:08:51 -0700 Subject: mm: fix Committed_AS underflow on large NR_CPUS environment The Committed_AS field can underflow in certain situations: > # while true; do cat /proc/meminfo | grep _AS; sleep 1; done | uniq -c > 1 Committed_AS: 18446744073709323392 kB > 11 Committed_AS: 18446744073709455488 kB > 6 Committed_AS: 35136 kB > 5 Committed_AS: 18446744073709454400 kB > 7 Committed_AS: 35904 kB > 3 Committed_AS: 18446744073709453248 kB > 2 Committed_AS: 34752 kB > 9 Committed_AS: 18446744073709453248 kB > 8 Committed_AS: 34752 kB > 3 Committed_AS: 18446744073709320960 kB > 7 Committed_AS: 18446744073709454080 kB > 3 Committed_AS: 18446744073709320960 kB > 5 Committed_AS: 18446744073709454080 kB > 6 Committed_AS: 18446744073709320960 kB Because NR_CPUS can be greater than 1000 and meminfo_proc_show() does not check for underflow. But NR_CPUS proportional isn't good calculation. In general, possibility of lock contention is proportional to the number of online cpus, not theorical maximum cpus (NR_CPUS). The current kernel has generic percpu-counter stuff. using it is right way. it makes code simplify and percpu_counter_read_positive() don't make underflow issue. Reported-by: Dave Hansen Signed-off-by: KOSAKI Motohiro Cc: Eric B Munson Cc: Mel Gorman Cc: Christoph Lameter Cc: [All kernel versions] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/meminfo.c | 2 +- include/linux/mman.h | 9 +++------ mm/mmap.c | 12 ++++++------ mm/nommu.c | 13 +++++++------ mm/swap.c | 46 ---------------------------------------------- 5 files changed, 17 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 74ea974f5ca6..c6b0302af4c4 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -35,7 +35,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) #define K(x) ((x) << (PAGE_SHIFT - 10)) si_meminfo(&i); si_swapinfo(&i); - committed = atomic_long_read(&vm_committed_space); + committed = percpu_counter_read_positive(&vm_committed_as); allowed = ((totalram_pages - hugetlb_total_pages()) * sysctl_overcommit_ratio / 100) + total_swap_pages; diff --git a/include/linux/mman.h b/include/linux/mman.h index 30d1073bac3b..9872d6ca58ae 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -12,21 +12,18 @@ #ifdef __KERNEL__ #include +#include #include extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; -extern atomic_long_t vm_committed_space; +extern struct percpu_counter vm_committed_as; -#ifdef CONFIG_SMP -extern void vm_acct_memory(long pages); -#else static inline void vm_acct_memory(long pages) { - atomic_long_add(pages, &vm_committed_space); + percpu_counter_add(&vm_committed_as, pages); } -#endif static inline void vm_unacct_memory(long pages) { diff --git a/mm/mmap.c b/mm/mmap.c index 3303d1ba8e87..6b7b1a95944b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -85,7 +85,7 @@ EXPORT_SYMBOL(vm_get_page_prot); int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; -atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); +struct percpu_counter vm_committed_as; /* * Check that a process has enough memory to allocate a new virtual @@ -179,11 +179,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) if (mm) allowed -= mm->total_vm / 32; - /* - * cast `allowed' as a signed long because vm_committed_space - * sometimes has a negative value - */ - if (atomic_long_read(&vm_committed_space) < (long)allowed) + if (percpu_counter_read_positive(&vm_committed_as) < allowed) return 0; error: vm_unacct_memory(pages); @@ -2481,4 +2477,8 @@ void mm_drop_all_locks(struct mm_struct *mm) */ void __init mmap_init(void) { + int ret; + + ret = percpu_counter_init(&vm_committed_as, 0); + VM_BUG_ON(ret); } diff --git a/mm/nommu.c b/mm/nommu.c index 72eda4aee2cb..809998aa7b50 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -62,7 +62,7 @@ void *high_memory; struct page *mem_map; unsigned long max_mapnr; unsigned long num_physpages; -atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); +struct percpu_counter vm_committed_as; int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; @@ -463,6 +463,10 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) */ void __init mmap_init(void) { + int ret; + + ret = percpu_counter_init(&vm_committed_as, 0); + VM_BUG_ON(ret); vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); } @@ -1847,12 +1851,9 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) if (mm) allowed -= mm->total_vm / 32; - /* - * cast `allowed' as a signed long because vm_committed_space - * sometimes has a negative value - */ - if (atomic_long_read(&vm_committed_space) < (long)allowed) + if (percpu_counter_read_positive(&vm_committed_as) < allowed) return 0; + error: vm_unacct_memory(pages); diff --git a/mm/swap.c b/mm/swap.c index bede23ce64ea..cb29ae5d33ab 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -491,49 +491,6 @@ unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, EXPORT_SYMBOL(pagevec_lookup_tag); -#ifdef CONFIG_SMP -/* - * We tolerate a little inaccuracy to avoid ping-ponging the counter between - * CPUs - */ -#define ACCT_THRESHOLD max(16, NR_CPUS * 2) - -static DEFINE_PER_CPU(long, committed_space); - -void vm_acct_memory(long pages) -{ - long *local; - - preempt_disable(); - local = &__get_cpu_var(committed_space); - *local += pages; - if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) { - atomic_long_add(*local, &vm_committed_space); - *local = 0; - } - preempt_enable(); -} - -#ifdef CONFIG_HOTPLUG_CPU - -/* Drop the CPU's cached committed space back into the central pool. */ -static int cpu_swap_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) -{ - long *committed; - - committed = &per_cpu(committed_space, (long)hcpu); - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { - atomic_long_add(*committed, &vm_committed_space); - *committed = 0; - drain_cpu_pagevecs((long)hcpu); - } - return NOTIFY_OK; -} -#endif /* CONFIG_HOTPLUG_CPU */ -#endif /* CONFIG_SMP */ - /* * Perform any setup for the swap system */ @@ -554,7 +511,4 @@ void __init swap_setup(void) * Right now other parts of the system means that we * _really_ don't want to cluster much more */ -#ifdef CONFIG_HOTPLUG_CPU - hotcpu_notifier(cpu_swap_callback, 0); -#endif } -- cgit v1.2.3-71-gd317 From f75e6745aa3084124ae1434fd7629853bdaf6798 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 21 Apr 2009 17:18:20 -0400 Subject: SUNRPC: Fix the problem of EADDRNOTAVAIL syslog floods on reconnect See http://bugzilla.kernel.org/show_bug.cgi?id=13034 If the port gets into a TIME_WAIT state, then we cannot reconnect without binding to a new port. Tested-by: Petr Vandrovec Tested-by: Jean Delvare Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/xprt.c | 6 ++---- net/sunrpc/xprtsock.c | 26 +++++++++++++++++++++----- 3 files changed, 24 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 1758d9f5b5c3..08afe43118f4 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -261,6 +261,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); #define XPRT_BINDING (5) #define XPRT_CLOSING (6) #define XPRT_CONNECTION_ABORT (7) +#define XPRT_CONNECTION_CLOSE (8) static inline void xprt_set_connected(struct rpc_xprt *xprt) { diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index a0bfe53f1621..06ca058572f2 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -672,10 +672,8 @@ xprt_init_autodisconnect(unsigned long data) if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) goto out_abort; spin_unlock(&xprt->transport_lock); - if (xprt_connecting(xprt)) - xprt_release_write(xprt, NULL); - else - queue_work(rpciod_workqueue, &xprt->task_cleanup); + set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); + queue_work(rpciod_workqueue, &xprt->task_cleanup); return; out_abort: spin_unlock(&xprt->transport_lock); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index d40ff50887aa..e18596146013 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -807,6 +807,9 @@ static void xs_reset_transport(struct sock_xprt *transport) * * This is used when all requests are complete; ie, no DRC state remains * on the server we want to save. + * + * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with + * xs_reset_transport() zeroing the socket from underneath a writer. */ static void xs_close(struct rpc_xprt *xprt) { @@ -824,6 +827,14 @@ static void xs_close(struct rpc_xprt *xprt) xprt_disconnect_done(xprt); } +static void xs_tcp_close(struct rpc_xprt *xprt) +{ + if (test_and_clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state)) + xs_close(xprt); + else + xs_tcp_shutdown(xprt); +} + /** * xs_destroy - prepare to shutdown a transport * @xprt: doomed transport @@ -1772,6 +1783,15 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, xprt, -status, xprt_connected(xprt), sock->sk->sk_state); switch (status) { + default: + printk("%s: connect returned unhandled error %d\n", + __func__, status); + case -EADDRNOTAVAIL: + /* We're probably in TIME_WAIT. Get rid of existing socket, + * and retry + */ + set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); + xprt_force_disconnect(xprt); case -ECONNREFUSED: case -ECONNRESET: case -ENETUNREACH: @@ -1782,10 +1802,6 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, xprt_clear_connecting(xprt); return; } - /* get rid of existing socket, and retry */ - xs_tcp_shutdown(xprt); - printk("%s: connect returned unhandled error %d\n", - __func__, status); out_eagain: status = -EAGAIN; out: @@ -1994,7 +2010,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { .buf_free = rpc_free, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, - .close = xs_tcp_shutdown, + .close = xs_tcp_close, .destroy = xs_destroy, .print_stats = xs_tcp_print_stats, }; -- cgit v1.2.3-71-gd317 From e67c85626cd02e306da1b4195bfaf68d61050796 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 8 Mar 2009 23:13:32 +0800 Subject: Revert driver core: move platform_data into platform_device This reverts commit 006f4571a15fae3a0575f2a0f9e9b63b3d1012f8: This patch moves platform_data from struct device into struct platform_device, based on the two ideas: 1. Now all platform_driver is registered by platform_driver_register, which makes probe()/release()/... of platform_driver passed parameter of platform_device *, so platform driver can get platform_data from platform_device; 2. Other kind of devices do not need to use platform_data, we can decrease size of device if moving it to platform_device. Taking into consideration of thousands of files to be fixed and they can't be finished in one night(maybe it will take a long time), so we keep platform_data in device to allow two kind of cases coexist until all platform devices pass its platfrom data from platform_device->platform_data. All patches to do this kind of conversion are welcome. As we don't really want to do it, it was a bad idea. Cc: David Brownell Cc: Ming Lei Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 3 --- include/linux/device.h | 9 ++------- include/linux/platform_device.h | 1 - 3 files changed, 2 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/platform.c b/drivers/base/platform.c index d1d0ee431926..8b4708e06244 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -217,7 +217,6 @@ int platform_device_add_data(struct platform_device *pdev, const void *data, if (d) { memcpy(d, data, size); pdev->dev.platform_data = d; - pdev->platform_data = d; } return d ? 0 : -ENOMEM; } @@ -247,8 +246,6 @@ int platform_device_add(struct platform_device *pdev) else dev_set_name(&pdev->dev, pdev->name); - pdev->platform_data = pdev->dev.platform_data; - for (i = 0; i < pdev->num_resources; i++) { struct resource *p, *r = &pdev->resource[i]; diff --git a/include/linux/device.h b/include/linux/device.h index 6a69caaac18a..5d5c197bad45 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -384,13 +384,8 @@ struct device { struct device_driver *driver; /* which driver has allocated this device */ void *driver_data; /* data private to the driver */ - - void *platform_data; /* We will remove platform_data - field if all platform devices - pass its platform specific data - from platform_device->platform_data, - other kind of devices should not - use platform_data. */ + void *platform_data; /* Platform specific data, device + core doesn't touch it */ struct dev_pm_info power; #ifdef CONFIG_NUMA diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 72736fd8223c..b67bb5d7b221 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -20,7 +20,6 @@ struct platform_device { struct device dev; u32 num_resources; struct resource * resource; - void *platform_data; struct platform_device_id *id_entry; }; -- cgit v1.2.3-71-gd317 From edcc37a0478836b4a51eafb1bcec6a52708f681d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 3 May 2009 06:00:05 -0400 Subject: Always lookup priv_root on reiserfs mount and keep it ... even if it's a negative dentry. That way we can set ->d_op on root before anyone could race with us. Simplify d_compare(), while we are at it. Signed-off-by: Al Viro --- fs/reiserfs/super.c | 6 ++- fs/reiserfs/xattr.c | 86 ++++++++++++++++++------------------------ include/linux/reiserfs_xattr.h | 1 + 3 files changed, 41 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 0ae6486d9046..d444fe0013a4 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1842,7 +1842,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) goto error; } - if ((errval = reiserfs_xattr_init(s, s->s_flags))) { + if ((errval = reiserfs_lookup_privroot(s)) || + (errval = reiserfs_xattr_init(s, s->s_flags))) { dput(s->s_root); s->s_root = NULL; goto error; @@ -1855,7 +1856,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) reiserfs_info(s, "using 3.5.x disk format\n"); } - if ((errval = reiserfs_xattr_init(s, s->s_flags))) { + if ((errval = reiserfs_lookup_privroot(s)) || + (errval = reiserfs_xattr_init(s, s->s_flags))) { dput(s->s_root); s->s_root = NULL; goto error; diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 31a3dbb120e1..2891f789f545 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -903,16 +903,19 @@ static int create_privroot(struct dentry *dentry) WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex)); err = xattr_mkdir(inode, dentry, 0700); - if (err) { - dput(dentry); - dentry = NULL; + if (err || !dentry->d_inode) { + reiserfs_warning(dentry->d_sb, "jdm-20006", + "xattrs/ACLs enabled and couldn't " + "find/create .reiserfs_priv. " + "Failing mount."); + return -EOPNOTSUPP; } - if (dentry && dentry->d_inode) - reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr " - "storage.\n", PRIVROOT_NAME); + dentry->d_inode->i_flags |= S_PRIVATE; + reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr " + "storage.\n", PRIVROOT_NAME); - return err; + return 0; } static int xattr_mount_check(struct super_block *s) @@ -944,11 +947,9 @@ static int xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name) { struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root; - if (name->len == priv_root->d_name.len && - name->hash == priv_root->d_name.hash && - !memcmp(name->name, priv_root->d_name.name, name->len)) { + if (container_of(q1, struct dentry, d_name) == priv_root) return -ENOENT; - } else if (q1->len == name->len && + if (q1->len == name->len && !memcmp(q1->name, name->name, name->len)) return 0; return 1; @@ -958,6 +959,27 @@ static const struct dentry_operations xattr_lookup_poison_ops = { .d_compare = xattr_lookup_poison, }; +int reiserfs_lookup_privroot(struct super_block *s) +{ + struct dentry *dentry; + int err = 0; + + /* If we don't have the privroot located yet - go find it */ + mutex_lock(&s->s_root->d_inode->i_mutex); + dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, + strlen(PRIVROOT_NAME)); + if (!IS_ERR(dentry)) { + REISERFS_SB(s)->priv_root = dentry; + s->s_root->d_op = &xattr_lookup_poison_ops; + if (dentry->d_inode) + dentry->d_inode->i_flags |= S_PRIVATE; + } else + err = PTR_ERR(dentry); + mutex_unlock(&s->s_root->d_inode->i_mutex); + + return err; +} + /* We need to take a copy of the mount flags since things like * MS_RDONLY don't get set until *after* we're called. * mount_flags != mount_options */ @@ -969,48 +991,12 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) err = xattr_mount_check(s); if (err) goto error; -#endif - /* If we don't have the privroot located yet - go find it */ - if (!REISERFS_SB(s)->priv_root) { - struct dentry *dentry; - mutex_lock_nested(&s->s_root->d_inode->i_mutex, I_MUTEX_CHILD); - dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, - strlen(PRIVROOT_NAME)); - if (!IS_ERR(dentry)) { -#ifdef CONFIG_REISERFS_FS_XATTR - if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) - err = create_privroot(dentry); -#endif - if (!dentry->d_inode) { - dput(dentry); - dentry = NULL; - } - } else - err = PTR_ERR(dentry); + if (!REISERFS_SB(s)->priv_root->d_inode && !(mount_flags & MS_RDONLY)) { + mutex_lock(&s->s_root->d_inode->i_mutex); + err = create_privroot(REISERFS_SB(s)->priv_root); mutex_unlock(&s->s_root->d_inode->i_mutex); - - if (!err && dentry) { - s->s_root->d_op = &xattr_lookup_poison_ops; - dentry->d_inode->i_flags |= S_PRIVATE; - REISERFS_SB(s)->priv_root = dentry; -#ifdef CONFIG_REISERFS_FS_XATTR - /* xattrs are unavailable */ - } else if (!(mount_flags & MS_RDONLY)) { - /* If we're read-only it just means that the dir - * hasn't been created. Not an error -- just no - * xattrs on the fs. We'll check again if we - * go read-write */ - reiserfs_warning(s, "jdm-20006", - "xattrs/ACLs enabled and couldn't " - "find/create .reiserfs_priv. " - "Failing mount."); - err = -EOPNOTSUPP; -#endif - } } - -#ifdef CONFIG_REISERFS_FS_XATTR if (!err) s->s_xattr = reiserfs_xattr_handlers; diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h index dcae01e63e40..fea1a8e65bef 100644 --- a/include/linux/reiserfs_xattr.h +++ b/include/linux/reiserfs_xattr.h @@ -38,6 +38,7 @@ struct nameidata; int reiserfs_xattr_register_handlers(void) __init; void reiserfs_xattr_unregister_handlers(void); int reiserfs_xattr_init(struct super_block *sb, int mount_flags); +int reiserfs_lookup_privroot(struct super_block *sb); int reiserfs_delete_xattrs(struct inode *inode); int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs); -- cgit v1.2.3-71-gd317 From ab17c4f02156c4f75d7fa43a5aa2a7f942d47201 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 5 May 2009 15:30:15 -0400 Subject: reiserfs: fixup xattr_root caching The xattr_root caching was broken from my previous patch set. It wouldn't cause corruption, but could cause decreased performance due to allocating a larger chunk of the journal (~ 27 blocks) than it would actually use. This patch loads the xattr root dentry at xattr initialization and creates it on-demand. Since we're using the cached dentry, there's no point in keeping lookup_or_create_dir around, so that's removed. Signed-off-by: Jeff Mahoney Signed-off-by: Al Viro --- fs/reiserfs/xattr.c | 73 ++++++++++++++++++++++++++---------------- include/linux/reiserfs_fs_sb.h | 2 +- include/linux/reiserfs_xattr.h | 2 +- 3 files changed, 48 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 2891f789f545..c77984473db9 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -113,36 +113,28 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry) #define xattr_may_create(flags) (!flags || flags & XATTR_CREATE) -/* Returns and possibly creates the xattr dir. */ -static struct dentry *lookup_or_create_dir(struct dentry *parent, - const char *name, int flags) +static struct dentry *open_xa_root(struct super_block *sb, int flags) { - struct dentry *dentry; - BUG_ON(!parent); + struct dentry *privroot = REISERFS_SB(sb)->priv_root; + struct dentry *xaroot; + if (!privroot->d_inode) + return ERR_PTR(-ENODATA); - mutex_lock_nested(&parent->d_inode->i_mutex, I_MUTEX_XATTR); - dentry = lookup_one_len(name, parent, strlen(name)); - if (!IS_ERR(dentry) && !dentry->d_inode) { - int err = -ENODATA; + mutex_lock_nested(&privroot->d_inode->i_mutex, I_MUTEX_XATTR); + xaroot = dget(REISERFS_SB(sb)->xattr_root); + if (!xaroot->d_inode) { + int err = -ENODATA; if (xattr_may_create(flags)) - err = xattr_mkdir(parent->d_inode, dentry, 0700); - + err = xattr_mkdir(privroot->d_inode, xaroot, 0700); if (err) { - dput(dentry); - dentry = ERR_PTR(err); + dput(xaroot); + xaroot = ERR_PTR(err); } } - mutex_unlock(&parent->d_inode->i_mutex); - return dentry; -} -static struct dentry *open_xa_root(struct super_block *sb, int flags) -{ - struct dentry *privroot = REISERFS_SB(sb)->priv_root; - if (!privroot) - return ERR_PTR(-ENODATA); - return lookup_or_create_dir(privroot, XAROOT_NAME, flags); + mutex_unlock(&privroot->d_inode->i_mutex); + return xaroot; } static struct dentry *open_xa_dir(const struct inode *inode, int flags) @@ -158,10 +150,22 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags) le32_to_cpu(INODE_PKEY(inode)->k_objectid), inode->i_generation); - xadir = lookup_or_create_dir(xaroot, namebuf, flags); + mutex_lock_nested(&xaroot->d_inode->i_mutex, I_MUTEX_XATTR); + + xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf)); + if (!IS_ERR(xadir) && !xadir->d_inode) { + int err = -ENODATA; + if (xattr_may_create(flags)) + err = xattr_mkdir(xaroot->d_inode, xadir, 0700); + if (err) { + dput(xadir); + xadir = ERR_PTR(err); + } + } + + mutex_unlock(&xaroot->d_inode->i_mutex); dput(xaroot); return xadir; - } /* The following are side effects of other operations that aren't explicitly @@ -986,19 +990,33 @@ int reiserfs_lookup_privroot(struct super_block *s) int reiserfs_xattr_init(struct super_block *s, int mount_flags) { int err = 0; + struct dentry *privroot = REISERFS_SB(s)->priv_root; #ifdef CONFIG_REISERFS_FS_XATTR err = xattr_mount_check(s); if (err) goto error; - if (!REISERFS_SB(s)->priv_root->d_inode && !(mount_flags & MS_RDONLY)) { + if (!privroot->d_inode && !(mount_flags & MS_RDONLY)) { mutex_lock(&s->s_root->d_inode->i_mutex); err = create_privroot(REISERFS_SB(s)->priv_root); mutex_unlock(&s->s_root->d_inode->i_mutex); } - if (!err) + + if (privroot->d_inode) { s->s_xattr = reiserfs_xattr_handlers; + mutex_lock(&privroot->d_inode->i_mutex); + if (!REISERFS_SB(s)->xattr_root) { + struct dentry *dentry; + dentry = lookup_one_len(XAROOT_NAME, privroot, + strlen(XAROOT_NAME)); + if (!IS_ERR(dentry)) + REISERFS_SB(s)->xattr_root = dentry; + else + err = PTR_ERR(dentry); + } + mutex_unlock(&privroot->d_inode->i_mutex); + } error: if (err) { @@ -1008,11 +1026,12 @@ error: #endif /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */ - s->s_flags = s->s_flags & ~MS_POSIXACL; #ifdef CONFIG_REISERFS_FS_POSIX_ACL if (reiserfs_posixacl(s)) s->s_flags |= MS_POSIXACL; + else #endif + s->s_flags &= ~MS_POSIXACL; return err; } diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index 6b361d23a499..8651640868a1 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -402,7 +402,7 @@ struct reiserfs_sb_info { int reserved_blocks; /* amount of blocks reserved for further allocations */ spinlock_t bitmap_lock; /* this lock on now only used to protect reserved_blocks variable */ struct dentry *priv_root; /* root of /.reiserfs_priv */ - struct dentry *xattr_root; /* root of /.reiserfs_priv/.xa */ + struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */ int j_errno; #ifdef CONFIG_QUOTA char *s_qf_names[MAXQUOTAS]; diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h index fea1a8e65bef..cdedc01036e4 100644 --- a/include/linux/reiserfs_xattr.h +++ b/include/linux/reiserfs_xattr.h @@ -98,7 +98,7 @@ static inline size_t reiserfs_xattr_jcreate_nblocks(struct inode *inode) if ((REISERFS_I(inode)->i_flags & i_has_xattr_dir) == 0) { nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); - if (REISERFS_SB(inode->i_sb)->xattr_root == NULL) + if (!REISERFS_SB(inode->i_sb)->xattr_root->d_inode) nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); } -- cgit v1.2.3-71-gd317 From 677c9b2e393a0cd203bd54e9c18b012b2c73305a Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 5 May 2009 15:30:17 -0400 Subject: reiserfs: remove privroot hiding in lookup With Al Viro's patch to move privroot lookup to fs mount, there's no need to have special code to hide the privroot in reiserfs_lookup. I've also cleaned up the privroot hiding in reiserfs_readdir_dentry and removed the last user of reiserfs_xattrs(). Signed-off-by: Jeff Mahoney Signed-off-by: Al Viro --- fs/reiserfs/dir.c | 24 +++++++++++++----------- fs/reiserfs/namei.c | 17 ++--------------- fs/reiserfs/xattr.c | 2 +- include/linux/reiserfs_fs_sb.h | 1 - 4 files changed, 16 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 67a80d7e59e2..45ee3d357c70 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -41,6 +41,18 @@ static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, #define store_ih(where,what) copy_item_head (where, what) +static inline bool is_privroot_deh(struct dentry *dir, + struct reiserfs_de_head *deh) +{ + int ret = 0; +#ifdef CONFIG_REISERFS_FS_XATTR + struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root; + ret = (dir == dir->d_parent && privroot->d_inode && + deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); +#endif + return ret; +} + int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, filldir_t filldir, loff_t *pos) { @@ -138,18 +150,8 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, } /* Ignore the .reiserfs_priv entry */ - if (reiserfs_xattrs(inode->i_sb) && - !old_format_only(inode->i_sb) && - dentry == inode->i_sb->s_root && - REISERFS_SB(inode->i_sb)->priv_root && - REISERFS_SB(inode->i_sb)->priv_root->d_inode - && deh_objectid(deh) == - le32_to_cpu(INODE_PKEY - (REISERFS_SB(inode->i_sb)-> - priv_root->d_inode)-> - k_objectid)) { + if (is_privroot_deh(dentry, deh)) continue; - } d_off = deh_offset(deh); *pos = d_off; diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index efd4d720718e..271579128634 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -338,21 +338,8 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, &path_to_entry, &de); pathrelse(&path_to_entry); if (retval == NAME_FOUND) { - /* Hide the .reiserfs_priv directory */ - if (reiserfs_xattrs(dir->i_sb) && - !old_format_only(dir->i_sb) && - REISERFS_SB(dir->i_sb)->priv_root && - REISERFS_SB(dir->i_sb)->priv_root->d_inode && - de.de_objectid == - le32_to_cpu(INODE_PKEY - (REISERFS_SB(dir->i_sb)->priv_root->d_inode)-> - k_objectid)) { - reiserfs_write_unlock(dir->i_sb); - return ERR_PTR(-EACCES); - } - - inode = - reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id)); + inode = reiserfs_iget(dir->i_sb, + (struct cpu_key *)&(de.de_dir_id)); if (!inode || IS_ERR(inode)) { reiserfs_write_unlock(dir->i_sb); return ERR_PTR(-EACCES); diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index c77984473db9..2237e10c7c7c 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -841,7 +841,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) if (!dentry->d_inode) return -EINVAL; - if (!reiserfs_xattrs(dentry->d_sb) || + if (!dentry->d_sb->s_xattr || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) return -EOPNOTSUPP; diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index 8651640868a1..6473650c28f1 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -488,7 +488,6 @@ enum reiserfs_mount_options { #define reiserfs_data_log(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_LOG)) #define reiserfs_data_ordered(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_ORDERED)) #define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK)) -#define reiserfs_xattrs(s) ((s)->s_xattr != NULL) #define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER)) #define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL)) #define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s)) -- cgit v1.2.3-71-gd317 From 74dbbdd7fdc11763f4698d2f3e684cf4446951e6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 6 May 2009 01:07:50 -0400 Subject: New helper: deactivate_locked_super() Does equivalent of up_write(&s->s_umount); deactivate_super(s); However, it does not does not unlock it until it's all over. As the result, it's safe to use to dispose of new superblock on ->get_sb() failure exits - nobody will see the sucker until it's all over. Equivalent using up_write/deactivate_super is safe for that purpose if superblock is either safe to use or has NULL ->s_root when we unlock. Normally filesystems take the required precautions, but a) we do have bugs in that area in some of them. b) up_write/deactivate_super sequence is extremely common, so the helper makes sense anyway. Signed-off-by: Al Viro --- fs/super.c | 46 ++++++++++++++++++++++++++++++++++------------ include/linux/fs.h | 1 + 2 files changed, 35 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/fs/super.c b/fs/super.c index 786fe7d72790..a9dc4c33ef4d 100644 --- a/fs/super.c +++ b/fs/super.c @@ -207,6 +207,34 @@ void deactivate_super(struct super_block *s) EXPORT_SYMBOL(deactivate_super); +/** + * deactivate_locked_super - drop an active reference to superblock + * @s: superblock to deactivate + * + * Equivalent of up_write(&s->s_umount); deactivate_super(s);, except that + * it does not unlock it until it's all over. As the result, it's safe to + * use to dispose of new superblock on ->get_sb() failure exits - nobody + * will see the sucker until it's all over. Equivalent using up_write + + * deactivate_super is safe for that purpose only if superblock is either + * safe to use or has NULL ->s_root when we unlock. + */ +void deactivate_locked_super(struct super_block *s) +{ + struct file_system_type *fs = s->s_type; + if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { + s->s_count -= S_BIAS-1; + spin_unlock(&sb_lock); + vfs_dq_off(s, 0); + fs->kill_sb(s); + put_filesystem(fs); + put_super(s); + } else { + up_write(&s->s_umount); + } +} + +EXPORT_SYMBOL(deactivate_locked_super); + /** * grab_super - acquire an active reference * @s: reference we are trying to make active @@ -797,8 +825,7 @@ int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, sb->s_flags = flags; err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (err) { - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); return err; } @@ -854,8 +881,7 @@ int get_sb_bdev(struct file_system_type *fs_type, if (s->s_root) { if ((flags ^ s->s_flags) & MS_RDONLY) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); error = -EBUSY; goto error_bdev; } @@ -870,8 +896,7 @@ int get_sb_bdev(struct file_system_type *fs_type, sb_set_blocksize(s, block_size(bdev)); error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); goto error; } @@ -921,8 +946,7 @@ int get_sb_nodev(struct file_system_type *fs_type, error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); return error; } s->s_flags |= MS_ACTIVE; @@ -952,8 +976,7 @@ int get_sb_single(struct file_system_type *fs_type, s->s_flags = flags; error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); return error; } s->s_flags |= MS_ACTIVE; @@ -1006,8 +1029,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void return mnt; out_sb: dput(mnt->mnt_root); - up_write(&mnt->mnt_sb->s_umount); - deactivate_super(mnt->mnt_sb); + deactivate_locked_super(mnt->mnt_sb); out_free_secdata: free_secdata(secdata); out_mnt: diff --git a/include/linux/fs.h b/include/linux/fs.h index 5bed436f4353..11484d08042c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1775,6 +1775,7 @@ void kill_block_super(struct super_block *sb); void kill_anon_super(struct super_block *sb); void kill_litter_super(struct super_block *sb); void deactivate_super(struct super_block *sb); +void deactivate_locked_super(struct super_block *sb); int set_anon_super(struct super_block *s, void *data); struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), -- cgit v1.2.3-71-gd317 From db6c1fbb92eeb4cb52c6133e0c533602f49fc4bd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Apr 2009 18:07:08 +0200 Subject: romfs: cleanup romfs_fs.h There's no kernel-only content in it anymore, so move it to header-y and remove the superflous #ifdef __KERNEL__. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/Kbuild | 2 +- include/linux/romfs_fs.h | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index ca9b9b9bd331..3f0eaa397ef5 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -138,6 +138,7 @@ header-y += qnxtypes.h header-y += radeonfb.h header-y += raw.h header-y += resource.h +header-y += romfs_fs.h header-y += rose.h header-y += serial_reg.h header-y += smbno.h @@ -314,7 +315,6 @@ unifdef-y += irqnr.h unifdef-y += reboot.h unifdef-y += reiserfs_fs.h unifdef-y += reiserfs_xattr.h -unifdef-y += romfs_fs.h unifdef-y += route.h unifdef-y += rtc.h unifdef-y += rtnetlink.h diff --git a/include/linux/romfs_fs.h b/include/linux/romfs_fs.h index e20bbf9eb365..c490fbc43fe2 100644 --- a/include/linux/romfs_fs.h +++ b/include/linux/romfs_fs.h @@ -53,9 +53,4 @@ struct romfs_inode { #define ROMFH_PAD (ROMFH_SIZE-1) #define ROMFH_MASK (~ROMFH_PAD) -#ifdef __KERNEL__ - -/* Not much now */ - -#endif /* __KERNEL__ */ #endif -- cgit v1.2.3-71-gd317 From 6e8341a11eb21826b7192d0bb88cb5b44900a9af Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Apr 2009 11:16:22 -0400 Subject: Switch open_exec() and sys_uselib() to do_open_filp() ... and make path_lookup_open() static Signed-off-by: Al Viro --- fs/exec.c | 72 ++++++++++++++++++--------------------------------- fs/namei.c | 13 +++++----- fs/open.c | 2 +- include/linux/fs.h | 2 +- include/linux/namei.h | 1 - 5 files changed, 34 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index 41ae8e0de72d..895823d0149d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -105,36 +105,28 @@ static inline void put_binfmt(struct linux_binfmt * fmt) SYSCALL_DEFINE1(uselib, const char __user *, library) { struct file *file; - struct nameidata nd; char *tmp = getname(library); int error = PTR_ERR(tmp); - if (!IS_ERR(tmp)) { - error = path_lookup_open(AT_FDCWD, tmp, - LOOKUP_FOLLOW, &nd, - FMODE_READ|FMODE_EXEC); - putname(tmp); - } - if (error) + if (IS_ERR(tmp)) + goto out; + + file = do_filp_open(AT_FDCWD, tmp, + O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, + MAY_READ | MAY_EXEC | MAY_OPEN); + putname(tmp); + error = PTR_ERR(file); + if (IS_ERR(file)) goto out; error = -EINVAL; - if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) + if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) goto exit; error = -EACCES; - if (nd.path.mnt->mnt_flags & MNT_NOEXEC) - goto exit; - - error = may_open(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN, 0); - if (error) + if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) goto exit; - file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE); - error = PTR_ERR(file); - if (IS_ERR(file)) - goto out; - fsnotify_open(file->f_path.dentry); error = -ENOEXEC; @@ -156,13 +148,10 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) } read_unlock(&binfmt_lock); } +exit: fput(file); out: return error; -exit: - release_open_intent(&nd); - path_put(&nd.path); - goto out; } #ifdef CONFIG_MMU @@ -657,44 +646,33 @@ EXPORT_SYMBOL(setup_arg_pages); struct file *open_exec(const char *name) { - struct nameidata nd; struct file *file; int err; - err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, - FMODE_READ|FMODE_EXEC); - if (err) + file = do_filp_open(AT_FDCWD, name, + O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, + MAY_EXEC | MAY_OPEN); + if (IS_ERR(file)) goto out; err = -EACCES; - if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) - goto out_path_put; - - if (nd.path.mnt->mnt_flags & MNT_NOEXEC) - goto out_path_put; - - err = may_open(&nd.path, MAY_EXEC | MAY_OPEN, 0); - if (err) - goto out_path_put; + if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) + goto exit; - file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE); - if (IS_ERR(file)) - return file; + if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) + goto exit; fsnotify_open(file->f_path.dentry); err = deny_write_access(file); - if (err) { - fput(file); - goto out; - } + if (err) + goto exit; +out: return file; - out_path_put: - release_open_intent(&nd); - path_put(&nd.path); - out: +exit: + fput(file); return ERR_PTR(err); } EXPORT_SYMBOL(open_exec); diff --git a/fs/namei.c b/fs/namei.c index 78f253cd2d4f..967c3db92724 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1130,8 +1130,8 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, * @nd: pointer to nameidata * @open_flags: open intent flags */ -int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags, - struct nameidata *nd, int open_flags) +static int path_lookup_open(int dfd, const char *name, + unsigned int lookup_flags, struct nameidata *nd, int open_flags) { struct file *filp = get_empty_filp(); int err; @@ -1637,18 +1637,19 @@ static int open_will_write_to_fs(int flag, struct inode *inode) * open_to_namei_flags() for more details. */ struct file *do_filp_open(int dfd, const char *pathname, - int open_flag, int mode) + int open_flag, int mode, int acc_mode) { struct file *filp; struct nameidata nd; - int acc_mode, error; + int error; struct path path; struct dentry *dir; int count = 0; int will_write; int flag = open_to_namei_flags(open_flag); - acc_mode = MAY_OPEN | ACC_MODE(flag); + if (!acc_mode) + acc_mode = MAY_OPEN | ACC_MODE(flag); /* O_TRUNC implies we need access checks for write permissions */ if (flag & O_TRUNC) @@ -1869,7 +1870,7 @@ do_link: */ struct file *filp_open(const char *filename, int flags, int mode) { - return do_filp_open(AT_FDCWD, filename, flags, mode); + return do_filp_open(AT_FDCWD, filename, flags, mode, 0); } EXPORT_SYMBOL(filp_open); diff --git a/fs/open.c b/fs/open.c index 377eb25b6abf..bdfbf03615a4 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1033,7 +1033,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode) if (!IS_ERR(tmp)) { fd = get_unused_fd_flags(flags); if (fd >= 0) { - struct file *f = do_filp_open(dfd, tmp, flags, mode); + struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); if (IS_ERR(f)) { put_unused_fd(fd); fd = PTR_ERR(f); diff --git a/include/linux/fs.h b/include/linux/fs.h index 11484d08042c..ed788426f464 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2118,7 +2118,7 @@ extern struct file *create_write_pipe(int flags); extern void free_write_pipe(struct file *); extern struct file *do_filp_open(int dfd, const char *pathname, - int open_flag, int mode); + int open_flag, int mode, int acc_mode); extern int may_open(struct path *, int, int); extern int kernel_read(struct file *, unsigned long, char *, unsigned long); diff --git a/include/linux/namei.h b/include/linux/namei.h index fc2e03579877..518098fe63af 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -69,7 +69,6 @@ extern int path_lookup(const char *, unsigned, struct nameidata *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct nameidata *); -extern int path_lookup_open(int dfd, const char *name, unsigned lookup_flags, struct nameidata *, int open_flags); extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, int (*open)(struct inode *, struct file *)); extern struct file *nameidata_to_filp(struct nameidata *nd, int flags); -- cgit v1.2.3-71-gd317 From 2a32cebd6cbcc43996c3e2d114fa32ba1e71192a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 May 2009 16:05:57 -0400 Subject: Fix races around the access to ->s_options Put generic_show_options read access to s_options under rcu_read_lock, split save_mount_options() into "we are setting it the first time" (uses in foo_fill_super()) and "we are relacing and freeing the old one", synchronize_rcu() before kfree() in the latter. Signed-off-by: Al Viro --- drivers/isdn/capi/capifs.c | 3 +-- fs/affs/super.c | 3 +-- fs/afs/super.c | 4 ++-- fs/hpfs/super.c | 3 +-- fs/namespace.c | 21 ++++++++++++++++++--- fs/reiserfs/super.c | 3 +-- include/linux/fs.h | 1 + 7 files changed, 25 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/isdn/capi/capifs.c b/drivers/isdn/capi/capifs.c index b129409925af..8f9f3b5a3e8c 100644 --- a/drivers/isdn/capi/capifs.c +++ b/drivers/isdn/capi/capifs.c @@ -75,8 +75,7 @@ static int capifs_remount(struct super_block *s, int *flags, char *data) } } - kfree(s->s_options); - s->s_options = new_opt; + replace_mount_options(s, new_opt); config.setuid = setuid; config.setgid = setgid; diff --git a/fs/affs/super.c b/fs/affs/super.c index 5ce695e707fe..63f5183f263b 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -507,8 +507,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) kfree(new_opts); return -EINVAL; } - kfree(sb->s_options); - sb->s_options = new_opts; + replace_mount_options(sb, new_opts); sbi->s_flags = mount_flags; sbi->s_mode = mode; diff --git a/fs/afs/super.c b/fs/afs/super.c index 2753f16dd315..76828e5f8a39 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -408,17 +408,17 @@ static int afs_get_sb(struct file_system_type *fs_type, deactivate_locked_super(sb); goto error; } - sb->s_options = new_opts; + save_mount_options(sb, new_opts); sb->s_flags |= MS_ACTIVE; } else { _debug("reuse"); - kfree(new_opts); ASSERTCMP(sb->s_flags, &, MS_ACTIVE); } simple_set_mnt(mnt, sb); afs_put_volume(params.volume); afs_put_cell(params.cell); + kfree(new_opts); _leave(" = 0 [%p]", sb); return 0; diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index fecf402d7b8a..fc77965be841 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -423,8 +423,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) if (!(*flags & MS_RDONLY)) mark_dirty(s); - kfree(s->s_options); - s->s_options = new_opts; + replace_mount_options(s, new_opts); return 0; diff --git a/fs/namespace.c b/fs/namespace.c index 0d2003fb4377..134d494158d9 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -695,12 +695,16 @@ static inline void mangle(struct seq_file *m, const char *s) */ int generic_show_options(struct seq_file *m, struct vfsmount *mnt) { - const char *options = mnt->mnt_sb->s_options; + const char *options; + + rcu_read_lock(); + options = rcu_dereference(mnt->mnt_sb->s_options); if (options != NULL && options[0]) { seq_putc(m, ','); mangle(m, options); } + rcu_read_unlock(); return 0; } @@ -721,11 +725,22 @@ EXPORT_SYMBOL(generic_show_options); */ void save_mount_options(struct super_block *sb, char *options) { - kfree(sb->s_options); - sb->s_options = kstrdup(options, GFP_KERNEL); + BUG_ON(sb->s_options); + rcu_assign_pointer(sb->s_options, kstrdup(options, GFP_KERNEL)); } EXPORT_SYMBOL(save_mount_options); +void replace_mount_options(struct super_block *sb, char *options) +{ + char *old = sb->s_options; + rcu_assign_pointer(sb->s_options, options); + if (old) { + synchronize_rcu(); + kfree(old); + } +} +EXPORT_SYMBOL(replace_mount_options); + #ifdef CONFIG_PROC_FS /* iterator */ static void *m_start(struct seq_file *m, loff_t *pos) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index d444fe0013a4..1215a4f50cd2 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1316,8 +1316,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) } out_ok: - kfree(s->s_options); - s->s_options = new_opts; + replace_mount_options(s, new_opts); return 0; out_err: diff --git a/include/linux/fs.h b/include/linux/fs.h index ed788426f464..3b534e527e09 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2368,6 +2368,7 @@ extern void file_update_time(struct file *file); extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt); extern void save_mount_options(struct super_block *sb, char *options); +extern void replace_mount_options(struct super_block *sb, char *options); static inline ino_t parent_ino(struct dentry *dentry) { -- cgit v1.2.3-71-gd317 From ecf4667d30dd63fa130e22f8f2da3e6ce003358b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 12 May 2009 13:19:37 -0700 Subject: syscalls.h add the missing sys_pipe2 declaration In order to build the generic syscall table, we need a declaration for every system call. sys_pipe2 was added without a proper declaration, so add this to syscalls.h now. Signed-off-by: Arnd Bergmann Cc: Ulrich Drepper Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 40617c1d8976..30520844b8da 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -433,6 +433,7 @@ asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg); asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg); #endif +asmlinkage long sys_pipe2(int __user *fildes, int flags); asmlinkage long sys_dup(unsigned int fildes); asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd); asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags); -- cgit v1.2.3-71-gd317 From 4f005dbe5584fe54c9f6d6d4f0acd3fb29be84da Mon Sep 17 00:00:00 2001 From: Maciej Sosnowski Date: Thu, 23 Apr 2009 12:31:51 +0200 Subject: ioatdma: fix "ioatdma frees DMA memory with wrong function" as reported by Alexander Beregalov ioatdma 0000:00:08.0: DMA-API: device driver frees DMA memory with wrong function [device address=0x000000007f76f800] [size=2000 bytes] [map ped as single] [unmapped as page] The ioatdma driver was unmapping all regions (either allocated as page or single) using unmap_page. This patch lets dma driver recognize if unmap_single or unmap_page should be used. It introduces two new dma control flags: DMA_COMPL_SRC_UNMAP_SINGLE and DMA_COMPL_DEST_UNMAP_SINGLE. They should be set to indicate dma driver to do dma-unmapping as single (first one for the source, tha latter for the destination). If respective flag is not set, the driver assumes dma-unmapping as page. Signed-off-by: Maciej Sosnowski Reported-by: Alexander Beregalov Tested-by: Alexander Beregalov Signed-off-by: Dan Williams --- drivers/dma/dmaengine.c | 17 +++++++++++------ drivers/dma/ioat_dma.c | 45 ++++++++++++++++++++++++++++----------------- include/linux/dmaengine.h | 6 ++++++ 3 files changed, 45 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 92438e9dacc3..5a87384ea4ff 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -804,11 +804,14 @@ dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, dma_addr_t dma_dest, dma_src; dma_cookie_t cookie; int cpu; + unsigned long flags; dma_src = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE); dma_dest = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE); - tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, - DMA_CTRL_ACK); + flags = DMA_CTRL_ACK | + DMA_COMPL_SRC_UNMAP_SINGLE | + DMA_COMPL_DEST_UNMAP_SINGLE; + tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags); if (!tx) { dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE); @@ -850,11 +853,12 @@ dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page, dma_addr_t dma_dest, dma_src; dma_cookie_t cookie; int cpu; + unsigned long flags; dma_src = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE); dma_dest = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE); - tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, - DMA_CTRL_ACK); + flags = DMA_CTRL_ACK | DMA_COMPL_SRC_UNMAP_SINGLE; + tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags); if (!tx) { dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE); @@ -898,12 +902,13 @@ dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg, dma_addr_t dma_dest, dma_src; dma_cookie_t cookie; int cpu; + unsigned long flags; dma_src = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE); dma_dest = dma_map_page(dev->dev, dest_pg, dest_off, len, DMA_FROM_DEVICE); - tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, - DMA_CTRL_ACK); + flags = DMA_CTRL_ACK; + tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags); if (!tx) { dma_unmap_page(dev->dev, dma_src, len, DMA_TO_DEVICE); diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c index e4fc33c1c32f..1955ee8d6d20 100644 --- a/drivers/dma/ioat_dma.c +++ b/drivers/dma/ioat_dma.c @@ -1063,22 +1063,31 @@ static void ioat_dma_cleanup_tasklet(unsigned long data) static void ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc) { - /* - * yes we are unmapping both _page and _single - * alloc'd regions with unmap_page. Is this - * *really* that bad? - */ - if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) - pci_unmap_page(ioat_chan->device->pdev, - pci_unmap_addr(desc, dst), - pci_unmap_len(desc, len), - PCI_DMA_FROMDEVICE); - - if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) - pci_unmap_page(ioat_chan->device->pdev, - pci_unmap_addr(desc, src), - pci_unmap_len(desc, len), - PCI_DMA_TODEVICE); + if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (desc->async_tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE) + pci_unmap_single(ioat_chan->device->pdev, + pci_unmap_addr(desc, dst), + pci_unmap_len(desc, len), + PCI_DMA_FROMDEVICE); + else + pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_addr(desc, dst), + pci_unmap_len(desc, len), + PCI_DMA_FROMDEVICE); + } + + if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + if (desc->async_tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE) + pci_unmap_single(ioat_chan->device->pdev, + pci_unmap_addr(desc, src), + pci_unmap_len(desc, len), + PCI_DMA_TODEVICE); + else + pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_addr(desc, src), + pci_unmap_len(desc, len), + PCI_DMA_TODEVICE); + } } /** @@ -1363,6 +1372,7 @@ static int ioat_dma_self_test(struct ioatdma_device *device) int err = 0; struct completion cmp; unsigned long tmo; + unsigned long flags; src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); if (!src) @@ -1392,8 +1402,9 @@ static int ioat_dma_self_test(struct ioatdma_device *device) DMA_TO_DEVICE); dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); + flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE; tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, - IOAT_TEST_SIZE, 0); + IOAT_TEST_SIZE, flags); if (!tx) { dev_err(&device->pdev->dev, "Self-test prep failed, disabling\n"); diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 2e2aa3df170c..ffefba81c818 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -78,12 +78,18 @@ enum dma_transaction_type { * dependency chains * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) + * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single + * (if not set, do the source dma-unmapping as page) + * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single + * (if not set, do the destination dma-unmapping as page) */ enum dma_ctrl_flags { DMA_PREP_INTERRUPT = (1 << 0), DMA_CTRL_ACK = (1 << 1), DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2), DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), + DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4), + DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5), }; /** -- cgit v1.2.3-71-gd317 From cd17cbfda004fe5f406c01b318c6378d9895896f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 May 2009 11:32:24 +0200 Subject: Revert "mm: add /proc controls for pdflush threads" This reverts commit fafd688e4c0c34da0f3de909881117d374e4c7af. Work is progressing to switch away from pdflush as the process backing for flushing out dirty data. So it seems pointless to add more knobs to control pdflush threads. The original author of the patch did not have any specific use cases for adding the knobs, so we can easily revert this before 2.6.30 to avoid having to maintain this API forever. Signed-off-by: Jens Axboe --- Documentation/sysctl/vm.txt | 28 ---------------------------- include/linux/writeback.h | 2 -- kernel/sysctl.c | 23 ----------------------- mm/pdflush.c | 31 ++++++++++++------------------- 4 files changed, 12 insertions(+), 72 deletions(-) (limited to 'include/linux') diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index b716d33912d8..c302ddf629a0 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -39,8 +39,6 @@ Currently, these files are in /proc/sys/vm: - nr_hugepages - nr_overcommit_hugepages - nr_pdflush_threads -- nr_pdflush_threads_min -- nr_pdflush_threads_max - nr_trim_pages (only if CONFIG_MMU=n) - numa_zonelist_order - oom_dump_tasks @@ -469,32 +467,6 @@ The default value is 0. ============================================================== -nr_pdflush_threads_min - -This value controls the minimum number of pdflush threads. - -At boot time, the kernel will create and maintain 'nr_pdflush_threads_min' -threads for the kernel's lifetime. - -The default value is 2. The minimum value you can specify is 1, and -the maximum value is the current setting of 'nr_pdflush_threads_max'. - -See 'nr_pdflush_threads_max' below for more information. - -============================================================== - -nr_pdflush_threads_max - -This value controls the maximum number of pdflush threads that can be -created. The pdflush algorithm will create a new pdflush thread (up to -this maximum) if no pdflush threads have been available for >= 1 second. - -The default value is 8. The minimum value you can specify is the -current value of 'nr_pdflush_threads_min' and the -maximum is 1000. - -============================================================== - overcommit_memory: This value contains a flag that enables memory overcommitment. diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 9c1ed1fb6ddb..93445477f86a 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -168,8 +168,6 @@ void writeback_set_ratelimit(void); /* pdflush.c */ extern int nr_pdflush_threads; /* Global so it can be exported to sysctl read-only. */ -extern int nr_pdflush_threads_max; /* Global so it can be exported to sysctl */ -extern int nr_pdflush_threads_min; /* Global so it can be exported to sysctl */ #endif /* WRITEBACK_H */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ea78fa101ad6..b2970d56fb76 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -101,7 +101,6 @@ static int __maybe_unused one = 1; static int __maybe_unused two = 2; static unsigned long one_ul = 1; static int one_hundred = 100; -static int one_thousand = 1000; /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; @@ -1033,28 +1032,6 @@ static struct ctl_table vm_table[] = { .mode = 0444 /* read-only*/, .proc_handler = &proc_dointvec, }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "nr_pdflush_threads_min", - .data = &nr_pdflush_threads_min, - .maxlen = sizeof nr_pdflush_threads_min, - .mode = 0644 /* read-write */, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &one, - .extra2 = &nr_pdflush_threads_max, - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "nr_pdflush_threads_max", - .data = &nr_pdflush_threads_max, - .maxlen = sizeof nr_pdflush_threads_max, - .mode = 0644 /* read-write */, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &nr_pdflush_threads_min, - .extra2 = &one_thousand, - }, { .ctl_name = VM_SWAPPINESS, .procname = "swappiness", diff --git a/mm/pdflush.c b/mm/pdflush.c index f2caf96993f8..235ac440c44e 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c @@ -57,14 +57,6 @@ static DEFINE_SPINLOCK(pdflush_lock); */ int nr_pdflush_threads = 0; -/* - * The max/min number of pdflush threads. R/W by sysctl at - * /proc/sys/vm/nr_pdflush_threads_max/min - */ -int nr_pdflush_threads_max __read_mostly = MAX_PDFLUSH_THREADS; -int nr_pdflush_threads_min __read_mostly = MIN_PDFLUSH_THREADS; - - /* * The time at which the pdflush thread pool last went empty */ @@ -76,7 +68,7 @@ static unsigned long last_empty_jifs; * Thread pool management algorithm: * * - The minimum and maximum number of pdflush instances are bound - * by nr_pdflush_threads_min and nr_pdflush_threads_max. + * by MIN_PDFLUSH_THREADS and MAX_PDFLUSH_THREADS. * * - If there have been no idle pdflush instances for 1 second, create * a new one. @@ -142,13 +134,14 @@ static int __pdflush(struct pdflush_work *my_work) * To throttle creation, we reset last_empty_jifs. */ if (time_after(jiffies, last_empty_jifs + 1 * HZ)) { - if (list_empty(&pdflush_list) && - nr_pdflush_threads < nr_pdflush_threads_max) { - last_empty_jifs = jiffies; - nr_pdflush_threads++; - spin_unlock_irq(&pdflush_lock); - start_one_pdflush_thread(); - spin_lock_irq(&pdflush_lock); + if (list_empty(&pdflush_list)) { + if (nr_pdflush_threads < MAX_PDFLUSH_THREADS) { + last_empty_jifs = jiffies; + nr_pdflush_threads++; + spin_unlock_irq(&pdflush_lock); + start_one_pdflush_thread(); + spin_lock_irq(&pdflush_lock); + } } } @@ -160,7 +153,7 @@ static int __pdflush(struct pdflush_work *my_work) */ if (list_empty(&pdflush_list)) continue; - if (nr_pdflush_threads <= nr_pdflush_threads_min) + if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS) continue; pdf = list_entry(pdflush_list.prev, struct pdflush_work, list); if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) { @@ -266,9 +259,9 @@ static int __init pdflush_init(void) * Pre-set nr_pdflush_threads... If we fail to create, * the count will be decremented. */ - nr_pdflush_threads = nr_pdflush_threads_min; + nr_pdflush_threads = MIN_PDFLUSH_THREADS; - for (i = 0; i < nr_pdflush_threads_min; i++) + for (i = 0; i < MIN_PDFLUSH_THREADS; i++) start_one_pdflush_thread(); return 0; } -- cgit v1.2.3-71-gd317 From 4bca3286433585b5f1c3e7d8ac37a2f4b3def9ca Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 15 May 2009 00:40:35 -0400 Subject: libata: Media rotation rate and form factor heuristics This patch provides new heuristics for parsing both the form factor and media rotation rate ATA IDENFITY words. The reported ATA version must be 7 or greater and the device must return values defined as valid in the standard. Only then are the characteristics reported to SCSI via the VPD B1 page. This seems like a reasonable compromise to me considering that we have been shipping several kernel releases that key off the rotation rate bit without any version checking whatsoever. With no complaints so far. Signed-off-by: Martin K. Petersen Signed-off-by: Jeff Garzik --- drivers/ata/libata-scsi.c | 11 ++++++----- include/linux/ata.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index d1718a1f278a..342316064e9f 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2142,13 +2142,14 @@ static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf) static unsigned int ata_scsiop_inq_b1(struct ata_scsi_args *args, u8 *rbuf) { + int form_factor = ata_id_form_factor(args->id); + int media_rotation_rate = ata_id_rotation_rate(args->id); + rbuf[1] = 0xb1; rbuf[3] = 0x3c; - if (ata_id_major_version(args->id) > 7) { - rbuf[4] = args->id[217] >> 8; - rbuf[5] = args->id[217]; - rbuf[7] = args->id[168] & 0xf; - } + rbuf[4] = media_rotation_rate >> 8; + rbuf[5] = media_rotation_rate; + rbuf[7] = form_factor; return 0; } diff --git a/include/linux/ata.h b/include/linux/ata.h index cb79b7a208e1..915da43edee1 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -730,6 +730,34 @@ static inline int ata_id_has_unload(const u16 *id) return 0; } +static inline int ata_id_form_factor(const u16 *id) +{ + u16 val = id[168]; + + if (ata_id_major_version(id) < 7 || val == 0 || val == 0xffff) + return 0; + + val &= 0xf; + + if (val > 5) + return 0; + + return val; +} + +static inline int ata_id_rotation_rate(const u16 *id) +{ + u16 val = id[217]; + + if (ata_id_major_version(id) < 7 || val == 0 || val == 0xffff) + return 0; + + if (val > 1 && val < 0x401) + return 0; + + return val; +} + static inline int ata_id_has_trim(const u16 *id) { if (ata_id_major_version(id) >= 7 && -- cgit v1.2.3-71-gd317