From 7bf570dc8dcf76df2a9f583bef2da96d4289ed0d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Apr 2008 20:52:51 +0100 Subject: Security: Make secctx_to_secid() take const secdata Make secctx_to_secid() take constant secdata. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/linux/security.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index adb09d893ae0..50737c70e78e 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1481,7 +1481,7 @@ struct security_operations { int (*getprocattr) (struct task_struct *p, char *name, char **value); int (*setprocattr) (struct task_struct *p, char *name, void *value, size_t size); int (*secid_to_secctx) (u32 secid, char **secdata, u32 *seclen); - int (*secctx_to_secid) (char *secdata, u32 seclen, u32 *secid); + int (*secctx_to_secid) (const char *secdata, u32 seclen, u32 *secid); void (*release_secctx) (char *secdata, u32 seclen); #ifdef CONFIG_SECURITY_NETWORK @@ -1730,7 +1730,7 @@ int security_setprocattr(struct task_struct *p, char *name, void *value, size_t int security_netlink_send(struct sock *sk, struct sk_buff *skb); int security_netlink_recv(struct sk_buff *skb, int cap); int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen); -int security_secctx_to_secid(char *secdata, u32 seclen, u32 *secid); +int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid); void security_release_secctx(char *secdata, u32 seclen); #else /* CONFIG_SECURITY */ @@ -2449,7 +2449,7 @@ static inline int security_secid_to_secctx(u32 secid, char **secdata, u32 *secle return -EOPNOTSUPP; } -static inline int security_secctx_to_secid(char *secdata, +static inline int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) { -- cgit v1.2.3-71-gd317 From 25f2ea9fc8c7ec34d351cef7dade2e8046e49ed1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Apr 2008 20:54:28 +0100 Subject: Security: Typecast CAP_*_SET macros Cast the CAP_*_SET macros to be of kernel_cap_t type to avoid compiler warnings. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/linux/capability.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index eaab759b1460..f4ea0dd9a618 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -365,12 +365,12 @@ typedef struct kernel_cap_struct { # error Fix up hand-coded capability macro initializers #else /* HAND-CODED capability initializers */ -# define CAP_EMPTY_SET {{ 0, 0 }} -# define CAP_FULL_SET {{ ~0, ~0 }} -# define CAP_INIT_EFF_SET {{ ~CAP_TO_MASK(CAP_SETPCAP), ~0 }} -# define CAP_FS_SET {{ CAP_FS_MASK_B0, CAP_FS_MASK_B1 } } -# define CAP_NFSD_SET {{ CAP_FS_MASK_B0|CAP_TO_MASK(CAP_SYS_RESOURCE), \ - CAP_FS_MASK_B1 } } +# define CAP_EMPTY_SET ((kernel_cap_t){{ 0, 0 }}) +# define CAP_FULL_SET ((kernel_cap_t){{ ~0, ~0 }}) +# define CAP_INIT_EFF_SET ((kernel_cap_t){{ ~CAP_TO_MASK(CAP_SETPCAP), ~0 }}) +# define CAP_FS_SET ((kernel_cap_t){{ CAP_FS_MASK_B0, CAP_FS_MASK_B1 } }) +# define CAP_NFSD_SET ((kernel_cap_t){{ CAP_FS_MASK_B0|CAP_TO_MASK(CAP_SYS_RESOURCE), \ + CAP_FS_MASK_B1 } }) #endif /* _LINUX_CAPABILITY_U32S != 2 */ -- cgit v1.2.3-71-gd317 From e463c7b197dbe64b8a99b0612c65f286937e5bf1 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Tue, 29 Apr 2008 13:46:50 -0700 Subject: mlx4_core: Add a way to set the "collapsed" CQ flag Extend the mlx4_cq_resize() API with a way to set the "collapsed" flag for the CQ being created. Signed-off-by: Yevgeny Petrilin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/cq.c | 2 +- drivers/net/mlx4/cq.c | 4 +++- include/linux/mlx4/device.h | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index e3dddfc687f9..2f199c5c4a72 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -221,7 +221,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector } err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, - cq->db.dma, &cq->mcq); + cq->db.dma, &cq->mcq, 0); if (err) goto err_dbmap; diff --git a/drivers/net/mlx4/cq.c b/drivers/net/mlx4/cq.c index 6fda0af9d0a6..95e87a2f8896 100644 --- a/drivers/net/mlx4/cq.c +++ b/drivers/net/mlx4/cq.c @@ -188,7 +188,8 @@ int mlx4_cq_resize(struct mlx4_dev *dev, struct mlx4_cq *cq, EXPORT_SYMBOL_GPL(mlx4_cq_resize); int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, - struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq) + struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, + int collapsed) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cq_table *cq_table = &priv->cq_table; @@ -224,6 +225,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, cq_context = mailbox->buf; memset(cq_context, 0, sizeof *cq_context); + cq_context->flags = cpu_to_be32(!!collapsed << 18); cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index); cq_context->comp_eqn = priv->eq_table.eq[MLX4_EQ_COMP].eqn; cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 9fa1a8002ce2..a744383d16e9 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -382,7 +382,8 @@ void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres, int size); int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, - struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq); + struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, + int collapsed); void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); int mlx4_qp_alloc(struct mlx4_dev *dev, int sqpn, struct mlx4_qp *qp); -- cgit v1.2.3-71-gd317 From 306f39f8f2ecf896ae761748843b148b90d3494d Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 29 Apr 2008 23:11:38 +0200 Subject: i2c: Drop unused RTC driver IDs The x1208, pcf8563 and isl1208 RTC drivers have been converted to new-style i2c drivers, so they no longer use I2C driver IDs. Signed-off-by: Jean Delvare Cc: Alessandro Zummo --- include/linux/i2c-id.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h index 32eb8bbe4831..580acc93903e 100644 --- a/include/linux/i2c-id.h +++ b/include/linux/i2c-id.h @@ -79,12 +79,9 @@ #define I2C_DRIVERID_UPD64031A 79 /* upd64031a video processor */ #define I2C_DRIVERID_SAA717X 80 /* saa717x video encoder */ #define I2C_DRIVERID_DS1672 81 /* Dallas/Maxim DS1672 RTC */ -#define I2C_DRIVERID_X1205 82 /* Xicor/Intersil X1205 RTC */ -#define I2C_DRIVERID_PCF8563 83 /* Philips PCF8563 RTC */ #define I2C_DRIVERID_BT866 85 /* Conexant bt866 video encoder */ #define I2C_DRIVERID_KS0127 86 /* Samsung ks0127 video decoder */ #define I2C_DRIVERID_TLV320AIC23B 87 /* TI TLV320AIC23B audio codec */ -#define I2C_DRIVERID_ISL1208 88 /* Intersil ISL1208 RTC */ #define I2C_DRIVERID_WM8731 89 /* Wolfson WM8731 audio codec */ #define I2C_DRIVERID_WM8750 90 /* Wolfson WM8750 audio codec */ #define I2C_DRIVERID_WM8753 91 /* Wolfson WM8753 audio codec */ -- cgit v1.2.3-71-gd317 From d2653e92732bd3911feff6bee5e23dbf959381db Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 29 Apr 2008 23:11:39 +0200 Subject: i2c: Add support for device alias names Based on earlier work by Jon Smirl and Jochen Friedrich. This patch allows new-style i2c chip drivers to have alias names using the official kernel aliasing system and MODULE_DEVICE_TABLE(). At this point, the old i2c driver binding scheme (driver_name/type) is still supported. Signed-off-by: Jean Delvare Cc: Jochen Friedrich Cc: Jon Smirl Cc: Kay Sievers --- Documentation/i2c/writing-clients | 3 +- drivers/gpio/pca953x.c | 3 +- drivers/gpio/pcf857x.c | 3 +- drivers/hwmon/f75375s.c | 8 +++-- drivers/i2c/chips/ds1682.c | 3 +- drivers/i2c/chips/menelaus.c | 3 +- drivers/i2c/chips/tps65010.c | 3 +- drivers/i2c/chips/tsl2550.c | 3 +- drivers/i2c/i2c-core.c | 51 ++++++++++++++++++++++++------ drivers/media/video/cs5345.c | 3 +- drivers/media/video/cs53l32a.c | 3 +- drivers/media/video/cx25840/cx25840-core.c | 3 +- drivers/media/video/m52790.c | 3 +- drivers/media/video/msp3400-driver.c | 2 +- drivers/media/video/mt9m001.c | 3 +- drivers/media/video/mt9v022.c | 3 +- drivers/media/video/saa7115.c | 3 +- drivers/media/video/saa7127.c | 3 +- drivers/media/video/saa717x.c | 3 +- drivers/media/video/tcm825x.c | 3 +- drivers/media/video/tlv320aic23b.c | 3 +- drivers/media/video/tuner-core.c | 3 +- drivers/media/video/tvaudio.c | 2 +- drivers/media/video/upd64031a.c | 3 +- drivers/media/video/upd64083.c | 3 +- drivers/media/video/v4l2-common.c | 5 +-- drivers/media/video/vp27smpx.c | 3 +- drivers/media/video/wm8739.c | 3 +- drivers/media/video/wm8775.c | 3 +- drivers/rtc/rtc-ds1307.c | 3 +- drivers/rtc/rtc-ds1374.c | 3 +- drivers/rtc/rtc-isl1208.c | 2 +- drivers/rtc/rtc-m41t80.c | 3 +- drivers/rtc/rtc-pcf8563.c | 3 +- drivers/rtc/rtc-rs5c372.c | 3 +- drivers/rtc/rtc-s35390a.c | 3 +- drivers/rtc/rtc-x1205.c | 3 +- include/linux/i2c.h | 5 ++- include/linux/mod_devicetable.h | 11 +++++++ include/media/v4l2-common.h | 4 ++- include/media/v4l2-i2c-drv-legacy.h | 2 +- include/media/v4l2-i2c-drv.h | 2 +- scripts/mod/file2alias.c | 13 ++++++++ 43 files changed, 146 insertions(+), 54 deletions(-) (limited to 'include/linux') diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients index bfb0a5520817..ee75cbace28d 100644 --- a/Documentation/i2c/writing-clients +++ b/Documentation/i2c/writing-clients @@ -164,7 +164,8 @@ I2C device drivers using this binding model work just like any other kind of driver in Linux: they provide a probe() method to bind to those devices, and a remove() method to unbind. - static int foo_probe(struct i2c_client *client); + static int foo_probe(struct i2c_client *client, + const struct i2c_device_id *id); static int foo_remove(struct i2c_client *client); Remember that the i2c_driver does not create those client handles. The diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c index e0e0af536108..2670519236e5 100644 --- a/drivers/gpio/pca953x.c +++ b/drivers/gpio/pca953x.c @@ -192,7 +192,8 @@ static void pca953x_setup_gpio(struct pca953x_chip *chip, int gpios) gc->owner = THIS_MODULE; } -static int __devinit pca953x_probe(struct i2c_client *client) +static int __devinit pca953x_probe(struct i2c_client *client, + const struct i2c_device_id *did) { struct pca953x_platform_data *pdata; struct pca953x_chip *chip; diff --git a/drivers/gpio/pcf857x.c b/drivers/gpio/pcf857x.c index 1106aa15ac79..8856870dd738 100644 --- a/drivers/gpio/pcf857x.c +++ b/drivers/gpio/pcf857x.c @@ -142,7 +142,8 @@ static void pcf857x_set16(struct gpio_chip *chip, unsigned offset, int value) /*-------------------------------------------------------------------------*/ -static int pcf857x_probe(struct i2c_client *client) +static int pcf857x_probe(struct i2c_client *client, + const struct i2c_device_id *id) { struct pcf857x_platform_data *pdata; struct pcf857x *gpio; diff --git a/drivers/hwmon/f75375s.c b/drivers/hwmon/f75375s.c index 1464338e4e11..1f63bab05522 100644 --- a/drivers/hwmon/f75375s.c +++ b/drivers/hwmon/f75375s.c @@ -117,7 +117,8 @@ struct f75375_data { static int f75375_attach_adapter(struct i2c_adapter *adapter); static int f75375_detect(struct i2c_adapter *adapter, int address, int kind); static int f75375_detach_client(struct i2c_client *client); -static int f75375_probe(struct i2c_client *client); +static int f75375_probe(struct i2c_client *client, + const struct i2c_device_id *id); static int f75375_remove(struct i2c_client *client); static struct i2c_driver f75375_legacy_driver = { @@ -628,7 +629,8 @@ static void f75375_init(struct i2c_client *client, struct f75375_data *data, } -static int f75375_probe(struct i2c_client *client) +static int f75375_probe(struct i2c_client *client, + const struct i2c_device_id *id) { struct f75375_data *data = i2c_get_clientdata(client); struct f75375s_platform_data *f75375s_pdata = client->dev.platform_data; @@ -748,7 +750,7 @@ static int f75375_detect(struct i2c_adapter *adapter, int address, int kind) if ((err = i2c_attach_client(client))) goto exit_free; - if ((err = f75375_probe(client)) < 0) + if ((err = f75375_probe(client, NULL)) < 0) goto exit_detach; return 0; diff --git a/drivers/i2c/chips/ds1682.c b/drivers/i2c/chips/ds1682.c index 9e94542c18a2..3070821030e4 100644 --- a/drivers/i2c/chips/ds1682.c +++ b/drivers/i2c/chips/ds1682.c @@ -200,7 +200,8 @@ static struct bin_attribute ds1682_eeprom_attr = { /* * Called when a ds1682 device is matched with this driver */ -static int ds1682_probe(struct i2c_client *client) +static int ds1682_probe(struct i2c_client *client, + const struct i2c_device_id *id) { int rc; diff --git a/drivers/i2c/chips/menelaus.c b/drivers/i2c/chips/menelaus.c index 2dea0123a958..3b8ba7e75843 100644 --- a/drivers/i2c/chips/menelaus.c +++ b/drivers/i2c/chips/menelaus.c @@ -1149,7 +1149,8 @@ static inline void menelaus_rtc_init(struct menelaus_chip *m) static struct i2c_driver menelaus_i2c_driver; -static int menelaus_probe(struct i2c_client *client) +static int menelaus_probe(struct i2c_client *client, + const struct i2c_device_id *id) { struct menelaus_chip *menelaus; int rev = 0, val; diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c index feabd12c081c..6ab3619a49de 100644 --- a/drivers/i2c/chips/tps65010.c +++ b/drivers/i2c/chips/tps65010.c @@ -532,7 +532,8 @@ static int __exit tps65010_remove(struct i2c_client *client) return 0; } -static int tps65010_probe(struct i2c_client *client) +static int tps65010_probe(struct i2c_client *client, + const struct i2c_device_id *id) { struct tps65010 *tps; int status; diff --git a/drivers/i2c/chips/tsl2550.c b/drivers/i2c/chips/tsl2550.c index a10fd2791a69..59c2c662cc45 100644 --- a/drivers/i2c/chips/tsl2550.c +++ b/drivers/i2c/chips/tsl2550.c @@ -364,7 +364,8 @@ static int tsl2550_init_client(struct i2c_client *client) */ static struct i2c_driver tsl2550_driver; -static int __devinit tsl2550_probe(struct i2c_client *client) +static int __devinit tsl2550_probe(struct i2c_client *client, + const struct i2c_device_id *id) { struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); struct tsl2550_data *data; diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 6c7fa8d53c0e..26384daccb96 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -48,6 +48,17 @@ static DEFINE_IDR(i2c_adapter_idr); /* ------------------------------------------------------------------------- */ +static const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id, + const struct i2c_client *client) +{ + while (id->name[0]) { + if (strcmp(client->name, id->name) == 0) + return id; + id++; + } + return NULL; +} + static int i2c_device_match(struct device *dev, struct device_driver *drv) { struct i2c_client *client = to_i2c_client(dev); @@ -59,6 +70,10 @@ static int i2c_device_match(struct device *dev, struct device_driver *drv) if (!is_newstyle_driver(driver)) return 0; + /* match on an id table if there is one */ + if (driver->id_table) + return i2c_match_id(driver->id_table, client) != NULL; + /* new style drivers use the same kind of driver matching policy * as platform devices or SPI: compare device and driver IDs. */ @@ -73,11 +88,17 @@ static int i2c_device_uevent(struct device *dev, struct kobj_uevent_env *env) struct i2c_client *client = to_i2c_client(dev); /* by definition, legacy drivers can't hotplug */ - if (dev->driver || !client->driver_name) + if (dev->driver) return 0; - if (add_uevent_var(env, "MODALIAS=%s", client->driver_name)) - return -ENOMEM; + if (client->driver_name[0]) { + if (add_uevent_var(env, "MODALIAS=%s", client->driver_name)) + return -ENOMEM; + } else { + if (add_uevent_var(env, "MODALIAS=%s%s", + I2C_MODULE_PREFIX, client->name)) + return -ENOMEM; + } dev_dbg(dev, "uevent\n"); return 0; } @@ -90,13 +111,19 @@ static int i2c_device_probe(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct i2c_driver *driver = to_i2c_driver(dev->driver); + const struct i2c_device_id *id; int status; if (!driver->probe) return -ENODEV; client->driver = driver; dev_dbg(dev, "probe\n"); - status = driver->probe(client); + + if (driver->id_table) + id = i2c_match_id(driver->id_table, client); + else + id = NULL; + status = driver->probe(client, id); if (status) client->driver = NULL; return status; @@ -179,9 +206,9 @@ static ssize_t show_client_name(struct device *dev, struct device_attribute *att static ssize_t show_modalias(struct device *dev, struct device_attribute *attr, char *buf) { struct i2c_client *client = to_i2c_client(dev); - return client->driver_name + return client->driver_name[0] ? sprintf(buf, "%s\n", client->driver_name) - : 0; + : sprintf(buf, "%s%s\n", I2C_MODULE_PREFIX, client->name); } static struct device_attribute i2c_dev_attrs[] = { @@ -300,15 +327,21 @@ void i2c_unregister_device(struct i2c_client *client) EXPORT_SYMBOL_GPL(i2c_unregister_device); -static int dummy_nop(struct i2c_client *client) +static int dummy_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + return 0; +} + +static int dummy_remove(struct i2c_client *client) { return 0; } static struct i2c_driver dummy_driver = { .driver.name = "dummy", - .probe = dummy_nop, - .remove = dummy_nop, + .probe = dummy_probe, + .remove = dummy_remove, }; /** diff --git a/drivers/media/video/cs5345.c b/drivers/media/video/cs5345.c index fae469ce16f5..2a429f9e32cd 100644 --- a/drivers/media/video/cs5345.c +++ b/drivers/media/video/cs5345.c @@ -142,7 +142,8 @@ static int cs5345_command(struct i2c_client *client, unsigned cmd, void *arg) /* ----------------------------------------------------------------------- */ -static int cs5345_probe(struct i2c_client *client) +static int cs5345_probe(struct i2c_client *client, + const struct i2c_device_id *id) { /* Check if the adapter supports the needed features */ if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA)) diff --git a/drivers/media/video/cs53l32a.c b/drivers/media/video/cs53l32a.c index f41bfde045fe..2dfd0afc62db 100644 --- a/drivers/media/video/cs53l32a.c +++ b/drivers/media/video/cs53l32a.c @@ -135,7 +135,8 @@ static int cs53l32a_command(struct i2c_client *client, unsigned cmd, void *arg) * concerning the addresses: i2c wants 7 bit (without the r/w bit), so '>>1' */ -static int cs53l32a_probe(struct i2c_client *client) +static int cs53l32a_probe(struct i2c_client *client, + const struct i2c_device_id *id) { int i; diff --git a/drivers/media/video/cx25840/cx25840-core.c b/drivers/media/video/cx25840/cx25840-core.c index 7fde678b2c4a..88823810497c 100644 --- a/drivers/media/video/cx25840/cx25840-core.c +++ b/drivers/media/video/cx25840/cx25840-core.c @@ -1209,7 +1209,8 @@ static int cx25840_command(struct i2c_client *client, unsigned int cmd, /* ----------------------------------------------------------------------- */ -static int cx25840_probe(struct i2c_client *client) +static int cx25840_probe(struct i2c_client *client, + const struct i2c_device_id *did) { struct cx25840_state *state; u32 id; diff --git a/drivers/media/video/m52790.c b/drivers/media/video/m52790.c index d4bf14c284ef..5b9dfa2c51b4 100644 --- a/drivers/media/video/m52790.c +++ b/drivers/media/video/m52790.c @@ -126,7 +126,8 @@ static int m52790_command(struct i2c_client *client, unsigned int cmd, /* i2c implementation */ -static int m52790_probe(struct i2c_client *client) +static int m52790_probe(struct i2c_client *client, + const struct i2c_device_id *id) { struct m52790_state *state; diff --git a/drivers/media/video/msp3400-driver.c b/drivers/media/video/msp3400-driver.c index b73c740f7fb2..e6273162e123 100644 --- a/drivers/media/video/msp3400-driver.c +++ b/drivers/media/video/msp3400-driver.c @@ -805,7 +805,7 @@ static int msp_resume(struct i2c_client *client) /* ----------------------------------------------------------------------- */ -static int msp_probe(struct i2c_client *client) +static int msp_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct msp_state *state; int (*thread_func)(void *data) = NULL; diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c index 3fb5f63df1e6..26cb27604e04 100644 --- a/drivers/media/video/mt9m001.c +++ b/drivers/media/video/mt9m001.c @@ -620,7 +620,8 @@ static void mt9m001_video_remove(struct soc_camera_device *icd) soc_camera_video_stop(&mt9m001->icd); } -static int mt9m001_probe(struct i2c_client *client) +static int mt9m001_probe(struct i2c_client *client, + const struct i2c_device_id *did) { struct mt9m001 *mt9m001; struct soc_camera_device *icd; diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c index d4b9e2744343..7b1dd7ede9d0 100644 --- a/drivers/media/video/mt9v022.c +++ b/drivers/media/video/mt9v022.c @@ -745,7 +745,8 @@ static void mt9v022_video_remove(struct soc_camera_device *icd) soc_camera_video_stop(&mt9v022->icd); } -static int mt9v022_probe(struct i2c_client *client) +static int mt9v022_probe(struct i2c_client *client, + const struct i2c_device_id *did) { struct mt9v022 *mt9v022; struct soc_camera_device *icd; diff --git a/drivers/media/video/saa7115.c b/drivers/media/video/saa7115.c index 416d05d4a969..e684108637ad 100644 --- a/drivers/media/video/saa7115.c +++ b/drivers/media/video/saa7115.c @@ -1450,7 +1450,8 @@ static int saa7115_command(struct i2c_client *client, unsigned int cmd, void *ar /* ----------------------------------------------------------------------- */ -static int saa7115_probe(struct i2c_client *client) +static int saa7115_probe(struct i2c_client *client, + const struct i2c_device_id *id) { struct saa711x_state *state; int i; diff --git a/drivers/media/video/saa7127.c b/drivers/media/video/saa7127.c index 06c88db656b4..e750cd65c1c3 100644 --- a/drivers/media/video/saa7127.c +++ b/drivers/media/video/saa7127.c @@ -661,7 +661,8 @@ static int saa7127_command(struct i2c_client *client, /* ----------------------------------------------------------------------- */ -static int saa7127_probe(struct i2c_client *client) +static int saa7127_probe(struct i2c_client *client, + const struct i2c_device_id *id) { struct saa7127_state *state; struct v4l2_sliced_vbi_data vbi = { 0, 0, 0, 0 }; /* set to disabled */ diff --git a/drivers/media/video/saa717x.c b/drivers/media/video/saa717x.c index 53c5edbcf7ea..72c4081feff5 100644 --- a/drivers/media/video/saa717x.c +++ b/drivers/media/video/saa717x.c @@ -1418,7 +1418,8 @@ static int saa717x_command(struct i2c_client *client, unsigned cmd, void *arg) /* i2c implementation */ /* ----------------------------------------------------------------------- */ -static int saa717x_probe(struct i2c_client *client) +static int saa717x_probe(struct i2c_client *client, + const struct i2c_device_id *did) { struct saa717x_state *decoder; u8 id = 0; diff --git a/drivers/media/video/tcm825x.c b/drivers/media/video/tcm825x.c index 6943b447a1bd..e57a64605778 100644 --- a/drivers/media/video/tcm825x.c +++ b/drivers/media/video/tcm825x.c @@ -840,7 +840,8 @@ static struct v4l2_int_device tcm825x_int_device = { }, }; -static int tcm825x_probe(struct i2c_client *client) +static int tcm825x_probe(struct i2c_client *client, + const struct i2c_device_id *did) { struct tcm825x_sensor *sensor = &tcm825x; int rval; diff --git a/drivers/media/video/tlv320aic23b.c b/drivers/media/video/tlv320aic23b.c index dc7b9c220b90..f1db54202dea 100644 --- a/drivers/media/video/tlv320aic23b.c +++ b/drivers/media/video/tlv320aic23b.c @@ -125,7 +125,8 @@ static int tlv320aic23b_command(struct i2c_client *client, * concerning the addresses: i2c wants 7 bit (without the r/w bit), so '>>1' */ -static int tlv320aic23b_probe(struct i2c_client *client) +static int tlv320aic23b_probe(struct i2c_client *client, + const struct i2c_device_id *id) { struct tlv320aic23b_state *state; diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c index 2b72e10e6b9f..2a2748238c78 100644 --- a/drivers/media/video/tuner-core.c +++ b/drivers/media/video/tuner-core.c @@ -1073,7 +1073,8 @@ static void tuner_lookup(struct i2c_adapter *adap, /* During client attach, set_type is called by adapter's attach_inform callback. set_type must then be completed by tuner_probe. */ -static int tuner_probe(struct i2c_client *client) +static int tuner_probe(struct i2c_client *client, + const struct i2c_device_id *id) { struct tuner *t; struct tuner *radio; diff --git a/drivers/media/video/tvaudio.c b/drivers/media/video/tvaudio.c index f29a2cd0f2f2..6f9945b04e1f 100644 --- a/drivers/media/video/tvaudio.c +++ b/drivers/media/video/tvaudio.c @@ -1461,7 +1461,7 @@ static struct CHIPDESC chiplist[] = { /* ---------------------------------------------------------------------- */ /* i2c registration */ -static int chip_probe(struct i2c_client *client) +static int chip_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct CHIPSTATE *chip; struct CHIPDESC *desc; diff --git a/drivers/media/video/upd64031a.c b/drivers/media/video/upd64031a.c index bd201397a2ac..93bfd19dec7d 100644 --- a/drivers/media/video/upd64031a.c +++ b/drivers/media/video/upd64031a.c @@ -195,7 +195,8 @@ static int upd64031a_command(struct i2c_client *client, unsigned cmd, void *arg) /* i2c implementation */ -static int upd64031a_probe(struct i2c_client *client) +static int upd64031a_probe(struct i2c_client *client, + const struct i2c_device_id *id) { struct upd64031a_state *state; int i; diff --git a/drivers/media/video/upd64083.c b/drivers/media/video/upd64083.c index 2d9a88f70c85..9ab712a56ce0 100644 --- a/drivers/media/video/upd64083.c +++ b/drivers/media/video/upd64083.c @@ -172,7 +172,8 @@ static int upd64083_command(struct i2c_client *client, unsigned cmd, void *arg) /* i2c implementation */ -static int upd64083_probe(struct i2c_client *client) +static int upd64083_probe(struct i2c_client *client, + const struct i2c_device_id *id) { struct upd64083_state *state; int i; diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c index 7cc42c1da457..e9dd996fd5df 100644 --- a/drivers/media/video/v4l2-common.c +++ b/drivers/media/video/v4l2-common.c @@ -710,7 +710,8 @@ EXPORT_SYMBOL(v4l2_chip_ident_i2c_client); /* Helper function for I2C legacy drivers */ int v4l2_i2c_attach(struct i2c_adapter *adapter, int address, struct i2c_driver *driver, - const char *name, int (*probe)(struct i2c_client *)) + const char *name, + int (*probe)(struct i2c_client *, const struct i2c_device_id *)) { struct i2c_client *client; int err; @@ -724,7 +725,7 @@ int v4l2_i2c_attach(struct i2c_adapter *adapter, int address, struct i2c_driver client->driver = driver; strlcpy(client->name, name, sizeof(client->name)); - err = probe(client); + err = probe(client, NULL); if (err == 0) { i2c_attach_client(client); } else { diff --git a/drivers/media/video/vp27smpx.c b/drivers/media/video/vp27smpx.c index 282c81403c97..fac0deba24af 100644 --- a/drivers/media/video/vp27smpx.c +++ b/drivers/media/video/vp27smpx.c @@ -121,7 +121,8 @@ static int vp27smpx_command(struct i2c_client *client, unsigned cmd, void *arg) * concerning the addresses: i2c wants 7 bit (without the r/w bit), so '>>1' */ -static int vp27smpx_probe(struct i2c_client *client) +static int vp27smpx_probe(struct i2c_client *client, + const struct i2c_device_id *id) { struct vp27smpx_state *state; diff --git a/drivers/media/video/wm8739.c b/drivers/media/video/wm8739.c index 31795b4f8b63..0f8ed8461fba 100644 --- a/drivers/media/video/wm8739.c +++ b/drivers/media/video/wm8739.c @@ -261,7 +261,8 @@ static int wm8739_command(struct i2c_client *client, unsigned cmd, void *arg) /* i2c implementation */ -static int wm8739_probe(struct i2c_client *client) +static int wm8739_probe(struct i2c_client *client, + const struct i2c_device_id *id) { struct wm8739_state *state; diff --git a/drivers/media/video/wm8775.c b/drivers/media/video/wm8775.c index 869f9e7946b6..67a409e60c46 100644 --- a/drivers/media/video/wm8775.c +++ b/drivers/media/video/wm8775.c @@ -159,7 +159,8 @@ static int wm8775_command(struct i2c_client *client, unsigned cmd, void *arg) * concerning the addresses: i2c wants 7 bit (without the r/w bit), so '>>1' */ -static int wm8775_probe(struct i2c_client *client) +static int wm8775_probe(struct i2c_client *client, + const struct i2c_device_id *id) { struct wm8775_state *state; diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index f389a28720d2..67ba8ae3217c 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -326,7 +326,8 @@ static struct bin_attribute nvram = { static struct i2c_driver ds1307_driver; -static int __devinit ds1307_probe(struct i2c_client *client) +static int __devinit ds1307_probe(struct i2c_client *client, + const struct i2c_device_id *id) { struct ds1307 *ds1307; int err = -ENODEV; diff --git a/drivers/rtc/rtc-ds1374.c b/drivers/rtc/rtc-ds1374.c index 45bda186befc..104dcfd5d9a8 100644 --- a/drivers/rtc/rtc-ds1374.c +++ b/drivers/rtc/rtc-ds1374.c @@ -355,7 +355,8 @@ static const struct rtc_class_ops ds1374_rtc_ops = { .ioctl = ds1374_ioctl, }; -static int ds1374_probe(struct i2c_client *client) +static int ds1374_probe(struct i2c_client *client, + const struct i2c_device_id *id) { struct ds1374 *ds1374; int ret; diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c index fb15e3fb4ce2..d75d8faeead0 100644 --- a/drivers/rtc/rtc-isl1208.c +++ b/drivers/rtc/rtc-isl1208.c @@ -490,7 +490,7 @@ isl1208_sysfs_unregister(struct device *dev) } static int -isl1208_probe(struct i2c_client *client) +isl1208_probe(struct i2c_client *client, const struct i2c_device_id *id) { int rc = 0; struct rtc_device *rtc; diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c index 1cb33cac1237..2ee0d070095a 100644 --- a/drivers/rtc/rtc-m41t80.c +++ b/drivers/rtc/rtc-m41t80.c @@ -756,7 +756,8 @@ static struct notifier_block wdt_notifier = { * ***************************************************************************** */ -static int m41t80_probe(struct i2c_client *client) +static int m41t80_probe(struct i2c_client *client, + const struct i2c_device_id *id) { int i, rc = 0; struct rtc_device *rtc = NULL; diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index a41681d26eba..7b3c31db0fc0 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c @@ -246,7 +246,8 @@ static const struct rtc_class_ops pcf8563_rtc_ops = { .set_time = pcf8563_rtc_set_time, }; -static int pcf8563_probe(struct i2c_client *client) +static int pcf8563_probe(struct i2c_client *client, + const struct i2c_device_id *id) { struct pcf8563 *pcf8563; diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c index 7e63074708eb..47db289bb0a3 100644 --- a/drivers/rtc/rtc-rs5c372.c +++ b/drivers/rtc/rtc-rs5c372.c @@ -494,7 +494,8 @@ static void rs5c_sysfs_unregister(struct device *dev) static struct i2c_driver rs5c372_driver; -static int rs5c372_probe(struct i2c_client *client) +static int rs5c372_probe(struct i2c_client *client, + const struct i2c_device_id *id) { int err = 0; struct rs5c372 *rs5c372; diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c index e8abc90c32c5..ab0c6d221404 100644 --- a/drivers/rtc/rtc-s35390a.c +++ b/drivers/rtc/rtc-s35390a.c @@ -195,7 +195,8 @@ static const struct rtc_class_ops s35390a_rtc_ops = { static struct i2c_driver s35390a_driver; -static int s35390a_probe(struct i2c_client *client) +static int s35390a_probe(struct i2c_client *client, + const struct i2c_device_id *id) { int err; unsigned int i; diff --git a/drivers/rtc/rtc-x1205.c b/drivers/rtc/rtc-x1205.c index 095282f63523..b792ad4dcaa9 100644 --- a/drivers/rtc/rtc-x1205.c +++ b/drivers/rtc/rtc-x1205.c @@ -494,7 +494,8 @@ static void x1205_sysfs_unregister(struct device *dev) } -static int x1205_probe(struct i2c_client *client) +static int x1205_probe(struct i2c_client *client, + const struct i2c_device_id *id) { int err = 0; unsigned char sr; diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 365e0df3646b..89cb34d5b0ba 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -126,7 +126,7 @@ struct i2c_driver { * With the driver model, device enumeration is NEVER done by drivers; * it's done by infrastructure. (NEW STYLE DRIVERS ONLY) */ - int (*probe)(struct i2c_client *); + int (*probe)(struct i2c_client *, const struct i2c_device_id *); int (*remove)(struct i2c_client *); /* driver model interfaces that don't relate to enumeration */ @@ -140,11 +140,10 @@ struct i2c_driver { int (*command)(struct i2c_client *client,unsigned int cmd, void *arg); struct device_driver driver; + const struct i2c_device_id *id_table; }; #define to_i2c_driver(d) container_of(d, struct i2c_driver, driver) -#define I2C_NAME_SIZE 20 - /** * struct i2c_client - represent an I2C slave device * @flags: I2C_CLIENT_TEN indicates the device uses a ten bit chip address; diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 139d49d2f078..d73eceaa7afb 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -368,4 +368,15 @@ struct virtio_device_id { }; #define VIRTIO_DEV_ANY_ID 0xffffffff +/* i2c */ + +#define I2C_NAME_SIZE 20 +#define I2C_MODULE_PREFIX "i2c:" + +struct i2c_device_id { + char name[I2C_NAME_SIZE]; + kernel_ulong_t driver_data; /* Data private to the driver */ +}; + + #endif /* LINUX_MOD_DEVICETABLE_H */ diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h index 316a58453134..020d05758bd8 100644 --- a/include/media/v4l2-common.h +++ b/include/media/v4l2-common.h @@ -107,9 +107,11 @@ int v4l2_chip_match_host(u32 id_type, u32 chip_id); struct i2c_driver; struct i2c_adapter; struct i2c_client; +struct i2c_device_id; int v4l2_i2c_attach(struct i2c_adapter *adapter, int address, struct i2c_driver *driver, - const char *name, int (*probe)(struct i2c_client *)); + const char *name, + int (*probe)(struct i2c_client *, const struct i2c_device_id *)); /* ------------------------------------------------------------------------- */ diff --git a/include/media/v4l2-i2c-drv-legacy.h b/include/media/v4l2-i2c-drv-legacy.h index e7645578fc22..347b6f8beb23 100644 --- a/include/media/v4l2-i2c-drv-legacy.h +++ b/include/media/v4l2-i2c-drv-legacy.h @@ -25,7 +25,7 @@ struct v4l2_i2c_driver_data { const char * const name; int driverid; int (*command)(struct i2c_client *client, unsigned int cmd, void *arg); - int (*probe)(struct i2c_client *client); + int (*probe)(struct i2c_client *client, const struct i2c_device_id *id); int (*remove)(struct i2c_client *client); int (*suspend)(struct i2c_client *client, pm_message_t state); int (*resume)(struct i2c_client *client); diff --git a/include/media/v4l2-i2c-drv.h b/include/media/v4l2-i2c-drv.h index 9e4bab276915..7b6f06be7950 100644 --- a/include/media/v4l2-i2c-drv.h +++ b/include/media/v4l2-i2c-drv.h @@ -30,7 +30,7 @@ struct v4l2_i2c_driver_data { const char * const name; int driverid; int (*command)(struct i2c_client *client, unsigned int cmd, void *arg); - int (*probe)(struct i2c_client *client); + int (*probe)(struct i2c_client *client, const struct i2c_device_id *id); int (*remove)(struct i2c_client *client); int (*suspend)(struct i2c_client *client, pm_message_t state); int (*resume)(struct i2c_client *client); diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 769b69db89c1..e04c4218cb52 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -576,6 +576,15 @@ static int do_virtio_entry(const char *filename, struct virtio_device_id *id, return 1; } +/* Looks like: i2c:S */ +static int do_i2c_entry(const char *filename, struct i2c_device_id *id, + char *alias) +{ + sprintf(alias, I2C_MODULE_PREFIX "%s", id->name); + + return 1; +} + /* Ignore any prefix, eg. v850 prepends _ */ static inline int sym_is(const char *symbol, const char *name) { @@ -704,6 +713,10 @@ void handle_moddevtable(struct module *mod, struct elf_info *info, do_table(symval, sym->st_size, sizeof(struct virtio_device_id), "virtio", do_virtio_entry, mod); + else if (sym_is(symname, "__mod_i2c_device_table")) + do_table(symval, sym->st_size, + sizeof(struct i2c_device_id), "i2c", + do_i2c_entry, mod); free(zeros); } -- cgit v1.2.3-71-gd317 From 3760f736716f74bdc62a4ba5406934338da93eb2 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 29 Apr 2008 23:11:40 +0200 Subject: i2c: Convert most new-style drivers to use module aliasing Based on earlier work by Jon Smirl and Jochen Friedrich. Update most new-style i2c drivers to use standard module aliasing instead of the old driver_name/type driver matching scheme. I've left the video drivers apart (except for SoC camera drivers) as they're a bit more diffcult to deal with, they'll have their own patch later. Signed-off-by: Jean Delvare Cc: Jon Smirl Cc: Jochen Friedrich --- arch/arm/mach-at91/board-csb337.c | 3 +- arch/arm/mach-at91/board-dk.c | 3 +- arch/arm/mach-at91/board-eb9200.c | 3 +- arch/arm/mach-iop32x/em7210.c | 3 +- arch/arm/mach-iop32x/glantank.c | 4 +- arch/arm/mach-iop32x/n2100.c | 4 +- arch/arm/mach-ixp4xx/dsmg600-setup.c | 2 +- arch/arm/mach-ixp4xx/nas100d-setup.c | 2 +- arch/arm/mach-ixp4xx/nslu2-setup.c | 2 +- arch/arm/mach-omap1/board-h2.c | 2 - arch/arm/mach-omap1/board-h3.c | 3 +- arch/arm/mach-omap1/board-osk.c | 1 - arch/arm/mach-orion5x/db88f5281-setup.c | 4 +- arch/arm/mach-orion5x/dns323-setup.c | 7 +-- arch/arm/mach-orion5x/kurobox_pro-setup.c | 4 +- arch/arm/mach-orion5x/rd88f5182-setup.c | 4 +- arch/arm/mach-orion5x/ts209-setup.c | 3 +- arch/arm/mach-pxa/pcm990-baseboard.c | 5 +- arch/blackfin/mach-bf533/boards/stamp.c | 3 -- arch/blackfin/mach-bf537/boards/stamp.c | 3 -- arch/blackfin/mach-bf548/boards/ezkit.c | 2 - arch/powerpc/sysdev/fsl_soc.c | 27 +++++------ arch/sh/boards/renesas/migor/setup.c | 3 +- arch/sh/boards/renesas/r7780rp/setup.c | 3 +- drivers/gpio/pca953x.c | 23 ++------- drivers/gpio/pcf857x.c | 33 +++++++------ drivers/hwmon/f75375s.c | 23 +++++---- drivers/i2c/busses/i2c-taos-evm.c | 3 +- drivers/i2c/chips/ds1682.c | 7 +++ drivers/i2c/chips/menelaus.c | 7 +++ drivers/i2c/chips/tps65010.c | 29 +++++------- drivers/i2c/chips/tsl2550.c | 7 +++ drivers/media/video/mt9m001.c | 7 +++ drivers/media/video/mt9v022.c | 7 +++ drivers/rtc/rtc-ds1307.c | 63 ++++++++++--------------- drivers/rtc/rtc-ds1374.c | 7 +++ drivers/rtc/rtc-isl1208.c | 7 +++ drivers/rtc/rtc-m41t80.c | 78 ++++++++----------------------- drivers/rtc/rtc-pcf8563.c | 7 +++ drivers/rtc/rtc-rs5c372.c | 24 +++++----- drivers/rtc/rtc-s35390a.c | 7 +++ drivers/rtc/rtc-x1205.c | 7 +++ include/linux/i2c.h | 12 ++--- 43 files changed, 211 insertions(+), 247 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-at91/board-csb337.c b/arch/arm/mach-at91/board-csb337.c index 26fea4dcc3a0..81f1ebb4e964 100644 --- a/arch/arm/mach-at91/board-csb337.c +++ b/arch/arm/mach-at91/board-csb337.c @@ -79,8 +79,7 @@ static struct at91_udc_data __initdata csb337_udc_data = { static struct i2c_board_info __initdata csb337_i2c_devices[] = { { - I2C_BOARD_INFO("rtc-ds1307", 0x68), - .type = "ds1307", + I2C_BOARD_INFO("ds1307", 0x68), }, }; diff --git a/arch/arm/mach-at91/board-dk.c b/arch/arm/mach-at91/board-dk.c index 0a897efeba8e..c1a813c7169b 100644 --- a/arch/arm/mach-at91/board-dk.c +++ b/arch/arm/mach-at91/board-dk.c @@ -132,8 +132,7 @@ static struct i2c_board_info __initdata dk_i2c_devices[] = { I2C_BOARD_INFO("x9429", 0x28), }, { - I2C_BOARD_INFO("at24c", 0x50), - .type = "24c1024", + I2C_BOARD_INFO("24c1024", 0x50), } }; diff --git a/arch/arm/mach-at91/board-eb9200.c b/arch/arm/mach-at91/board-eb9200.c index b7b79bb9d6c4..af1a1d8ecc30 100644 --- a/arch/arm/mach-at91/board-eb9200.c +++ b/arch/arm/mach-at91/board-eb9200.c @@ -93,8 +93,7 @@ static struct at91_mmc_data __initdata eb9200_mmc_data = { static struct i2c_board_info __initdata eb9200_i2c_devices[] = { { - I2C_BOARD_INFO("at24c", 0x50), - .type = "24c512", + I2C_BOARD_INFO("24c512", 0x50), }, }; diff --git a/arch/arm/mach-iop32x/em7210.c b/arch/arm/mach-iop32x/em7210.c index c947152f9a3c..4877597c8758 100644 --- a/arch/arm/mach-iop32x/em7210.c +++ b/arch/arm/mach-iop32x/em7210.c @@ -50,8 +50,7 @@ static struct sys_timer em7210_timer = { */ static struct i2c_board_info __initdata em7210_i2c_devices[] = { { - I2C_BOARD_INFO("rtc-rs5c372", 0x32), - .type = "rs5c372a", + I2C_BOARD_INFO("rs5c372a", 0x32), }, }; diff --git a/arch/arm/mach-iop32x/glantank.c b/arch/arm/mach-iop32x/glantank.c index d2a7b04f1cb0..d4fca75ce542 100644 --- a/arch/arm/mach-iop32x/glantank.c +++ b/arch/arm/mach-iop32x/glantank.c @@ -176,12 +176,10 @@ static struct f75375s_platform_data glantank_f75375s = { static struct i2c_board_info __initdata glantank_i2c_devices[] = { { - I2C_BOARD_INFO("rtc-rs5c372", 0x32), - .type = "rs5c372a", + I2C_BOARD_INFO("rs5c372a", 0x32), }, { I2C_BOARD_INFO("f75375", 0x2e), - .type = "f75375", .platform_data = &glantank_f75375s, }, }; diff --git a/arch/arm/mach-iop32x/n2100.c b/arch/arm/mach-iop32x/n2100.c index bc91d6e66bc4..2741063bf361 100644 --- a/arch/arm/mach-iop32x/n2100.c +++ b/arch/arm/mach-iop32x/n2100.c @@ -208,12 +208,10 @@ static struct f75375s_platform_data n2100_f75375s = { static struct i2c_board_info __initdata n2100_i2c_devices[] = { { - I2C_BOARD_INFO("rtc-rs5c372", 0x32), - .type = "rs5c372b", + I2C_BOARD_INFO("rs5c372b", 0x32), }, { I2C_BOARD_INFO("f75375", 0x2e), - .type = "f75375", .platform_data = &n2100_f75375s, }, }; diff --git a/arch/arm/mach-ixp4xx/dsmg600-setup.c b/arch/arm/mach-ixp4xx/dsmg600-setup.c index 8cb07437a807..a51bfa6978b6 100644 --- a/arch/arm/mach-ixp4xx/dsmg600-setup.c +++ b/arch/arm/mach-ixp4xx/dsmg600-setup.c @@ -65,7 +65,7 @@ static struct platform_device dsmg600_i2c_gpio = { static struct i2c_board_info __initdata dsmg600_i2c_board_info [] = { { - I2C_BOARD_INFO("rtc-pcf8563", 0x51), + I2C_BOARD_INFO("pcf8563", 0x51), }, }; diff --git a/arch/arm/mach-ixp4xx/nas100d-setup.c b/arch/arm/mach-ixp4xx/nas100d-setup.c index 159e1c4f1eda..84b5e62a9c0a 100644 --- a/arch/arm/mach-ixp4xx/nas100d-setup.c +++ b/arch/arm/mach-ixp4xx/nas100d-setup.c @@ -54,7 +54,7 @@ static struct platform_device nas100d_flash = { static struct i2c_board_info __initdata nas100d_i2c_board_info [] = { { - I2C_BOARD_INFO("rtc-pcf8563", 0x51), + I2C_BOARD_INFO("pcf8563", 0x51), }, }; diff --git a/arch/arm/mach-ixp4xx/nslu2-setup.c b/arch/arm/mach-ixp4xx/nslu2-setup.c index d9a182895a0f..a48a6655b887 100644 --- a/arch/arm/mach-ixp4xx/nslu2-setup.c +++ b/arch/arm/mach-ixp4xx/nslu2-setup.c @@ -57,7 +57,7 @@ static struct i2c_gpio_platform_data nslu2_i2c_gpio_data = { static struct i2c_board_info __initdata nslu2_i2c_board_info [] = { { - I2C_BOARD_INFO("rtc-x1205", 0x6f), + I2C_BOARD_INFO("x1205", 0x6f), }, }; diff --git a/arch/arm/mach-omap1/board-h2.c b/arch/arm/mach-omap1/board-h2.c index 507987720015..4b444fdaafea 100644 --- a/arch/arm/mach-omap1/board-h2.c +++ b/arch/arm/mach-omap1/board-h2.c @@ -351,11 +351,9 @@ static void __init h2_init_smc91x(void) static struct i2c_board_info __initdata h2_i2c_board_info[] = { { I2C_BOARD_INFO("tps65010", 0x48), - .type = "tps65010", .irq = OMAP_GPIO_IRQ(58), }, { I2C_BOARD_INFO("isp1301_omap", 0x2d), - .type = "isp1301_omap", .irq = OMAP_GPIO_IRQ(2), }, }; diff --git a/arch/arm/mach-omap1/board-h3.c b/arch/arm/mach-omap1/board-h3.c index c3ef1ee5f77b..7fbaa8d648cd 100644 --- a/arch/arm/mach-omap1/board-h3.c +++ b/arch/arm/mach-omap1/board-h3.c @@ -473,8 +473,7 @@ static struct omap_board_config_kernel h3_config[] __initdata = { static struct i2c_board_info __initdata h3_i2c_board_info[] = { { - I2C_BOARD_INFO("tps65010", 0x48), - .type = "tps65013", + I2C_BOARD_INFO("tps65013", 0x48), /* .irq = OMAP_GPIO_IRQ(??), */ }, }; diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c index 4f9baba7d893..a66505f58b15 100644 --- a/arch/arm/mach-omap1/board-osk.c +++ b/arch/arm/mach-omap1/board-osk.c @@ -254,7 +254,6 @@ static struct tps65010_board tps_board = { static struct i2c_board_info __initdata osk_i2c_board_info[] = { { I2C_BOARD_INFO("tps65010", 0x48), - .type = "tps65010", .irq = OMAP_GPIO_IRQ(OMAP_MPUIO(1)), .platform_data = &tps_board, diff --git a/arch/arm/mach-orion5x/db88f5281-setup.c b/arch/arm/mach-orion5x/db88f5281-setup.c index 872aed372327..ea3141e3e3c0 100644 --- a/arch/arm/mach-orion5x/db88f5281-setup.c +++ b/arch/arm/mach-orion5x/db88f5281-setup.c @@ -292,9 +292,7 @@ static struct mv643xx_eth_platform_data db88f5281_eth_data = { * RTC DS1339 on I2C bus ****************************************************************************/ static struct i2c_board_info __initdata db88f5281_i2c_rtc = { - .driver_name = "rtc-ds1307", - .type = "ds1339", - .addr = 0x68, + I2C_BOARD_INFO("ds1339", 0x68), }; /***************************************************************************** diff --git a/arch/arm/mach-orion5x/dns323-setup.c b/arch/arm/mach-orion5x/dns323-setup.c index d67790ef236e..058a525c2ab6 100644 --- a/arch/arm/mach-orion5x/dns323-setup.c +++ b/arch/arm/mach-orion5x/dns323-setup.c @@ -220,19 +220,16 @@ static struct platform_device *dns323_plat_devices[] __initdata = { static struct i2c_board_info __initdata dns323_i2c_devices[] = { { I2C_BOARD_INFO("g760a", 0x3e), - .type = "g760a", }, #if 0 /* this entry requires the new-style driver model lm75 driver, * for the meantime "insmod lm75.ko force_lm75=0,0x48" is needed */ { - I2C_BOARD_INFO("lm75", 0x48), - .type = "g751", + I2C_BOARD_INFO("g751", 0x48), }, #endif { - I2C_BOARD_INFO("rtc-m41t80", 0x68), - .type = "m41t80", + I2C_BOARD_INFO("m41t80", 0x68), } }; diff --git a/arch/arm/mach-orion5x/kurobox_pro-setup.c b/arch/arm/mach-orion5x/kurobox_pro-setup.c index 91413455beba..707db4be74a6 100644 --- a/arch/arm/mach-orion5x/kurobox_pro-setup.c +++ b/arch/arm/mach-orion5x/kurobox_pro-setup.c @@ -162,9 +162,7 @@ static struct mv643xx_eth_platform_data kurobox_pro_eth_data = { * RTC 5C372a on I2C bus ****************************************************************************/ static struct i2c_board_info __initdata kurobox_pro_i2c_rtc = { - .driver_name = "rtc-rs5c372", - .type = "rs5c372a", - .addr = 0x32, + I2C_BOARD_INFO("rs5c372a", 0x32), }; /***************************************************************************** diff --git a/arch/arm/mach-orion5x/rd88f5182-setup.c b/arch/arm/mach-orion5x/rd88f5182-setup.c index 37e8b2dc3ed5..7082fe8f83b1 100644 --- a/arch/arm/mach-orion5x/rd88f5182-setup.c +++ b/arch/arm/mach-orion5x/rd88f5182-setup.c @@ -224,9 +224,7 @@ static struct mv643xx_eth_platform_data rd88f5182_eth_data = { * RTC DS1338 on I2C bus ****************************************************************************/ static struct i2c_board_info __initdata rd88f5182_i2c_rtc = { - .driver_name = "rtc-ds1307", - .type = "ds1338", - .addr = 0x68, + I2C_BOARD_INFO("ds1338", 0x68), }; /***************************************************************************** diff --git a/arch/arm/mach-orion5x/ts209-setup.c b/arch/arm/mach-orion5x/ts209-setup.c index fd43863a86f6..6f93668b0ed5 100644 --- a/arch/arm/mach-orion5x/ts209-setup.c +++ b/arch/arm/mach-orion5x/ts209-setup.c @@ -276,8 +276,7 @@ static void __init ts209_find_mac_addr(void) #define TS209_RTC_GPIO 3 static struct i2c_board_info __initdata qnap_ts209_i2c_rtc = { - .driver_name = "rtc-s35390a", - .addr = 0x30, + I2C_BOARD_INFO("s35390a", 0x30), .irq = 0, }; diff --git a/arch/arm/mach-pxa/pcm990-baseboard.c b/arch/arm/mach-pxa/pcm990-baseboard.c index e6be9d0aeccf..49d951db0f3d 100644 --- a/arch/arm/mach-pxa/pcm990-baseboard.c +++ b/arch/arm/mach-pxa/pcm990-baseboard.c @@ -320,16 +320,13 @@ static struct soc_camera_link iclink[] = { static struct i2c_board_info __initdata pcm990_i2c_devices[] = { { /* Must initialize before the camera(s) */ - I2C_BOARD_INFO("pca953x", 0x41), - .type = "pca9536", + I2C_BOARD_INFO("pca9536", 0x41), .platform_data = &pca9536_data, }, { I2C_BOARD_INFO("mt9v022", 0x48), - .type = "mt9v022", .platform_data = &iclink[0], /* With extender */ }, { I2C_BOARD_INFO("mt9m001", 0x5d), - .type = "mt9m001", .platform_data = &iclink[0], /* With extender */ }, }; diff --git a/arch/blackfin/mach-bf533/boards/stamp.c b/arch/blackfin/mach-bf533/boards/stamp.c index fddce32901a2..024f418ae543 100644 --- a/arch/blackfin/mach-bf533/boards/stamp.c +++ b/arch/blackfin/mach-bf533/boards/stamp.c @@ -499,20 +499,17 @@ static struct i2c_board_info __initdata bfin_i2c_board_info[] = { #if defined(CONFIG_JOYSTICK_AD7142) || defined(CONFIG_JOYSTICK_AD7142_MODULE) { I2C_BOARD_INFO("ad7142_joystick", 0x2C), - .type = "ad7142_joystick", .irq = 39, }, #endif #if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE) { I2C_BOARD_INFO("pcf8574_lcd", 0x22), - .type = "pcf8574_lcd", }, #endif #if defined(CONFIG_TWI_KEYPAD) || defined(CONFIG_TWI_KEYPAD_MODULE) { I2C_BOARD_INFO("pcf8574_keypad", 0x27), - .type = "pcf8574_keypad", .irq = 39, }, #endif diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c index 0cec14b1ef5c..d3727b7c2d7d 100644 --- a/arch/blackfin/mach-bf537/boards/stamp.c +++ b/arch/blackfin/mach-bf537/boards/stamp.c @@ -751,20 +751,17 @@ static struct i2c_board_info __initdata bfin_i2c_board_info[] = { #if defined(CONFIG_JOYSTICK_AD7142) || defined(CONFIG_JOYSTICK_AD7142_MODULE) { I2C_BOARD_INFO("ad7142_joystick", 0x2C), - .type = "ad7142_joystick", .irq = 55, }, #endif #if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE) { I2C_BOARD_INFO("pcf8574_lcd", 0x22), - .type = "pcf8574_lcd", }, #endif #if defined(CONFIG_TWI_KEYPAD) || defined(CONFIG_TWI_KEYPAD_MODULE) { I2C_BOARD_INFO("pcf8574_keypad", 0x27), - .type = "pcf8574_keypad", .irq = 72, }, #endif diff --git a/arch/blackfin/mach-bf548/boards/ezkit.c b/arch/blackfin/mach-bf548/boards/ezkit.c index 231dfbd3bc1f..b00f68ac6bc9 100644 --- a/arch/blackfin/mach-bf548/boards/ezkit.c +++ b/arch/blackfin/mach-bf548/boards/ezkit.c @@ -641,13 +641,11 @@ static struct i2c_board_info __initdata bfin_i2c_board_info1[] = { #if defined(CONFIG_TWI_LCD) || defined(CONFIG_TWI_LCD_MODULE) { I2C_BOARD_INFO("pcf8574_lcd", 0x22), - .type = "pcf8574_lcd", }, #endif #if defined(CONFIG_TWI_KEYPAD) || defined(CONFIG_TWI_KEYPAD_MODULE) { I2C_BOARD_INFO("pcf8574_keypad", 0x27), - .type = "pcf8574_keypad", .irq = 212, }, #endif diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index 7b45670c7af3..324c01b70ddd 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -418,22 +418,21 @@ arch_initcall(gfar_of_init); #include struct i2c_driver_device { char *of_device; - char *i2c_driver; char *i2c_type; }; static struct i2c_driver_device i2c_devices[] __initdata = { - {"ricoh,rs5c372a", "rtc-rs5c372", "rs5c372a",}, - {"ricoh,rs5c372b", "rtc-rs5c372", "rs5c372b",}, - {"ricoh,rv5c386", "rtc-rs5c372", "rv5c386",}, - {"ricoh,rv5c387a", "rtc-rs5c372", "rv5c387a",}, - {"dallas,ds1307", "rtc-ds1307", "ds1307",}, - {"dallas,ds1337", "rtc-ds1307", "ds1337",}, - {"dallas,ds1338", "rtc-ds1307", "ds1338",}, - {"dallas,ds1339", "rtc-ds1307", "ds1339",}, - {"dallas,ds1340", "rtc-ds1307", "ds1340",}, - {"stm,m41t00", "rtc-ds1307", "m41t00"}, - {"dallas,ds1374", "rtc-ds1374", "rtc-ds1374",}, + {"ricoh,rs5c372a", "rs5c372a"}, + {"ricoh,rs5c372b", "rs5c372b"}, + {"ricoh,rv5c386", "rv5c386"}, + {"ricoh,rv5c387a", "rv5c387a"}, + {"dallas,ds1307", "ds1307"}, + {"dallas,ds1337", "ds1337"}, + {"dallas,ds1338", "ds1338"}, + {"dallas,ds1339", "ds1339"}, + {"dallas,ds1340", "ds1340"}, + {"stm,m41t00", "m41t00"}, + {"dallas,ds1374", "rtc-ds1374"}, }; static int __init of_find_i2c_driver(struct device_node *node, @@ -444,9 +443,7 @@ static int __init of_find_i2c_driver(struct device_node *node, for (i = 0; i < ARRAY_SIZE(i2c_devices); i++) { if (!of_device_is_compatible(node, i2c_devices[i].of_device)) continue; - if (strlcpy(info->driver_name, i2c_devices[i].i2c_driver, - KOBJ_NAME_LEN) >= KOBJ_NAME_LEN || - strlcpy(info->type, i2c_devices[i].i2c_type, + if (strlcpy(info->type, i2c_devices[i].i2c_type, I2C_NAME_SIZE) >= I2C_NAME_SIZE) return -ENOMEM; return 0; diff --git a/arch/sh/boards/renesas/migor/setup.c b/arch/sh/boards/renesas/migor/setup.c index 00d52a20d8a5..e7c150d49702 100644 --- a/arch/sh/boards/renesas/migor/setup.c +++ b/arch/sh/boards/renesas/migor/setup.c @@ -199,8 +199,7 @@ static struct platform_device *migor_devices[] __initdata = { static struct i2c_board_info __initdata migor_i2c_devices[] = { { - I2C_BOARD_INFO("rtc-rs5c372", 0x32), - .type = "rs5c372b", + I2C_BOARD_INFO("rs5c372b", 0x32), }, { I2C_BOARD_INFO("migor_ts", 0x51), diff --git a/arch/sh/boards/renesas/r7780rp/setup.c b/arch/sh/boards/renesas/r7780rp/setup.c index a5c5e9236501..ac0a96522e45 100644 --- a/arch/sh/boards/renesas/r7780rp/setup.c +++ b/arch/sh/boards/renesas/r7780rp/setup.c @@ -199,8 +199,7 @@ static struct platform_device smbus_device = { static struct i2c_board_info __initdata highlander_i2c_devices[] = { { - I2C_BOARD_INFO("rtc-rs5c372", 0x32), - .type = "r2025sd", + I2C_BOARD_INFO("r2025sd", 0x32), }, }; diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c index 2670519236e5..5a99e81d2784 100644 --- a/drivers/gpio/pca953x.c +++ b/drivers/gpio/pca953x.c @@ -23,13 +23,7 @@ #define PCA953X_INVERT 2 #define PCA953X_DIRECTION 3 -/* This is temporary - in 2.6.26 i2c_driver_data should replace it. */ -struct pca953x_desc { - char name[I2C_NAME_SIZE]; - unsigned long driver_data; -}; - -static const struct pca953x_desc pca953x_descs[] = { +static const struct i2c_device_id pca953x_id[] = { { "pca9534", 8, }, { "pca9535", 16, }, { "pca9536", 4, }, @@ -37,7 +31,9 @@ static const struct pca953x_desc pca953x_descs[] = { { "pca9538", 8, }, { "pca9539", 16, }, /* REVISIT several pca955x parts should work here too */ + { } }; +MODULE_DEVICE_TABLE(i2c, pca953x_id); struct pca953x_chip { unsigned gpio_start; @@ -193,26 +189,16 @@ static void pca953x_setup_gpio(struct pca953x_chip *chip, int gpios) } static int __devinit pca953x_probe(struct i2c_client *client, - const struct i2c_device_id *did) + const struct i2c_device_id *id) { struct pca953x_platform_data *pdata; struct pca953x_chip *chip; int ret, i; - const struct pca953x_desc *id = NULL; pdata = client->dev.platform_data; if (pdata == NULL) return -ENODEV; - /* this loop vanishes when we get i2c_device_id */ - for (i = 0; i < ARRAY_SIZE(pca953x_descs); i++) - if (!strcmp(pca953x_descs[i].name, client->name)) { - id = pca953x_descs + i; - break; - } - if (!id) - return -ENODEV; - chip = kzalloc(sizeof(struct pca953x_chip), GFP_KERNEL); if (chip == NULL) return -ENOMEM; @@ -292,6 +278,7 @@ static struct i2c_driver pca953x_driver = { }, .probe = pca953x_probe, .remove = pca953x_remove, + .id_table = pca953x_id, }; static int __init pca953x_init(void) diff --git a/drivers/gpio/pcf857x.c b/drivers/gpio/pcf857x.c index 8856870dd738..aa6cc8b2a2bc 100644 --- a/drivers/gpio/pcf857x.c +++ b/drivers/gpio/pcf857x.c @@ -26,6 +26,21 @@ #include +static const struct i2c_device_id pcf857x_id[] = { + { "pcf8574", 8 }, + { "pca8574", 8 }, + { "pca9670", 8 }, + { "pca9672", 8 }, + { "pca9674", 8 }, + { "pcf8575", 16 }, + { "pca8575", 16 }, + { "pca9671", 16 }, + { "pca9673", 16 }, + { "pca9675", 16 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, pcf857x_id); + /* * The pcf857x, pca857x, and pca967x chips only expose one read and one * write register. Writing a "one" bit (to match the reset state) lets @@ -173,13 +188,8 @@ static int pcf857x_probe(struct i2c_client *client, * * NOTE: we don't distinguish here between *4 and *4a parts. */ - if (strcmp(client->name, "pcf8574") == 0 - || strcmp(client->name, "pca8574") == 0 - || strcmp(client->name, "pca9670") == 0 - || strcmp(client->name, "pca9672") == 0 - || strcmp(client->name, "pca9674") == 0 - ) { - gpio->chip.ngpio = 8; + gpio->chip.ngpio = id->driver_data; + if (gpio->chip.ngpio == 8) { gpio->chip.direction_input = pcf857x_input8; gpio->chip.get = pcf857x_get8; gpio->chip.direction_output = pcf857x_output8; @@ -199,13 +209,7 @@ static int pcf857x_probe(struct i2c_client *client, * * NOTE: we don't distinguish here between '75 and '75c parts. */ - } else if (strcmp(client->name, "pcf8575") == 0 - || strcmp(client->name, "pca8575") == 0 - || strcmp(client->name, "pca9671") == 0 - || strcmp(client->name, "pca9673") == 0 - || strcmp(client->name, "pca9675") == 0 - ) { - gpio->chip.ngpio = 16; + } else if (gpio->chip.ngpio == 16) { gpio->chip.direction_input = pcf857x_input16; gpio->chip.get = pcf857x_get16; gpio->chip.direction_output = pcf857x_output16; @@ -314,6 +318,7 @@ static struct i2c_driver pcf857x_driver = { }, .probe = pcf857x_probe, .remove = pcf857x_remove, + .id_table = pcf857x_id, }; static int __init pcf857x_init(void) diff --git a/drivers/hwmon/f75375s.c b/drivers/hwmon/f75375s.c index 1f63bab05522..dc1f30e432ea 100644 --- a/drivers/hwmon/f75375s.c +++ b/drivers/hwmon/f75375s.c @@ -129,12 +129,20 @@ static struct i2c_driver f75375_legacy_driver = { .detach_client = f75375_detach_client, }; +static const struct i2c_device_id f75375_id[] = { + { "f75373", f75373 }, + { "f75375", f75375 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, f75375_id); + static struct i2c_driver f75375_driver = { .driver = { .name = "f75375", }, .probe = f75375_probe, .remove = f75375_remove, + .id_table = f75375_id, }; static inline int f75375_read8(struct i2c_client *client, u8 reg) @@ -645,15 +653,7 @@ static int f75375_probe(struct i2c_client *client, i2c_set_clientdata(client, data); data->client = client; mutex_init(&data->update_lock); - - if (strcmp(client->name, "f75375") == 0) - data->kind = f75375; - else if (strcmp(client->name, "f75373") == 0) - data->kind = f75373; - else { - dev_err(&client->dev, "Unsupported device: %s\n", client->name); - return -ENODEV; - } + data->kind = id->driver_data; if ((err = sysfs_create_group(&client->dev.kobj, &f75375_group))) goto exit_free; @@ -714,6 +714,7 @@ static int f75375_detect(struct i2c_adapter *adapter, int address, int kind) u8 version = 0; int err = 0; const char *name = ""; + struct i2c_device_id id; if (!(client = kzalloc(sizeof(*client), GFP_KERNEL))) { err = -ENOMEM; @@ -750,7 +751,9 @@ static int f75375_detect(struct i2c_adapter *adapter, int address, int kind) if ((err = i2c_attach_client(client))) goto exit_free; - if ((err = f75375_probe(client, NULL)) < 0) + strlcpy(id.name, name, I2C_NAME_SIZE); + id.driver_data = kind; + if ((err = f75375_probe(client, &id)) < 0) goto exit_detach; return 0; diff --git a/drivers/i2c/busses/i2c-taos-evm.c b/drivers/i2c/busses/i2c-taos-evm.c index 1b0cfd5472fd..de9db49e54d9 100644 --- a/drivers/i2c/busses/i2c-taos-evm.c +++ b/drivers/i2c/busses/i2c-taos-evm.c @@ -51,7 +51,6 @@ struct taos_data { /* TAOS TSL2550 EVM */ static struct i2c_board_info tsl2550_info = { I2C_BOARD_INFO("tsl2550", 0x39), - .type = "tsl2550", }; /* Instantiate i2c devices based on the adapter name */ @@ -59,7 +58,7 @@ static struct i2c_client *taos_instantiate_device(struct i2c_adapter *adapter) { if (!strncmp(adapter->name, "TAOS TSL2550 EVM", 16)) { dev_info(&adapter->dev, "Instantiating device %s at 0x%02x\n", - tsl2550_info.driver_name, tsl2550_info.addr); + tsl2550_info.type, tsl2550_info.addr); return i2c_new_device(adapter, &tsl2550_info); } diff --git a/drivers/i2c/chips/ds1682.c b/drivers/i2c/chips/ds1682.c index 3070821030e4..23be4d42cb02 100644 --- a/drivers/i2c/chips/ds1682.c +++ b/drivers/i2c/chips/ds1682.c @@ -235,12 +235,19 @@ static int ds1682_remove(struct i2c_client *client) return 0; } +static const struct i2c_device_id ds1682_id[] = { + { "ds1682", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, ds1682_id); + static struct i2c_driver ds1682_driver = { .driver = { .name = "ds1682", }, .probe = ds1682_probe, .remove = ds1682_remove, + .id_table = ds1682_id, }; static int __init ds1682_init(void) diff --git a/drivers/i2c/chips/menelaus.c b/drivers/i2c/chips/menelaus.c index 3b8ba7e75843..b36db1797c11 100644 --- a/drivers/i2c/chips/menelaus.c +++ b/drivers/i2c/chips/menelaus.c @@ -1243,12 +1243,19 @@ static int __exit menelaus_remove(struct i2c_client *client) return 0; } +static const struct i2c_device_id menelaus_id[] = { + { "menelaus", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, menelaus_id); + static struct i2c_driver menelaus_i2c_driver = { .driver = { .name = DRIVER_NAME, }, .probe = menelaus_probe, .remove = __exit_p(menelaus_remove), + .id_table = menelaus_id, }; static int __init menelaus_init(void) diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c index 6ab3619a49de..85949685191b 100644 --- a/drivers/i2c/chips/tps65010.c +++ b/drivers/i2c/chips/tps65010.c @@ -64,7 +64,6 @@ static struct i2c_driver tps65010_driver; * as part of board setup by a bootloader. */ enum tps_model { - TPS_UNKNOWN = 0, TPS65010, TPS65011, TPS65012, @@ -554,20 +553,7 @@ static int tps65010_probe(struct i2c_client *client, mutex_init(&tps->lock); INIT_DELAYED_WORK(&tps->work, tps65010_work); tps->client = client; - - if (strcmp(client->name, "tps65010") == 0) - tps->model = TPS65010; - else if (strcmp(client->name, "tps65011") == 0) - tps->model = TPS65011; - else if (strcmp(client->name, "tps65012") == 0) - tps->model = TPS65012; - else if (strcmp(client->name, "tps65013") == 0) - tps->model = TPS65013; - else { - dev_warn(&client->dev, "unknown chip '%s'\n", client->name); - status = -ENODEV; - goto fail1; - } + tps->model = id->driver_data; /* the IRQ is active low, but many gpio lines can't support that * so this driver uses falling-edge triggers instead. @@ -596,9 +582,6 @@ static int tps65010_probe(struct i2c_client *client, case TPS65012: tps->por = 1; break; - case TPS_UNKNOWN: - printk(KERN_WARNING "%s: unknown TPS chip\n", DRIVER_NAME); - break; /* else CHGCONFIG.POR is replaced by AUA, enabling a WAIT mode */ } tps->chgconf = i2c_smbus_read_byte_data(client, TPS_CHGCONFIG); @@ -685,12 +668,22 @@ fail1: return status; } +static const struct i2c_device_id tps65010_id[] = { + { "tps65010", TPS65010 }, + { "tps65011", TPS65011 }, + { "tps65012", TPS65012 }, + { "tps65013", TPS65013 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, tps65010_id); + static struct i2c_driver tps65010_driver = { .driver = { .name = "tps65010", }, .probe = tps65010_probe, .remove = __exit_p(tps65010_remove), + .id_table = tps65010_id, }; /*-------------------------------------------------------------------------*/ diff --git a/drivers/i2c/chips/tsl2550.c b/drivers/i2c/chips/tsl2550.c index 59c2c662cc45..1a9cc135219f 100644 --- a/drivers/i2c/chips/tsl2550.c +++ b/drivers/i2c/chips/tsl2550.c @@ -452,6 +452,12 @@ static int tsl2550_resume(struct i2c_client *client) #endif /* CONFIG_PM */ +static const struct i2c_device_id tsl2550_id[] = { + { "tsl2550", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, tsl2550_id); + static struct i2c_driver tsl2550_driver = { .driver = { .name = TSL2550_DRV_NAME, @@ -461,6 +467,7 @@ static struct i2c_driver tsl2550_driver = { .resume = tsl2550_resume, .probe = tsl2550_probe, .remove = __devexit_p(tsl2550_remove), + .id_table = tsl2550_id, }; static int __init tsl2550_init(void) diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c index 26cb27604e04..ba09826ddf48 100644 --- a/drivers/media/video/mt9m001.c +++ b/drivers/media/video/mt9m001.c @@ -697,12 +697,19 @@ static int mt9m001_remove(struct i2c_client *client) return 0; } +static const struct i2c_device_id mt9m001_id[] = { + { "mt9m001", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, mt9m001_id); + static struct i2c_driver mt9m001_i2c_driver = { .driver = { .name = "mt9m001", }, .probe = mt9m001_probe, .remove = mt9m001_remove, + .id_table = mt9m001_id, }; static int __init mt9m001_mod_init(void) diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c index 7b1dd7ede9d0..7b223691ce96 100644 --- a/drivers/media/video/mt9v022.c +++ b/drivers/media/video/mt9v022.c @@ -819,12 +819,19 @@ static int mt9v022_remove(struct i2c_client *client) return 0; } +static const struct i2c_device_id mt9v022_id[] = { + { "mt9v022", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, mt9v022_id); + static struct i2c_driver mt9v022_i2c_driver = { .driver = { .name = "mt9v022", }, .probe = mt9v022_probe, .remove = mt9v022_remove, + .id_table = mt9v022_id, }; static int __init mt9v022_mod_init(void) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 67ba8ae3217c..bbf97e65202a 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -99,45 +99,38 @@ struct ds1307 { }; struct chip_desc { - char name[9]; unsigned nvram56:1; unsigned alarm:1; - enum ds_type type; }; -static const struct chip_desc chips[] = { { - .name = "ds1307", - .type = ds_1307, +static const struct chip_desc chips[] = { +[ds_1307] = { .nvram56 = 1, -}, { - .name = "ds1337", - .type = ds_1337, +}, +[ds_1337] = { .alarm = 1, -}, { - .name = "ds1338", - .type = ds_1338, +}, +[ds_1338] = { .nvram56 = 1, -}, { - .name = "ds1339", - .type = ds_1339, +}, +[ds_1339] = { .alarm = 1, -}, { - .name = "ds1340", - .type = ds_1340, -}, { - .name = "m41t00", - .type = m41t00, +}, +[ds_1340] = { +}, +[m41t00] = { }, }; -static inline const struct chip_desc *find_chip(const char *s) -{ - unsigned i; - - for (i = 0; i < ARRAY_SIZE(chips); i++) - if (strnicmp(s, chips[i].name, sizeof chips[i].name) == 0) - return &chips[i]; - return NULL; -} +static const struct i2c_device_id ds1307_id[] = { + { "ds1307", ds_1307 }, + { "ds1337", ds_1337 }, + { "ds1338", ds_1338 }, + { "ds1339", ds_1339 }, + { "ds1340", ds_1340 }, + { "m41t00", m41t00 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, ds1307_id); static int ds1307_get_time(struct device *dev, struct rtc_time *t) { @@ -332,16 +325,9 @@ static int __devinit ds1307_probe(struct i2c_client *client, struct ds1307 *ds1307; int err = -ENODEV; int tmp; - const struct chip_desc *chip; + const struct chip_desc *chip = &chips[id->driver_data]; struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); - chip = find_chip(client->name); - if (!chip) { - dev_err(&client->dev, "unknown chip type '%s'\n", - client->name); - return -ENODEV; - } - if (!i2c_check_functionality(adapter, I2C_FUNC_I2C | I2C_FUNC_SMBUS_WRITE_BYTE_DATA)) return -EIO; @@ -362,7 +348,7 @@ static int __devinit ds1307_probe(struct i2c_client *client, ds1307->msg[1].len = sizeof(ds1307->regs); ds1307->msg[1].buf = ds1307->regs; - ds1307->type = chip->type; + ds1307->type = id->driver_data; switch (ds1307->type) { case ds_1337: @@ -551,6 +537,7 @@ static struct i2c_driver ds1307_driver = { }, .probe = ds1307_probe, .remove = __devexit_p(ds1307_remove), + .id_table = ds1307_id, }; static int __init ds1307_init(void) diff --git a/drivers/rtc/rtc-ds1374.c b/drivers/rtc/rtc-ds1374.c index 104dcfd5d9a8..fa2d2f8b3f4d 100644 --- a/drivers/rtc/rtc-ds1374.c +++ b/drivers/rtc/rtc-ds1374.c @@ -41,6 +41,12 @@ #define DS1374_REG_SR_AF 0x01 /* Alarm Flag */ #define DS1374_REG_TCR 0x09 /* Trickle Charge */ +static const struct i2c_device_id ds1374_id[] = { + { "rtc-ds1374", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, ds1374_id); + struct ds1374 { struct i2c_client *client; struct rtc_device *rtc; @@ -430,6 +436,7 @@ static struct i2c_driver ds1374_driver = { }, .probe = ds1374_probe, .remove = __devexit_p(ds1374_remove), + .id_table = ds1374_id, }; static int __init ds1374_init(void) diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c index d75d8faeead0..fbb90b1e4098 100644 --- a/drivers/rtc/rtc-isl1208.c +++ b/drivers/rtc/rtc-isl1208.c @@ -545,12 +545,19 @@ isl1208_remove(struct i2c_client *client) return 0; } +static const struct i2c_device_id isl1208_id[] = { + { "isl1208", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, isl1208_id); + static struct i2c_driver isl1208_driver = { .driver = { .name = "rtc-isl1208", }, .probe = isl1208_probe, .remove = isl1208_remove, + .id_table = isl1208_id, }; static int __init diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c index 2ee0d070095a..316bfaa80872 100644 --- a/drivers/rtc/rtc-m41t80.c +++ b/drivers/rtc/rtc-m41t80.c @@ -60,48 +60,21 @@ #define DRV_VERSION "0.05" -struct m41t80_chip_info { - const char *name; - u8 features; -}; - -static const struct m41t80_chip_info m41t80_chip_info_tbl[] = { - { - .name = "m41t80", - .features = 0, - }, - { - .name = "m41t81", - .features = M41T80_FEATURE_HT, - }, - { - .name = "m41t81s", - .features = M41T80_FEATURE_HT | M41T80_FEATURE_BL, - }, - { - .name = "m41t82", - .features = M41T80_FEATURE_HT | M41T80_FEATURE_BL, - }, - { - .name = "m41t83", - .features = M41T80_FEATURE_HT | M41T80_FEATURE_BL, - }, - { - .name = "m41st84", - .features = M41T80_FEATURE_HT | M41T80_FEATURE_BL, - }, - { - .name = "m41st85", - .features = M41T80_FEATURE_HT | M41T80_FEATURE_BL, - }, - { - .name = "m41st87", - .features = M41T80_FEATURE_HT | M41T80_FEATURE_BL, - }, +static const struct i2c_device_id m41t80_id[] = { + { "m41t80", 0 }, + { "m41t81", M41T80_FEATURE_HT }, + { "m41t81s", M41T80_FEATURE_HT | M41T80_FEATURE_BL }, + { "m41t82", M41T80_FEATURE_HT | M41T80_FEATURE_BL }, + { "m41t83", M41T80_FEATURE_HT | M41T80_FEATURE_BL }, + { "m41st84", M41T80_FEATURE_HT | M41T80_FEATURE_BL }, + { "m41st85", M41T80_FEATURE_HT | M41T80_FEATURE_BL }, + { "m41st87", M41T80_FEATURE_HT | M41T80_FEATURE_BL }, + { } }; +MODULE_DEVICE_TABLE(i2c, m41t80_id); struct m41t80_data { - const struct m41t80_chip_info *chip; + u8 features; struct rtc_device *rtc; }; @@ -208,7 +181,7 @@ static int m41t80_rtc_proc(struct device *dev, struct seq_file *seq) struct m41t80_data *clientdata = i2c_get_clientdata(client); u8 reg; - if (clientdata->chip->features & M41T80_FEATURE_BL) { + if (clientdata->features & M41T80_FEATURE_BL) { reg = i2c_smbus_read_byte_data(client, M41T80_REG_FLAGS); seq_printf(seq, "battery\t\t: %s\n", (reg & M41T80_FLAGS_BATT_LOW) ? "exhausted" : "ok"); @@ -759,10 +732,9 @@ static struct notifier_block wdt_notifier = { static int m41t80_probe(struct i2c_client *client, const struct i2c_device_id *id) { - int i, rc = 0; + int rc = 0; struct rtc_device *rtc = NULL; struct rtc_time tm; - const struct m41t80_chip_info *chip; struct m41t80_data *clientdata = NULL; if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C @@ -774,19 +746,6 @@ static int m41t80_probe(struct i2c_client *client, dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n"); - chip = NULL; - for (i = 0; i < ARRAY_SIZE(m41t80_chip_info_tbl); i++) { - if (!strcmp(m41t80_chip_info_tbl[i].name, client->name)) { - chip = &m41t80_chip_info_tbl[i]; - break; - } - } - if (!chip) { - dev_err(&client->dev, "%s is not supported\n", client->name); - rc = -ENODEV; - goto exit; - } - clientdata = kzalloc(sizeof(*clientdata), GFP_KERNEL); if (!clientdata) { rc = -ENOMEM; @@ -802,7 +761,7 @@ static int m41t80_probe(struct i2c_client *client, } clientdata->rtc = rtc; - clientdata->chip = chip; + clientdata->features = id->driver_data; i2c_set_clientdata(client, clientdata); /* Make sure HT (Halt Update) bit is cleared */ @@ -811,7 +770,7 @@ static int m41t80_probe(struct i2c_client *client, goto ht_err; if (rc & M41T80_ALHOUR_HT) { - if (chip->features & M41T80_FEATURE_HT) { + if (clientdata->features & M41T80_FEATURE_HT) { m41t80_get_datetime(client, &tm); dev_info(&client->dev, "HT bit was set!\n"); dev_info(&client->dev, @@ -843,7 +802,7 @@ static int m41t80_probe(struct i2c_client *client, goto exit; #ifdef CONFIG_RTC_DRV_M41T80_WDT - if (chip->features & M41T80_FEATURE_HT) { + if (clientdata->features & M41T80_FEATURE_HT) { rc = misc_register(&wdt_dev); if (rc) goto exit; @@ -879,7 +838,7 @@ static int m41t80_remove(struct i2c_client *client) struct rtc_device *rtc = clientdata->rtc; #ifdef CONFIG_RTC_DRV_M41T80_WDT - if (clientdata->chip->features & M41T80_FEATURE_HT) { + if (clientdata->features & M41T80_FEATURE_HT) { misc_deregister(&wdt_dev); unregister_reboot_notifier(&wdt_notifier); } @@ -897,6 +856,7 @@ static struct i2c_driver m41t80_driver = { }, .probe = m41t80_probe, .remove = m41t80_remove, + .id_table = m41t80_id, }; static int __init m41t80_rtc_init(void) diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index 7b3c31db0fc0..0fc4c3630780 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c @@ -300,12 +300,19 @@ static int pcf8563_remove(struct i2c_client *client) return 0; } +static const struct i2c_device_id pcf8563_id[] = { + { "pcf8563", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, pcf8563_id); + static struct i2c_driver pcf8563_driver = { .driver = { .name = "rtc-pcf8563", }, .probe = pcf8563_probe, .remove = pcf8563_remove, + .id_table = pcf8563_id, }; static int __init pcf8563_init(void) diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c index 47db289bb0a3..56caf6b2c3e5 100644 --- a/drivers/rtc/rtc-rs5c372.c +++ b/drivers/rtc/rtc-rs5c372.c @@ -69,6 +69,15 @@ enum rtc_type { rtc_rv5c387a, }; +static const struct i2c_device_id rs5c372_id[] = { + { "rs5c372a", rtc_rs5c372a }, + { "rs5c372b", rtc_rs5c372b }, + { "rv5c386", rtc_rv5c386 }, + { "rv5c387a", rtc_rv5c387a }, + { } +}; +MODULE_DEVICE_TABLE(i2c, rs5c372_id); + /* REVISIT: this assumes that: * - we're in the 21st century, so it's safe to ignore the century * bit for rv5c38[67] (REG_MONTH bit 7); @@ -515,6 +524,7 @@ static int rs5c372_probe(struct i2c_client *client, rs5c372->client = client; i2c_set_clientdata(client, rs5c372); + rs5c372->type = id->driver_data; /* we read registers 0x0f then 0x00-0x0f; skip the first one */ rs5c372->regs = &rs5c372->buf[1]; @@ -523,19 +533,6 @@ static int rs5c372_probe(struct i2c_client *client, if (err < 0) goto exit_kfree; - if (strcmp(client->name, "rs5c372a") == 0) - rs5c372->type = rtc_rs5c372a; - else if (strcmp(client->name, "rs5c372b") == 0) - rs5c372->type = rtc_rs5c372b; - else if (strcmp(client->name, "rv5c386") == 0) - rs5c372->type = rtc_rv5c386; - else if (strcmp(client->name, "rv5c387a") == 0) - rs5c372->type = rtc_rv5c387a; - else { - rs5c372->type = rtc_rs5c372b; - dev_warn(&client->dev, "assuming rs5c372b\n"); - } - /* clock may be set for am/pm or 24 hr time */ switch (rs5c372->type) { case rtc_rs5c372a: @@ -652,6 +649,7 @@ static struct i2c_driver rs5c372_driver = { }, .probe = rs5c372_probe, .remove = rs5c372_remove, + .id_table = rs5c372_id, }; static __init int rs5c372_init(void) diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c index ab0c6d221404..29f47bacfc77 100644 --- a/drivers/rtc/rtc-s35390a.c +++ b/drivers/rtc/rtc-s35390a.c @@ -34,6 +34,12 @@ #define S35390A_FLAG_RESET 0x80 #define S35390A_FLAG_TEST 0x01 +static const struct i2c_device_id s35390a_id[] = { + { "s35390a", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, s35390a_id); + struct s35390a { struct i2c_client *client[8]; struct rtc_device *rtc; @@ -297,6 +303,7 @@ static struct i2c_driver s35390a_driver = { }, .probe = s35390a_probe, .remove = s35390a_remove, + .id_table = s35390a_id, }; static int __init s35390a_rtc_init(void) diff --git a/drivers/rtc/rtc-x1205.c b/drivers/rtc/rtc-x1205.c index b792ad4dcaa9..eaf55945f21b 100644 --- a/drivers/rtc/rtc-x1205.c +++ b/drivers/rtc/rtc-x1205.c @@ -553,12 +553,19 @@ static int x1205_remove(struct i2c_client *client) return 0; } +static const struct i2c_device_id x1205_id[] = { + { "x1205", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, x1205_id); + static struct i2c_driver x1205_driver = { .driver = { .name = "rtc-x1205", }, .probe = x1205_probe, .remove = x1205_remove, + .id_table = x1205_id, }; static int __init x1205_init(void) diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 89cb34d5b0ba..cb63da5c2139 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -229,17 +229,17 @@ struct i2c_board_info { }; /** - * I2C_BOARD_INFO - macro used to list an i2c device and its driver - * @driver: identifies the driver to use with the device + * I2C_BOARD_INFO - macro used to list an i2c device and its address + * @dev_type: identifies the device type * @dev_addr: the device's address on the bus. * * This macro initializes essential fields of a struct i2c_board_info, * declaring what has been provided on a particular board. Optional - * fields (such as the chip type, its associated irq, or device-specific - * platform_data) are provided using conventional syntax. + * fields (such as associated irq, or device-specific platform_data) + * are provided using conventional syntax. */ -#define I2C_BOARD_INFO(driver,dev_addr) \ - .driver_name = (driver), .addr = (dev_addr) +#define I2C_BOARD_INFO(dev_type,dev_addr) \ + .type = (dev_type), .addr = (dev_addr) /* Add-on boards should register/unregister their devices; e.g. a board -- cgit v1.2.3-71-gd317 From f7e989301b6c232dec5489e94ee7741c85cb11ba Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Tue, 29 Apr 2008 17:47:34 -0400 Subject: [libata] linux/libata.h: reorganize ata_device struct members a bit Put the big stuff at the end, to prepare for upcoming changes (and also hopefully achieve nicer packing of remaining members). Signed-off-by: Jeff Garzik --- include/linux/libata.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 395a523d8c30..d1dfe872ee30 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -548,11 +548,6 @@ struct ata_device { u64 n_sectors; /* size of device, if ATA */ unsigned int class; /* ATA_DEV_xxx */ - union { - u16 id[ATA_ID_WORDS]; /* IDENTIFY xxx DEVICE data */ - u32 gscr[SATA_PMP_GSCR_DWORDS]; /* PMP GSCR block */ - }; - u8 pio_mode; u8 dma_mode; u8 xfer_mode; @@ -574,8 +569,13 @@ struct ata_device { u16 sectors; /* Number of sectors per track */ /* error history */ - struct ata_ering ering; int spdn_cnt; + struct ata_ering ering; + + union { + u16 id[ATA_ID_WORDS]; /* IDENTIFY xxx DEVICE data */ + u32 gscr[SATA_PMP_GSCR_DWORDS]; /* PMP GSCR block */ + }; }; /* Offset into struct ata_device. Fields above it are maintained -- cgit v1.2.3-71-gd317 From 267e4db9ac28a09973476e7ec2cb6807e609d35a Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 29 Apr 2008 08:11:12 -0400 Subject: ext4: Fix race between migration and mmap write Fail migrate if we allocated new blocks via mmap write. If we write to holes in the file via mmap, we end up allocating new blocks. This block allocation happens without taking inode->i_mutex. Since migrate is protected by i_mutex and migrate expects that no new blocks get allocated during migrate, fail migrate if new blocks get allocated. We can't take inode->i_mutex in the mmap write path because that would result in a locking order violation between i_mutex and mmap_sem. Also adding a separate rw_sempahore for protection is really high overhead for a rare operation such as migrate. Signed-off-by: Aneesh Kumar K.V Acked-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 13 ++++++++++++- fs/ext4/migrate.c | 39 ++++++++++++++++++++++++++++++++++----- include/linux/ext4_fs.h | 1 + 3 files changed, 47 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8fab233cb05f..24a2604dde7b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -985,6 +985,16 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, } else { retval = ext4_get_blocks_handle(handle, inode, block, max_blocks, bh, create, extend_disksize); + + if (retval > 0 && buffer_new(bh)) { + /* + * We allocated new blocks which will result in + * i_data's format changing. Force the migrate + * to fail by clearing migrate flags + */ + EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & + ~EXT4_EXT_MIGRATE; + } } up_write((&EXT4_I(inode)->i_data_sem)); return retval; @@ -2976,7 +2986,8 @@ static int ext4_do_update_inode(handle_t *handle, if (ext4_inode_blocks_set(handle, raw_inode, ei)) goto out_brelse; raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); - raw_inode->i_flags = cpu_to_le32(ei->i_flags); + /* clear the migrate flag in the raw_inode */ + raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE); if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != cpu_to_le32(EXT4_OS_HURD)) raw_inode->i_file_acl_high = diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 5c1e27de7755..9b4fb07d192c 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -327,7 +327,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) } static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, - struct inode *tmp_inode) + struct inode *tmp_inode) { int retval; __le32 i_data[3]; @@ -339,7 +339,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, * i_data field of the original inode */ retval = ext4_journal_extend(handle, 1); - if (retval != 0) { + if (retval) { retval = ext4_journal_restart(handle, 1); if (retval) goto err_out; @@ -350,6 +350,18 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, i_data[2] = ei->i_data[EXT4_TIND_BLOCK]; down_write(&EXT4_I(inode)->i_data_sem); + /* + * if EXT4_EXT_MIGRATE is cleared a block allocation + * happened after we started the migrate. We need to + * fail the migrate + */ + if (!(EXT4_I(inode)->i_flags & EXT4_EXT_MIGRATE)) { + retval = -EAGAIN; + up_write(&EXT4_I(inode)->i_data_sem); + goto err_out; + } else + EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & + ~EXT4_EXT_MIGRATE; /* * We have the extent map build with the tmp inode. * Now copy the i_data across @@ -508,6 +520,17 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, * switch the inode format to prevent read. */ mutex_lock(&(inode->i_mutex)); + /* + * Even though we take i_mutex we can still cause block allocation + * via mmap write to holes. If we have allocated new blocks we fail + * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. + * The flag is updated with i_data_sem held to prevent racing with + * block allocation. + */ + down_read((&EXT4_I(inode)->i_data_sem)); + EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags | EXT4_EXT_MIGRATE; + up_read((&EXT4_I(inode)->i_data_sem)); + handle = ext4_journal_start(inode, 1); ei = EXT4_I(inode); @@ -559,9 +582,15 @@ err_out: * tmp_inode */ free_ext_block(handle, tmp_inode); - else - retval = ext4_ext_swap_inode_data(handle, inode, - tmp_inode); + else { + retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode); + if (retval) + /* + * if we fail to swap inode data free the extent + * details of the tmp inode + */ + free_ext_block(handle, tmp_inode); + } /* We mark the tmp_inode dirty via ext4_ext_tree_init. */ if (ext4_journal_extend(handle, 1) != 0) diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 250032548597..105337ca9ed0 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -231,6 +231,7 @@ struct ext4_group_desc #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ +#define EXT4_EXT_MIGRATE 0x00100000 /* Inode is migrating */ #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ -- cgit v1.2.3-71-gd317 From 5cdd7b2d7716a7ed7d6dc7588e2d015f04d46640 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 29 Apr 2008 22:03:54 -0400 Subject: Convert ext4 to use unlocked_ioctl I checked ext4_ioctl and it looked largely safe to not be used without BKL. So convert it over to unlocked_ioctl. Signed-off-by: Andi Kleen Signed-off-by: Theodore Ts'o --- fs/ext4/dir.c | 2 +- fs/ext4/file.c | 2 +- fs/ext4/ioctl.c | 12 +++--------- include/linux/ext4_fs.h | 3 +-- 4 files changed, 6 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 2c23bade9aa6..88c97f7312be 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -42,7 +42,7 @@ const struct file_operations ext4_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = ext4_readdir, /* we take BKL. needed?*/ - .ioctl = ext4_ioctl, /* BKL held */ + .unlocked_ioctl = ext4_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext4_compat_ioctl, #endif diff --git a/fs/ext4/file.c b/fs/ext4/file.c index ac35ec58db55..20507a24506a 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -129,7 +129,7 @@ const struct file_operations ext4_file_operations = { .write = do_sync_write, .aio_read = generic_file_aio_read, .aio_write = ext4_file_write, - .ioctl = ext4_ioctl, + .unlocked_ioctl = ext4_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext4_compat_ioctl, #endif diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 25b13ede8086..ce937fe432a0 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -18,9 +18,9 @@ #include #include -int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, - unsigned long arg) +long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + struct inode *inode = filp->f_dentry->d_inode; struct ext4_inode_info *ei = EXT4_I(inode); unsigned int flags; unsigned short rsv_window_size; @@ -277,9 +277,6 @@ setversion_out: #ifdef CONFIG_COMPAT long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; - int ret; - /* These are just misnamed, they actually get/put from/to user an int */ switch (cmd) { case EXT4_IOC32_GETFLAGS: @@ -319,9 +316,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) default: return -ENOIOCTLCMD; } - lock_kernel(); - ret = ext4_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); - unlock_kernel(); - return ret; + return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); } #endif diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 105337ca9ed0..33bc88568c54 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -1050,8 +1050,7 @@ extern int ext4_block_truncate_page(handle_t *handle, struct page *page, struct address_space *mapping, loff_t from); /* ioctl.c */ -extern int ext4_ioctl (struct inode *, struct file *, unsigned int, - unsigned long); +extern long ext4_ioctl(struct file *, unsigned int, unsigned long); extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); /* migrate.c */ -- cgit v1.2.3-71-gd317 From 418f6e9e5b77443a66f4457bc60f391e4fba8ad8 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Apr 2008 08:11:12 -0400 Subject: ext4: remove duplicate include of ext4_fs_i.h header file include/linux/ext4_fs_i.h is included in include/linux/ext_fs.h twice Signed-off-by: Joe Perches Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- include/linux/ext4_fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 33bc88568c54..1ae0f965f386 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -19,7 +19,6 @@ #include #include #include - #include /* @@ -176,7 +175,6 @@ struct ext4_group_desc #define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ #ifdef __KERNEL__ -#include #include #endif /* -- cgit v1.2.3-71-gd317 From 9b98af3217ae6ad979075eb233a5e8a5c82f13ca Mon Sep 17 00:00:00 2001 From: Alek Du Date: Thu, 24 Apr 2008 09:19:44 +0800 Subject: PCI: Add Intel SCH PCI IDs This patch adds Intel SCH chipsets (US15W, US15L, UL11L) PCI IDs, these IDs will be used by following SCH driver patches. Signed-off-by: Alek Du Signed-off-by: Jesse Barnes --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 70eb3c803d47..e5a53daf17f1 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2413,6 +2413,8 @@ #define PCI_DEVICE_ID_INTEL_82443GX_0 0x71a0 #define PCI_DEVICE_ID_INTEL_82443GX_2 0x71a2 #define PCI_DEVICE_ID_INTEL_82372FB_1 0x7601 +#define PCI_DEVICE_ID_INTEL_SCH_LPC 0x8119 +#define PCI_DEVICE_ID_INTEL_SCH_IDE 0x811a #define PCI_DEVICE_ID_INTEL_82454GX 0x84c4 #define PCI_DEVICE_ID_INTEL_82450GX 0x84c5 #define PCI_DEVICE_ID_INTEL_82451NX 0x84ca -- cgit v1.2.3-71-gd317 From fb96c00819c28860fd10137f1c63f7c48dec252b Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Sat, 26 Apr 2008 13:46:31 -0400 Subject: [MTD] Delete long-unused jedec.h header file. Signed-off-by: Robert P. J. Day Signed-off-by: David Woodhouse --- include/linux/mtd/jedec.h | 66 ----------------------------------------------- 1 file changed, 66 deletions(-) delete mode 100644 include/linux/mtd/jedec.h (limited to 'include/linux') diff --git a/include/linux/mtd/jedec.h b/include/linux/mtd/jedec.h deleted file mode 100644 index 9006feb218b9..000000000000 --- a/include/linux/mtd/jedec.h +++ /dev/null @@ -1,66 +0,0 @@ - -/* JEDEC Flash Interface. - * This is an older type of interface for self programming flash. It is - * commonly use in older AMD chips and is obsolete compared with CFI. - * It is called JEDEC because the JEDEC association distributes the ID codes - * for the chips. - * - * See the AMD flash databook for information on how to operate the interface. - * - * $Id: jedec.h,v 1.4 2005/11/07 11:14:54 gleixner Exp $ - */ - -#ifndef __LINUX_MTD_JEDEC_H__ -#define __LINUX_MTD_JEDEC_H__ - -#include - -#define MAX_JEDEC_CHIPS 16 - -// Listing of all supported chips and their information -struct JEDECTable -{ - __u16 jedec; - char *name; - unsigned long size; - unsigned long sectorsize; - __u32 capabilities; -}; - -// JEDEC being 0 is the end of the chip array -struct jedec_flash_chip -{ - __u16 jedec; - unsigned long size; - unsigned long sectorsize; - - // *(__u8*)(base + (adder << addrshift)) = data << datashift - // Address size = size << addrshift - unsigned long base; // Byte 0 of the flash, will be unaligned - unsigned int datashift; // Useful for 32bit/16bit accesses - unsigned int addrshift; - unsigned long offset; // linerized start. base==offset for unbanked, uninterleaved flash - - __u32 capabilities; - - // These markers are filled in by the flash_chip_scan function - unsigned long start; - unsigned long length; -}; - -struct jedec_private -{ - unsigned long size; // Total size of all the devices - - /* Bank handling. If sum(bank_fill) == size then this is linear flash. - Otherwise the mapping has holes in it. bank_fill may be used to - find the holes, but in the common symetric case - bank_fill[0] == bank_fill[*], thus addresses may be computed - mathmatically. bank_fill must be powers of two */ - unsigned is_banked; - unsigned long bank_fill[MAX_JEDEC_CHIPS]; - - struct jedec_flash_chip chips[MAX_JEDEC_CHIPS]; -}; - -#endif -- cgit v1.2.3-71-gd317 From 7752d5cfe3d11ca0bb9c673ec38bd78ba6578f8e Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Fri, 15 Feb 2008 01:27:20 -0800 Subject: x86: validate against acpi motherboard resources This path adds validation of the MMCONFIG table against the ACPI reserved motherboard resources. If the MMCONFIG table is found to be reserved in ACPI, we don't bother checking the E820 table. The PCI Express firmware spec apparently tells BIOS developers that reservation in ACPI is required and E820 reservation is optional, so checking against ACPI first makes sense. Many BIOSes don't reserve the MMCONFIG region in E820 even though it is perfectly functional, the existing check needlessly disables MMCONFIG in these cases. In order to do this, MMCONFIG setup has been split into two phases. If PCI configuration type 1 is not available then MMCONFIG is enabled early as before. Otherwise, it is enabled later after the ACPI interpreter is enabled, since we need to be able to execute control methods in order to check the ACPI reserved resources. Presently this is just triggered off the end of ACPI interpreter initialization. There are a few other behavioral changes here: - Validate all MMCONFIG configurations provided, not just the first one. - Validate the entire required length of each configuration according to the provided ending bus number is reserved, not just the minimum required allocation. - Validate that the area is reserved even if we read it from the chipset directly and not from the MCFG table. This catches the case where the BIOS didn't set the location properly in the chipset and has mapped it over other things it shouldn't have. This also cleans up the MMCONFIG initialization functions so that they simply do nothing if MMCONFIG is not compiled in. Based on an original patch by Rajesh Shah from Intel. [akpm@linux-foundation.org: many fixes and cleanups] Signed-off-by: Robert Hancock Signed-off-by: Andi Kleen Cc: Andrew Morton Cc: Greg KH Signed-off-by: Thomas Gleixner Tested-by: Andi Kleen Cc: Rajesh Shah Cc: Jesse Barnes Acked-by: Linus Torvalds Cc: Andi Kleen Cc: Greg KH Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- arch/x86/pci/init.c | 4 +- arch/x86/pci/mmconfig-shared.c | 149 ++++++++++++++++++++++++++++++++++++----- arch/x86/pci/pci.h | 1 - drivers/acpi/bus.c | 2 + include/linux/pci.h | 8 +++ 5 files changed, 143 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index 3de9f9ba2da6..2080b04b3bcc 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c @@ -11,9 +11,7 @@ static __init int pci_access_init(void) #ifdef CONFIG_PCI_DIRECT type = pci_direct_probe(); #endif -#ifdef CONFIG_PCI_MMCONFIG - pci_mmcfg_init(type); -#endif + pci_mmcfg_early_init(type); if (raw_pci_ops) return 0; #ifdef CONFIG_PCI_BIOS diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 8d54df4dfaad..498e35ee428e 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -173,9 +173,78 @@ static void __init pci_mmcfg_insert_resources(unsigned long resource_flags) pci_mmcfg_resources_inserted = 1; } -static void __init pci_mmcfg_reject_broken(int type) +static acpi_status __init check_mcfg_resource(struct acpi_resource *res, + void *data) +{ + struct resource *mcfg_res = data; + struct acpi_resource_address64 address; + acpi_status status; + + if (res->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32) { + struct acpi_resource_fixed_memory32 *fixmem32 = + &res->data.fixed_memory32; + if (!fixmem32) + return AE_OK; + if ((mcfg_res->start >= fixmem32->address) && + (mcfg_res->end < (fixmem32->address + + fixmem32->address_length))) { + mcfg_res->flags = 1; + return AE_CTRL_TERMINATE; + } + } + if ((res->type != ACPI_RESOURCE_TYPE_ADDRESS32) && + (res->type != ACPI_RESOURCE_TYPE_ADDRESS64)) + return AE_OK; + + status = acpi_resource_to_address64(res, &address); + if (ACPI_FAILURE(status) || + (address.address_length <= 0) || + (address.resource_type != ACPI_MEMORY_RANGE)) + return AE_OK; + + if ((mcfg_res->start >= address.minimum) && + (mcfg_res->end < (address.minimum + address.address_length))) { + mcfg_res->flags = 1; + return AE_CTRL_TERMINATE; + } + return AE_OK; +} + +static acpi_status __init find_mboard_resource(acpi_handle handle, u32 lvl, + void *context, void **rv) +{ + struct resource *mcfg_res = context; + + acpi_walk_resources(handle, METHOD_NAME__CRS, + check_mcfg_resource, context); + + if (mcfg_res->flags) + return AE_CTRL_TERMINATE; + + return AE_OK; +} + +static int __init is_acpi_reserved(unsigned long start, unsigned long end) +{ + struct resource mcfg_res; + + mcfg_res.start = start; + mcfg_res.end = end; + mcfg_res.flags = 0; + + acpi_get_devices("PNP0C01", find_mboard_resource, &mcfg_res, NULL); + + if (!mcfg_res.flags) + acpi_get_devices("PNP0C02", find_mboard_resource, &mcfg_res, + NULL); + + return mcfg_res.flags; +} + +static void __init pci_mmcfg_reject_broken(void) { typeof(pci_mmcfg_config[0]) *cfg; + int i; if ((pci_mmcfg_config_num == 0) || (pci_mmcfg_config == NULL) || @@ -196,17 +265,37 @@ static void __init pci_mmcfg_reject_broken(int type) goto reject; } - /* - * Only do this check when type 1 works. If it doesn't work - * assume we run on a Mac and always use MCFG - */ - if (type == 1 && !e820_all_mapped(cfg->address, - cfg->address + MMCONFIG_APER_MIN, - E820_RESERVED)) { - printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" - " E820-reserved\n", cfg->address); - goto reject; + for (i = 0; i < pci_mmcfg_config_num; i++) { + u32 size = (cfg->end_bus_number + 1) << 20; + cfg = &pci_mmcfg_config[i]; + printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lu " + "segment %hu buses %u - %u\n", + i, (unsigned long)cfg->address, cfg->pci_segment, + (unsigned int)cfg->start_bus_number, + (unsigned int)cfg->end_bus_number); + if (is_acpi_reserved(cfg->address, cfg->address + size - 1)) { + printk(KERN_NOTICE "PCI: MCFG area at %Lx reserved " + "in ACPI motherboard resources\n", + cfg->address); + } else { + printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" + " reserved in ACPI motherboard resources\n", + cfg->address); + /* Don't try to do this check unless configuration + type 1 is available. */ + if ((pci_probe & PCI_PROBE_CONF1) && + e820_all_mapped(cfg->address, + cfg->address + size - 1, + E820_RESERVED)) + printk(KERN_NOTICE + "PCI: MCFG area at %Lx reserved in " + "E820\n", + cfg->address); + else + goto reject; + } } + return; reject: @@ -216,20 +305,46 @@ reject: pci_mmcfg_config_num = 0; } -void __init pci_mmcfg_init(int type) +void __init pci_mmcfg_early_init(int type) +{ + if ((pci_probe & PCI_PROBE_MMCONF) == 0) + return; + + /* If type 1 access is available, no need to enable MMCONFIG yet, we can + defer until later when the ACPI interpreter is available to better + validate things. */ + if (type == 1) + return; + + acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); + + if ((pci_mmcfg_config_num == 0) || + (pci_mmcfg_config == NULL) || + (pci_mmcfg_config[0].address == 0)) + return; + + if (pci_mmcfg_arch_init()) + pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; +} + +void __init pci_mmcfg_late_init(void) { int known_bridge = 0; + /* MMCONFIG disabled */ if ((pci_probe & PCI_PROBE_MMCONF) == 0) return; - if (type == 1 && pci_mmcfg_check_hostbridge()) - known_bridge = 1; + /* MMCONFIG already enabled */ + if (!(pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF)) + return; - if (!known_bridge) { + if ((pci_probe & PCI_PROBE_CONF1) && pci_mmcfg_check_hostbridge()) + known_bridge = 1; + else acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); - pci_mmcfg_reject_broken(type); - } + + pci_mmcfg_reject_broken(); if ((pci_mmcfg_config_num == 0) || (pci_mmcfg_config == NULL) || diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index c4bddaeff619..28b9b72ce7c7 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h @@ -97,7 +97,6 @@ extern struct pci_raw_ops pci_direct_conf1; extern int pci_direct_probe(void); extern void pci_direct_init(int type); extern void pci_pcbios_init(void); -extern void pci_mmcfg_init(int type); /* pci-mmconfig.c */ diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 2d1955c11833..a6dbcf4d9ef5 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -35,6 +35,7 @@ #ifdef CONFIG_X86 #include #endif +#include #include #include @@ -784,6 +785,7 @@ static int __init acpi_init(void) result = acpi_bus_init(); if (!result) { + pci_mmcfg_late_init(); if (!(pm_flags & PM_APM)) pm_flags |= PM_ACPI; else { diff --git a/include/linux/pci.h b/include/linux/pci.h index 292491324b01..43a4f9cae67d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1053,5 +1053,13 @@ extern unsigned long pci_cardbus_mem_size; extern int pcibios_add_platform_entries(struct pci_dev *dev); +#ifdef CONFIG_PCI_MMCONFIG +extern void __init pci_mmcfg_early_init(int type); +extern void __init pci_mmcfg_late_init(void); +#else +static inline void pci_mmcfg_early_init(int type) { } +static inline void pci_mmcfg_late_init(void) { } +#endif + #endif /* __KERNEL__ */ #endif /* LINUX_PCI_H */ -- cgit v1.2.3-71-gd317 From 57741a779070e0b141b6148136b420c8d35ccbce Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 15 Feb 2008 01:32:50 -0800 Subject: x86_64: set cfg_size for AMD Family 10h in case MMCONFIG reuse pci_cfg_space_size but skip check pci express and pci-x CAP ID. Signed-off-by: Yinghai Lu Cc: Andrew Morton Acked-by: Greg Kroah-Hartman Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/pci/fixup.c | 17 +++++++++++++++++ drivers/pci/probe.c | 11 ++++++++++- include/linux/pci.h | 1 + 3 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index a5ef5f551373..b60b2abd480c 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -493,3 +493,20 @@ static void __devinit pci_siemens_interrupt_controller(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015, pci_siemens_interrupt_controller); + +/* + * Regular PCI devices have 256 bytes, but AMD Family 10h Opteron ext config + * have 4096 bytes. Even if the device is capable, that doesn't mean we can + * access it. Maybe we don't have a way to generate extended config space + * accesses. So check it + */ +static void fam10h_pci_cfg_space_size(struct pci_dev *dev) +{ + dev->cfg_size = pci_cfg_space_size_ext(dev, 0); +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, fam10h_pci_cfg_space_size); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index f991359f0c36..a8efdaef1870 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -842,11 +842,14 @@ static void set_pcie_port_type(struct pci_dev *pdev) * reading the dword at 0x100 which must either be 0 or a valid extended * capability header. */ -int pci_cfg_space_size(struct pci_dev *dev) +int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix) { int pos; u32 status; + if (!check_exp_pcix) + goto skip; + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (!pos) { pos = pci_find_capability(dev, PCI_CAP_ID_PCIX); @@ -858,6 +861,7 @@ int pci_cfg_space_size(struct pci_dev *dev) goto fail; } + skip: if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL) goto fail; if (status == 0xffffffff) @@ -869,6 +873,11 @@ int pci_cfg_space_size(struct pci_dev *dev) return PCI_CFG_SPACE_SIZE; } +int pci_cfg_space_size(struct pci_dev *dev) +{ + return pci_cfg_space_size_ext(dev, 1); +} + static void pci_release_bus_bridge_dev(struct device *dev) { kfree(dev); diff --git a/include/linux/pci.h b/include/linux/pci.h index 43a4f9cae67d..2b8f74522f8f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -666,6 +666,7 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *), void *userdata); +int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix); int pci_cfg_space_size(struct pci_dev *dev); unsigned char pci_bus_max_busnr(struct pci_bus *bus); -- cgit v1.2.3-71-gd317 From bb63b4219976d48ed6d22ac33c18be334fb5a78c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 28 Feb 2008 23:56:50 -0800 Subject: x86 pci: remove checking type for mmconfig probe doesn't need to check if it is type1 or type2, we can use raw_pci_ops directly. also make pci_direct_conf1 static again. anyway is there system with type 2 and mmconf support? Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/pci/direct.c | 8 +++++--- arch/x86/pci/init.c | 11 +++++------ arch/x86/pci/mmconfig-shared.c | 32 +++++++++++++++----------------- include/linux/pci.h | 4 ++-- 4 files changed, 27 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 42f3e4cad179..21d1e0e0d535 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -258,7 +258,8 @@ void __init pci_direct_init(int type) { if (type == 0) return; - printk(KERN_INFO "PCI: Using configuration type %d\n", type); + printk(KERN_INFO "PCI: Using configuration type %d for base access\n", + type); if (type == 1) raw_pci_ops = &pci_direct_conf1; else @@ -275,8 +276,10 @@ int __init pci_direct_probe(void) if (!region) goto type2; - if (pci_check_type1()) + if (pci_check_type1()) { + raw_pci_ops = &pci_direct_conf1; return 1; + } release_resource(region); type2: @@ -290,7 +293,6 @@ int __init pci_direct_probe(void) goto fail2; if (pci_check_type2()) { - printk(KERN_INFO "PCI: Using configuration type 2\n"); raw_pci_ops = &pci_direct_conf2; return 2; } diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index 2080b04b3bcc..343c36337e69 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c @@ -6,14 +6,13 @@ in the right sequence from here. */ static __init int pci_access_init(void) { - int type __maybe_unused = 0; - #ifdef CONFIG_PCI_DIRECT + int type = 0; + type = pci_direct_probe(); #endif - pci_mmcfg_early_init(type); - if (raw_pci_ops) - return 0; + pci_mmcfg_early_init(); + #ifdef CONFIG_PCI_BIOS pci_pcbios_init(); #endif @@ -26,7 +25,7 @@ static __init int pci_access_init(void) #ifdef CONFIG_PCI_DIRECT pci_direct_init(type); #endif - if (!raw_pci_ops) + if (!raw_pci_ops && !raw_pci_ext_ops) printk(KERN_ERR "PCI: Fatal: No config space access function found\n"); diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 6f68658b519d..bdf62243186a 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -28,7 +28,7 @@ static int __initdata pci_mmcfg_resources_inserted; static const char __init *pci_mmcfg_e7520(void) { u32 win; - pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win); + raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0xce, 2, &win); win = win & 0xf000; if(win == 0x0000 || win == 0xf000) @@ -53,7 +53,7 @@ static const char __init *pci_mmcfg_intel_945(void) pci_mmcfg_config_num = 1; - pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0x48, 4, &pciexbar); + raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0x48, 4, &pciexbar); /* Enable bit */ if (!(pciexbar & 1)) @@ -179,6 +179,9 @@ static int __init pci_mmcfg_check_hostbridge(void) int i; const char *name; + if (!raw_pci_ops) + return 0; + pci_mmcfg_config_num = 0; pci_mmcfg_config = NULL; name = NULL; @@ -186,7 +189,7 @@ static int __init pci_mmcfg_check_hostbridge(void) for (i = 0; !name && i < ARRAY_SIZE(pci_mmcfg_probes); i++) { bus = pci_mmcfg_probes[i].bus; devfn = pci_mmcfg_probes[i].devfn; - pci_direct_conf1.read(0, bus, devfn, 0, 4, &l); + raw_pci_ops->read(0, bus, devfn, 0, 4, &l); vendor = l & 0xffff; device = (l >> 16) & 0xffff; @@ -304,7 +307,7 @@ static int __init is_acpi_reserved(unsigned long start, unsigned long end) return mcfg_res.flags; } -static void __init pci_mmcfg_reject_broken(int type, int early) +static void __init pci_mmcfg_reject_broken(int early) { typeof(pci_mmcfg_config[0]) *cfg; int i; @@ -342,8 +345,8 @@ static void __init pci_mmcfg_reject_broken(int type, int early) " reserved in ACPI motherboard resources\n", cfg->address); /* Don't try to do this check unless configuration - type 1 is available. */ - if (type == 1 && e820_all_mapped(cfg->address, + type 1 is available. how about type 2 ?*/ + if (raw_pci_ops && e820_all_mapped(cfg->address, cfg->address + size - 1, E820_RESERVED)) { printk(KERN_NOTICE @@ -368,7 +371,7 @@ reject: static int __initdata known_bridge; -void __init __pci_mmcfg_init(int type, int early) +void __init __pci_mmcfg_init(int early) { /* MMCONFIG disabled */ if ((pci_probe & PCI_PROBE_MMCONF) == 0) @@ -382,14 +385,14 @@ void __init __pci_mmcfg_init(int type, int early) if (known_bridge) return; - if (early && type == 1) { + if (early) { if (pci_mmcfg_check_hostbridge()) known_bridge = 1; } if (!known_bridge) { acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); - pci_mmcfg_reject_broken(type, early); + pci_mmcfg_reject_broken(early); } if ((pci_mmcfg_config_num == 0) || @@ -410,19 +413,14 @@ void __init __pci_mmcfg_init(int type, int early) } } -void __init pci_mmcfg_early_init(int type) +void __init pci_mmcfg_early_init(void) { - __pci_mmcfg_init(type, 1); + __pci_mmcfg_init(1); } void __init pci_mmcfg_late_init(void) { - int type = 0; - - if (pci_probe & PCI_PROBE_CONF1) - type = 1; - - __pci_mmcfg_init(type, 0); + __pci_mmcfg_init(0); } static int __init pci_mmcfg_late_insert_resources(void) diff --git a/include/linux/pci.h b/include/linux/pci.h index 2b8f74522f8f..a71954a38932 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1055,10 +1055,10 @@ extern unsigned long pci_cardbus_mem_size; extern int pcibios_add_platform_entries(struct pci_dev *dev); #ifdef CONFIG_PCI_MMCONFIG -extern void __init pci_mmcfg_early_init(int type); +extern void __init pci_mmcfg_early_init(void); extern void __init pci_mmcfg_late_init(void); #else -static inline void pci_mmcfg_early_init(int type) { } +static inline void pci_mmcfg_early_init(void) { } static inline void pci_mmcfg_late_init(void) { } #endif -- cgit v1.2.3-71-gd317 From 30a18d6c3f1e774de656ebd8ff219d53e2ba4029 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Feb 2008 03:21:20 -0800 Subject: x86: multi pci root bus with different io resource range, on 64-bit scan AMD opteron io/mmio routing to make sure every pci root bus get correct resource range. Thus later pci scan could assign correct resource to device with unassigned resource. this can fix a system without _CRS for multi pci root bus. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/pci/Makefile_64 | 2 +- arch/x86/pci/k8-bus_64.c | 404 +++++++++++++++++++++++++++++++++++++++------ drivers/pci/probe.c | 6 + include/asm-x86/topology.h | 3 + include/linux/pci.h | 2 +- 5 files changed, 365 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/pci/Makefile_64 b/arch/x86/pci/Makefile_64 index 7d8c467bf143..8fbd19832cf6 100644 --- a/arch/x86/pci/Makefile_64 +++ b/arch/x86/pci/Makefile_64 @@ -13,5 +13,5 @@ obj-y += legacy.o irq.o common.o early.o # mmconfig has a 64bit special obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_64.o direct.o mmconfig-shared.o -obj-$(CONFIG_NUMA) += k8-bus_64.o +obj-y += k8-bus_64.o diff --git a/arch/x86/pci/k8-bus_64.c b/arch/x86/pci/k8-bus_64.c index dab38310ee97..5e8a9d105edd 100644 --- a/arch/x86/pci/k8-bus_64.c +++ b/arch/x86/pci/k8-bus_64.c @@ -7,23 +7,29 @@ /* * This discovers the pcibus <-> node mapping on AMD K8. - * - * RED-PEN need to call this again on PCI hotplug - * RED-PEN empty cpus get reported wrong + * also get peer root bus resource for io,mmio */ -#define NODE_ID(dword) ((dword>>4) & 0x07) -#define LDT_BUS_NUMBER_REGISTER_0 0xE0 -#define LDT_BUS_NUMBER_REGISTER_1 0xE4 -#define LDT_BUS_NUMBER_REGISTER_2 0xE8 -#define LDT_BUS_NUMBER_REGISTER_3 0xEC -#define NR_LDT_BUS_NUMBER_REGISTERS 4 -#define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF) -#define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 24) & 0xFF) -#define PCI_DEVICE_ID_K8HTCONFIG 0x1100 -#define PCI_DEVICE_ID_K8_10H_HTCONFIG 0x1200 -#define PCI_DEVICE_ID_K8_11H_HTCONFIG 0x1300 +/* + * sub bus (transparent) will use entres from 3 to store extra from root, + * so need to make sure have enought slot there, increase PCI_BUS_NUM_RESOURCES? + */ +#define RES_NUM 16 +struct pci_root_info { + char name[12]; + unsigned int res_num; + struct resource res[RES_NUM]; + int bus_min; + int bus_max; + int node; + int link; +}; + +/* 4 at this time, it may become to 32 */ +#define PCI_ROOT_NR 4 +static int pci_root_num; +static struct pci_root_info pci_root_info[PCI_ROOT_NR]; #ifdef CONFIG_NUMA @@ -55,77 +61,375 @@ int get_mp_bus_to_node(int busnum) return node; } - #endif +void set_pci_bus_resources_arch_default(struct pci_bus *b) +{ + int i; + int j; + struct pci_root_info *info; + + if (!pci_root_num) + return; + + for (i = 0; i < pci_root_num; i++) { + if (pci_root_info[i].bus_min == b->number) + break; + } + + if (i == pci_root_num) + return; + + info = &pci_root_info[i]; + for (j = 0; j < info->res_num; j++) { + struct resource *res; + struct resource *root; + + res = &info->res[j]; + b->resource[j] = res; + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + else + root = &iomem_resource; + insert_resource(root, res); + } +} + +#define RANGE_NUM 16 + +struct res_range { + size_t start; + size_t end; +}; + +static void __init update_range(struct res_range *range, size_t start, + size_t end) +{ + int i; + int j; + + for (j = 0; j < RANGE_NUM; j++) { + if (!range[j].end) + continue; + if (start == range[j].start && end < range[j].end) { + range[j].start = end + 1; + break; + } else if (start == range[j].start && end == range[j].end) { + range[j].start = 0; + range[j].end = 0; + break; + } else if (start > range[j].start && end == range[j].end) { + range[j].end = start - 1; + break; + } else if (start > range[j].start && end < range[j].end) { + /* find the new spare */ + for (i = 0; i < RANGE_NUM; i++) { + if (range[i].end == 0) + break; + } + if (i < RANGE_NUM) { + range[i].end = range[j].end; + range[i].start = end + 1; + } else { + printk(KERN_ERR "run of slot in ranges\n"); + } + range[j].end = start - 1; + break; + } + } +} + +static void __init update_res(struct pci_root_info *info, size_t start, + size_t end, unsigned long flags, int merge) +{ + int i; + struct resource *res; + + if (!merge) + goto addit; + + /* try to merge it with old one */ + for (i = 0; i < info->res_num; i++) { + res = &info->res[i]; + if (res->flags != flags) + continue; + if (res->end + 1 == start) { + res->end = end; + return; + } else if (end + 1 == res->start) { + res->start = start; + return; + } + } + +addit: + + /* need to add that */ + if (info->res_num >= RES_NUM) + return; + + res = &info->res[info->res_num]; + res->name = info->name; + res->flags = flags; + res->start = start; + res->end = end; + res->child = NULL; + info->res_num++; +} + +struct pci_hostbridge_probe { + u32 bus; + u32 slot; + u32 vendor; + u32 device; +}; + +static struct pci_hostbridge_probe pci_probes[] __initdata = { + { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1100 }, + { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 }, + { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, + { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 }, +}; + /** * early_fill_mp_bus_to_node() * called before pcibios_scan_root and pci_scan_bus * fills the mp_bus_to_cpumask array based according to the LDT Bus Number * Registers found in the K8 northbridge */ -__init static int -early_fill_mp_bus_to_node(void) +static int __init early_fill_mp_bus_info(void) { -#ifdef CONFIG_NUMA - int i, j; + int i; + int j; + unsigned bus; unsigned slot; - u32 ldtbus; - u32 id; + int found; int node; - u16 deviceid; - u16 vendorid; - int min_bus; - int max_bus; - - static int lbnr[NR_LDT_BUS_NUMBER_REGISTERS] = { - LDT_BUS_NUMBER_REGISTER_0, - LDT_BUS_NUMBER_REGISTER_1, - LDT_BUS_NUMBER_REGISTER_2, - LDT_BUS_NUMBER_REGISTER_3 - }; + int link; + int def_node; + int def_link; + struct pci_root_info *info; + u32 reg; + struct resource *res; + size_t start; + size_t end; + struct res_range range[RANGE_NUM]; + u64 val; + u32 address; +#ifdef CONFIG_NUMA for (i = 0; i < BUS_NR; i++) mp_bus_to_node[i] = -1; +#endif if (!early_pci_allowed()) return -1; - slot = 0x18; - id = read_pci_config(0, slot, 0, PCI_VENDOR_ID); + found = 0; + for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { + u32 id; + u16 device; + u16 vendor; - vendorid = id & 0xffff; - if (vendorid != PCI_VENDOR_ID_AMD) - goto out; + bus = pci_probes[i].bus; + slot = pci_probes[i].slot; + id = read_pci_config(bus, slot, 0, PCI_VENDOR_ID); - deviceid = (id>>16) & 0xffff; - if ((deviceid != PCI_DEVICE_ID_K8HTCONFIG) && - (deviceid != PCI_DEVICE_ID_K8_10H_HTCONFIG) && - (deviceid != PCI_DEVICE_ID_K8_11H_HTCONFIG)) - goto out; + vendor = id & 0xffff; + device = (id>>16) & 0xffff; + if (pci_probes[i].vendor == vendor && + pci_probes[i].device == device) { + found = 1; + break; + } + } + + if (!found) + return 0; - for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) { - ldtbus = read_pci_config(0, slot, 1, lbnr[i]); + pci_root_num = 0; + for (i = 0; i < 4; i++) { + int min_bus; + int max_bus; + reg = read_pci_config(bus, slot, 1, 0xe0 + (i << 2)); /* Check if that register is enabled for bus range */ - if ((ldtbus & 7) != 3) + if ((reg & 7) != 3) continue; - min_bus = SECONDARY_LDT_BUS_NUMBER(ldtbus); - max_bus = SUBORDINATE_LDT_BUS_NUMBER(ldtbus); - node = NODE_ID(ldtbus); + min_bus = (reg >> 16) & 0xff; + max_bus = (reg >> 24) & 0xff; + node = (reg >> 4) & 0x07; +#ifdef CONFIG_NUMA for (j = min_bus; j <= max_bus; j++) mp_bus_to_node[j] = (unsigned char) node; +#endif + link = (reg >> 8) & 0x03; + + info = &pci_root_info[pci_root_num]; + info->bus_min = min_bus; + info->bus_max = max_bus; + info->node = node; + info->link = link; + sprintf(info->name, "PCI Bus #%02x", min_bus); + pci_root_num++; } -out: + /* get the default node and link for left over res */ + reg = read_pci_config(bus, slot, 0, 0x60); + def_node = (reg >> 8) & 0x07; + reg = read_pci_config(bus, slot, 0, 0x64); + def_link = (reg >> 8) & 0x03; + + memset(range, 0, sizeof(range)); + range[0].end = 0xffff; + /* io port resource */ + for (i = 0; i < 4; i++) { + reg = read_pci_config(bus, slot, 1, 0xc0 + (i << 3)); + if (!(reg & 3)) + continue; + + start = reg & 0xfff000; + reg = read_pci_config(bus, slot, 1, 0xc4 + (i << 3)); + node = reg & 0x07; + link = (reg >> 4) & 0x03; + end = (reg & 0xfff000) | 0xfff; + + /* find the position */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == node && info->link == link) + break; + } + if (j == pci_root_num) + continue; /* not found */ + + info = &pci_root_info[j]; + update_res(info, start, end, IORESOURCE_IO, 0); + update_range(range, start, end); + } + /* add left over io port range to def node/link, [0, 0xffff] */ + /* find the position */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == def_node && info->link == def_link) + break; + } + if (j < pci_root_num) { + info = &pci_root_info[j]; + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + + update_res(info, range[i].start, range[i].end, + IORESOURCE_IO, 1); + } + } + + memset(range, 0, sizeof(range)); + /* 0xfd00000000-0xffffffffff for HT */ + /* 0xfc00000000-0xfcffffffff for Family 10h mmconfig*/ + range[0].end = 0xfbffffffffULL; + + /* need to take out [0, TOM) for RAM*/ + address = MSR_K8_TOP_MEM1; + rdmsrl(address, val); + end = (val & 0xffffff8000000ULL); + printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20); + if (end < (1ULL<<32)) + update_range(range, 0, end - 1); + + /* mmio resource */ + for (i = 0; i < 8; i++) { + reg = read_pci_config(bus, slot, 1, 0x80 + (i << 3)); + if (!(reg & 3)) + continue; + + start = reg & 0xffffff00; /* 39:16 on 31:8*/ + start <<= 8; + reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3)); + node = reg & 0x07; + link = (reg >> 4) & 0x03; + end = (reg & 0xffffff00); + end <<= 8; + end |= 0xffff; + + /* find the position */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == node && info->link == link) + break; + } + if (j == pci_root_num) + continue; /* not found */ + + info = &pci_root_info[j]; + update_res(info, start, end, IORESOURCE_MEM, 0); + update_range(range, start, end); + } + + /* need to take out [4G, TOM2) for RAM*/ + /* SYS_CFG */ + address = MSR_K8_SYSCFG; + rdmsrl(address, val); + /* TOP_MEM2 is enabled? */ + if (val & (1<<21)) { + /* TOP_MEM2 */ + address = MSR_K8_TOP_MEM2; + rdmsrl(address, val); + end = (val & 0xffffff8000000ULL); + printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20); + update_range(range, 1ULL<<32, end - 1); + } + + /* + * add left over mmio range to def node/link ? + * that is tricky, just record range in from start_min to 4G + */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == def_node && info->link == def_link) + break; + } + if (j < pci_root_num) { + info = &pci_root_info[j]; + + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + + update_res(info, range[i].start, range[i].end, + IORESOURCE_MEM, 1); + } + } + +#ifdef CONFIG_NUMA for (i = 0; i < BUS_NR; i++) { node = mp_bus_to_node[i]; if (node >= 0) printk(KERN_DEBUG "bus: %02x to node: %02x\n", i, node); } #endif + + for (i = 0; i < pci_root_num; i++) { + int res_num; + int busnum; + + info = &pci_root_info[i]; + res_num = info->res_num; + busnum = info->bus_min; + printk(KERN_DEBUG "bus: [%02x,%02x] on node %x link %x\n", + info->bus_min, info->bus_max, info->node, info->link); + for (j = 0; j < res_num; j++) { + res = &info->res[j]; + printk(KERN_DEBUG "bus: %02x index %x %s: [%llx, %llx]\n", + busnum, j, + (res->flags & IORESOURCE_IO)?"io port":"mmio", + res->start, res->end); + } + } + return 0; } -postcore_initcall(early_fill_mp_bus_to_node); +postcore_initcall(early_fill_mp_bus_info); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index a40043bd3257..4a55bf380957 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1088,6 +1088,10 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) return max; } +void __attribute__((weak)) set_pci_bus_resources_arch_default(struct pci_bus *b) +{ +} + struct pci_bus * pci_create_bus(struct device *parent, int bus, struct pci_ops *ops, void *sysdata) { @@ -1147,6 +1151,8 @@ struct pci_bus * pci_create_bus(struct device *parent, b->resource[0] = &ioport_resource; b->resource[1] = &iomem_resource; + set_pci_bus_resources_arch_default(b); + return b; dev_create_file_err: diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h index 4793ae745a78..0e6d6b03affe 100644 --- a/include/asm-x86/topology.h +++ b/include/asm-x86/topology.h @@ -193,6 +193,9 @@ extern cpumask_t cpu_coregroup_map(int cpu); #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) #endif +struct pci_bus; +void set_pci_bus_resources_arch_default(struct pci_bus *b); + #ifdef CONFIG_SMP #define mc_capable() (boot_cpu_data.x86_max_cores > 1) #define smt_capable() (smp_num_siblings > 1) diff --git a/include/linux/pci.h b/include/linux/pci.h index a71954a38932..abc998ffb66e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -254,7 +254,7 @@ static inline void pci_add_saved_cap(struct pci_dev *pci_dev, #define PCI_NUM_RESOURCES 11 #ifndef PCI_BUS_NUM_RESOURCES -#define PCI_BUS_NUM_RESOURCES 8 +#define PCI_BUS_NUM_RESOURCES 16 #endif #define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */ -- cgit v1.2.3-71-gd317 From cbf9bd603ab1fc4d2ecb1c6a4b7bd1cc50a7e82a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Feb 2008 03:21:06 -0800 Subject: acpi: get boot_cpu_id as early for k8_scan_nodes [mingo@elte.hu: split from "x86_64: get boot_cpu_id as early for k8_scan_nodes] Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 70 +++++++++++++++++++++++++++++++++++++++++++++ arch/x86/mm/k8topology_64.c | 38 +++++++++++++++++++++++- include/linux/acpi.h | 5 ++++ 3 files changed, 112 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 977ed5cdeaa3..c49ebcc6c41e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -771,6 +771,32 @@ static void __init acpi_register_lapic_address(unsigned long address) boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); } +static int __init early_acpi_parse_madt_lapic_addr_ovr(void) +{ + int count; + + if (!cpu_has_apic) + return -ENODEV; + + /* + * Note that the LAPIC address is obtained from the MADT (32-bit value) + * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). + */ + + count = + acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, + acpi_parse_lapic_addr_ovr, 0); + if (count < 0) { + printk(KERN_ERR PREFIX + "Error parsing LAPIC address override entry\n"); + return count; + } + + acpi_register_lapic_address(acpi_lapic_addr); + + return count; +} + static int __init acpi_parse_madt_lapic_entries(void) { int count; @@ -901,6 +927,33 @@ static inline int acpi_parse_madt_ioapic_entries(void) } #endif /* !CONFIG_X86_IO_APIC */ +static void __init early_acpi_process_madt(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + int error; + + if (!acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) { + + /* + * Parse MADT LAPIC entries + */ + error = early_acpi_parse_madt_lapic_addr_ovr(); + if (!error) { + acpi_lapic = 1; + smp_found_config = 1; + } + if (error == -EINVAL) { + /* + * Dell Precision Workstation 410, 610 come here. + */ + printk(KERN_ERR PREFIX + "Invalid BIOS MADT, disabling ACPI\n"); + disable_acpi(); + } + } +#endif +} + static void __init acpi_process_madt(void) { #ifdef CONFIG_X86_LOCAL_APIC @@ -1233,6 +1286,23 @@ int __init acpi_boot_table_init(void) return 0; } +int __init early_acpi_boot_init(void) +{ + /* + * If acpi_disabled, bail out + * One exception: acpi=ht continues far enough to enumerate LAPICs + */ + if (acpi_disabled && !acpi_ht) + return 1; + + /* + * Process the Multiple APIC Description Table (MADT), if present + */ + early_acpi_process_madt(); + + return 0; +} + int __init acpi_boot_init(void) { /* diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 86808e666f9c..1f476e477844 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c @@ -13,12 +13,15 @@ #include #include #include +#include #include #include #include #include #include #include +#include +#include static __init int find_northbridge(void) { @@ -44,6 +47,30 @@ static __init int find_northbridge(void) return -1; } +static __init void early_get_boot_cpu_id(void) +{ + /* + * need to get boot_cpu_id so can use that to create apicid_to_node + * in k8_scan_nodes() + */ + /* + * Find possible boot-time SMP configuration: + */ + early_find_smp_config(); +#ifdef CONFIG_ACPI + /* + * Read APIC information from ACPI tables. + */ + early_acpi_boot_init(); +#endif + /* + * get boot-time SMP configuration: + */ + if (smp_found_config) + early_get_smp_config(); + early_init_lapic_mapping(); +} + int __init k8_scan_nodes(unsigned long start, unsigned long end) { unsigned long prevbase; @@ -56,6 +83,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) unsigned cores; unsigned bits; int j; + unsigned apicid_base; if (!early_pci_allowed()) return -1; @@ -174,11 +202,19 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) /* use the coreid bits from early_identify_cpu */ bits = boot_cpu_data.x86_coreid_bits; cores = (1< 0) { + printk(KERN_INFO "BSP APIC ID: %02x\n", + boot_cpu_physical_apicid); + apicid_base = boot_cpu_physical_apicid; + } for (i = 0; i < 8; i++) { if (nodes[i].start != nodes[i].end) { nodeid = nodeids[i]; - for (j = 0; j < cores; j++) + for (j = apicid_base; j < cores + apicid_base; j++) apicid_to_node[(nodeid << bits) + j] = i; setup_node_bootmem(i, nodes[i].start, nodes[i].end); } diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 2c7e003356ac..41f7ce7edd7a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -79,6 +79,7 @@ typedef int (*acpi_table_handler) (struct acpi_table_header *table); typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end); char * __acpi_map_table (unsigned long phys_addr, unsigned long size); +int early_acpi_boot_init(void); int acpi_boot_init (void); int acpi_boot_table_init (void); int acpi_numa_init (void); @@ -235,6 +236,10 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n, #else /* CONFIG_ACPI */ +static inline int early_acpi_boot_init(void) +{ + return 0; +} static inline int acpi_boot_init(void) { return 0; -- cgit v1.2.3-71-gd317 From 39b264641a0c3b5e0e742e2046b49e92d1f3be88 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 14 Apr 2008 19:11:30 +0300 Subject: slub: Store max number of objects in the page struct. Split the inuse field up to be able to store the number of objects in this page in the page struct as well. Necessary if we want to have pages of various orders for a slab. Also avoids touching struct kmem_cache cachelines in __slab_alloc(). Update diagnostic code to check the number of objects and make sure that the number of objects always stays within the bounds of a 16 bit unsigned integer. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/mm_types.h | 5 ++++- mm/slub.c | 54 ++++++++++++++++++++++++++++++------------------ 2 files changed, 38 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index af190ceab971..e0bd2235296b 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -42,7 +42,10 @@ struct page { * to show when page is mapped * & limit reverse map searches. */ - unsigned int inuse; /* SLUB: Nr of objects */ + struct { /* SLUB */ + u16 inuse; + u16 objects; + }; }; union { struct { diff --git a/mm/slub.c b/mm/slub.c index 64c2b2bfbd79..6641025c597f 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -301,7 +301,7 @@ static inline int check_valid_pointer(struct kmem_cache *s, return 1; base = page_address(page); - if (object < base || object >= base + s->objects * s->size || + if (object < base || object >= base + page->objects * s->size || (object - base) % s->size) { return 0; } @@ -451,8 +451,8 @@ static void print_tracking(struct kmem_cache *s, void *object) static void print_page_info(struct page *page) { - printk(KERN_ERR "INFO: Slab 0x%p used=%u fp=0x%p flags=0x%04lx\n", - page, page->inuse, page->freelist, page->flags); + printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n", + page, page->objects, page->inuse, page->freelist, page->flags); } @@ -652,6 +652,7 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) p + off, POISON_INUSE, s->size - off); } +/* Check the pad bytes at the end of a slab page */ static int slab_pad_check(struct kmem_cache *s, struct page *page) { u8 *start; @@ -664,20 +665,20 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) return 1; start = page_address(page); - end = start + (PAGE_SIZE << s->order); - length = s->objects * s->size; - remainder = end - (start + length); + length = (PAGE_SIZE << s->order); + end = start + length; + remainder = length % s->size; if (!remainder) return 1; - fault = check_bytes(start + length, POISON_INUSE, remainder); + fault = check_bytes(end - remainder, POISON_INUSE, remainder); if (!fault) return 1; while (end > fault && end[-1] == POISON_INUSE) end--; slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); - print_section("Padding", start, length); + print_section("Padding", end - remainder, remainder); restore_bytes(s, "slab padding", POISON_INUSE, start, end); return 0; @@ -739,15 +740,24 @@ static int check_object(struct kmem_cache *s, struct page *page, static int check_slab(struct kmem_cache *s, struct page *page) { + int maxobj; + VM_BUG_ON(!irqs_disabled()); if (!PageSlab(page)) { slab_err(s, page, "Not a valid slab page"); return 0; } - if (page->inuse > s->objects) { + + maxobj = (PAGE_SIZE << compound_order(page)) / s->size; + if (page->objects > maxobj) { + slab_err(s, page, "objects %u > max %u", + s->name, page->objects, maxobj); + return 0; + } + if (page->inuse > page->objects) { slab_err(s, page, "inuse %u > max %u", - s->name, page->inuse, s->objects); + s->name, page->inuse, page->objects); return 0; } /* Slab_pad_check fixes things up after itself */ @@ -765,7 +775,7 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) void *fp = page->freelist; void *object = NULL; - while (fp && nr <= s->objects) { + while (fp && nr <= page->objects) { if (fp == search) return 1; if (!check_valid_pointer(s, page, fp)) { @@ -777,7 +787,7 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) } else { slab_err(s, page, "Freepointer corrupt"); page->freelist = NULL; - page->inuse = s->objects; + page->inuse = page->objects; slab_fix(s, "Freelist cleared"); return 0; } @@ -788,10 +798,10 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) nr++; } - if (page->inuse != s->objects - nr) { + if (page->inuse != page->objects - nr) { slab_err(s, page, "Wrong object count. Counter is %d but " - "counted were %d", page->inuse, s->objects - nr); - page->inuse = s->objects - nr; + "counted were %d", page->inuse, page->objects - nr); + page->inuse = page->objects - nr; slab_fix(s, "Object count adjusted."); } return search == NULL; @@ -910,7 +920,7 @@ bad: * as used avoids touching the remaining objects. */ slab_fix(s, "Marking all objects used"); - page->inuse = s->objects; + page->inuse = page->objects; page->freelist = NULL; } return 0; @@ -1081,6 +1091,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) if (!page) return NULL; + page->objects = s->objects; mod_zone_page_state(page_zone(page), (s->flags & SLAB_RECLAIM_ACCOUNT) ? NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, @@ -1519,7 +1530,7 @@ load_freelist: goto debug; c->freelist = object[c->offset]; - c->page->inuse = s->objects; + c->page->inuse = c->page->objects; c->page->freelist = NULL; c->node = page_to_nid(c->page); unlock_out: @@ -1818,6 +1829,9 @@ static inline int slab_order(int size, int min_objects, int rem; int min_order = slub_min_order; + if ((PAGE_SIZE << min_order) / size > 65535) + return get_order(size * 65535) - 1; + for (order = max(min_order, fls(min_objects * size - 1) - PAGE_SHIFT); order <= max_order; order++) { @@ -3251,7 +3265,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page, return 0; /* Now we know that a valid freelist exists */ - bitmap_zero(map, s->objects); + bitmap_zero(map, page->objects); for_each_free_object(p, s, page->freelist) { set_bit(slab_index(p, s, addr), map); @@ -3528,10 +3542,10 @@ static void process_slab(struct loc_track *t, struct kmem_cache *s, struct page *page, enum track_item alloc) { void *addr = page_address(page); - DECLARE_BITMAP(map, s->objects); + DECLARE_BITMAP(map, page->objects); void *p; - bitmap_zero(map, s->objects); + bitmap_zero(map, page->objects); for_each_free_object(p, s, page->freelist) set_bit(slab_index(p, s, addr), map); -- cgit v1.2.3-71-gd317 From 834f3d119234b35a1985a2449831d99356637937 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 14 Apr 2008 19:11:31 +0300 Subject: slub: Add kmem_cache_order_objects struct Pack the order and the number of objects into a single word. This saves some memory in the kmem_cache_structure and more importantly allows us to fetch both values atomically. Later the slab orders become runtime configurable and we need to fetch these two items together in order to properly allocate a slab and initialize its objects. Fix the race by fetching the order and the number of objects in one word. [penberg@cs.helsinki.fi: fix memset() page order in new_slab()] Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 12 ++++++-- mm/slub.c | 76 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 61 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 79d59c937fac..4131e5fbd18b 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -52,6 +52,15 @@ struct kmem_cache_node { #endif }; +/* + * Word size structure that can be atomically updated or read and that + * contains both the order and the number of objects that a slab of the + * given order would contain. + */ +struct kmem_cache_order_objects { + unsigned long x; +}; + /* * Slab cache management. */ @@ -61,7 +70,7 @@ struct kmem_cache { int size; /* The size of an object including meta data */ int objsize; /* The size of an object without meta data */ int offset; /* Free pointer offset. */ - int order; /* Current preferred allocation order */ + struct kmem_cache_order_objects oo; /* * Avoid an extra cache line for UP, SMP and for the node local to @@ -70,7 +79,6 @@ struct kmem_cache { struct kmem_cache_node local_node; /* Allocation and freeing of slabs */ - int objects; /* Number of objects in slab */ gfp_t allocflags; /* gfp flags to use on each alloc */ int refcount; /* Refcount for slab cache destroy */ void (*ctor)(struct kmem_cache *, void *); diff --git a/mm/slub.c b/mm/slub.c index 67f7d6068934..0a220df5ed7c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -341,6 +341,26 @@ static inline int slab_index(void *p, struct kmem_cache *s, void *addr) return (p - addr) / s->size; } +static inline struct kmem_cache_order_objects oo_make(int order, + unsigned long size) +{ + struct kmem_cache_order_objects x = { + (order << 16) + (PAGE_SIZE << order) / size + }; + + return x; +} + +static inline int oo_order(struct kmem_cache_order_objects x) +{ + return x.x >> 16; +} + +static inline int oo_objects(struct kmem_cache_order_objects x) +{ + return x.x & ((1 << 16) - 1); +} + #ifdef CONFIG_SLUB_DEBUG /* * Debug settings: @@ -665,7 +685,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) return 1; start = page_address(page); - length = (PAGE_SIZE << s->order); + length = (PAGE_SIZE << compound_order(page)); end = start + length; remainder = length % s->size; if (!remainder) @@ -1090,19 +1110,21 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node) {} static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) { struct page *page; - int pages = 1 << s->order; + struct kmem_cache_order_objects oo = s->oo; + int order = oo_order(oo); + int pages = 1 << order; flags |= s->allocflags; if (node == -1) - page = alloc_pages(flags, s->order); + page = alloc_pages(flags, order); else - page = alloc_pages_node(node, flags, s->order); + page = alloc_pages_node(node, flags, order); if (!page) return NULL; - page->objects = s->objects; + page->objects = oo_objects(oo); mod_zone_page_state(page_zone(page), (s->flags & SLAB_RECLAIM_ACCOUNT) ? NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, @@ -1143,7 +1165,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) start = page_address(page); if (unlikely(s->flags & SLAB_POISON)) - memset(start, POISON_INUSE, PAGE_SIZE << s->order); + memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page)); last = start; for_each_object(p, s, start, page->objects) { @@ -1162,7 +1184,8 @@ out: static void __free_slab(struct kmem_cache *s, struct page *page) { - int pages = 1 << s->order; + int order = compound_order(page); + int pages = 1 << order; if (unlikely(SlabDebug(page))) { void *p; @@ -1181,7 +1204,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) __ClearPageSlab(page); reset_page_mapcount(page); - __free_pages(page, s->order); + __free_pages(page, order); } static void rcu_free_slab(struct rcu_head *h) @@ -2202,6 +2225,7 @@ static int calculate_sizes(struct kmem_cache *s) unsigned long flags = s->flags; unsigned long size = s->objsize; unsigned long align = s->align; + int order; /* * Round up object size to the next word boundary. We can only @@ -2294,17 +2318,17 @@ static int calculate_sizes(struct kmem_cache *s) * page allocator order 0 allocs so take a reasonably large * order that will allows us a good number of objects. */ - s->order = max(slub_max_order, PAGE_ALLOC_COSTLY_ORDER); + order = max(slub_max_order, PAGE_ALLOC_COSTLY_ORDER); s->flags |= __PAGE_ALLOC_FALLBACK; s->allocflags |= __GFP_NOWARN; } else - s->order = calculate_order(size); + order = calculate_order(size); - if (s->order < 0) + if (order < 0) return 0; s->allocflags = 0; - if (s->order) + if (order) s->allocflags |= __GFP_COMP; if (s->flags & SLAB_CACHE_DMA) @@ -2316,9 +2340,9 @@ static int calculate_sizes(struct kmem_cache *s) /* * Determine the number of objects per slab */ - s->objects = (PAGE_SIZE << s->order) / size; + s->oo = oo_make(order, size); - return !!s->objects; + return !!oo_objects(s->oo); } @@ -2351,7 +2375,7 @@ error: if (flags & SLAB_PANIC) panic("Cannot create slab %s size=%lu realsize=%u " "order=%u offset=%u flags=%lx\n", - s->name, (unsigned long)size, s->size, s->order, + s->name, (unsigned long)size, s->size, oo_order(s->oo), s->offset, flags); return 0; } @@ -2789,8 +2813,9 @@ int kmem_cache_shrink(struct kmem_cache *s) struct kmem_cache_node *n; struct page *page; struct page *t; + int objects = oo_objects(s->oo); struct list_head *slabs_by_inuse = - kmalloc(sizeof(struct list_head) * s->objects, GFP_KERNEL); + kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL); unsigned long flags; if (!slabs_by_inuse) @@ -2803,7 +2828,7 @@ int kmem_cache_shrink(struct kmem_cache *s) if (!n->nr_partial) continue; - for (i = 0; i < s->objects; i++) + for (i = 0; i < objects; i++) INIT_LIST_HEAD(slabs_by_inuse + i); spin_lock_irqsave(&n->list_lock, flags); @@ -2835,7 +2860,7 @@ int kmem_cache_shrink(struct kmem_cache *s) * Rebuild the partial list with the slabs filled up most * first and the least used slabs at the end. */ - for (i = s->objects - 1; i >= 0; i--) + for (i = objects - 1; i >= 0; i--) list_splice(slabs_by_inuse + i, n->partial.prev); spin_unlock_irqrestore(&n->list_lock, flags); @@ -3351,7 +3376,7 @@ static long validate_slab_cache(struct kmem_cache *s) { int node; unsigned long count = 0; - unsigned long *map = kmalloc(BITS_TO_LONGS(s->objects) * + unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->oo)) * sizeof(unsigned long), GFP_KERNEL); if (!map) @@ -3719,7 +3744,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s, - n->nr_partial; if (flags & SO_OBJECTS) - x = full_slabs * s->objects; + x = full_slabs * oo_objects(s->oo); else x = full_slabs; total += x; @@ -3798,13 +3823,13 @@ SLAB_ATTR_RO(object_size); static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf) { - return sprintf(buf, "%d\n", s->objects); + return sprintf(buf, "%d\n", oo_objects(s->oo)); } SLAB_ATTR_RO(objs_per_slab); static ssize_t order_show(struct kmem_cache *s, char *buf) { - return sprintf(buf, "%d\n", s->order); + return sprintf(buf, "%d\n", oo_order(s->oo)); } SLAB_ATTR_RO(order); @@ -4451,11 +4476,12 @@ static int s_show(struct seq_file *m, void *p) nr_inuse += count_partial(n); } - nr_objs = nr_slabs * s->objects; - nr_inuse += (nr_slabs - nr_partials) * s->objects; + nr_objs = nr_slabs * oo_objects(s->oo); + nr_inuse += (nr_slabs - nr_partials) * oo_objects(s->oo); seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse, - nr_objs, s->size, s->objects, (1 << s->order)); + nr_objs, s->size, oo_objects(s->oo), + (1 << oo_order(s->oo))); seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0); seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs, 0UL); -- cgit v1.2.3-71-gd317 From 205ab99dd103e3dd5b0964dad8a16dfe2db69b2e Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 14 Apr 2008 19:11:40 +0300 Subject: slub: Update statistics handling for variable order slabs Change the statistics to consider that slabs of the same slabcache can have different number of objects in them since they may be of different order. Provide a new sysfs field total_objects which shows the total objects that the allocated slabs of a slabcache could hold. Add a max field that holds the largest slab order that was ever used for a slab cache. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- Documentation/vm/slabinfo.c | 27 ++++---- include/linux/slub_def.h | 2 + mm/slub.c | 150 ++++++++++++++++++++++++++++---------------- 3 files changed, 110 insertions(+), 69 deletions(-) (limited to 'include/linux') diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c index 22d7e3e4d60c..d3ce295bffac 100644 --- a/Documentation/vm/slabinfo.c +++ b/Documentation/vm/slabinfo.c @@ -31,7 +31,7 @@ struct slabinfo { int hwcache_align, object_size, objs_per_slab; int sanity_checks, slab_size, store_user, trace; int order, poison, reclaim_account, red_zone; - unsigned long partial, objects, slabs; + unsigned long partial, objects, slabs, objects_partial, objects_total; unsigned long alloc_fastpath, alloc_slowpath; unsigned long free_fastpath, free_slowpath; unsigned long free_frozen, free_add_partial, free_remove_partial; @@ -540,7 +540,8 @@ void slabcache(struct slabinfo *s) return; store_size(size_str, slab_size(s)); - snprintf(dist_str, 40, "%lu/%lu/%d", s->slabs, s->partial, s->cpu_slabs); + snprintf(dist_str, 40, "%lu/%lu/%d", s->slabs - s->cpu_slabs, + s->partial, s->cpu_slabs); if (!line++) first_line(); @@ -776,7 +777,6 @@ void totals(void) unsigned long used; unsigned long long wasted; unsigned long long objwaste; - long long objects_in_partial_slabs; unsigned long percentage_partial_slabs; unsigned long percentage_partial_objs; @@ -790,18 +790,11 @@ void totals(void) wasted = size - used; objwaste = s->slab_size - s->object_size; - objects_in_partial_slabs = s->objects - - (s->slabs - s->partial - s ->cpu_slabs) * - s->objs_per_slab; - - if (objects_in_partial_slabs < 0) - objects_in_partial_slabs = 0; - percentage_partial_slabs = s->partial * 100 / s->slabs; if (percentage_partial_slabs > 100) percentage_partial_slabs = 100; - percentage_partial_objs = objects_in_partial_slabs * 100 + percentage_partial_objs = s->objects_partial * 100 / s->objects; if (percentage_partial_objs > 100) @@ -823,8 +816,8 @@ void totals(void) min_objects = s->objects; if (used < min_used) min_used = used; - if (objects_in_partial_slabs < min_partobj) - min_partobj = objects_in_partial_slabs; + if (s->objects_partial < min_partobj) + min_partobj = s->objects_partial; if (percentage_partial_slabs < min_ppart) min_ppart = percentage_partial_slabs; if (percentage_partial_objs < min_ppartobj) @@ -848,8 +841,8 @@ void totals(void) max_objects = s->objects; if (used > max_used) max_used = used; - if (objects_in_partial_slabs > max_partobj) - max_partobj = objects_in_partial_slabs; + if (s->objects_partial > max_partobj) + max_partobj = s->objects_partial; if (percentage_partial_slabs > max_ppart) max_ppart = percentage_partial_slabs; if (percentage_partial_objs > max_ppartobj) @@ -864,7 +857,7 @@ void totals(void) total_objects += s->objects; total_used += used; - total_partobj += objects_in_partial_slabs; + total_partobj += s->objects_partial; total_ppart += percentage_partial_slabs; total_ppartobj += percentage_partial_objs; @@ -1160,6 +1153,8 @@ void read_slab_dir(void) slab->hwcache_align = get_obj("hwcache_align"); slab->object_size = get_obj("object_size"); slab->objects = get_obj("objects"); + slab->objects_partial = get_obj("objects_partial"); + slab->objects_total = get_obj("objects_total"); slab->objs_per_slab = get_obj("objs_per_slab"); slab->order = get_obj("order"); slab->partial = get_obj("partial"); diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 4131e5fbd18b..4236b5dee812 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -48,6 +48,7 @@ struct kmem_cache_node { struct list_head partial; #ifdef CONFIG_SLUB_DEBUG atomic_long_t nr_slabs; + atomic_long_t total_objects; struct list_head full; #endif }; @@ -79,6 +80,7 @@ struct kmem_cache { struct kmem_cache_node local_node; /* Allocation and freeing of slabs */ + struct kmem_cache_order_objects max; gfp_t allocflags; /* gfp flags to use on each alloc */ int refcount; /* Refcount for slab cache destroy */ void (*ctor)(struct kmem_cache *, void *); diff --git a/mm/slub.c b/mm/slub.c index 0a220df5ed7c..c8514e93ffdf 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -886,7 +886,7 @@ static inline unsigned long slabs_node(struct kmem_cache *s, int node) return atomic_long_read(&n->nr_slabs); } -static inline void inc_slabs_node(struct kmem_cache *s, int node) +static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) { struct kmem_cache_node *n = get_node(s, node); @@ -896,14 +896,17 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node) * dilemma by deferring the increment of the count during * bootstrap (see early_kmem_cache_node_alloc). */ - if (!NUMA_BUILD || n) + if (!NUMA_BUILD || n) { atomic_long_inc(&n->nr_slabs); + atomic_long_add(objects, &n->total_objects); + } } -static inline void dec_slabs_node(struct kmem_cache *s, int node) +static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) { struct kmem_cache_node *n = get_node(s, node); atomic_long_dec(&n->nr_slabs); + atomic_long_sub(objects, &n->total_objects); } /* Object debug checks for alloc/free paths */ @@ -1101,9 +1104,12 @@ static inline unsigned long kmem_cache_flags(unsigned long objsize, static inline unsigned long slabs_node(struct kmem_cache *s, int node) { return 0; } -static inline void inc_slabs_node(struct kmem_cache *s, int node) {} -static inline void dec_slabs_node(struct kmem_cache *s, int node) {} +static inline void inc_slabs_node(struct kmem_cache *s, int node, + int objects) {} +static inline void dec_slabs_node(struct kmem_cache *s, int node, + int objects) {} #endif + /* * Slab allocation and freeing */ @@ -1155,7 +1161,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) if (!page) goto out; - inc_slabs_node(s, page_to_nid(page)); + inc_slabs_node(s, page_to_nid(page), page->objects); page->slab = s; page->flags |= 1 << PG_slab; if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | @@ -1230,7 +1236,7 @@ static void free_slab(struct kmem_cache *s, struct page *page) static void discard_slab(struct kmem_cache *s, struct page *page) { - dec_slabs_node(s, page_to_nid(page)); + dec_slabs_node(s, page_to_nid(page), page->objects); free_slab(s, page); } @@ -2144,7 +2150,7 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags, init_tracking(kmalloc_caches, n); #endif init_kmem_cache_node(n); - inc_slabs_node(kmalloc_caches, node); + inc_slabs_node(kmalloc_caches, node, page->objects); /* * lockdep requires consistent irq usage for each lock @@ -2341,6 +2347,8 @@ static int calculate_sizes(struct kmem_cache *s) * Determine the number of objects per slab */ s->oo = oo_make(order, size); + if (oo_objects(s->oo) > oo_objects(s->max)) + s->max = s->oo; return !!oo_objects(s->oo); @@ -2813,7 +2821,7 @@ int kmem_cache_shrink(struct kmem_cache *s) struct kmem_cache_node *n; struct page *page; struct page *t; - int objects = oo_objects(s->oo); + int objects = oo_objects(s->max); struct list_head *slabs_by_inuse = kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL); unsigned long flags; @@ -3276,7 +3284,8 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, } #if (defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)) || defined(CONFIG_SLABINFO) -static unsigned long count_partial(struct kmem_cache_node *n) +static unsigned long count_partial(struct kmem_cache_node *n, + int (*get_count)(struct page *)) { unsigned long flags; unsigned long x = 0; @@ -3284,10 +3293,25 @@ static unsigned long count_partial(struct kmem_cache_node *n) spin_lock_irqsave(&n->list_lock, flags); list_for_each_entry(page, &n->partial, lru) - x += page->inuse; + x += get_count(page); spin_unlock_irqrestore(&n->list_lock, flags); return x; } + +static int count_inuse(struct page *page) +{ + return page->inuse; +} + +static int count_total(struct page *page) +{ + return page->objects; +} + +static int count_free(struct page *page) +{ + return page->objects - page->inuse; +} #endif #if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) @@ -3376,7 +3400,7 @@ static long validate_slab_cache(struct kmem_cache *s) { int node; unsigned long count = 0; - unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->oo)) * + unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) * sizeof(unsigned long), GFP_KERNEL); if (!map) @@ -3676,22 +3700,23 @@ static int list_locations(struct kmem_cache *s, char *buf, } enum slab_stat_type { - SL_FULL, - SL_PARTIAL, - SL_CPU, - SL_OBJECTS + SL_ALL, /* All slabs */ + SL_PARTIAL, /* Only partially allocated slabs */ + SL_CPU, /* Only slabs used for cpu caches */ + SL_OBJECTS, /* Determine allocated objects not slabs */ + SL_TOTAL /* Determine object capacity not slabs */ }; -#define SO_FULL (1 << SL_FULL) +#define SO_ALL (1 << SL_ALL) #define SO_PARTIAL (1 << SL_PARTIAL) #define SO_CPU (1 << SL_CPU) #define SO_OBJECTS (1 << SL_OBJECTS) +#define SO_TOTAL (1 << SL_TOTAL) static ssize_t show_slab_objects(struct kmem_cache *s, char *buf, unsigned long flags) { unsigned long total = 0; - int cpu; int node; int x; unsigned long *nodes; @@ -3702,56 +3727,60 @@ static ssize_t show_slab_objects(struct kmem_cache *s, return -ENOMEM; per_cpu = nodes + nr_node_ids; - for_each_possible_cpu(cpu) { - struct page *page; - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); + if (flags & SO_CPU) { + int cpu; - if (!c) - continue; + for_each_possible_cpu(cpu) { + struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); - page = c->page; - node = c->node; - if (node < 0) - continue; - if (page) { - if (flags & SO_CPU) { - if (flags & SO_OBJECTS) - x = page->inuse; + if (!c || c->node < 0) + continue; + + if (c->page) { + if (flags & SO_TOTAL) + x = c->page->objects; + else if (flags & SO_OBJECTS) + x = c->page->inuse; else x = 1; + total += x; - nodes[node] += x; + nodes[c->node] += x; } - per_cpu[node]++; + per_cpu[c->node]++; } } - for_each_node_state(node, N_NORMAL_MEMORY) { - struct kmem_cache_node *n = get_node(s, node); + if (flags & SO_ALL) { + for_each_node_state(node, N_NORMAL_MEMORY) { + struct kmem_cache_node *n = get_node(s, node); + + if (flags & SO_TOTAL) + x = atomic_long_read(&n->total_objects); + else if (flags & SO_OBJECTS) + x = atomic_long_read(&n->total_objects) - + count_partial(n, count_free); - if (flags & SO_PARTIAL) { - if (flags & SO_OBJECTS) - x = count_partial(n); else - x = n->nr_partial; + x = atomic_long_read(&n->nr_slabs); total += x; nodes[node] += x; } - if (flags & SO_FULL) { - int full_slabs = atomic_long_read(&n->nr_slabs) - - per_cpu[node] - - n->nr_partial; + } else if (flags & SO_PARTIAL) { + for_each_node_state(node, N_NORMAL_MEMORY) { + struct kmem_cache_node *n = get_node(s, node); - if (flags & SO_OBJECTS) - x = full_slabs * oo_objects(s->oo); + if (flags & SO_TOTAL) + x = count_partial(n, count_total); + else if (flags & SO_OBJECTS) + x = count_partial(n, count_inuse); else - x = full_slabs; + x = n->nr_partial; total += x; nodes[node] += x; } } - x = sprintf(buf, "%lu", total); #ifdef CONFIG_NUMA for_each_node_state(node, N_NORMAL_MEMORY) @@ -3852,7 +3881,7 @@ SLAB_ATTR_RO(aliases); static ssize_t slabs_show(struct kmem_cache *s, char *buf) { - return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU); + return show_slab_objects(s, buf, SO_ALL); } SLAB_ATTR_RO(slabs); @@ -3870,10 +3899,22 @@ SLAB_ATTR_RO(cpu_slabs); static ssize_t objects_show(struct kmem_cache *s, char *buf) { - return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS); + return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS); } SLAB_ATTR_RO(objects); +static ssize_t objects_partial_show(struct kmem_cache *s, char *buf) +{ + return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS); +} +SLAB_ATTR_RO(objects_partial); + +static ssize_t total_objects_show(struct kmem_cache *s, char *buf) +{ + return show_slab_objects(s, buf, SO_ALL|SO_TOTAL); +} +SLAB_ATTR_RO(total_objects); + static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE)); @@ -4131,6 +4172,8 @@ static struct attribute *slab_attrs[] = { &objs_per_slab_attr.attr, &order_attr.attr, &objects_attr.attr, + &objects_partial_attr.attr, + &total_objects_attr.attr, &slabs_attr.attr, &partial_attr.attr, &cpu_slabs_attr.attr, @@ -4459,7 +4502,8 @@ static int s_show(struct seq_file *m, void *p) unsigned long nr_partials = 0; unsigned long nr_slabs = 0; unsigned long nr_inuse = 0; - unsigned long nr_objs; + unsigned long nr_objs = 0; + unsigned long nr_free = 0; struct kmem_cache *s; int node; @@ -4473,11 +4517,11 @@ static int s_show(struct seq_file *m, void *p) nr_partials += n->nr_partial; nr_slabs += atomic_long_read(&n->nr_slabs); - nr_inuse += count_partial(n); + nr_objs += atomic_long_read(&n->total_objects); + nr_free += count_partial(n, count_free); } - nr_objs = nr_slabs * oo_objects(s->oo); - nr_inuse += (nr_slabs - nr_partials) * oo_objects(s->oo); + nr_inuse = nr_objs - nr_free; seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse, nr_objs, s->size, oo_objects(s->oo), -- cgit v1.2.3-71-gd317 From 65c3376aaca96c66aa76014aaf430398964b68cb Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 14 Apr 2008 19:11:40 +0300 Subject: slub: Fallback to minimal order during slab page allocation If any higher order allocation fails then fall back the smallest order necessary to contain at least one object. This enables fallback for all allocations to order 0 pages. The fallback will waste more memory (objects will not fit neatly) and the fallback slabs will be not as efficient as larger slabs since they contain less objects. Note that SLAB also depends on order 1 allocations for some slabs that waste too much memory if forced into PAGE_SIZE'd page. SLUB now can now deal with failing order 1 allocs which SLAB cannot do. Add a new field min that will contain the objects for the smallest possible order for a slab cache. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 2 ++ mm/slub.c | 39 ++++++++++++++++++++++++++++----------- 2 files changed, 30 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 4236b5dee812..71e43a12ebbb 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -29,6 +29,7 @@ enum stat_item { DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */ DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */ DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */ + ORDER_FALLBACK, /* Number of times fallback was necessary */ NR_SLUB_STAT_ITEMS }; struct kmem_cache_cpu { @@ -81,6 +82,7 @@ struct kmem_cache { /* Allocation and freeing of slabs */ struct kmem_cache_order_objects max; + struct kmem_cache_order_objects min; gfp_t allocflags; /* gfp flags to use on each alloc */ int refcount; /* Refcount for slab cache destroy */ void (*ctor)(struct kmem_cache *, void *); diff --git a/mm/slub.c b/mm/slub.c index c8514e93ffdf..35c22d940ba7 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1113,28 +1113,43 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, /* * Slab allocation and freeing */ +static inline struct page *alloc_slab_page(gfp_t flags, int node, + struct kmem_cache_order_objects oo) +{ + int order = oo_order(oo); + + if (node == -1) + return alloc_pages(flags, order); + else + return alloc_pages_node(node, flags, order); +} + static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) { struct page *page; struct kmem_cache_order_objects oo = s->oo; - int order = oo_order(oo); - int pages = 1 << order; flags |= s->allocflags; - if (node == -1) - page = alloc_pages(flags, order); - else - page = alloc_pages_node(node, flags, order); - - if (!page) - return NULL; + page = alloc_slab_page(flags | __GFP_NOWARN | __GFP_NORETRY, node, + oo); + if (unlikely(!page)) { + oo = s->min; + /* + * Allocation may have failed due to fragmentation. + * Try a lower order alloc if possible + */ + page = alloc_slab_page(flags, node, oo); + if (!page) + return NULL; + stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK); + } page->objects = oo_objects(oo); mod_zone_page_state(page_zone(page), (s->flags & SLAB_RECLAIM_ACCOUNT) ? NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, - pages); + 1 << oo_order(oo)); return page; } @@ -2347,6 +2362,7 @@ static int calculate_sizes(struct kmem_cache *s) * Determine the number of objects per slab */ s->oo = oo_make(order, size); + s->min = oo_make(get_order(size), size); if (oo_objects(s->oo) > oo_objects(s->max)) s->max = s->oo; @@ -4163,7 +4179,7 @@ STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty); STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head); STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail); STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees); - +STAT_ATTR(ORDER_FALLBACK, order_fallback); #endif static struct attribute *slab_attrs[] = { @@ -4216,6 +4232,7 @@ static struct attribute *slab_attrs[] = { &deactivate_to_head_attr.attr, &deactivate_to_tail_attr.attr, &deactivate_remote_frees_attr.attr, + &order_fallback_attr.attr, #endif NULL }; -- cgit v1.2.3-71-gd317 From 2532386f480eefbdd67b48be55fb4fb3e5a6081c Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 18 Apr 2008 10:09:25 -0400 Subject: Audit: collect sessionid in netlink messages Previously I added sessionid output to all audit messages where it was available but we still didn't know the sessionid of the sender of netlink messages. This patch adds that information to netlink messages so we can audit who sent netlink messages. Signed-off-by: Eric Paris Signed-off-by: Al Viro --- drivers/char/tty_audit.c | 7 +--- include/linux/audit.h | 3 +- include/linux/netlink.h | 1 + include/linux/tty.h | 4 +-- include/net/netlabel.h | 1 + include/net/xfrm.h | 23 +++++++------ kernel/audit.c | 72 ++++++++++++++++++++++----------------- kernel/auditfilter.c | 16 +++++---- net/key/af_key.c | 17 ++++++--- net/netlabel/netlabel_unlabeled.c | 1 + net/netlabel/netlabel_user.c | 4 ++- net/netlabel/netlabel_user.h | 1 + net/netlink/af_netlink.c | 1 + net/xfrm/xfrm_policy.c | 12 ++++--- net/xfrm/xfrm_state.c | 13 ++++--- net/xfrm/xfrm_user.c | 41 +++++++++++++++------- security/smack/smackfs.c | 2 ++ 17 files changed, 132 insertions(+), 87 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/tty_audit.c b/drivers/char/tty_audit.c index 7722466e052f..9739bbfc8f70 100644 --- a/drivers/char/tty_audit.c +++ b/drivers/char/tty_audit.c @@ -151,14 +151,9 @@ void tty_audit_fork(struct signal_struct *sig) /** * tty_audit_push_task - Flush task's pending audit data */ -void tty_audit_push_task(struct task_struct *tsk, uid_t loginuid) +void tty_audit_push_task(struct task_struct *tsk, uid_t loginuid, u32 sessionid) { struct tty_audit_buf *buf; - /* FIXME I think this is correct. Check against netlink once that is - * I really need to read this code more closely. But that's for - * another patch. - */ - unsigned int sessionid = audit_get_sessionid(tsk); spin_lock_irq(&tsk->sighand->siglock); buf = tsk->signal->tty_audit_buf; diff --git a/include/linux/audit.h b/include/linux/audit.h index 4ccb048cae1d..25f6ae30dd4b 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -569,7 +569,8 @@ extern int audit_update_lsm_rules(void); extern int audit_filter_user(struct netlink_skb_parms *cb, int type); extern int audit_filter_type(int type); extern int audit_receive_filter(int type, int pid, int uid, int seq, - void *data, size_t datasz, uid_t loginuid, u32 sid); + void *data, size_t datasz, uid_t loginuid, + u32 sessionid, u32 sid); extern int audit_enabled; #else #define audit_log(c,g,t,f,...) do { ; } while (0) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index fb0713b6ffaf..bec1062a25a1 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -166,6 +166,7 @@ struct netlink_skb_parms __u32 dst_group; kernel_cap_t eff_cap; __u32 loginuid; /* Login (audit) uid */ + __u32 sessionid; /* Session id (audit) */ __u32 sid; /* SELinux security id */ }; diff --git a/include/linux/tty.h b/include/linux/tty.h index dd8e08fe8855..430624504ca0 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -351,7 +351,7 @@ extern void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, extern void tty_audit_exit(void); extern void tty_audit_fork(struct signal_struct *sig); extern void tty_audit_push(struct tty_struct *tty); -extern void tty_audit_push_task(struct task_struct *tsk, uid_t loginuid); +extern void tty_audit_push_task(struct task_struct *tsk, uid_t loginuid, u32 sessionid); extern void tty_audit_opening(void); #else static inline void tty_audit_add_data(struct tty_struct *tty, @@ -367,7 +367,7 @@ static inline void tty_audit_fork(struct signal_struct *sig) static inline void tty_audit_push(struct tty_struct *tty) { } -static inline void tty_audit_push_task(struct task_struct *tsk, uid_t loginuid) +static inline void tty_audit_push_task(struct task_struct *tsk, uid_t loginuid, u32 sessionid) { } static inline void tty_audit_opening(void) diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 5e53a85b5ca1..e4d2d6baa983 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -103,6 +103,7 @@ struct cipso_v4_doi; struct netlbl_audit { u32 secid; uid_t loginuid; + u32 sessionid; }; /* diff --git a/include/net/xfrm.h b/include/net/xfrm.h index baa9f372cfd1..d1350bcccb03 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -597,8 +597,9 @@ struct xfrm_spi_skb_cb { /* Audit Information */ struct xfrm_audit { - u32 loginuid; u32 secid; + uid_t loginuid; + u32 sessionid; }; #ifdef CONFIG_AUDITSYSCALL @@ -616,13 +617,13 @@ static inline struct audit_buffer *xfrm_audit_start(const char *op) return audit_buf; } -static inline void xfrm_audit_helper_usrinfo(u32 auid, u32 secid, +static inline void xfrm_audit_helper_usrinfo(uid_t auid, u32 ses, u32 secid, struct audit_buffer *audit_buf) { char *secctx; u32 secctx_len; - audit_log_format(audit_buf, " auid=%u", auid); + audit_log_format(audit_buf, " auid=%u ses=%u", auid, ses); if (secid != 0 && security_secid_to_secctx(secid, &secctx, &secctx_len) == 0) { audit_log_format(audit_buf, " subj=%s", secctx); @@ -632,13 +633,13 @@ static inline void xfrm_audit_helper_usrinfo(u32 auid, u32 secid, } extern void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - u32 auid, u32 secid); + u32 auid, u32 ses, u32 secid); extern void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - u32 auid, u32 secid); + u32 auid, u32 ses, u32 secid); extern void xfrm_audit_state_add(struct xfrm_state *x, int result, - u32 auid, u32 secid); + u32 auid, u32 ses, u32 secid); extern void xfrm_audit_state_delete(struct xfrm_state *x, int result, - u32 auid, u32 secid); + u32 auid, u32 ses, u32 secid); extern void xfrm_audit_state_replay_overflow(struct xfrm_state *x, struct sk_buff *skb); extern void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family); @@ -647,10 +648,10 @@ extern void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family, extern void xfrm_audit_state_icvfail(struct xfrm_state *x, struct sk_buff *skb, u8 proto); #else -#define xfrm_audit_policy_add(x, r, a, s) do { ; } while (0) -#define xfrm_audit_policy_delete(x, r, a, s) do { ; } while (0) -#define xfrm_audit_state_add(x, r, a, s) do { ; } while (0) -#define xfrm_audit_state_delete(x, r, a, s) do { ; } while (0) +#define xfrm_audit_policy_add(x, r, a, se, s) do { ; } while (0) +#define xfrm_audit_policy_delete(x, r, a, se, s) do { ; } while (0) +#define xfrm_audit_state_add(x, r, a, se, s) do { ; } while (0) +#define xfrm_audit_state_delete(x, r, a, se, s) do { ; } while (0) #define xfrm_audit_state_replay_overflow(x, s) do { ; } while (0) #define xfrm_audit_state_notfound_simple(s, f) do { ; } while (0) #define xfrm_audit_state_notfound(s, f, sp, sq) do { ; } while (0) diff --git a/kernel/audit.c b/kernel/audit.c index a7b16086d36f..ad6d1abfa1d2 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -252,14 +252,15 @@ void audit_log_lost(const char *message) } static int audit_log_config_change(char *function_name, int new, int old, - uid_t loginuid, u32 sid, int allow_changes) + uid_t loginuid, u32 sessionid, u32 sid, + int allow_changes) { struct audit_buffer *ab; int rc = 0; ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); - audit_log_format(ab, "%s=%d old=%d by auid=%u", function_name, new, - old, loginuid); + audit_log_format(ab, "%s=%d old=%d auid=%u ses=%u", function_name, new, + old, loginuid, sessionid); if (sid) { char *ctx = NULL; u32 len; @@ -279,7 +280,8 @@ static int audit_log_config_change(char *function_name, int new, int old, } static int audit_do_config_change(char *function_name, int *to_change, - int new, uid_t loginuid, u32 sid) + int new, uid_t loginuid, u32 sessionid, + u32 sid) { int allow_changes, rc = 0, old = *to_change; @@ -290,8 +292,8 @@ static int audit_do_config_change(char *function_name, int *to_change, allow_changes = 1; if (audit_enabled != AUDIT_OFF) { - rc = audit_log_config_change(function_name, new, old, - loginuid, sid, allow_changes); + rc = audit_log_config_change(function_name, new, old, loginuid, + sessionid, sid, allow_changes); if (rc) allow_changes = 0; } @@ -305,26 +307,28 @@ static int audit_do_config_change(char *function_name, int *to_change, return rc; } -static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid) +static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sessionid, + u32 sid) { return audit_do_config_change("audit_rate_limit", &audit_rate_limit, - limit, loginuid, sid); + limit, loginuid, sessionid, sid); } -static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid) +static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sessionid, + u32 sid) { return audit_do_config_change("audit_backlog_limit", &audit_backlog_limit, - limit, loginuid, sid); + limit, loginuid, sessionid, sid); } -static int audit_set_enabled(int state, uid_t loginuid, u32 sid) +static int audit_set_enabled(int state, uid_t loginuid, u32 sessionid, u32 sid) { int rc; if (state < AUDIT_OFF || state > AUDIT_LOCKED) return -EINVAL; rc = audit_do_config_change("audit_enabled", &audit_enabled, state, - loginuid, sid); + loginuid, sessionid, sid); if (!rc) audit_ever_enabled |= !!state; @@ -332,7 +336,7 @@ static int audit_set_enabled(int state, uid_t loginuid, u32 sid) return rc; } -static int audit_set_failure(int state, uid_t loginuid, u32 sid) +static int audit_set_failure(int state, uid_t loginuid, u32 sessionid, u32 sid) { if (state != AUDIT_FAIL_SILENT && state != AUDIT_FAIL_PRINTK @@ -340,7 +344,7 @@ static int audit_set_failure(int state, uid_t loginuid, u32 sid) return -EINVAL; return audit_do_config_change("audit_failure", &audit_failure, state, - loginuid, sid); + loginuid, sessionid, sid); } static int kauditd_thread(void *dummy) @@ -385,7 +389,7 @@ static int kauditd_thread(void *dummy) return 0; } -static int audit_prepare_user_tty(pid_t pid, uid_t loginuid) +static int audit_prepare_user_tty(pid_t pid, uid_t loginuid, u32 sessionid) { struct task_struct *tsk; int err; @@ -404,7 +408,7 @@ static int audit_prepare_user_tty(pid_t pid, uid_t loginuid) if (err) goto out; - tty_audit_push_task(tsk, loginuid); + tty_audit_push_task(tsk, loginuid, sessionid); out: read_unlock(&tasklist_lock); return err; @@ -534,7 +538,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) } static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, - u32 pid, u32 uid, uid_t auid, u32 sid) + u32 pid, u32 uid, uid_t auid, u32 ses, + u32 sid) { int rc = 0; char *ctx = NULL; @@ -546,8 +551,8 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, } *ab = audit_log_start(NULL, GFP_KERNEL, msg_type); - audit_log_format(*ab, "user pid=%d uid=%u auid=%u", - pid, uid, auid); + audit_log_format(*ab, "user pid=%d uid=%u auid=%u ses=%u", + pid, uid, auid, ses); if (sid) { rc = security_secid_to_secctx(sid, &ctx, &len); if (rc) @@ -570,6 +575,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) struct audit_buffer *ab; u16 msg_type = nlh->nlmsg_type; uid_t loginuid; /* loginuid of sender */ + u32 sessionid; struct audit_sig_info *sig_data; char *ctx = NULL; u32 len; @@ -591,6 +597,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) pid = NETLINK_CREDS(skb)->pid; uid = NETLINK_CREDS(skb)->uid; loginuid = NETLINK_CB(skb).loginuid; + sessionid = NETLINK_CB(skb).sessionid; sid = NETLINK_CB(skb).sid; seq = nlh->nlmsg_seq; data = NLMSG_DATA(nlh); @@ -613,12 +620,12 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) status_get = (struct audit_status *)data; if (status_get->mask & AUDIT_STATUS_ENABLED) { err = audit_set_enabled(status_get->enabled, - loginuid, sid); + loginuid, sessionid, sid); if (err < 0) return err; } if (status_get->mask & AUDIT_STATUS_FAILURE) { err = audit_set_failure(status_get->failure, - loginuid, sid); + loginuid, sessionid, sid); if (err < 0) return err; } if (status_get->mask & AUDIT_STATUS_PID) { @@ -627,17 +634,17 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (audit_enabled != AUDIT_OFF) audit_log_config_change("audit_pid", new_pid, audit_pid, loginuid, - sid, 1); + sessionid, sid, 1); audit_pid = new_pid; audit_nlk_pid = NETLINK_CB(skb).pid; } if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) err = audit_set_rate_limit(status_get->rate_limit, - loginuid, sid); + loginuid, sessionid, sid); if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) err = audit_set_backlog_limit(status_get->backlog_limit, - loginuid, sid); + loginuid, sessionid, sid); break; case AUDIT_USER: case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: @@ -649,12 +656,13 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (err == 1) { err = 0; if (msg_type == AUDIT_USER_TTY) { - err = audit_prepare_user_tty(pid, loginuid); + err = audit_prepare_user_tty(pid, loginuid, + sessionid); if (err) break; } audit_log_common_recv_msg(&ab, msg_type, pid, uid, - loginuid, sid); + loginuid, sessionid, sid); if (msg_type != AUDIT_USER_TTY) audit_log_format(ab, " msg='%.1024s'", @@ -677,7 +685,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; if (audit_enabled == AUDIT_LOCKED) { audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid, - uid, loginuid, sid); + uid, loginuid, sessionid, sid); audit_log_format(ab, " audit_enabled=%d res=0", audit_enabled); @@ -688,7 +696,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) case AUDIT_LIST: err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, uid, seq, data, nlmsg_len(nlh), - loginuid, sid); + loginuid, sessionid, sid); break; case AUDIT_ADD_RULE: case AUDIT_DEL_RULE: @@ -696,7 +704,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; if (audit_enabled == AUDIT_LOCKED) { audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid, - uid, loginuid, sid); + uid, loginuid, sessionid, sid); audit_log_format(ab, " audit_enabled=%d res=0", audit_enabled); @@ -707,13 +715,13 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) case AUDIT_LIST_RULES: err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, uid, seq, data, nlmsg_len(nlh), - loginuid, sid); + loginuid, sessionid, sid); break; case AUDIT_TRIM: audit_trim_trees(); audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid, - uid, loginuid, sid); + uid, loginuid, sessionid, sid); audit_log_format(ab, " op=trim res=1"); audit_log_end(ab); @@ -745,7 +753,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) err = audit_tag_tree(old, new); audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid, - uid, loginuid, sid); + uid, loginuid, sessionid, sid); audit_log_format(ab, " op=make_equiv old="); audit_log_untrustedstring(ab, old); diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 28fef6bf8534..af3ae91c47b1 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1500,8 +1500,9 @@ static void audit_list_rules(int pid, int seq, struct sk_buff_head *q) } /* Log rule additions and removals */ -static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action, - struct audit_krule *rule, int res) +static void audit_log_rule_change(uid_t loginuid, u32 sessionid, u32 sid, + char *action, struct audit_krule *rule, + int res) { struct audit_buffer *ab; @@ -1511,7 +1512,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action, ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); if (!ab) return; - audit_log_format(ab, "auid=%u", loginuid); + audit_log_format(ab, "auid=%u ses=%u", loginuid, sessionid); if (sid) { char *ctx = NULL; u32 len; @@ -1543,7 +1544,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action, * @sid: SE Linux Security ID of sender */ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, - size_t datasz, uid_t loginuid, u32 sid) + size_t datasz, uid_t loginuid, u32 sessionid, u32 sid) { struct task_struct *tsk; struct audit_netlink_list *dest; @@ -1590,7 +1591,8 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, err = audit_add_rule(entry, &audit_filter_list[entry->rule.listnr]); - audit_log_rule_change(loginuid, sid, "add", &entry->rule, !err); + audit_log_rule_change(loginuid, sessionid, sid, "add", + &entry->rule, !err); if (err) audit_free_rule(entry); @@ -1606,8 +1608,8 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, err = audit_del_rule(entry, &audit_filter_list[entry->rule.listnr]); - audit_log_rule_change(loginuid, sid, "remove", &entry->rule, - !err); + audit_log_rule_change(loginuid, sessionid, sid, "remove", + &entry->rule, !err); audit_free_rule(entry); break; diff --git a/net/key/af_key.c b/net/key/af_key.c index 2403a31fe0f6..9e7236ff6bcc 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1498,7 +1498,8 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, err = xfrm_state_update(x); xfrm_audit_state_add(x, err ? 0 : 1, - audit_get_loginuid(current), 0); + audit_get_loginuid(current), + audit_get_sessionid(current), 0); if (err < 0) { x->km.state = XFRM_STATE_DEAD; @@ -1552,7 +1553,8 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h km_state_notify(x, &c); out: xfrm_audit_state_delete(x, err ? 0 : 1, - audit_get_loginuid(current), 0); + audit_get_loginuid(current), + audit_get_sessionid(current), 0); xfrm_state_put(x); return err; @@ -1728,6 +1730,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd return -EINVAL; audit_info.loginuid = audit_get_loginuid(current); + audit_info.sessionid = audit_get_sessionid(current); audit_info.secid = 0; err = xfrm_state_flush(proto, &audit_info); if (err) @@ -2324,7 +2327,8 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h hdr->sadb_msg_type != SADB_X_SPDUPDATE); xfrm_audit_policy_add(xp, err ? 0 : 1, - audit_get_loginuid(current), 0); + audit_get_loginuid(current), + audit_get_sessionid(current), 0); if (err) goto out; @@ -2406,7 +2410,8 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg return -ENOENT; xfrm_audit_policy_delete(xp, err ? 0 : 1, - audit_get_loginuid(current), 0); + audit_get_loginuid(current), + audit_get_sessionid(current), 0); if (err) goto out; @@ -2667,7 +2672,8 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h if (delete) { xfrm_audit_policy_delete(xp, err ? 0 : 1, - audit_get_loginuid(current), 0); + audit_get_loginuid(current), + audit_get_sessionid(current), 0); if (err) goto out; @@ -2767,6 +2773,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg int err; audit_info.loginuid = audit_get_loginuid(current); + audit_info.sessionid = audit_get_sessionid(current); audit_info.secid = 0; err = xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN, &audit_info); if (err) diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index d282ad1570a7..0099da5b2591 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1780,6 +1780,7 @@ int __init netlbl_unlabel_defconf(void) * messages so don't worry to much about these values. */ security_task_getsecid(current, &audit_info.secid); audit_info.loginuid = 0; + audit_info.sessionid = 0; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index b17d4203806e..68706b4e3bf8 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -107,7 +107,9 @@ struct audit_buffer *netlbl_audit_start_common(int type, if (audit_buf == NULL) return NULL; - audit_log_format(audit_buf, "netlabel: auid=%u", audit_info->loginuid); + audit_log_format(audit_buf, "netlabel: auid=%u ses=%u", + audit_info->loginuid, + audit_info->sessionid); if (audit_info->secid != 0 && security_secid_to_secctx(audit_info->secid, diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index 6d7f4ab46c2b..6caef8b20611 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -51,6 +51,7 @@ static inline void netlbl_netlink_auditinfo(struct sk_buff *skb, { audit_info->secid = NETLINK_CB(skb).sid; audit_info->loginuid = NETLINK_CB(skb).loginuid; + audit_info->sessionid = NETLINK_CB(skb).sessionid; } /* NetLabel NETLINK I/O functions */ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 46f3e44bb83a..9b97f8006c9c 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1248,6 +1248,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, NETLINK_CB(skb).pid = nlk->pid; NETLINK_CB(skb).dst_group = dst_group; NETLINK_CB(skb).loginuid = audit_get_loginuid(current); + NETLINK_CB(skb).sessionid = audit_get_sessionid(current); security_task_getsecid(current, &(NETLINK_CB(skb).sid)); memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index e0c0390613c0..cae9fd815543 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -762,6 +762,7 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) if (err) { xfrm_audit_policy_delete(pol, 0, audit_info->loginuid, + audit_info->sessionid, audit_info->secid); return err; } @@ -777,6 +778,7 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) if (err) { xfrm_audit_policy_delete(pol, 0, audit_info->loginuid, + audit_info->sessionid, audit_info->secid); return err; } @@ -819,6 +821,7 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) write_unlock_bh(&xfrm_policy_lock); xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, + audit_info->sessionid, audit_info->secid); xfrm_policy_kill(pol); @@ -841,6 +844,7 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, + audit_info->sessionid, audit_info->secid); xfrm_policy_kill(pol); killed++; @@ -2472,14 +2476,14 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, } void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - u32 auid, u32 secid) + uid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SPD-add"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, secid, audit_buf); + xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); audit_log_format(audit_buf, " res=%u", result); xfrm_audit_common_policyinfo(xp, audit_buf); audit_log_end(audit_buf); @@ -2487,14 +2491,14 @@ void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - u32 auid, u32 secid) + uid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SPD-delete"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, secid, audit_buf); + xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); audit_log_format(audit_buf, " res=%u", result); xfrm_audit_common_policyinfo(xp, audit_buf); audit_log_end(audit_buf); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5dcc10b93c86..c3f5f70934ec 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -496,7 +496,8 @@ expired: km_state_expired(x, 1, 0); xfrm_audit_state_delete(x, err ? 0 : 1, - audit_get_loginuid(current), 0); + audit_get_loginuid(current), + audit_get_sessionid(current), 0); out: spin_unlock(&x->lock); @@ -603,6 +604,7 @@ xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) (err = security_xfrm_state_delete(x)) != 0) { xfrm_audit_state_delete(x, 0, audit_info->loginuid, + audit_info->sessionid, audit_info->secid); return err; } @@ -641,6 +643,7 @@ restart: err = xfrm_state_delete(x); xfrm_audit_state_delete(x, err ? 0 : 1, audit_info->loginuid, + audit_info->sessionid, audit_info->secid); xfrm_state_put(x); @@ -2123,14 +2126,14 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family, } void xfrm_audit_state_add(struct xfrm_state *x, int result, - u32 auid, u32 secid) + uid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SAD-add"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, secid, audit_buf); + xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); xfrm_audit_helper_sainfo(x, audit_buf); audit_log_format(audit_buf, " res=%u", result); audit_log_end(audit_buf); @@ -2138,14 +2141,14 @@ void xfrm_audit_state_add(struct xfrm_state *x, int result, EXPORT_SYMBOL_GPL(xfrm_audit_state_add); void xfrm_audit_state_delete(struct xfrm_state *x, int result, - u32 auid, u32 secid) + uid_t auid, u32 sessionid, u32 secid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SAD-delete"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, secid, audit_buf); + xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); xfrm_audit_helper_sainfo(x, audit_buf); audit_log_format(audit_buf, " res=%u", result); audit_log_end(audit_buf); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 22a30ae582a2..a1b0fbe3ea35 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -407,6 +407,9 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_state *x; int err; struct km_event c; + uid_t loginuid = NETLINK_CB(skb).loginuid; + u32 sessionid = NETLINK_CB(skb).sessionid; + u32 sid = NETLINK_CB(skb).sid; err = verify_newsa_info(p, attrs); if (err) @@ -422,8 +425,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, else err = xfrm_state_update(x); - xfrm_audit_state_add(x, err ? 0 : 1, NETLINK_CB(skb).loginuid, - NETLINK_CB(skb).sid); + xfrm_audit_state_add(x, err ? 0 : 1, loginuid, sessionid, sid); if (err < 0) { x->km.state = XFRM_STATE_DEAD; @@ -478,6 +480,9 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, int err = -ESRCH; struct km_event c; struct xfrm_usersa_id *p = nlmsg_data(nlh); + uid_t loginuid = NETLINK_CB(skb).loginuid; + u32 sessionid = NETLINK_CB(skb).sessionid; + u32 sid = NETLINK_CB(skb).sid; x = xfrm_user_state_lookup(p, attrs, &err); if (x == NULL) @@ -502,8 +507,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, km_state_notify(x, &c); out: - xfrm_audit_state_delete(x, err ? 0 : 1, NETLINK_CB(skb).loginuid, - NETLINK_CB(skb).sid); + xfrm_audit_state_delete(x, err ? 0 : 1, loginuid, sessionid, sid); xfrm_state_put(x); return err; } @@ -1123,6 +1127,9 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; int err; int excl; + uid_t loginuid = NETLINK_CB(skb).loginuid; + u32 sessionid = NETLINK_CB(skb).sessionid; + u32 sid = NETLINK_CB(skb).sid; err = verify_newpolicy_info(p); if (err) @@ -1141,8 +1148,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, * a type XFRM_MSG_UPDPOLICY - JHS */ excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; err = xfrm_policy_insert(p->dir, xp, excl); - xfrm_audit_policy_add(xp, err ? 0 : 1, NETLINK_CB(skb).loginuid, - NETLINK_CB(skb).sid); + xfrm_audit_policy_add(xp, err ? 0 : 1, loginuid, sessionid, sid); if (err) { security_xfrm_policy_free(xp->security); @@ -1371,9 +1377,12 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, NETLINK_CB(skb).pid); } } else { - xfrm_audit_policy_delete(xp, err ? 0 : 1, - NETLINK_CB(skb).loginuid, - NETLINK_CB(skb).sid); + uid_t loginuid = NETLINK_CB(skb).loginuid; + u32 sessionid = NETLINK_CB(skb).sessionid; + u32 sid = NETLINK_CB(skb).sid; + + xfrm_audit_policy_delete(xp, err ? 0 : 1, loginuid, sessionid, + sid); if (err != 0) goto out; @@ -1399,6 +1408,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, int err; audit_info.loginuid = NETLINK_CB(skb).loginuid; + audit_info.sessionid = NETLINK_CB(skb).sessionid; audit_info.secid = NETLINK_CB(skb).sid; err = xfrm_state_flush(p->proto, &audit_info); if (err) @@ -1546,6 +1556,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, return err; audit_info.loginuid = NETLINK_CB(skb).loginuid; + audit_info.sessionid = NETLINK_CB(skb).sessionid; audit_info.secid = NETLINK_CB(skb).sid; err = xfrm_policy_flush(type, &audit_info); if (err) @@ -1604,9 +1615,11 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, read_unlock(&xp->lock); err = 0; if (up->hard) { + uid_t loginuid = NETLINK_CB(skb).loginuid; + uid_t sessionid = NETLINK_CB(skb).sessionid; + u32 sid = NETLINK_CB(skb).sid; xfrm_policy_delete(xp, p->dir); - xfrm_audit_policy_delete(xp, 1, NETLINK_CB(skb).loginuid, - NETLINK_CB(skb).sid); + xfrm_audit_policy_delete(xp, 1, loginuid, sessionid, sid); } else { // reset the timers here? @@ -1640,9 +1653,11 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, km_state_expired(x, ue->hard, current->pid); if (ue->hard) { + uid_t loginuid = NETLINK_CB(skb).loginuid; + uid_t sessionid = NETLINK_CB(skb).sessionid; + u32 sid = NETLINK_CB(skb).sid; __xfrm_state_delete(x); - xfrm_audit_state_delete(x, 1, NETLINK_CB(skb).loginuid, - NETLINK_CB(skb).sid); + xfrm_audit_state_delete(x, 1, loginuid, sessionid, sid); } err = 0; out: diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index 6ba283783b70..5d1bee0fa513 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -324,6 +324,7 @@ void smk_cipso_doi(void) struct netlbl_audit audit_info; audit_info.loginuid = audit_get_loginuid(current); + audit_info.sessionid = audit_get_sessionid(current); audit_info.secid = smack_to_secid(current->security); rc = netlbl_cfg_map_del(NULL, &audit_info); @@ -356,6 +357,7 @@ void smk_unlbl_ambient(char *oldambient) struct netlbl_audit audit_info; audit_info.loginuid = audit_get_loginuid(current); + audit_info.sessionid = audit_get_sessionid(current); audit_info.secid = smack_to_secid(current->security); if (oldambient != NULL) { -- cgit v1.2.3-71-gd317 From b556f8ad58c6e9f8f485c8cef7546e3fc82c382a Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 18 Apr 2008 10:12:59 -0400 Subject: Audit: standardize string audit interfaces This patch standardized the string auditing interfaces. No userspace changes will be visible and this is all just cleanup and consistancy work. We have the following string audit interfaces to use: void audit_log_n_hex(struct audit_buffer *ab, const unsigned char *buf, size_t len); void audit_log_n_string(struct audit_buffer *ab, const char *buf, size_t n); void audit_log_string(struct audit_buffer *ab, const char *buf); void audit_log_n_untrustedstring(struct audit_buffer *ab, const char *string, size_t n); void audit_log_untrustedstring(struct audit_buffer *ab, const char *string); This may be the first step to possibly fixing some of the issues that people have with the string output from the kernel audit system. But we still don't have an agreed upon solution to that problem. Signed-off-by: Eric Paris Signed-off-by: Al Viro --- drivers/char/tty_audit.c | 2 +- include/linux/audit.h | 22 ++++++++++++++-------- kernel/audit.c | 19 +++++++++---------- kernel/auditsc.c | 8 ++++---- security/selinux/avc.c | 2 +- 5 files changed, 29 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/tty_audit.c b/drivers/char/tty_audit.c index 9739bbfc8f70..caeedd12d494 100644 --- a/drivers/char/tty_audit.c +++ b/drivers/char/tty_audit.c @@ -92,7 +92,7 @@ static void tty_audit_buf_push(struct task_struct *tsk, uid_t loginuid, get_task_comm(name, tsk); audit_log_untrustedstring(ab, name); audit_log_format(ab, " data="); - audit_log_n_untrustedstring(ab, buf->valid, buf->data); + audit_log_n_untrustedstring(ab, buf->data, buf->valid); audit_log_end(ab); } buf->valid = 0; diff --git a/include/linux/audit.h b/include/linux/audit.h index 25f6ae30dd4b..f938335af75e 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -549,16 +549,20 @@ extern void audit_log_format(struct audit_buffer *ab, const char *fmt, ...) __attribute__((format(printf,2,3))); extern void audit_log_end(struct audit_buffer *ab); -extern void audit_log_hex(struct audit_buffer *ab, - const unsigned char *buf, - size_t len); extern int audit_string_contains_control(const char *string, size_t len); +extern void audit_log_n_hex(struct audit_buffer *ab, + const unsigned char *buf, + size_t len); +extern void audit_log_n_string(struct audit_buffer *ab, + const char *buf, + size_t n); +#define audit_log_string(a,b) audit_log_n_string(a, b, strlen(b)); +extern void audit_log_n_untrustedstring(struct audit_buffer *ab, + const char *string, + size_t n); extern void audit_log_untrustedstring(struct audit_buffer *ab, const char *string); -extern void audit_log_n_untrustedstring(struct audit_buffer *ab, - size_t n, - const char *string); extern void audit_log_d_path(struct audit_buffer *ab, const char *prefix, struct path *path); @@ -578,9 +582,11 @@ extern int audit_enabled; #define audit_log_vformat(b,f,a) do { ; } while (0) #define audit_log_format(b,f,...) do { ; } while (0) #define audit_log_end(b) do { ; } while (0) -#define audit_log_hex(a,b,l) do { ; } while (0) -#define audit_log_untrustedstring(a,s) do { ; } while (0) +#define audit_log_n_hex(a,b,l) do { ; } while (0) +#define audit_log_n_string(a,c,l) do { ; } while (0) +#define audit_log_string(a,c) do { ; } while (0) #define audit_log_n_untrustedstring(a,n,s) do { ; } while (0) +#define audit_log_untrustedstring(a,s) do { ; } while (0) #define audit_log_d_path(b, p, d) do { ; } while (0) #define audit_enabled 0 #endif diff --git a/kernel/audit.c b/kernel/audit.c index 520583d8ca18..5b9ad3dda885 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -757,8 +757,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) audit_log_format(ab, " msg="); size = nlmsg_len(nlh); - audit_log_n_untrustedstring(ab, size, - data); + audit_log_n_untrustedstring(ab, data, size); } audit_set_pid(ab, pid); audit_log_end(ab); @@ -1293,7 +1292,7 @@ void audit_log_format(struct audit_buffer *ab, const char *fmt, ...) * This function will take the passed buf and convert it into a string of * ascii hex digits. The new string is placed onto the skb. */ -void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf, +void audit_log_n_hex(struct audit_buffer *ab, const unsigned char *buf, size_t len) { int i, avail, new_len; @@ -1329,8 +1328,8 @@ void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf, * Format a string of no more than slen characters into the audit buffer, * enclosed in quote marks. */ -static void audit_log_n_string(struct audit_buffer *ab, size_t slen, - const char *string) +void audit_log_n_string(struct audit_buffer *ab, const char *string, + size_t slen) { int avail, new_len; unsigned char *ptr; @@ -1386,13 +1385,13 @@ int audit_string_contains_control(const char *string, size_t len) * The caller specifies the number of characters in the string to log, which may * or may not be the entire string. */ -void audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len, - const char *string) +void audit_log_n_untrustedstring(struct audit_buffer *ab, const char *string, + size_t len) { if (audit_string_contains_control(string, len)) - audit_log_hex(ab, string, len); + audit_log_n_hex(ab, string, len); else - audit_log_n_string(ab, len, string); + audit_log_n_string(ab, string, len); } /** @@ -1405,7 +1404,7 @@ void audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len, */ void audit_log_untrustedstring(struct audit_buffer *ab, const char *string) { - audit_log_n_untrustedstring(ab, strlen(string), string); + audit_log_n_untrustedstring(ab, string, strlen(string)); } /* This is a helper-function to print the escaped d_path */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index d7249fcdc442..0072b1d8b258 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1095,7 +1095,7 @@ static int audit_log_single_execve_arg(struct audit_context *context, audit_log_format(*ab, "[%d]", i); audit_log_format(*ab, "="); if (has_cntl) - audit_log_hex(*ab, buf, to_send); + audit_log_n_hex(*ab, buf, to_send); else audit_log_format(*ab, "\"%s\"", buf); audit_log_format(*ab, "\n"); @@ -1307,7 +1307,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts struct audit_aux_data_sockaddr *axs = (void *)aux; audit_log_format(ab, "saddr="); - audit_log_hex(ab, axs->a, axs->len); + audit_log_n_hex(ab, axs->a, axs->len); break; } case AUDIT_FD_PAIR: { @@ -1371,8 +1371,8 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts default: /* log the name's directory component */ audit_log_format(ab, " name="); - audit_log_n_untrustedstring(ab, n->name_len, - n->name); + audit_log_n_untrustedstring(ab, n->name, + n->name_len); } } else audit_log_format(ab, " name=(null)"); diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 95a8ef4a5073..114b4b4c97b2 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -646,7 +646,7 @@ void avc_audit(u32 ssid, u32 tsid, if (*p) audit_log_untrustedstring(ab, p); else - audit_log_hex(ab, p, len); + audit_log_n_hex(ab, p, len); break; } } -- cgit v1.2.3-71-gd317 From a42da93c8641a0b49405ceb2a2063975c823aa49 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 18 Apr 2008 10:36:22 -0400 Subject: Audit: increase the maximum length of the key field Key lengths were arbitrarily limited to 32 characters. If userspace is going to start using the single kernel key field as multiple virtual key fields (example key=key1,key2,key3,key4) we should give them enough room to work. Signed-off-by: Eric Paris Signed-off-by: Al Viro --- include/linux/audit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index f938335af75e..dcd5395b400d 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -146,7 +146,7 @@ /* Rule structure sizes -- if these change, different AUDIT_ADD and * AUDIT_LIST commands must be implemented. */ #define AUDIT_MAX_FIELDS 64 -#define AUDIT_MAX_KEY_LEN 32 +#define AUDIT_MAX_KEY_LEN 256 #define AUDIT_BITMASK_SIZE 64 #define AUDIT_WORD(nr) ((__u32)((nr)/32)) #define AUDIT_BIT(nr) (1 << ((nr) - AUDIT_WORD(nr)*32)) -- cgit v1.2.3-71-gd317 From 41126226e186d92a45ed664e546abb5204588359 Mon Sep 17 00:00:00 2001 From: Miloslav Trmac Date: Fri, 18 Apr 2008 13:30:14 -0700 Subject: [patch 1/2] audit: let userspace fully control TTY input auditing Remove the code that automatically disables TTY input auditing in processes that open TTYs when they have no other TTY open; this heuristic was intended to automatically handle daemons, but it has false positives (e.g. with sshd) that make it impossible to control TTY input auditing from a PAM module. With this patch, TTY input auditing is controlled from user-space only. On the other hand, not even for daemons does it make sense to audit "input" from PTY masters; this data was produced by a program writing to the PTY slave, and does not represent data entered by the user. Signed-off-by: Miloslav Trmac Cc: Al Viro Cc: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- drivers/char/tty_audit.c | 54 ++++-------------------------------------------- drivers/char/tty_io.c | 5 +---- include/linux/tty.h | 5 ----- 3 files changed, 5 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/tty_audit.c b/drivers/char/tty_audit.c index caeedd12d494..6342b0534f4d 100644 --- a/drivers/char/tty_audit.c +++ b/drivers/char/tty_audit.c @@ -233,6 +233,10 @@ void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, if (unlikely(size == 0)) return; + if (tty->driver->type == TTY_DRIVER_TYPE_PTY + && tty->driver->subtype == PTY_TYPE_MASTER) + return; + buf = tty_audit_buf_get(tty); if (!buf) return; @@ -295,53 +299,3 @@ void tty_audit_push(struct tty_struct *tty) tty_audit_buf_put(buf); } } - -/** - * tty_audit_opening - A TTY is being opened. - * - * As a special hack, tasks that close all their TTYs and open new ones - * are assumed to be system daemons (e.g. getty) and auditing is - * automatically disabled for them. - */ -void tty_audit_opening(void) -{ - int disable; - - disable = 1; - spin_lock_irq(¤t->sighand->siglock); - if (current->signal->audit_tty == 0) - disable = 0; - spin_unlock_irq(¤t->sighand->siglock); - if (!disable) - return; - - task_lock(current); - if (current->files) { - struct fdtable *fdt; - unsigned i; - - /* - * We don't take a ref to the file, so we must hold ->file_lock - * instead. - */ - spin_lock(¤t->files->file_lock); - fdt = files_fdtable(current->files); - for (i = 0; i < fdt->max_fds; i++) { - struct file *filp; - - filp = fcheck_files(current->files, i); - if (filp && is_tty(filp)) { - disable = 0; - break; - } - } - spin_unlock(¤t->files->file_lock); - } - task_unlock(current); - if (!disable) - return; - - spin_lock_irq(¤t->sighand->siglock); - current->signal->audit_tty = 0; - spin_unlock_irq(¤t->sighand->siglock); -} diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 4d3c7018f0c3..afddccf1bb3d 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -2755,7 +2755,6 @@ got_driver: __proc_set_tty(current, tty); spin_unlock_irq(¤t->sighand->siglock); mutex_unlock(&tty_mutex); - tty_audit_opening(); return 0; } @@ -2818,10 +2817,8 @@ static int ptmx_open(struct inode *inode, struct file *filp) check_tty_count(tty, "tty_open"); retval = ptm_driver->open(tty, filp); - if (!retval) { - tty_audit_opening(); + if (!retval) return 0; - } out1: release_dev(filp); return retval; diff --git a/include/linux/tty.h b/include/linux/tty.h index 430624504ca0..265831ccaa88 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -300,7 +300,6 @@ extern void tty_hangup(struct tty_struct * tty); extern void tty_vhangup(struct tty_struct * tty); extern void tty_unhangup(struct file *filp); extern int tty_hung_up_p(struct file * filp); -extern int is_tty(struct file *filp); extern void do_SAK(struct tty_struct *tty); extern void __do_SAK(struct tty_struct *tty); extern void disassociate_ctty(int priv); @@ -352,7 +351,6 @@ extern void tty_audit_exit(void); extern void tty_audit_fork(struct signal_struct *sig); extern void tty_audit_push(struct tty_struct *tty); extern void tty_audit_push_task(struct task_struct *tsk, uid_t loginuid, u32 sessionid); -extern void tty_audit_opening(void); #else static inline void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, size_t size) @@ -370,9 +368,6 @@ static inline void tty_audit_push(struct tty_struct *tty) static inline void tty_audit_push_task(struct task_struct *tsk, uid_t loginuid, u32 sessionid) { } -static inline void tty_audit_opening(void) -{ -} #endif /* tty_ioctl.c */ -- cgit v1.2.3-71-gd317 From 8b67dca9420474623709e00d72a066068a502b20 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 28 Apr 2008 04:15:49 -0400 Subject: [PATCH] new predicate - AUDIT_FILETYPE Argument is S_IF... | , where index is normally 0 or 1. Triggers if chosen element of ctx->names[] is present and the mode of object in question matches the upper bits of argument. I.e. for things like "is the argument of that chmod a directory", etc. Signed-off-by: Al Viro --- include/linux/audit.h | 1 + kernel/auditfilter.c | 8 ++++++++ kernel/auditsc.c | 16 ++++++++++++++++ 3 files changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index dcd5395b400d..63c3bb98558f 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -209,6 +209,7 @@ #define AUDIT_WATCH 105 #define AUDIT_PERM 106 #define AUDIT_DIR 107 +#define AUDIT_FILETYPE 108 #define AUDIT_ARG0 200 #define AUDIT_ARG1 (AUDIT_ARG0+1) diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 7c3450d063fe..9435d9392df5 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -478,6 +478,10 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) if (f->val & ~15) goto exit_free; break; + case AUDIT_FILETYPE: + if ((f->val & ~S_IFMT) > S_IFMT) + goto exit_free; + break; case AUDIT_INODE: err = audit_to_inode(&entry->rule, f); if (err) @@ -649,6 +653,10 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, if (f->val & ~15) goto exit_free; break; + case AUDIT_FILETYPE: + if ((f->val & ~S_IFMT) > S_IFMT) + goto exit_free; + break; default: goto exit_free; } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 091409996577..c10e7aae04d7 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -280,6 +280,19 @@ static int audit_match_perm(struct audit_context *ctx, int mask) } } +static int audit_match_filetype(struct audit_context *ctx, int which) +{ + unsigned index = which & ~S_IFMT; + mode_t mode = which & S_IFMT; + if (index >= ctx->name_count) + return 0; + if (ctx->names[index].ino == -1) + return 0; + if ((ctx->names[index].mode ^ mode) & S_IFMT) + return 0; + return 1; +} + /* * We keep a linked list of fixed-sized (31 pointer) arrays of audit_chunk *; * ->first_trees points to its beginning, ->trees - to the current end of data. @@ -589,6 +602,9 @@ static int audit_filter_rules(struct task_struct *tsk, case AUDIT_PERM: result = audit_match_perm(ctx, f->val); break; + case AUDIT_FILETYPE: + result = audit_match_filetype(ctx, f->val); + break; } if (!result) -- cgit v1.2.3-71-gd317 From ee69439cc1dcadbae42ece1caa1ec1786560f7aa Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Mon, 28 Apr 2008 12:30:35 -0700 Subject: PCI: don't expose struct pci_vpd to userspace We just need to forward declare it for struct pci_dev, not expose it outside of __KERNEL__. Signed-off-by: Jesse Barnes --- include/linux/pci.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 292491324b01..7a0770d4c4e2 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -20,8 +20,6 @@ /* Include the pci register defines */ #include -struct pci_vpd; - /* * The PCI interface treats multi-function devices as independent * devices. The slot/function address of each device is encoded @@ -131,6 +129,8 @@ struct pci_cap_saved_state { }; struct pcie_link_state; +struct pci_vpd; + /* * The pci_dev structure is used to describe PCI devices. */ -- cgit v1.2.3-71-gd317 From e8628dd06d66f2e3965ec9742029b401d63434f1 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 18 Apr 2008 13:31:12 -0700 Subject: [CPUFREQ] expose cpufreq coordination requirements regardless of coordination mechanism Currently, affected_cpus shows which CPUs need to have their frequency coordinated in software. When hardware coordination is in use, the contents of this file appear the same as when no coordination is required. This can lead to some confusion among user-space programs, for example, that do not know that extra coordination is required to force a CPU core to a particular speed to control power consumption. To fix this, create a "related_cpus" attribute that always displays the coordination map regardless of whatever coordination strategy the cpufreq driver uses (sw or hw). If the cpufreq driver does not provide a value, fall back to policy->cpus. Signed-off-by: Darrick J. Wong Signed-off-by: Andrew Morton Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 1 + drivers/cpufreq/cpufreq.c | 29 ++++++++++++++++++++++++----- include/linux/cpufreq.h | 3 ++- 3 files changed, 27 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 8db8f73503b3..b0c8208df9fa 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -601,6 +601,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { policy->cpus = perf->shared_cpu_map; } + policy->related_cpus = perf->shared_cpu_map; #ifdef CONFIG_SMP dmi_check_system(sw_any_bug_dmi_table); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d3575f5ec6d2..7fce038fa57e 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -583,15 +583,13 @@ out: i += sprintf(&buf[i], "\n"); return i; } -/** - * show_affected_cpus - show the CPUs affected by each transition - */ -static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf) + +static ssize_t show_cpus(cpumask_t mask, char *buf) { ssize_t i = 0; unsigned int cpu; - for_each_cpu_mask(cpu, policy->cpus) { + for_each_cpu_mask(cpu, mask) { if (i) i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " "); i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu); @@ -602,6 +600,25 @@ static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf) return i; } +/** + * show_related_cpus - show the CPUs affected by each transition even if + * hw coordination is in use + */ +static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf) +{ + if (cpus_empty(policy->related_cpus)) + return show_cpus(policy->cpus, buf); + return show_cpus(policy->related_cpus, buf); +} + +/** + * show_affected_cpus - show the CPUs affected by each transition + */ +static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf) +{ + return show_cpus(policy->cpus, buf); +} + static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy, const char *buf, size_t count) { @@ -646,6 +663,7 @@ define_one_ro(cpuinfo_max_freq); define_one_ro(scaling_available_governors); define_one_ro(scaling_driver); define_one_ro(scaling_cur_freq); +define_one_ro(related_cpus); define_one_ro(affected_cpus); define_one_rw(scaling_min_freq); define_one_rw(scaling_max_freq); @@ -658,6 +676,7 @@ static struct attribute *default_attrs[] = { &scaling_min_freq.attr, &scaling_max_freq.attr, &affected_cpus.attr, + &related_cpus.attr, &scaling_governor.attr, &scaling_driver.attr, &scaling_available_governors.attr, diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index ddd8652fc3f3..a881fd62c447 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -83,7 +83,8 @@ struct cpufreq_real_policy { }; struct cpufreq_policy { - cpumask_t cpus; /* affected CPUs */ + cpumask_t cpus; /* CPUs requiring sw coordination */ + cpumask_t related_cpus; /* CPUs with any coordination */ unsigned int shared_type; /* ANY or ALL affected CPUs should set cpufreq */ unsigned int cpu; /* cpu nr of registered CPU */ -- cgit v1.2.3-71-gd317 From 30d221db4439973076953e2ed44344fa92d1d09f Mon Sep 17 00:00:00 2001 From: Alessandro Guido Date: Fri, 18 Apr 2008 13:31:13 -0700 Subject: [CPUFREQ] allow use of the powersave governor as the default one Allow use of the powersave cpufreq governor as the default one for EMBEDDED configs. Signed-off-by: Alessandro Guido Signed-off-by: Andrew Morton Signed-off-by: Dave Jones --- drivers/cpufreq/Kconfig | 9 +++++++++ drivers/cpufreq/cpufreq_powersave.c | 8 ++++++-- include/linux/cpufreq.h | 3 +++ 3 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index c159ae64eeb2..5f076aef74fa 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -69,6 +69,15 @@ config CPU_FREQ_DEFAULT_GOV_PERFORMANCE the frequency statically to the highest frequency supported by the CPU. +config CPU_FREQ_DEFAULT_GOV_POWERSAVE + bool "powersave" + depends on EMBEDDED + select CPU_FREQ_GOV_POWERSAVE + help + Use the CPUFreq governor 'powersave' as default. This sets + the frequency statically to the lowest frequency supported by + the CPU. + config CPU_FREQ_DEFAULT_GOV_USERSPACE bool "userspace" select CPU_FREQ_GOV_USERSPACE diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c index 13fe06b94b0a..88d2f44fba48 100644 --- a/drivers/cpufreq/cpufreq_powersave.c +++ b/drivers/cpufreq/cpufreq_powersave.c @@ -35,12 +35,12 @@ static int cpufreq_governor_powersave(struct cpufreq_policy *policy, return 0; } -static struct cpufreq_governor cpufreq_gov_powersave = { +struct cpufreq_governor cpufreq_gov_powersave = { .name = "powersave", .governor = cpufreq_governor_powersave, .owner = THIS_MODULE, }; - +EXPORT_SYMBOL(cpufreq_gov_powersave); static int __init cpufreq_gov_powersave_init(void) { @@ -58,5 +58,9 @@ MODULE_AUTHOR("Dominik Brodowski "); MODULE_DESCRIPTION("CPUfreq policy governor 'powersave'"); MODULE_LICENSE("GPL"); +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE +fs_initcall(cpufreq_gov_powersave_init); +#else module_init(cpufreq_gov_powersave_init); +#endif module_exit(cpufreq_gov_powersave_exit); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index a881fd62c447..e7e91dbfde0f 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -308,6 +308,9 @@ extern struct cpufreq_governor cpufreq_gov_performance; #endif #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_performance) +#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE) +extern struct cpufreq_governor cpufreq_gov_powersave; +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_powersave) #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE) extern struct cpufreq_governor cpufreq_gov_userspace; #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_userspace) -- cgit v1.2.3-71-gd317 From 92d3ab27e8fd23d1a9dc3b69d17b2afb83e5c6f5 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 28 Apr 2008 23:44:36 +0200 Subject: falconide/q40ide: add ->atapi_*put_bytes and ->ata_*put_data methods (take 2) * Add ->atapi_{in,out}put_bytes and ->ata_{in,out}put_data methods to falconide and q40ide host drivers (->ata_* methods are implemented on top of ->atapi_* methods so they also do byte-swapping now). * Cleanup atapi_{in,out}put_bytes(). v2: * Add 'struct request *rq' argument to ->ata_{in,out}put_data methods and don't byte-swap disk fs requests (we shouldn't un-swap fs requests because fs itself is stored byte-swapped on the disk) - this is how things were done before the patch (ideally device mapper should be used instead but it would break existing setups and would have some performance impact). Cc: Geert Uytterhoeven Cc: Michael Schmitz Cc: Roman Zippel Cc: Alan Cox Cc: Richard Zidlicky Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/cris/ide-cris.c | 14 ++++++++------ drivers/ide/ide-io.c | 2 +- drivers/ide/ide-iops.c | 26 +++++++------------------- drivers/ide/ide-probe.c | 2 +- drivers/ide/ide-taskfile.c | 16 +++++++++------- drivers/ide/legacy/falconide.c | 36 ++++++++++++++++++++++++++++++++++++ drivers/ide/legacy/q40ide.c | 34 ++++++++++++++++++++++++++++++++++ include/linux/ide.h | 4 ++-- 8 files changed, 98 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/cris/ide-cris.c b/drivers/ide/cris/ide-cris.c index 9df26855bc05..aa263df76569 100644 --- a/drivers/ide/cris/ide-cris.c +++ b/drivers/ide/cris/ide-cris.c @@ -673,8 +673,10 @@ cris_ide_inb(unsigned long reg) return (unsigned char)cris_ide_inw(reg); } -static void cris_ide_input_data (ide_drive_t *drive, void *, unsigned int); -static void cris_ide_output_data (ide_drive_t *drive, void *, unsigned int); +static void cris_ide_input_data(ide_drive_t *, struct request *, + void *, unsigned int); +static void cris_ide_output_data(ide_drive_t *, struct request *, + void *, unsigned int); static void cris_atapi_input_bytes(ide_drive_t *drive, void *, unsigned int); static void cris_atapi_output_bytes(ide_drive_t *drive, void *, unsigned int); @@ -900,8 +902,8 @@ cris_atapi_output_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecoun /* * This is used for most PIO data transfers *from* the IDE interface */ -static void -cris_ide_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount) +static void cris_ide_input_data(ide_drive_t *drive, struct request *rq, + void *buffer, unsigned int wcount) { cris_atapi_input_bytes(drive, buffer, wcount << 2); } @@ -909,8 +911,8 @@ cris_ide_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount) /* * This is used for most PIO data transfers *to* the IDE interface */ -static void -cris_ide_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount) +static void cris_ide_output_data(ide_drive_t *drive, struct request *, + void *buffer, unsigned int wcount) { cris_atapi_output_bytes(drive, buffer, wcount << 2); } diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 3a2d8930d17f..60078cf307fb 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -422,7 +422,7 @@ static void try_to_flush_leftover_data (ide_drive_t *drive) u32 wcount = (i > 16) ? 16 : i; i -= wcount; - HWIF(drive)->ata_input_data(drive, buffer, wcount); + drive->hwif->ata_input_data(drive, NULL, buffer, wcount); } } diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index 5425d3038ec2..7ec7fa2aef96 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -192,7 +192,8 @@ static void ata_vlb_sync(ide_drive_t *drive, unsigned long port) /* * This is used for most PIO data transfers *from* the IDE interface */ -static void ata_input_data(ide_drive_t *drive, void *buffer, u32 wcount) +static void ata_input_data(ide_drive_t *drive, struct request *rq, + void *buffer, u32 wcount) { ide_hwif_t *hwif = drive->hwif; struct ide_io_ports *io_ports = &hwif->io_ports; @@ -215,7 +216,8 @@ static void ata_input_data(ide_drive_t *drive, void *buffer, u32 wcount) /* * This is used for most PIO data transfers *to* the IDE interface */ -static void ata_output_data(ide_drive_t *drive, void *buffer, u32 wcount) +static void ata_output_data(ide_drive_t *drive, struct request *rq, + void *buffer, u32 wcount) { ide_hwif_t *hwif = drive->hwif; struct ide_io_ports *io_ports = &hwif->io_ports; @@ -248,14 +250,7 @@ static void atapi_input_bytes(ide_drive_t *drive, void *buffer, u32 bytecount) ide_hwif_t *hwif = HWIF(drive); ++bytecount; -#if defined(CONFIG_ATARI) || defined(CONFIG_Q40) - if (MACH_IS_ATARI || MACH_IS_Q40) { - /* Atari has a byte-swapped IDE interface */ - insw_swapw(hwif->io_ports.data_addr, buffer, bytecount / 2); - return; - } -#endif /* CONFIG_ATARI || CONFIG_Q40 */ - hwif->ata_input_data(drive, buffer, bytecount / 4); + hwif->ata_input_data(drive, NULL, buffer, bytecount / 4); if ((bytecount & 0x03) >= 2) hwif->INSW(hwif->io_ports.data_addr, (u8 *)buffer + (bytecount & ~0x03), 1); @@ -266,14 +261,7 @@ static void atapi_output_bytes(ide_drive_t *drive, void *buffer, u32 bytecount) ide_hwif_t *hwif = HWIF(drive); ++bytecount; -#if defined(CONFIG_ATARI) || defined(CONFIG_Q40) - if (MACH_IS_ATARI || MACH_IS_Q40) { - /* Atari has a byte-swapped IDE interface */ - outsw_swapw(hwif->io_ports.data_addr, buffer, bytecount / 2); - return; - } -#endif /* CONFIG_ATARI || CONFIG_Q40 */ - hwif->ata_output_data(drive, buffer, bytecount / 4); + hwif->ata_output_data(drive, NULL, buffer, bytecount / 4); if ((bytecount & 0x03) >= 2) hwif->OUTSW(hwif->io_ports.data_addr, (u8 *)buffer + (bytecount & ~0x03), 1); @@ -668,7 +656,7 @@ int ide_driveid_update(ide_drive_t *drive) local_irq_restore(flags); return 0; } - hwif->ata_input_data(drive, id, SECTOR_WORDS); + hwif->ata_input_data(drive, NULL, id, SECTOR_WORDS); (void)ide_read_status(drive); /* clear drive IRQ */ local_irq_enable(); local_irq_restore(flags); diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 862f02603f9b..e06a64605215 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -124,7 +124,7 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd) id = drive->id; /* read 512 bytes of id info */ - hwif->ata_input_data(drive, id, SECTOR_WORDS); + hwif->ata_input_data(drive, NULL, id, SECTOR_WORDS); drive->id_read = 1; local_irq_enable(); diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 9f9ad9fb6b89..7f6bfd314411 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -283,7 +283,8 @@ static u8 wait_drive_not_busy(ide_drive_t *drive) return stat; } -static void ide_pio_sector(ide_drive_t *drive, unsigned int write) +static void ide_pio_sector(ide_drive_t *drive, struct request *rq, + unsigned int write) { ide_hwif_t *hwif = drive->hwif; struct scatterlist *sg = hwif->sg_table; @@ -323,9 +324,9 @@ static void ide_pio_sector(ide_drive_t *drive, unsigned int write) /* do the actual data transfer */ if (write) - hwif->ata_output_data(drive, buf, SECTOR_WORDS); + hwif->ata_output_data(drive, rq, buf, SECTOR_WORDS); else - hwif->ata_input_data(drive, buf, SECTOR_WORDS); + hwif->ata_input_data(drive, rq, buf, SECTOR_WORDS); kunmap_atomic(buf, KM_BIO_SRC_IRQ); #ifdef CONFIG_HIGHMEM @@ -333,13 +334,14 @@ static void ide_pio_sector(ide_drive_t *drive, unsigned int write) #endif } -static void ide_pio_multi(ide_drive_t *drive, unsigned int write) +static void ide_pio_multi(ide_drive_t *drive, struct request *rq, + unsigned int write) { unsigned int nsect; nsect = min_t(unsigned int, drive->hwif->nleft, drive->mult_count); while (nsect--) - ide_pio_sector(drive, write); + ide_pio_sector(drive, rq, write); } static void ide_pio_datablock(ide_drive_t *drive, struct request *rq, @@ -362,10 +364,10 @@ static void ide_pio_datablock(ide_drive_t *drive, struct request *rq, switch (drive->hwif->data_phase) { case TASKFILE_MULTI_IN: case TASKFILE_MULTI_OUT: - ide_pio_multi(drive, write); + ide_pio_multi(drive, rq, write); break; default: - ide_pio_sector(drive, write); + ide_pio_sector(drive, rq, write); break; } diff --git a/drivers/ide/legacy/falconide.c b/drivers/ide/legacy/falconide.c index 56cdaa0eeea5..32c044b17d45 100644 --- a/drivers/ide/legacy/falconide.c +++ b/drivers/ide/legacy/falconide.c @@ -44,6 +44,36 @@ int falconide_intr_lock; EXPORT_SYMBOL(falconide_intr_lock); +static void falconide_atapi_input_bytes(ide_drive_t *drive, void *buf, + unsigned int len) +{ + insw_swapw(drive->hwif->io_ports.data_addr, buf, (len + 1) / 2); +} + +static void falconide_atapi_output_bytes(ide_drive_t *drive, void *buf, + unsigned int len) +{ + outsw_swapw(drive->hwif->io_ports.data_addr, buf, (len + 1) / 2); +} + +static void falconide_ata_input_data(ide_drive_t *drive, struct request *rq, + void *buf, unsigned int wcount) +{ + if (drive->media == ide_disk && rq && rq->cmd_type == REQ_TYPE_FS) + return insw(drive->hwif->io_ports.data_addr, buf, wcount * 2); + + falconide_atapi_input_bytes(drive, buf, wcount * 4); +} + +static void falconide_ata_output_data(ide_drive_t *drive, struct request *rq, + void *buf, unsigned int wcount) +{ + if (drive->media == ide_disk && rq && rq->cmd_type == REQ_TYPE_FS) + return outsw(drive->hwif->io_ports.data_addr, buf, wcount * 2); + + falconide_atapi_output_bytes(drive, buf, wcount * 4); +} + static void __init falconide_setup_ports(hw_regs_t *hw) { int i; @@ -90,6 +120,12 @@ static int __init falconide_init(void) ide_init_port_data(hwif, index); ide_init_port_hw(hwif, &hw); + /* Atari has a byte-swapped IDE interface */ + hwif->atapi_input_bytes = falconide_atapi_input_bytes; + hwif->atapi_output_bytes = falconide_atapi_output_bytes; + hwif->ata_input_data = falconide_ata_input_data; + hwif->ata_output_data = falconide_ata_output_data; + ide_get_lock(NULL, NULL); ide_device_add(idx, NULL); ide_release_lock(); diff --git a/drivers/ide/legacy/q40ide.c b/drivers/ide/legacy/q40ide.c index f9210458aea0..deae3d2ca65e 100644 --- a/drivers/ide/legacy/q40ide.c +++ b/drivers/ide/legacy/q40ide.c @@ -72,7 +72,35 @@ static void q40_ide_setup_ports(hw_regs_t *hw, unsigned long base, hw->ack_intr = ack_intr; } +static void q40ide_atapi_input_bytes(ide_drive_t *drive, void *buf, + unsigned int len) +{ + insw_swapw(drive->hwif->io_ports.data_addr, buf, (len + 1) / 2); +} +static void q40ide_atapi_output_bytes(ide_drive_t *drive, void *buf, + unsigned int len) +{ + outsw_swapw(drive->hwif->io_ports.data_addr, buf, (len + 1) / 2); +} + +static void q40ide_ata_input_data(ide_drive_t *drive, struct request *rq, + void *buf, unsigned int wcount) +{ + if (drive->media == ide_disk && rq && rq->cmd_type == REQ_TYPE_FS) + return insw(drive->hwif->io_ports.data_addr, buf, wcount * 2); + + q40ide_atapi_input_bytes(drive, buf, wcount * 4); +} + +static void q40ide_ata_output_data(ide_drive_t *drive, struct request *rq, + void *buf, unsigned int wcount) +{ + if (drive->media == ide_disk && rq && rq->cmd_type == REQ_TYPE_FS) + return outsw(drive->hwif->io_ports.data_addr, buf, wcount * 2); + + q40ide_atapi_output_bytes(drive, buf, wcount * 4); +} /* * the static array is needed to have the name reported in /proc/ioports, @@ -123,6 +151,12 @@ static int __init q40ide_init(void) ide_init_port_data(hwif, hwif->index); ide_init_port_hw(hwif, &hw); + /* Q40 has a byte-swapped IDE interface */ + hwif->atapi_input_bytes = q40ide_atapi_input_bytes; + hwif->atapi_output_bytes = q40ide_atapi_output_bytes; + hwif->ata_input_data = q40ide_ata_input_data; + hwif->ata_output_data = q40ide_ata_output_data; + idx[i] = hwif->index; } } diff --git a/include/linux/ide.h b/include/linux/ide.h index 32fd77bb4436..0cbc46bf08a5 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -467,8 +467,8 @@ typedef struct hwif_s { const struct ide_port_ops *port_ops; const struct ide_dma_ops *dma_ops; - void (*ata_input_data)(ide_drive_t *, void *, u32); - void (*ata_output_data)(ide_drive_t *, void *, u32); + void (*ata_input_data)(ide_drive_t *, struct request *, void *, u32); + void (*ata_output_data)(ide_drive_t *, struct request *, void *, u32); void (*atapi_input_bytes)(ide_drive_t *, void *, u32); void (*atapi_output_bytes)(ide_drive_t *, void *, u32); -- cgit v1.2.3-71-gd317 From 9567b349f7e7dd7e2483db99ee8e4a6fe0caca38 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 28 Apr 2008 23:44:36 +0200 Subject: ide: merge ->atapi_*put_bytes and ->ata_*put_data methods * Merge ->atapi_{in,out}put_bytes and ->ata_{in,out}put_data methods into new ->{in,out}put_data methods which take number of bytes to transfer as an argument and always do padding. While at it: * Use 'hwif' or 'drive->hwif' instead of 'HWIF(drive)'. There should be no functional changes caused by this patch (all users of ->ata_{in,out}put_data methods were using multiply-of-4 word counts). Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/cris/ide-cris.c | 52 ++++++++++---------------------- drivers/ide/ide-cd.c | 14 ++++----- drivers/ide/ide-floppy.c | 18 ++++++----- drivers/ide/ide-io.c | 2 +- drivers/ide/ide-iops.c | 68 ++++++++++++++++-------------------------- drivers/ide/ide-probe.c | 2 +- drivers/ide/ide-tape.c | 13 ++++---- drivers/ide/ide-taskfile.c | 4 +-- drivers/ide/legacy/falconide.c | 36 ++++++++-------------- drivers/ide/legacy/q40ide.c | 36 ++++++++-------------- drivers/scsi/ide-scsi.c | 27 ++++++++--------- include/linux/ide.h | 13 ++++---- 12 files changed, 113 insertions(+), 172 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/cris/ide-cris.c b/drivers/ide/cris/ide-cris.c index aa263df76569..72ff63ed5f2b 100644 --- a/drivers/ide/cris/ide-cris.c +++ b/drivers/ide/cris/ide-cris.c @@ -673,12 +673,8 @@ cris_ide_inb(unsigned long reg) return (unsigned char)cris_ide_inw(reg); } -static void cris_ide_input_data(ide_drive_t *, struct request *, - void *, unsigned int); -static void cris_ide_output_data(ide_drive_t *, struct request *, - void *, unsigned int); -static void cris_atapi_input_bytes(ide_drive_t *drive, void *, unsigned int); -static void cris_atapi_output_bytes(ide_drive_t *drive, void *, unsigned int); +static void cris_input_data(ide_drive_t *, struct request *, void *, unsigned); +static void cris_output_data(ide_drive_t *, struct request *, void *, unsigned); static void cris_dma_host_set(ide_drive_t *drive, int on) { @@ -816,10 +812,9 @@ static int __init init_e100_ide(void) ide_init_port_data(hwif, hwif->index); ide_init_port_hw(hwif, &hw); - hwif->ata_input_data = &cris_ide_input_data; - hwif->ata_output_data = &cris_ide_output_data; - hwif->atapi_input_bytes = &cris_atapi_input_bytes; - hwif->atapi_output_bytes = &cris_atapi_output_bytes; + hwif->input_data = cris_input_data; + hwif->output_data = cris_output_data; + hwif->OUTB = &cris_ide_outb; hwif->OUTW = &cris_ide_outw; hwif->OUTBSYNC = &cris_ide_outbsync; @@ -849,17 +844,16 @@ static int __init init_e100_ide(void) static cris_dma_descr_type mydescr __attribute__ ((__aligned__(16))); /* - * The following routines are mainly used by the ATAPI drivers. + * This is used for most PIO data transfers *from* the IDE interface * * These routines will round up any request for an odd number of bytes, * so if an odd bytecount is specified, be sure that there's at least one * extra byte allocated for the buffer. */ -static void -cris_atapi_input_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount) +static void cris_input_data(ide_drive_t *drive, struct request *rq, + void *buffer, unsigned int bytecount) { - D(printk("atapi_input_bytes, buffer 0x%x, count %d\n", - buffer, bytecount)); + D(printk("input_data, buffer 0x%x, count %d\n", buffer, bytecount)); if(bytecount & 1) { printk("warning, odd bytecount in cdrom_in_bytes = %d.\n", bytecount); @@ -877,11 +871,13 @@ cris_atapi_input_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount LED_DISK_READ(0); } -static void -cris_atapi_output_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount) +/* + * This is used for most PIO data transfers *to* the IDE interface + */ +static void cris_output_data(ide_drive_t *drive, struct request *rq, + void *buffer, unsigned int bytecount) { - D(printk("atapi_output_bytes, buffer 0x%x, count %d\n", - buffer, bytecount)); + D(printk("output_data, buffer 0x%x, count %d\n", buffer, bytecount)); if(bytecount & 1) { printk("odd bytecount %d in atapi_out_bytes!\n", bytecount); @@ -899,24 +895,6 @@ cris_atapi_output_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecoun LED_DISK_WRITE(0); } -/* - * This is used for most PIO data transfers *from* the IDE interface - */ -static void cris_ide_input_data(ide_drive_t *drive, struct request *rq, - void *buffer, unsigned int wcount) -{ - cris_atapi_input_bytes(drive, buffer, wcount << 2); -} - -/* - * This is used for most PIO data transfers *to* the IDE interface - */ -static void cris_ide_output_data(ide_drive_t *drive, struct request *, - void *buffer, unsigned int wcount) -{ - cris_atapi_output_bytes(drive, buffer, wcount << 2); -} - /* we only have one DMA channel on the chip for ATA, so we can keep these statically */ static cris_dma_descr_type ata_descrs[MAX_DMA_DESCRS] __attribute__ ((__aligned__(16))); static unsigned int ata_tot_size; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index b34fd2bde96f..095e50a93869 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -613,7 +613,7 @@ static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive, cmd_len = ATAPI_MIN_CDB_BYTES; /* send the command to the device */ - HWIF(drive)->atapi_output_bytes(drive, rq->cmd, cmd_len); + hwif->output_data(drive, NULL, rq->cmd, cmd_len); /* start the DMA if need be */ if (info->dma) @@ -629,7 +629,7 @@ static void ide_cd_pad_transfer(ide_drive_t *drive, xfer_func_t *xf, int len) { while (len > 0) { int dum = 0; - xf(drive, &dum, sizeof(dum)); + xf(drive, NULL, &dum, sizeof(dum)); len -= sizeof(dum); } } @@ -639,7 +639,7 @@ static void ide_cd_drain_data(ide_drive_t *drive, int nsects) while (nsects > 0) { static char dum[SECTOR_SIZE]; - drive->hwif->atapi_input_bytes(drive, dum, sizeof(dum)); + drive->hwif->input_data(drive, NULL, dum, sizeof(dum)); nsects--; } } @@ -666,7 +666,7 @@ static int ide_cd_check_ireason(ide_drive_t *drive, struct request *rq, printk(KERN_ERR "%s: %s: wrong transfer direction!\n", drive->name, __func__); - xf = rw ? hwif->atapi_output_bytes : hwif->atapi_input_bytes; + xf = rw ? hwif->output_data : hwif->input_data; ide_cd_pad_transfer(drive, xf, len); } else if (rw == 0 && ireason == 1) { /* @@ -1019,10 +1019,10 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) if (ireason == 0) { write = 1; - xferfunc = HWIF(drive)->atapi_output_bytes; + xferfunc = hwif->output_data; } else { write = 0; - xferfunc = HWIF(drive)->atapi_input_bytes; + xferfunc = hwif->input_data; } /* transfer data */ @@ -1061,7 +1061,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) if (blen > thislen) blen = thislen; - xferfunc(drive, ptr, blen); + xferfunc(drive, NULL, ptr, blen); thislen -= blen; len -= blen; diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 489079b8ed03..e2bcd3af45db 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -231,6 +231,7 @@ static int idefloppy_end_request(ide_drive_t *drive, int uptodate, int nsecs) static void ide_floppy_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, unsigned int bcount, int direction) { + ide_hwif_t *hwif = drive->hwif; struct request *rq = pc->rq; struct req_iterator iter; struct bio_vec *bvec; @@ -246,9 +247,9 @@ static void ide_floppy_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, data = bvec_kmap_irq(bvec, &flags); if (direction) - drive->hwif->atapi_output_bytes(drive, data, count); + hwif->output_data(drive, NULL, data, count); else - drive->hwif->atapi_input_bytes(drive, data, count); + hwif->input_data(drive, NULL, data, count); bvec_kunmap_irq(data, &flags); bcount -= count; @@ -503,12 +504,12 @@ static ide_startstop_t idefloppy_pc_intr(ide_drive_t *drive) } } if (pc->flags & PC_FLAG_WRITING) - xferfunc = hwif->atapi_output_bytes; + xferfunc = hwif->output_data; else - xferfunc = hwif->atapi_input_bytes; + xferfunc = hwif->input_data; if (pc->buf) - xferfunc(drive, pc->cur_pos, bcount); + xferfunc(drive, NULL, pc->cur_pos, bcount); else ide_floppy_io_buffers(drive, pc, bcount, !!(pc->flags & PC_FLAG_WRITING)); @@ -548,8 +549,10 @@ static ide_startstop_t idefloppy_transfer_pc(ide_drive_t *drive) /* Set the interrupt routine */ ide_set_handler(drive, &idefloppy_pc_intr, IDEFLOPPY_WAIT_CMD, NULL); + /* Send the actual packet */ - HWIF(drive)->atapi_output_bytes(drive, floppy->pc->c, 12); + hwif->output_data(drive, NULL, floppy->pc->c, 12); + return ide_started; } @@ -569,7 +572,8 @@ static int idefloppy_transfer_pc2(ide_drive_t *drive) idefloppy_floppy_t *floppy = drive->driver_data; /* Send the actual packet */ - HWIF(drive)->atapi_output_bytes(drive, floppy->pc->c, 12); + drive->hwif->output_data(drive, NULL, floppy->pc->c, 12); + /* Timeout for the packet command */ return IDEFLOPPY_WAIT_CMD; } diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 60078cf307fb..a17fc6430001 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -422,7 +422,7 @@ static void try_to_flush_leftover_data (ide_drive_t *drive) u32 wcount = (i > 16) ? 16 : i; i -= wcount; - drive->hwif->ata_input_data(drive, NULL, buffer, wcount); + drive->hwif->input_data(drive, NULL, buffer, wcount * 4); } } diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index 7ec7fa2aef96..fbbbb30ae964 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -191,36 +191,47 @@ static void ata_vlb_sync(ide_drive_t *drive, unsigned long port) /* * This is used for most PIO data transfers *from* the IDE interface + * + * These routines will round up any request for an odd number of bytes, + * so if an odd len is specified, be sure that there's at least one + * extra byte allocated for the buffer. */ static void ata_input_data(ide_drive_t *drive, struct request *rq, - void *buffer, u32 wcount) + void *buf, unsigned int len) { ide_hwif_t *hwif = drive->hwif; struct ide_io_ports *io_ports = &hwif->io_ports; + unsigned long data_addr = io_ports->data_addr; u8 io_32bit = drive->io_32bit; + len++; + if (io_32bit) { if (io_32bit & 2) { unsigned long flags; local_irq_save(flags); ata_vlb_sync(drive, io_ports->nsect_addr); - hwif->INSL(io_ports->data_addr, buffer, wcount); + hwif->INSL(data_addr, buf, len / 4); local_irq_restore(flags); } else - hwif->INSL(io_ports->data_addr, buffer, wcount); + hwif->INSL(data_addr, buf, len / 4); + + if ((len & 3) >= 2) + hwif->INSW(data_addr, (u8 *)buf + (len & ~3), 1); } else - hwif->INSW(io_ports->data_addr, buffer, wcount << 1); + hwif->INSW(data_addr, buf, len / 2); } /* * This is used for most PIO data transfers *to* the IDE interface */ static void ata_output_data(ide_drive_t *drive, struct request *rq, - void *buffer, u32 wcount) + void *buf, unsigned int len) { ide_hwif_t *hwif = drive->hwif; struct ide_io_ports *io_ports = &hwif->io_ports; + unsigned long data_addr = io_ports->data_addr; u8 io_32bit = drive->io_32bit; if (io_32bit) { @@ -229,50 +240,21 @@ static void ata_output_data(ide_drive_t *drive, struct request *rq, local_irq_save(flags); ata_vlb_sync(drive, io_ports->nsect_addr); - hwif->OUTSL(io_ports->data_addr, buffer, wcount); + hwif->OUTSL(data_addr, buf, len / 4); local_irq_restore(flags); } else - hwif->OUTSL(io_ports->data_addr, buffer, wcount); - } else - hwif->OUTSW(io_ports->data_addr, buffer, wcount << 1); -} - -/* - * The following routines are mainly used by the ATAPI drivers. - * - * These routines will round up any request for an odd number of bytes, - * so if an odd bytecount is specified, be sure that there's at least one - * extra byte allocated for the buffer. - */ - -static void atapi_input_bytes(ide_drive_t *drive, void *buffer, u32 bytecount) -{ - ide_hwif_t *hwif = HWIF(drive); + hwif->OUTSL(data_addr, buf, len / 4); - ++bytecount; - hwif->ata_input_data(drive, NULL, buffer, bytecount / 4); - if ((bytecount & 0x03) >= 2) - hwif->INSW(hwif->io_ports.data_addr, - (u8 *)buffer + (bytecount & ~0x03), 1); -} - -static void atapi_output_bytes(ide_drive_t *drive, void *buffer, u32 bytecount) -{ - ide_hwif_t *hwif = HWIF(drive); - - ++bytecount; - hwif->ata_output_data(drive, NULL, buffer, bytecount / 4); - if ((bytecount & 0x03) >= 2) - hwif->OUTSW(hwif->io_ports.data_addr, - (u8 *)buffer + (bytecount & ~0x03), 1); + if ((len & 3) >= 2) + hwif->OUTSW(data_addr, (u8 *)buf + (len & ~3), 1); + } else + hwif->OUTSW(data_addr, buf, len / 2); } void default_hwif_transport(ide_hwif_t *hwif) { - hwif->ata_input_data = ata_input_data; - hwif->ata_output_data = ata_output_data; - hwif->atapi_input_bytes = atapi_input_bytes; - hwif->atapi_output_bytes = atapi_output_bytes; + hwif->input_data = ata_input_data; + hwif->output_data = ata_output_data; } void ide_fix_driveid (struct hd_driveid *id) @@ -656,7 +638,7 @@ int ide_driveid_update(ide_drive_t *drive) local_irq_restore(flags); return 0; } - hwif->ata_input_data(drive, NULL, id, SECTOR_WORDS); + hwif->input_data(drive, NULL, id, SECTOR_SIZE); (void)ide_read_status(drive); /* clear drive IRQ */ local_irq_enable(); local_irq_restore(flags); diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index e06a64605215..8f7d57660643 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -124,7 +124,7 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd) id = drive->id; /* read 512 bytes of id info */ - hwif->ata_input_data(drive, NULL, id, SECTOR_WORDS); + hwif->input_data(drive, NULL, id, SECTOR_SIZE); drive->id_read = 1; local_irq_enable(); diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 29870c415110..f4f31238bbef 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -401,7 +401,7 @@ static void idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, count = min( (unsigned int)(bh->b_size - atomic_read(&bh->b_count)), bcount); - HWIF(drive)->atapi_input_bytes(drive, bh->b_data + + drive->hwif->input_data(drive, NULL, bh->b_data + atomic_read(&bh->b_count), count); bcount -= count; atomic_add(count, &bh->b_count); @@ -427,7 +427,7 @@ static void idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, return; } count = min((unsigned int)pc->b_count, (unsigned int)bcount); - HWIF(drive)->atapi_output_bytes(drive, pc->b_data, count); + drive->hwif->output_data(drive, NULL, pc->b_data, count); bcount -= count; pc->b_data += count; pc->b_count -= count; @@ -880,16 +880,16 @@ static ide_startstop_t idetape_pc_intr(ide_drive_t *drive) "data than expected - allowing transfer\n"); } iobuf = &idetape_input_buffers; - xferfunc = hwif->atapi_input_bytes; + xferfunc = hwif->input_data; } else { iobuf = &idetape_output_buffers; - xferfunc = hwif->atapi_output_bytes; + xferfunc = hwif->output_data; } if (pc->bh) iobuf(drive, pc, bcount); else - xferfunc(drive, pc->cur_pos, bcount); + xferfunc(drive, NULL, pc->cur_pos, bcount); /* Update the current position */ pc->xferred += bcount; @@ -979,7 +979,8 @@ static ide_startstop_t idetape_transfer_pc(ide_drive_t *drive) hwif->dma_ops->dma_start(drive); #endif /* Send the actual packet */ - HWIF(drive)->atapi_output_bytes(drive, pc->c, 12); + hwif->output_data(drive, NULL, pc->c, 12); + return ide_started; } diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 7f6bfd314411..0321884f9d92 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -324,9 +324,9 @@ static void ide_pio_sector(ide_drive_t *drive, struct request *rq, /* do the actual data transfer */ if (write) - hwif->ata_output_data(drive, rq, buf, SECTOR_WORDS); + hwif->output_data(drive, rq, buf, SECTOR_SIZE); else - hwif->ata_input_data(drive, rq, buf, SECTOR_WORDS); + hwif->input_data(drive, rq, buf, SECTOR_SIZE); kunmap_atomic(buf, KM_BIO_SRC_IRQ); #ifdef CONFIG_HIGHMEM diff --git a/drivers/ide/legacy/falconide.c b/drivers/ide/legacy/falconide.c index 32c044b17d45..83555ca513b5 100644 --- a/drivers/ide/legacy/falconide.c +++ b/drivers/ide/legacy/falconide.c @@ -44,34 +44,26 @@ int falconide_intr_lock; EXPORT_SYMBOL(falconide_intr_lock); -static void falconide_atapi_input_bytes(ide_drive_t *drive, void *buf, - unsigned int len) +static void falconide_input_data(ide_drive_t *drive, struct request *rq, + void *buf, unsigned int len) { - insw_swapw(drive->hwif->io_ports.data_addr, buf, (len + 1) / 2); -} - -static void falconide_atapi_output_bytes(ide_drive_t *drive, void *buf, - unsigned int len) -{ - outsw_swapw(drive->hwif->io_ports.data_addr, buf, (len + 1) / 2); -} + unsigned long data_addr = drive->hwif->io_ports.data_addr; -static void falconide_ata_input_data(ide_drive_t *drive, struct request *rq, - void *buf, unsigned int wcount) -{ if (drive->media == ide_disk && rq && rq->cmd_type == REQ_TYPE_FS) - return insw(drive->hwif->io_ports.data_addr, buf, wcount * 2); + return insw(data_addr, buf, (len + 1) / 2); - falconide_atapi_input_bytes(drive, buf, wcount * 4); + insw_swapw(data_addr, buf, (len + 1) / 2); } -static void falconide_ata_output_data(ide_drive_t *drive, struct request *rq, - void *buf, unsigned int wcount) +static void falconide_output_data(ide_drive_t *drive, struct request *rq, + void *buf, unsigned int len) { + unsigned long data_addr = drive->hwif->io_ports.data_addr; + if (drive->media == ide_disk && rq && rq->cmd_type == REQ_TYPE_FS) - return outsw(drive->hwif->io_ports.data_addr, buf, wcount * 2); + return outsw(data_adr, buf, (len + 1) / 2); - falconide_atapi_output_bytes(drive, buf, wcount * 4); + outsw_swapw(data_addr, buf, (len + 1) / 2); } static void __init falconide_setup_ports(hw_regs_t *hw) @@ -121,10 +113,8 @@ static int __init falconide_init(void) ide_init_port_hw(hwif, &hw); /* Atari has a byte-swapped IDE interface */ - hwif->atapi_input_bytes = falconide_atapi_input_bytes; - hwif->atapi_output_bytes = falconide_atapi_output_bytes; - hwif->ata_input_data = falconide_ata_input_data; - hwif->ata_output_data = falconide_ata_output_data; + hwif->input_data = falconide_input_data; + hwif->output_data = falconide_output_data; ide_get_lock(NULL, NULL); ide_device_add(idx, NULL); diff --git a/drivers/ide/legacy/q40ide.c b/drivers/ide/legacy/q40ide.c index deae3d2ca65e..6f535d00e638 100644 --- a/drivers/ide/legacy/q40ide.c +++ b/drivers/ide/legacy/q40ide.c @@ -72,34 +72,26 @@ static void q40_ide_setup_ports(hw_regs_t *hw, unsigned long base, hw->ack_intr = ack_intr; } -static void q40ide_atapi_input_bytes(ide_drive_t *drive, void *buf, - unsigned int len) +static void q40ide_input_data(ide_drive_t *drive, struct request *rq, + void *buf, unsigned int len) { - insw_swapw(drive->hwif->io_ports.data_addr, buf, (len + 1) / 2); -} - -static void q40ide_atapi_output_bytes(ide_drive_t *drive, void *buf, - unsigned int len) -{ - outsw_swapw(drive->hwif->io_ports.data_addr, buf, (len + 1) / 2); -} + unsigned long data_addr = drive->hwif->io_ports.data_addr; -static void q40ide_ata_input_data(ide_drive_t *drive, struct request *rq, - void *buf, unsigned int wcount) -{ if (drive->media == ide_disk && rq && rq->cmd_type == REQ_TYPE_FS) - return insw(drive->hwif->io_ports.data_addr, buf, wcount * 2); + return insw(data_addr, buf, (len + 1) / 2); - q40ide_atapi_input_bytes(drive, buf, wcount * 4); + insw_swapw(data_addr, buf, (len + 1) / 2); } -static void q40ide_ata_output_data(ide_drive_t *drive, struct request *rq, - void *buf, unsigned int wcount) +static void q40ide_output_data(ide_drive_t *drive, struct request *rq, + void *buf, unsigned int len) { + unsigned long data_addr = drive->hwif->io_ports.data_addr; + if (drive->media == ide_disk && rq && rq->cmd_type == REQ_TYPE_FS) - return outsw(drive->hwif->io_ports.data_addr, buf, wcount * 2); + return outsw(data_addr, buf, (len + 1) / 2); - q40ide_atapi_output_bytes(drive, buf, wcount * 4); + outsw_swapw(data_addr, buf, (len + 1) / 2); } /* @@ -152,10 +144,8 @@ static int __init q40ide_init(void) ide_init_port_hw(hwif, &hw); /* Q40 has a byte-swapped IDE interface */ - hwif->atapi_input_bytes = q40ide_atapi_input_bytes; - hwif->atapi_output_bytes = q40ide_atapi_output_bytes; - hwif->ata_input_data = q40ide_ata_input_data; - hwif->ata_output_data = q40ide_ata_output_data; + hwif->input_data = q40ide_input_data; + hwif->output_data = q40ide_output_data; idx[i] = hwif->index; } diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c index 32553639aded..1168fb0a713c 100644 --- a/drivers/scsi/ide-scsi.c +++ b/drivers/scsi/ide-scsi.c @@ -134,6 +134,7 @@ static inline idescsi_scsi_t *drive_to_idescsi(ide_drive_t *ide_drive) static void idescsi_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, unsigned int bcount) { + ide_hwif_t *hwif = drive->hwif; int count; char *buf; @@ -145,14 +146,12 @@ static void idescsi_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, local_irq_save(flags); buf = kmap_atomic(sg_page(pc->sg), KM_IRQ0) + pc->sg->offset; - drive->hwif->atapi_input_bytes(drive, - buf + pc->b_count, count); + hwif->input_data(drive, NULL, buf + pc->b_count, count); kunmap_atomic(buf - pc->sg->offset, KM_IRQ0); local_irq_restore(flags); } else { buf = sg_virt(pc->sg); - drive->hwif->atapi_input_bytes(drive, - buf + pc->b_count, count); + hwif->input_data(drive, NULL, buf + pc->b_count, count); } bcount -= count; pc->b_count += count; if (pc->b_count == pc->sg->length) { @@ -172,6 +171,7 @@ static void idescsi_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, static void idescsi_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, unsigned int bcount) { + ide_hwif_t *hwif = drive->hwif; int count; char *buf; @@ -183,14 +183,12 @@ static void idescsi_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, local_irq_save(flags); buf = kmap_atomic(sg_page(pc->sg), KM_IRQ0) + pc->sg->offset; - drive->hwif->atapi_output_bytes(drive, - buf + pc->b_count, count); + hwif->output_data(drive, NULL, buf + pc->b_count, count); kunmap_atomic(buf - pc->sg->offset, KM_IRQ0); local_irq_restore(flags); } else { buf = sg_virt(pc->sg); - drive->hwif->atapi_output_bytes(drive, - buf + pc->b_count, count); + hwif->output_data(drive, NULL, buf + pc->b_count, count); } bcount -= count; pc->b_count += count; if (pc->b_count == pc->sg->length) { @@ -431,7 +429,8 @@ static ide_startstop_t idescsi_pc_intr (ide_drive_t *drive) idescsi_input_buffers(drive, pc, temp); else - drive->hwif->atapi_input_bytes(drive, pc->cur_pos, temp); + hwif->input_data(drive, NULL, + pc->cur_pos, temp); printk(KERN_ERR "ide-scsi: transferred" " %d of %d bytes\n", temp, bcount); @@ -452,15 +451,13 @@ static ide_startstop_t idescsi_pc_intr (ide_drive_t *drive) if (pc->sg) idescsi_input_buffers(drive, pc, bcount); else - hwif->atapi_input_bytes(drive, pc->cur_pos, - bcount); + hwif->input_data(drive, NULL, pc->cur_pos, bcount); } else { pc->flags |= PC_FLAG_WRITING; if (pc->sg) idescsi_output_buffers(drive, pc, bcount); else - hwif->atapi_output_bytes(drive, pc->cur_pos, - bcount); + hwif->output_data(drive, NULL, pc->cur_pos, bcount); } /* Update the current position */ pc->xferred += bcount; @@ -493,8 +490,10 @@ static ide_startstop_t idescsi_transfer_pc(ide_drive_t *drive) BUG_ON(HWGROUP(drive)->handler != NULL); /* Set the interrupt routine */ ide_set_handler(drive, &idescsi_pc_intr, get_timeout(pc), idescsi_expiry); + /* Send the actual packet */ - drive->hwif->atapi_output_bytes(drive, scsi->pc->c, 12); + hwif->output_data(drive, NULL, scsi->pc->c, 12); + if (pc->flags & PC_FLAG_DMA_OK) { pc->flags |= PC_FLAG_DMA_IN_PROGRESS; hwif->dma_ops->dma_start(drive); diff --git a/include/linux/ide.h b/include/linux/ide.h index 0cbc46bf08a5..b89b95dcb708 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -467,11 +467,8 @@ typedef struct hwif_s { const struct ide_port_ops *port_ops; const struct ide_dma_ops *dma_ops; - void (*ata_input_data)(ide_drive_t *, struct request *, void *, u32); - void (*ata_output_data)(ide_drive_t *, struct request *, void *, u32); - - void (*atapi_input_bytes)(ide_drive_t *, void *, u32); - void (*atapi_output_bytes)(ide_drive_t *, void *, u32); + void (*input_data)(ide_drive_t *, struct request *, void *, unsigned); + void (*output_data)(ide_drive_t *, struct request *, void *, unsigned); void (*ide_dma_clear_irq)(ide_drive_t *drive); @@ -547,7 +544,7 @@ typedef ide_startstop_t (ide_handler_t)(ide_drive_t *); typedef int (ide_expiry_t)(ide_drive_t *); /* used by ide-cd, ide-floppy, etc. */ -typedef void (xfer_func_t)(ide_drive_t *, void *, u32); +typedef void (xfer_func_t)(ide_drive_t *, struct request *rq, void *, unsigned); typedef struct hwgroup_s { /* irq handler, if active */ @@ -1369,7 +1366,7 @@ static inline void ide_atapi_discard_data(ide_drive_t *drive, unsigned bcount) { ide_hwif_t *hwif = drive->hwif; - /* FIXME: use ->atapi_input_bytes */ + /* FIXME: use ->input_data */ while (bcount--) (void)hwif->INB(hwif->io_ports.data_addr); } @@ -1378,7 +1375,7 @@ static inline void ide_atapi_write_zeros(ide_drive_t *drive, unsigned bcount) { ide_hwif_t *hwif = drive->hwif; - /* FIXME: use ->atapi_output_bytes */ + /* FIXME: use ->output_data */ while (bcount--) hwif->OUTB(0, hwif->io_ports.data_addr); } -- cgit v1.2.3-71-gd317 From c5dd43ec65c1e1e378df043d517d40ed70a32cbe Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 28 Apr 2008 23:44:37 +0200 Subject: ide: add IDE_HFLAG_MMIO host flag (take 2) * Add IDE_HFLAG_MMIO host flag and set it for hosts which use default_hwif_mmiops(). v2: * Fix kernel panic in pmac host driver (',' should be '|'). Thanks to Kamalesh for reporting it + testing the fix and to Andrew for hinting me about the source of the issue. Cc: Kamalesh Babulal Cc: Andrew Morton Cc: Stephen Rothwell Cc: Andy Whitcroft Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/arm/icside.c | 2 +- drivers/ide/arm/palm_bk3710.c | 1 + drivers/ide/arm/rapide.c | 1 + drivers/ide/legacy/ide_platform.c | 4 +++- drivers/ide/mips/swarm.c | 1 + drivers/ide/pci/sgiioc4.c | 1 + drivers/ide/pci/siimage.c | 1 + drivers/ide/ppc/pmac.c | 1 + include/linux/ide.h | 2 ++ 9 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/arm/icside.c b/drivers/ide/arm/icside.c index 65038ca35e10..061456914ca3 100644 --- a/drivers/ide/arm/icside.c +++ b/drivers/ide/arm/icside.c @@ -483,7 +483,7 @@ static const struct ide_port_info icside_v6_port_info __initdata = { .init_dma = icside_dma_off_init, .port_ops = &icside_v6_no_dma_port_ops, .dma_ops = &icside_v6_dma_ops, - .host_flags = IDE_HFLAG_SERIALIZE, + .host_flags = IDE_HFLAG_SERIALIZE | IDE_HFLAG_MMIO, .mwdma_mask = ATA_MWDMA2, .swdma_mask = ATA_SWDMA2, }; diff --git a/drivers/ide/arm/palm_bk3710.c b/drivers/ide/arm/palm_bk3710.c index aaf32541622d..96378ebfb31f 100644 --- a/drivers/ide/arm/palm_bk3710.c +++ b/drivers/ide/arm/palm_bk3710.c @@ -342,6 +342,7 @@ static const struct ide_port_ops palm_bk3710_ports_ops = { static const struct ide_port_info __devinitdata palm_bk3710_port_info = { .init_dma = palm_bk3710_init_dma, .port_ops = &palm_bk3710_ports_ops, + .host_flags = IDE_HFLAG_MMIO, .pio_mask = ATA_PIO4, .udma_mask = ATA_UDMA4, /* (input clk 99MHz) */ .mwdma_mask = ATA_MWDMA2, diff --git a/drivers/ide/arm/rapide.c b/drivers/ide/arm/rapide.c index babc1a5e128d..1747b2358775 100644 --- a/drivers/ide/arm/rapide.c +++ b/drivers/ide/arm/rapide.c @@ -53,6 +53,7 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id) ide_init_port_hw(hwif, &hw); + hwif->host_flags = IDE_HFLAG_MMIO; default_hwif_mmiops(hwif); idx[0] = hwif->index; diff --git a/drivers/ide/legacy/ide_platform.c b/drivers/ide/legacy/ide_platform.c index 8279dc7ca4c0..d3bc3f24e05d 100644 --- a/drivers/ide/legacy/ide_platform.c +++ b/drivers/ide/legacy/ide_platform.c @@ -101,8 +101,10 @@ static int __devinit plat_ide_probe(struct platform_device *pdev) ide_init_port_hw(hwif, &hw); - if (mmio) + if (mmio) { + hwif->host_flags = IDE_HFLAG_MMIO; default_hwif_mmiops(hwif); + } idx[0] = hwif->index; diff --git a/drivers/ide/mips/swarm.c b/drivers/ide/mips/swarm.c index 68947626e4aa..712d17bdd470 100644 --- a/drivers/ide/mips/swarm.c +++ b/drivers/ide/mips/swarm.c @@ -109,6 +109,7 @@ static int __devinit swarm_ide_probe(struct device *dev) base = ioremap(offset, size); /* Setup MMIO ops. */ + hwif->host_flags = IDE_HFLAG_MMIO; default_hwif_mmiops(hwif); hwif->chipset = ide_generic; diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c index 63e28f4e6d3b..16a0bce17d69 100644 --- a/drivers/ide/pci/sgiioc4.c +++ b/drivers/ide/pci/sgiioc4.c @@ -573,6 +573,7 @@ static const struct ide_port_info sgiioc4_port_info __devinitdata = { .init_dma = ide_dma_sgiioc4, .port_ops = &sgiioc4_port_ops, .dma_ops = &sgiioc4_dma_ops, + .host_flags = IDE_HFLAG_MMIO, .mwdma_mask = ATA_MWDMA2_ONLY, }; diff --git a/drivers/ide/pci/siimage.c b/drivers/ide/pci/siimage.c index c2040a017f47..076a476c3e3d 100644 --- a/drivers/ide/pci/siimage.c +++ b/drivers/ide/pci/siimage.c @@ -630,6 +630,7 @@ static void __devinit init_mmio_iops_siimage(ide_hwif_t *hwif) * Fill in the basic HWIF bits */ + hwif->host_flags |= IDE_HFLAG_MMIO; default_hwif_mmiops(hwif); hwif->hwif_data = addr; diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c index 3cac6b2790dd..48aa019127bc 100644 --- a/drivers/ide/ppc/pmac.c +++ b/drivers/ide/ppc/pmac.c @@ -941,6 +941,7 @@ static const struct ide_port_info pmac_port_info = { .port_ops = &pmac_ide_port_ops, .host_flags = IDE_HFLAG_SET_PIO_MODE_KEEP_DMA | IDE_HFLAG_POST_SET_MODE | + IDE_HFLAG_MMIO | IDE_HFLAG_UNMASK_IRQS, .pio_mask = ATA_PIO4, .mwdma_mask = ATA_MWDMA2, diff --git a/include/linux/ide.h b/include/linux/ide.h index b89b95dcb708..8e79875f9872 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1069,6 +1069,8 @@ enum { IDE_HFLAG_NO_DMA = (1 << 14), /* check if host is PCI IDE device before allowing DMA */ IDE_HFLAG_NO_AUTODMA = (1 << 15), + /* host uses MMIO */ + IDE_HFLAG_MMIO = (1 << 16), /* host is CS5510/CS5520 */ IDE_HFLAG_CS5520 = IDE_HFLAG_VDMA, /* no LBA48 */ -- cgit v1.2.3-71-gd317 From 16bb69c14a42e64faef1ec5c724ffaca916347a1 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 28 Apr 2008 23:44:37 +0200 Subject: ide: remove ->INS{W,L} and ->OUTS{W,L} methods * Use ins{w,l}()/outs{w,l}() and __ide_mm_ins{w,l}()/__ide_mm_outs{w,l}() directly in ata_{in,out}put_data() (by using IDE_HFLAG_MMIO host flag to decide which I/O ops are required). * Remove no longer needed ->INS{W,L} and ->OUTS{W,L} methods (ide-h8300, au1xxx-ide and scc_pata implement their own ->{in,out}put_data methods). There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/h8300/ide-h8300.c | 4 -- drivers/ide/ide-iops.c | 114 ++++++++++++++++++------------------------ drivers/ide/mips/au1xxx-ide.c | 3 -- drivers/ide/pci/scc_pata.c | 4 -- include/linux/ide.h | 4 -- 5 files changed, 48 insertions(+), 81 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/h8300/ide-h8300.c b/drivers/ide/h8300/ide-h8300.c index 90702f79d560..b5d6508d39bb 100644 --- a/drivers/ide/h8300/ide-h8300.c +++ b/drivers/ide/h8300/ide-h8300.c @@ -90,11 +90,7 @@ static inline void hwif_setup(ide_hwif_t *hwif) hwif->output_data = h8300_output_data; hwif->OUTW = mm_outw; - hwif->OUTSW = mm_outsw; hwif->INW = mm_inw; - hwif->INSW = mm_insw; - hwif->OUTSL = NULL; - hwif->INSL = NULL; } static int __init h8300_ide_init(void) diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index fbbbb30ae964..1789fbab2daf 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -42,16 +42,6 @@ static u16 ide_inw (unsigned long port) return (u16) inw(port); } -static void ide_insw (unsigned long port, void *addr, u32 count) -{ - insw(port, addr, count); -} - -static void ide_insl (unsigned long port, void *addr, u32 count) -{ - insl(port, addr, count); -} - static void ide_outb (u8 val, unsigned long port) { outb(val, port); @@ -67,27 +57,13 @@ static void ide_outw (u16 val, unsigned long port) outw(val, port); } -static void ide_outsw (unsigned long port, void *addr, u32 count) -{ - outsw(port, addr, count); -} - -static void ide_outsl (unsigned long port, void *addr, u32 count) -{ - outsl(port, addr, count); -} - void default_hwif_iops (ide_hwif_t *hwif) { hwif->OUTB = ide_outb; hwif->OUTBSYNC = ide_outbsync; hwif->OUTW = ide_outw; - hwif->OUTSW = ide_outsw; - hwif->OUTSL = ide_outsl; hwif->INB = ide_inb; hwif->INW = ide_inw; - hwif->INSW = ide_insw; - hwif->INSL = ide_insl; } /* @@ -104,16 +80,6 @@ static u16 ide_mm_inw (unsigned long port) return (u16) readw((void __iomem *) port); } -static void ide_mm_insw (unsigned long port, void *addr, u32 count) -{ - __ide_mm_insw((void __iomem *) port, addr, count); -} - -static void ide_mm_insl (unsigned long port, void *addr, u32 count) -{ - __ide_mm_insl((void __iomem *) port, addr, count); -} - static void ide_mm_outb (u8 value, unsigned long port) { writeb(value, (void __iomem *) port); @@ -129,16 +95,6 @@ static void ide_mm_outw (u16 value, unsigned long port) writew(value, (void __iomem *) port); } -static void ide_mm_outsw (unsigned long port, void *addr, u32 count) -{ - __ide_mm_outsw((void __iomem *) port, addr, count); -} - -static void ide_mm_outsl (unsigned long port, void *addr, u32 count) -{ - __ide_mm_outsl((void __iomem *) port, addr, count); -} - void default_hwif_mmiops (ide_hwif_t *hwif) { hwif->OUTB = ide_mm_outb; @@ -146,12 +102,8 @@ void default_hwif_mmiops (ide_hwif_t *hwif) this one is controller specific! */ hwif->OUTBSYNC = ide_mm_outbsync; hwif->OUTW = ide_mm_outw; - hwif->OUTSW = ide_mm_outsw; - hwif->OUTSL = ide_mm_outsl; hwif->INB = ide_mm_inb; hwif->INW = ide_mm_inw; - hwif->INSW = ide_mm_insw; - hwif->INSL = ide_mm_insl; } EXPORT_SYMBOL(default_hwif_mmiops); @@ -203,24 +155,39 @@ static void ata_input_data(ide_drive_t *drive, struct request *rq, struct ide_io_ports *io_ports = &hwif->io_ports; unsigned long data_addr = io_ports->data_addr; u8 io_32bit = drive->io_32bit; + u8 mmio = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0; len++; if (io_32bit) { - if (io_32bit & 2) { - unsigned long flags; + unsigned long uninitialized_var(flags); + if (io_32bit & 2) { local_irq_save(flags); ata_vlb_sync(drive, io_ports->nsect_addr); - hwif->INSL(data_addr, buf, len / 4); + } + + if (mmio) + __ide_mm_insl((void __iomem *)data_addr, buf, len / 4); + else + insl(data_addr, buf, len / 4); + + if (io_32bit & 2) local_irq_restore(flags); - } else - hwif->INSL(data_addr, buf, len / 4); - if ((len & 3) >= 2) - hwif->INSW(data_addr, (u8 *)buf + (len & ~3), 1); - } else - hwif->INSW(data_addr, buf, len / 2); + if ((len & 3) >= 2) { + if (mmio) + __ide_mm_insw((void __iomem *)data_addr, + (u8 *)buf + (len & ~3), 1); + else + insw(data_addr, (u8 *)buf + (len & ~3), 1); + } + } else { + if (mmio) + __ide_mm_insw((void __iomem *)data_addr, buf, len / 2); + else + insw(data_addr, buf, len / 2); + } } /* @@ -233,22 +200,37 @@ static void ata_output_data(ide_drive_t *drive, struct request *rq, struct ide_io_ports *io_ports = &hwif->io_ports; unsigned long data_addr = io_ports->data_addr; u8 io_32bit = drive->io_32bit; + u8 mmio = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0; if (io_32bit) { - if (io_32bit & 2) { - unsigned long flags; + unsigned long uninitialized_var(flags); + if (io_32bit & 2) { local_irq_save(flags); ata_vlb_sync(drive, io_ports->nsect_addr); - hwif->OUTSL(data_addr, buf, len / 4); + } + + if (mmio) + __ide_mm_outsl((void __iomem *)data_addr, buf, len / 4); + else + outsl(data_addr, buf, len / 4); + + if (io_32bit & 2) local_irq_restore(flags); - } else - hwif->OUTSL(data_addr, buf, len / 4); - if ((len & 3) >= 2) - hwif->OUTSW(data_addr, (u8 *)buf + (len & ~3), 1); - } else - hwif->OUTSW(data_addr, buf, len / 2); + if ((len & 3) >= 2) { + if (mmio) + __ide_mm_outsw((void __iomem *)data_addr, + (u8 *)buf + (len & ~3), 1); + else + outsw(data_addr, (u8 *)buf + (len & ~3), 1); + } + } else { + if (mmio) + __ide_mm_outsw((void __iomem *)data_addr, buf, len / 2); + else + outsw(data_addr, buf, len / 2); + } } void default_hwif_transport(ide_hwif_t *hwif) diff --git a/drivers/ide/mips/au1xxx-ide.c b/drivers/ide/mips/au1xxx-ide.c index b28fa794b314..1a6c27b32498 100644 --- a/drivers/ide/mips/au1xxx-ide.c +++ b/drivers/ide/mips/au1xxx-ide.c @@ -609,9 +609,6 @@ static int au_ide_probe(struct device *dev) #ifdef CONFIG_BLK_DEV_IDE_AU1XXX_PIO_DBDMA hwif->input_data = au1xxx_input_data; hwif->output_data = au1xxx_output_data; - - hwif->INSW = auide_insw; - hwif->OUTSW = auide_outsw; #endif hwif->select_data = 0; /* no chipset-specific code */ hwif->config_data = 0; /* no chipset-specific code */ diff --git a/drivers/ide/pci/scc_pata.c b/drivers/ide/pci/scc_pata.c index a7a2c58482a2..d11df45a2ae8 100644 --- a/drivers/ide/pci/scc_pata.c +++ b/drivers/ide/pci/scc_pata.c @@ -669,13 +669,9 @@ static void __devinit init_mmio_iops_scc(ide_hwif_t *hwif) hwif->INB = scc_ide_inb; hwif->INW = scc_ide_inw; - hwif->INSW = scc_ide_insw; - hwif->INSL = scc_ide_insl; hwif->OUTB = scc_ide_outb; hwif->OUTBSYNC = scc_ide_outbsync; hwif->OUTW = scc_ide_outw; - hwif->OUTSW = scc_ide_outsw; - hwif->OUTSL = scc_ide_outsl; hwif->dma_base = dma_base; hwif->config_data = ports->ctl; diff --git a/include/linux/ide.h b/include/linux/ide.h index 8e79875f9872..5ab9773b75fb 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -475,13 +475,9 @@ typedef struct hwif_s { void (*OUTB)(u8 addr, unsigned long port); void (*OUTBSYNC)(ide_drive_t *drive, u8 addr, unsigned long port); void (*OUTW)(u16 addr, unsigned long port); - void (*OUTSW)(unsigned long port, void *addr, u32 count); - void (*OUTSL)(unsigned long port, void *addr, u32 count); u8 (*INB)(unsigned long port); u16 (*INW)(unsigned long port); - void (*INSW)(unsigned long port, void *addr, u32 count); - void (*INSL)(unsigned long port, void *addr, u32 count); /* dma physical region descriptor table (cpu view) */ unsigned int *dmatable_cpu; -- cgit v1.2.3-71-gd317 From 1fc142589e58b20a67582974b8848595a2c7432e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 28 Apr 2008 23:44:39 +0200 Subject: ide: add ide_execute_pkt_cmd() helper Add ide_execute_pkt_cmd() helper for executing PACKET command, then convert ATAPI device drivers to use it. As a nice side-effect this fixes ide-{floppy,tape,scsi} w.r.t. ide_lock taking (ide-cd was OK). Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-cd.c | 9 +-------- drivers/ide/ide-floppy.c | 4 +--- drivers/ide/ide-iops.c | 12 +++++++++++- drivers/ide/ide-tape.c | 4 +--- drivers/scsi/ide-scsi.c | 4 +--- include/linux/ide.h | 2 ++ 6 files changed, 17 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 095e50a93869..0881ddc5831e 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -555,14 +555,7 @@ static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive, ATAPI_WAIT_PC, cdrom_timer_expiry); return ide_started; } else { - unsigned long flags; - - /* packet command */ - spin_lock_irqsave(&ide_lock, flags); - hwif->OUTBSYNC(drive, WIN_PACKETCMD, - hwif->io_ports.command_addr); - ndelay(400); - spin_unlock_irqrestore(&ide_lock, flags); + ide_execute_pkt_cmd(drive); return (*handler) (drive); } diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index da79248633a4..0039789b0eb9 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -696,9 +696,7 @@ static ide_startstop_t idefloppy_issue_pc(ide_drive_t *drive, return ide_started; } else { /* Issue the packet command */ - hwif->OUTBSYNC(drive, WIN_PACKETCMD, - hwif->io_ports.command_addr); - ndelay(400); + ide_execute_pkt_cmd(drive); return (*pkt_xfer_routine) (drive); } } diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index 7f2b4edc6369..dfe47d5eb157 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -801,9 +801,19 @@ void ide_execute_command(ide_drive_t *drive, u8 cmd, ide_handler_t *handler, ndelay(400); spin_unlock_irqrestore(&ide_lock, flags); } - EXPORT_SYMBOL(ide_execute_command); +void ide_execute_pkt_cmd(ide_drive_t *drive) +{ + ide_hwif_t *hwif = drive->hwif; + unsigned long flags; + + spin_lock_irqsave(&ide_lock, flags); + hwif->OUTBSYNC(drive, WIN_PACKETCMD, hwif->io_ports.command_addr); + ndelay(400); + spin_unlock_irqrestore(&ide_lock, flags); +} +EXPORT_SYMBOL_GPL(ide_execute_pkt_cmd); /* needed below */ static ide_startstop_t do_reset1 (ide_drive_t *, int); diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index dc88431a2359..71d07d740add 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -1056,9 +1056,7 @@ static ide_startstop_t idetape_issue_pc(ide_drive_t *drive, IDETAPE_WAIT_CMD, NULL); return ide_started; } else { - hwif->OUTBSYNC(drive, WIN_PACKETCMD, - hwif->io_ports.command_addr); - ndelay(400); + ide_execute_pkt_cmd(drive); return idetape_transfer_pc(drive); } } diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c index 8b1c783e259b..7964cc146152 100644 --- a/drivers/scsi/ide-scsi.c +++ b/drivers/scsi/ide-scsi.c @@ -574,9 +574,7 @@ static ide_startstop_t idescsi_issue_pc(ide_drive_t *drive, return ide_started; } else { /* Issue the packet command */ - hwif->OUTBSYNC(drive, WIN_PACKETCMD, - hwif->io_ports.command_addr); - ndelay(400); + ide_execute_pkt_cmd(drive); return idescsi_transfer_pc(drive); } } diff --git a/include/linux/ide.h b/include/linux/ide.h index 5ab9773b75fb..3927996d2f82 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -822,6 +822,8 @@ extern void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler, unsigne void ide_execute_command(ide_drive_t *, u8, ide_handler_t *, unsigned int, ide_expiry_t *); +void ide_execute_pkt_cmd(ide_drive_t *); + ide_startstop_t __ide_error(ide_drive_t *, struct request *, u8, u8); ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat); -- cgit v1.2.3-71-gd317 From 089c5c7e0089c3461545be936bcd236cbf16b79a Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 28 Apr 2008 23:44:39 +0200 Subject: ide: factor out debugging code from ide_tf_load() Factor out debugging code from ide_tf_load() to ide_tf_dump() helper and update ide_tf_load() users accordingly. Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-io.c | 1 + drivers/ide/ide-taskfile.c | 29 +++++++++++++++++------------ include/linux/ide.h | 2 ++ 3 files changed, 20 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index c32ca769b963..a6f2186773fa 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -1680,6 +1680,7 @@ void ide_pktcmd_tf_load(ide_drive_t *drive, u32 tf_flags, u16 bcount, u8 dma) task.tf.lbam = bcount & 0xff; task.tf.lbah = (bcount >> 8) & 0xff; + ide_tf_dump(drive->name, &task.tf); ide_tf_load(drive, &task); } diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 0321884f9d92..416ce54af7ad 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -33,26 +33,29 @@ #include #include -void ide_tf_load(ide_drive_t *drive, ide_task_t *task) +void ide_tf_dump(const char *s, struct ide_taskfile *tf) { - ide_hwif_t *hwif = drive->hwif; - struct ide_io_ports *io_ports = &hwif->io_ports; - struct ide_taskfile *tf = &task->tf; - u8 HIHI = (task->tf_flags & IDE_TFLAG_LBA48) ? 0xE0 : 0xEF; - - if (task->tf_flags & IDE_TFLAG_FLAGGED) - HIHI = 0xFF; - #ifdef DEBUG printk("%s: tf: feat 0x%02x nsect 0x%02x lbal 0x%02x " "lbam 0x%02x lbah 0x%02x dev 0x%02x cmd 0x%02x\n", - drive->name, tf->feature, tf->nsect, tf->lbal, + s, tf->feature, tf->nsect, tf->lbal, tf->lbam, tf->lbah, tf->device, tf->command); printk("%s: hob: nsect 0x%02x lbal 0x%02x " "lbam 0x%02x lbah 0x%02x\n", - drive->name, tf->hob_nsect, tf->hob_lbal, + s, tf->hob_nsect, tf->hob_lbal, tf->hob_lbam, tf->hob_lbah); #endif +} + +void ide_tf_load(ide_drive_t *drive, ide_task_t *task) +{ + ide_hwif_t *hwif = drive->hwif; + struct ide_io_ports *io_ports = &hwif->io_ports; + struct ide_taskfile *tf = &task->tf; + u8 HIHI = (task->tf_flags & IDE_TFLAG_LBA48) ? 0xE0 : 0xEF; + + if (task->tf_flags & IDE_TFLAG_FLAGGED) + HIHI = 0xFF; ide_set_irq(drive, 1); @@ -149,8 +152,10 @@ ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task) if (task->tf_flags & IDE_TFLAG_FLAGGED) task->tf_flags |= IDE_TFLAG_FLAGGED_SET_IN_FLAGS; - if ((task->tf_flags & IDE_TFLAG_DMA_PIO_FALLBACK) == 0) + if ((task->tf_flags & IDE_TFLAG_DMA_PIO_FALLBACK) == 0) { + ide_tf_dump(drive->name, tf); ide_tf_load(drive, task); + } switch (task->data_phase) { case TASKFILE_MULTI_OUT: diff --git a/include/linux/ide.h b/include/linux/ide.h index 3927996d2f82..d7d8bb69db4b 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -960,6 +960,8 @@ typedef struct ide_task_s { void *special; /* valid_t generally */ } ide_task_t; +void ide_tf_dump(const char *, struct ide_taskfile *); + void ide_tf_load(ide_drive_t *, ide_task_t *); void ide_tf_read(ide_drive_t *, ide_task_t *); -- cgit v1.2.3-71-gd317 From 94cd5b62ff9bb07ef065333eb97438f115a75890 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 28 Apr 2008 23:44:40 +0200 Subject: ide: add ->tf_load and ->tf_read methods * Add ->tf_load and ->tf_read methods to ide_hwif_t and set the default methods in default_hwif_transport(). * Use ->tf_{load,read} instead o calling ide_tf_{load,read}() directly. * Make ide_tf_{load,read}() static. There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-io.c | 4 ++-- drivers/ide/ide-iops.c | 7 +++++-- drivers/ide/ide-lib.c | 2 +- drivers/ide/ide-taskfile.c | 2 +- include/linux/ide.h | 8 +++++--- 5 files changed, 14 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 5b675a001382..8d7c1a09e1e7 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -330,7 +330,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err) tf->error = err; tf->status = stat; - ide_tf_read(drive, task); + drive->hwif->tf_read(drive, task); if (task->tf_flags & IDE_TFLAG_DYN) kfree(task); @@ -1638,7 +1638,7 @@ void ide_pktcmd_tf_load(ide_drive_t *drive, u32 tf_flags, u16 bcount, u8 dma) task.tf.lbah = (bcount >> 8) & 0xff; ide_tf_dump(drive->name, &task.tf); - ide_tf_load(drive, &task); + drive->hwif->tf_load(drive, &task); } EXPORT_SYMBOL_GPL(ide_pktcmd_tf_load); diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index ac9e063bcf93..65275454a209 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -127,7 +127,7 @@ void SELECT_MASK (ide_drive_t *drive, int mask) port_ops->maskproc(drive, mask); } -void ide_tf_load(ide_drive_t *drive, ide_task_t *task) +static void ide_tf_load(ide_drive_t *drive, ide_task_t *task) { ide_hwif_t *hwif = drive->hwif; struct ide_io_ports *io_ports = &hwif->io_ports; @@ -172,7 +172,7 @@ void ide_tf_load(ide_drive_t *drive, ide_task_t *task) io_ports->device_addr); } -void ide_tf_read(ide_drive_t *drive, ide_task_t *task) +static void ide_tf_read(ide_drive_t *drive, ide_task_t *task) { ide_hwif_t *hwif = drive->hwif; struct ide_io_ports *io_ports = &hwif->io_ports; @@ -323,6 +323,9 @@ static void ata_output_data(ide_drive_t *drive, struct request *rq, void default_hwif_transport(ide_hwif_t *hwif) { + hwif->tf_load = ide_tf_load; + hwif->tf_read = ide_tf_read; + hwif->input_data = ata_input_data; hwif->output_data = ata_output_data; } diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c index 6f04ea3e93a8..47af80df6872 100644 --- a/drivers/ide/ide-lib.c +++ b/drivers/ide/ide-lib.c @@ -487,7 +487,7 @@ static void ide_dump_sector(ide_drive_t *drive) else task.tf_flags = IDE_TFLAG_IN_LBA | IDE_TFLAG_IN_DEVICE; - ide_tf_read(drive, &task); + drive->hwif->tf_read(drive, &task); if (lba48 || (tf->device & ATA_LBA)) printk(", LBAsect=%llu", diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 9ec3ecd4a3a5..9a846a0cd5a4 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -109,7 +109,7 @@ ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task) if ((task->tf_flags & IDE_TFLAG_DMA_PIO_FALLBACK) == 0) { ide_tf_dump(drive->name, tf); - ide_tf_load(drive, task); + hwif->tf_load(drive, task); } switch (task->data_phase) { diff --git a/include/linux/ide.h b/include/linux/ide.h index d7d8bb69db4b..8e95579c3d34 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -427,6 +427,8 @@ struct ide_dma_ops { void (*dma_timeout)(struct ide_drive_s *); }; +struct ide_task_s; + typedef struct hwif_s { struct hwif_s *next; /* for linked-list in ide_hwgroup_t */ struct hwif_s *mate; /* other hwif from same PCI chip */ @@ -467,6 +469,9 @@ typedef struct hwif_s { const struct ide_port_ops *port_ops; const struct ide_dma_ops *dma_ops; + void (*tf_load)(ide_drive_t *, struct ide_task_s *); + void (*tf_read)(ide_drive_t *, struct ide_task_s *); + void (*input_data)(ide_drive_t *, struct request *, void *, unsigned); void (*output_data)(ide_drive_t *, struct request *, void *, unsigned); @@ -962,9 +967,6 @@ typedef struct ide_task_s { void ide_tf_dump(const char *, struct ide_taskfile *); -void ide_tf_load(ide_drive_t *, ide_task_t *); -void ide_tf_read(ide_drive_t *, ide_task_t *); - extern void SELECT_DRIVE(ide_drive_t *); extern void SELECT_MASK(ide_drive_t *, int); -- cgit v1.2.3-71-gd317 From 7c0daf2681f140dd9f39cd95966f471b5c904d8a Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 28 Apr 2008 23:44:41 +0200 Subject: ide: remove ->INW and ->OUTW methods * Remove no longer used ->INW and ->OUTW methods. While at it: * scc_pata.c: scc_ide_{out,in}w() is called only in scc_tf_{load,read}() so inline it there. Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/cris/ide-cris.c | 2 -- drivers/ide/h8300/ide-h8300.c | 3 --- drivers/ide/ide-iops.c | 24 ------------------------ drivers/ide/pci/scc_pata.c | 19 +++---------------- include/linux/ide.h | 2 -- 5 files changed, 3 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/cris/ide-cris.c b/drivers/ide/cris/ide-cris.c index d628141098c2..38b069c0057f 100644 --- a/drivers/ide/cris/ide-cris.c +++ b/drivers/ide/cris/ide-cris.c @@ -903,10 +903,8 @@ static int __init init_e100_ide(void) hwif->output_data = cris_output_data; hwif->OUTB = &cris_ide_outb; - hwif->OUTW = &cris_ide_outw; hwif->OUTBSYNC = &cris_ide_outbsync; hwif->INB = &cris_ide_inb; - hwif->INW = &cris_ide_inw; hwif->cbl = ATA_CBL_PATA40; idx[h] = hwif->index; diff --git a/drivers/ide/h8300/ide-h8300.c b/drivers/ide/h8300/ide-h8300.c index 6bd143cf0891..ecf53bb0d2aa 100644 --- a/drivers/ide/h8300/ide-h8300.c +++ b/drivers/ide/h8300/ide-h8300.c @@ -176,9 +176,6 @@ static inline void hwif_setup(ide_hwif_t *hwif) hwif->input_data = h8300_input_data; hwif->output_data = h8300_output_data; - - hwif->OUTW = mm_outw; - hwif->INW = mm_inw; } static int __init h8300_ide_init(void) diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index e981e2943073..daa23b19440e 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -37,11 +37,6 @@ static u8 ide_inb (unsigned long port) return (u8) inb(port); } -static u16 ide_inw (unsigned long port) -{ - return (u16) inw(port); -} - static void ide_outb (u8 val, unsigned long port) { outb(val, port); @@ -52,18 +47,11 @@ static void ide_outbsync (ide_drive_t *drive, u8 addr, unsigned long port) outb(addr, port); } -static void ide_outw (u16 val, unsigned long port) -{ - outw(val, port); -} - void default_hwif_iops (ide_hwif_t *hwif) { hwif->OUTB = ide_outb; hwif->OUTBSYNC = ide_outbsync; - hwif->OUTW = ide_outw; hwif->INB = ide_inb; - hwif->INW = ide_inw; } /* @@ -75,11 +63,6 @@ static u8 ide_mm_inb (unsigned long port) return (u8) readb((void __iomem *) port); } -static u16 ide_mm_inw (unsigned long port) -{ - return (u16) readw((void __iomem *) port); -} - static void ide_mm_outb (u8 value, unsigned long port) { writeb(value, (void __iomem *) port); @@ -90,20 +73,13 @@ static void ide_mm_outbsync (ide_drive_t *drive, u8 value, unsigned long port) writeb(value, (void __iomem *) port); } -static void ide_mm_outw (u16 value, unsigned long port) -{ - writew(value, (void __iomem *) port); -} - void default_hwif_mmiops (ide_hwif_t *hwif) { hwif->OUTB = ide_mm_outb; /* Most systems will need to override OUTBSYNC, alas however this one is controller specific! */ hwif->OUTBSYNC = ide_mm_outbsync; - hwif->OUTW = ide_mm_outw; hwif->INB = ide_mm_inb; - hwif->INW = ide_mm_inw; } EXPORT_SYMBOL(default_hwif_mmiops); diff --git a/drivers/ide/pci/scc_pata.c b/drivers/ide/pci/scc_pata.c index cb635a66030e..a17f73ec577a 100644 --- a/drivers/ide/pci/scc_pata.c +++ b/drivers/ide/pci/scc_pata.c @@ -126,12 +126,6 @@ static u8 scc_ide_inb(unsigned long port) return (u8)data; } -static u16 scc_ide_inw(unsigned long port) -{ - u32 data = in_be32((void*)port); - return (u16)data; -} - static void scc_ide_insw(unsigned long port, void *addr, u32 count) { u16 *ptr = (u16 *)addr; @@ -154,11 +148,6 @@ static void scc_ide_outb(u8 addr, unsigned long port) out_be32((void*)port, addr); } -static void scc_ide_outw(u16 addr, unsigned long port) -{ - out_be32((void*)port, addr); -} - static void scc_ide_outbsync(ide_drive_t * drive, u8 addr, unsigned long port) { @@ -630,8 +619,8 @@ static void scc_tf_load(ide_drive_t *drive, ide_task_t *task) ide_set_irq(drive, 1); if (task->tf_flags & IDE_TFLAG_OUT_DATA) - scc_ide_outw((tf->hob_data << 8) | tf->data, - io_ports->data_addr); + out_be32((void *)io_ports->data_addr, + (tf->hob_data << 8) | tf->data); if (task->tf_flags & IDE_TFLAG_OUT_HOB_FEATURE) scc_ide_outb(tf->hob_feature, io_ports->feature_addr); @@ -666,7 +655,7 @@ static void scc_tf_read(ide_drive_t *drive, ide_task_t *task) struct ide_taskfile *tf = &task->tf; if (task->tf_flags & IDE_TFLAG_IN_DATA) { - u16 data = scc_ide_inw(io_ports->data_addr); + u16 data = (u16)in_be32((void *)io_ports->data_addr); tf->data = data & 0xff; tf->hob_data = (data >> 8) & 0xff; @@ -755,10 +744,8 @@ static void __devinit init_mmio_iops_scc(ide_hwif_t *hwif) hwif->output_data = scc_output_data; hwif->INB = scc_ide_inb; - hwif->INW = scc_ide_inw; hwif->OUTB = scc_ide_outb; hwif->OUTBSYNC = scc_ide_outbsync; - hwif->OUTW = scc_ide_outw; hwif->dma_base = dma_base; hwif->config_data = ports->ctl; diff --git a/include/linux/ide.h b/include/linux/ide.h index 8e95579c3d34..2053d7d86b1d 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -479,10 +479,8 @@ typedef struct hwif_s { void (*OUTB)(u8 addr, unsigned long port); void (*OUTBSYNC)(ide_drive_t *drive, u8 addr, unsigned long port); - void (*OUTW)(u16 addr, unsigned long port); u8 (*INB)(unsigned long port); - u16 (*INW)(unsigned long port); /* dma physical region descriptor table (cpu view) */ unsigned int *dmatable_cpu; -- cgit v1.2.3-71-gd317 From 9f87abe892f899f19df8d472f937ee955cd6264b Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 28 Apr 2008 23:44:41 +0200 Subject: ide: add ide_pad_transfer() helper * Add ide_pad_transfer() helper (which uses ->{in,out}put_data methods internally so the transfer is also padded to drive+host requirements) and use it instead of ide_atapi_{write_zeros,discard_data}(). * Remove no longer needed ide_atapi_{write_zeros,discard_data}(). Cc: Borislav Petkov Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-floppy.c | 7 ++----- drivers/ide/ide-io.c | 15 +++++++++++++++ drivers/ide/ide-tape.c | 4 ++-- drivers/scsi/ide-scsi.c | 6 +++--- include/linux/ide.h | 25 ++----------------------- 5 files changed, 24 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 0039789b0eb9..f05fbc2bd7a8 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -262,10 +262,7 @@ static void ide_floppy_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, if (bcount) { printk(KERN_ERR "%s: leftover data in %s, bcount == %d\n", drive->name, __func__, bcount); - if (direction) - ide_atapi_write_zeros(drive, bcount); - else - ide_atapi_discard_data(drive, bcount); + ide_pad_transfer(drive, direction, bcount); } } @@ -491,7 +488,7 @@ static ide_startstop_t idefloppy_pc_intr(ide_drive_t *drive) printk(KERN_ERR "ide-floppy: The floppy wants " "to send us more data than expected " "- discarding data\n"); - ide_atapi_discard_data(drive, bcount); + ide_pad_transfer(drive, 0, bcount); ide_set_handler(drive, &idefloppy_pc_intr, diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 8d7c1a09e1e7..788783da9025 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -1642,3 +1642,18 @@ void ide_pktcmd_tf_load(ide_drive_t *drive, u32 tf_flags, u16 bcount, u8 dma) } EXPORT_SYMBOL_GPL(ide_pktcmd_tf_load); + +void ide_pad_transfer(ide_drive_t *drive, int write, int len) +{ + ide_hwif_t *hwif = drive->hwif; + u8 buf[4] = { 0 }; + + while (len > 0) { + if (write) + hwif->output_data(drive, NULL, buf, min(4, len)); + else + hwif->input_data(drive, NULL, buf, min(4, len)); + len -= 4; + } +} +EXPORT_SYMBOL_GPL(ide_pad_transfer); diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 71d07d740add..54a43b044608 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -395,7 +395,7 @@ static void idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, if (bh == NULL) { printk(KERN_ERR "ide-tape: bh == NULL in " "idetape_input_buffers\n"); - ide_atapi_discard_data(drive, bcount); + ide_pad_transfer(drive, 0, bcount); return; } count = min( @@ -871,7 +871,7 @@ static ide_startstop_t idetape_pc_intr(ide_drive_t *drive) printk(KERN_ERR "ide-tape: The tape wants to " "send us more data than expected " "- discarding data\n"); - ide_atapi_discard_data(drive, bcount); + ide_pad_transfer(drive, 0, bcount); ide_set_handler(drive, &idetape_pc_intr, IDETAPE_WAIT_CMD, NULL); return ide_started; diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c index 7964cc146152..44d8d5163a1a 100644 --- a/drivers/scsi/ide-scsi.c +++ b/drivers/scsi/ide-scsi.c @@ -164,7 +164,7 @@ static void idescsi_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, if (bcount) { printk (KERN_ERR "ide-scsi: scatter gather table too small, discarding data\n"); - ide_atapi_discard_data(drive, bcount); + ide_pad_transfer(drive, 0, bcount); } } @@ -201,7 +201,7 @@ static void idescsi_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, if (bcount) { printk (KERN_ERR "ide-scsi: scatter gather table too small, padding with zeros\n"); - ide_atapi_write_zeros(drive, bcount); + ide_pad_transfer(drive, 1, bcount); } } @@ -438,7 +438,7 @@ static ide_startstop_t idescsi_pc_intr (ide_drive_t *drive) } pc->xferred += temp; pc->cur_pos += temp; - ide_atapi_discard_data(drive, bcount - temp); + ide_pad_transfer(drive, 0, bcount - temp); ide_set_handler(drive, &idescsi_pc_intr, get_timeout(pc), idescsi_expiry); return ide_started; } diff --git a/include/linux/ide.h b/include/linux/ide.h index 2053d7d86b1d..7a5da394b98f 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -827,6 +827,8 @@ void ide_execute_command(ide_drive_t *, u8, ide_handler_t *, unsigned int, void ide_execute_pkt_cmd(ide_drive_t *); +void ide_pad_transfer(ide_drive_t *, int, int); + ide_startstop_t __ide_error(ide_drive_t *, struct request *, u8, u8); ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat); @@ -1359,27 +1361,4 @@ static inline u8 ide_read_error(ide_drive_t *drive) return hwif->INB(hwif->io_ports.error_addr); } - -/* - * Too bad. The drive wants to send us data which we are not ready to accept. - * Just throw it away. - */ -static inline void ide_atapi_discard_data(ide_drive_t *drive, unsigned bcount) -{ - ide_hwif_t *hwif = drive->hwif; - - /* FIXME: use ->input_data */ - while (bcount--) - (void)hwif->INB(hwif->io_ports.data_addr); -} - -static inline void ide_atapi_write_zeros(ide_drive_t *drive, unsigned bcount) -{ - ide_hwif_t *hwif = drive->hwif; - - /* FIXME: use ->output_data */ - while (bcount--) - hwif->OUTB(0, hwif->io_ports.data_addr); -} - #endif /* _IDE_H */ -- cgit v1.2.3-71-gd317 From 41051a141dcc67f4c5011a2ab2b547e80b9ac509 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 28 Apr 2008 23:44:42 +0200 Subject: ide: remove ->dma_vendor{1,3} fields from ide_hwif_t * Use 'hwif->dma_base + {1,3}' instead of hwif->dma_vendor{1,3} in pdc202xx_new host driver. * Remove no longer needed ->dma_vendor{1,3} fields from ide_hwif_t. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-dma.c | 4 ---- drivers/ide/pci/pdc202xx_new.c | 8 ++++---- include/linux/ide.h | 2 -- 3 files changed, 4 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index c352cf27b6e7..767820db7cae 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -858,12 +858,8 @@ void ide_setup_dma(ide_hwif_t *hwif, unsigned long base) if (!hwif->dma_command) hwif->dma_command = hwif->dma_base + 0; - if (!hwif->dma_vendor1) - hwif->dma_vendor1 = hwif->dma_base + 1; if (!hwif->dma_status) hwif->dma_status = hwif->dma_base + 2; - if (!hwif->dma_vendor3) - hwif->dma_vendor3 = hwif->dma_base + 3; if (!hwif->dma_prdtable) hwif->dma_prdtable = hwif->dma_base + 4; diff --git a/drivers/ide/pci/pdc202xx_new.c b/drivers/ide/pci/pdc202xx_new.c index ec9bd7b352fc..070df8ab3b21 100644 --- a/drivers/ide/pci/pdc202xx_new.c +++ b/drivers/ide/pci/pdc202xx_new.c @@ -83,8 +83,8 @@ static u8 get_indexed_reg(ide_hwif_t *hwif, u8 index) { u8 value; - outb(index, hwif->dma_vendor1); - value = inb(hwif->dma_vendor3); + outb(index, hwif->dma_base + 1); + value = inb(hwif->dma_base + 3); DBG("index[%02X] value[%02X]\n", index, value); return value; @@ -97,8 +97,8 @@ static u8 get_indexed_reg(ide_hwif_t *hwif, u8 index) */ static void set_indexed_reg(ide_hwif_t *hwif, u8 index, u8 value) { - outb(index, hwif->dma_vendor1); - outb(value, hwif->dma_vendor3); + outb(index, hwif->dma_base + 1); + outb(value, hwif->dma_base + 3); DBG("index[%02X] value[%02X]\n", index, value); } diff --git a/include/linux/ide.h b/include/linux/ide.h index 7a5da394b98f..d1d0111e48aa 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -505,9 +505,7 @@ typedef struct hwif_s { unsigned long dma_base; /* base addr for dma ports */ unsigned long dma_command; /* dma command register */ - unsigned long dma_vendor1; /* dma vendor 1 register */ unsigned long dma_status; /* dma status register */ - unsigned long dma_vendor3; /* dma vendor 3 register */ unsigned long dma_prdtable; /* actual prd table address */ unsigned long config_data; /* for use by chipset-specific code */ -- cgit v1.2.3-71-gd317 From 55224bc86a39409d55e47fd45573642ac709bb8f Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 28 Apr 2008 23:44:42 +0200 Subject: ide: remove ->dma_prdtable field from ide_hwif_t * Use 'hwif->dma_base + {4,8}' instead of hwif->dma_prdtable in {ide,scc}_dma_setup(). * Remove no longer needed ->dma_prdtable field from ide_hwif_t. While at it: * Use ATA_DMA_TABLE_OFS define. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-dma.c | 7 +++---- drivers/ide/pci/scc_pata.c | 3 +-- include/linux/ide.h | 1 - 3 files changed, 4 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index 767820db7cae..653b1ade13d3 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -464,9 +464,10 @@ int ide_dma_setup(ide_drive_t *drive) /* PRD table */ if (hwif->mmio) - writel(hwif->dmatable_dma, (void __iomem *)hwif->dma_prdtable); + writel(hwif->dmatable_dma, + (void __iomem *)(hwif->dma_base + ATA_DMA_TABLE_OFS)); else - outl(hwif->dmatable_dma, hwif->dma_prdtable); + outl(hwif->dmatable_dma, hwif->dma_base + ATA_DMA_TABLE_OFS); /* specify r/w */ hwif->OUTB(reading, hwif->dma_command); @@ -860,8 +861,6 @@ void ide_setup_dma(ide_hwif_t *hwif, unsigned long base) hwif->dma_command = hwif->dma_base + 0; if (!hwif->dma_status) hwif->dma_status = hwif->dma_base + 2; - if (!hwif->dma_prdtable) - hwif->dma_prdtable = hwif->dma_base + 4; hwif->dma_ops = &sff_dma_ops; } diff --git a/drivers/ide/pci/scc_pata.c b/drivers/ide/pci/scc_pata.c index 144938188325..910fb00deb71 100644 --- a/drivers/ide/pci/scc_pata.c +++ b/drivers/ide/pci/scc_pata.c @@ -304,7 +304,7 @@ static int scc_dma_setup(ide_drive_t *drive) } /* PRD table */ - out_be32((void __iomem *)hwif->dma_prdtable, hwif->dmatable_dma); + out_be32((void __iomem *)(hwif->dma_base + 8), hwif->dmatable_dma); /* specify r/w */ out_be32((void __iomem *)hwif->dma_command, reading); @@ -838,7 +838,6 @@ static void __devinit init_hwif_scc(ide_hwif_t *hwif) hwif->dma_command = hwif->dma_base; hwif->dma_status = hwif->dma_base + 0x04; - hwif->dma_prdtable = hwif->dma_base + 0x08; /* PTERADD */ out_be32((void __iomem *)(hwif->dma_base + 0x018), hwif->dmatable_dma); diff --git a/include/linux/ide.h b/include/linux/ide.h index d1d0111e48aa..b0135b0c3a04 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -506,7 +506,6 @@ typedef struct hwif_s { unsigned long dma_base; /* base addr for dma ports */ unsigned long dma_command; /* dma command register */ unsigned long dma_status; /* dma status register */ - unsigned long dma_prdtable; /* actual prd table address */ unsigned long config_data; /* for use by chipset-specific code */ unsigned long select_data; /* for use by chipset-specific code */ -- cgit v1.2.3-71-gd317 From 98d5c21c812e4e3b795f5bd912f407ed7c5e4e38 Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Fri, 18 Apr 2008 13:33:52 -0700 Subject: [POWERPC] Update lmb data structures for hotplug memory add/remove The powerpc kernel maintains information about logical memory blocks in the lmb.memory structure, which is initialized and updated at boot time, but not when memory is added or removed while the kernel is running. This adds a hotplug memory notifier which updates lmb.memory when memory is added or removed. This information is useful for eHEA driver to find out the memory layout and holes. NOTE: No special locking is needed for lmb_add() and lmb_remove(). Calls to these are serialized by caller. (pSeries_reconfig_chain). Signed-off-by: Badari Pulavarty Cc: Yasunori Goto Cc: Benjamin Herrenschmidt Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/pseries/hotplug-memory.c | 43 ++++++++++++++++ include/linux/lmb.h | 3 +- lib/lmb.c | 66 +++++++++++++++++++++---- 3 files changed, 102 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 2d3e9a4bd6ae..3c5727dd5aa5 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -57,6 +58,11 @@ static int pseries_remove_memory(struct device_node *np) if (ret) return ret; + /* + * Update memory regions for memory remove + */ + lmb_remove(start_pfn << PAGE_SHIFT, regs[3]); + /* * Remove htab bolted mappings for this section of memory */ @@ -65,6 +71,41 @@ static int pseries_remove_memory(struct device_node *np) return ret; } +static int pseries_add_memory(struct device_node *np) +{ + const char *type; + const unsigned int *my_index; + const unsigned int *regs; + u64 start_pfn; + int ret = -EINVAL; + + /* + * Check to see if we are actually adding memory + */ + type = of_get_property(np, "device_type", NULL); + if (type == NULL || strcmp(type, "memory") != 0) + return 0; + + /* + * Find the memory index and size of the added section + */ + my_index = of_get_property(np, "ibm,my-drc-index", NULL); + if (!my_index) + return ret; + + regs = of_get_property(np, "reg", NULL); + if (!regs) + return ret; + + start_pfn = section_nr_to_pfn(*my_index & 0xffff); + + /* + * Update memory region to represent the memory add + */ + lmb_add(start_pfn << PAGE_SHIFT, regs[3]); + return 0; +} + static int pseries_memory_notifier(struct notifier_block *nb, unsigned long action, void *node) { @@ -72,6 +113,8 @@ static int pseries_memory_notifier(struct notifier_block *nb, switch (action) { case PSERIES_RECONFIG_ADD: + if (pseries_add_memory(node)) + err = NOTIFY_BAD; break; case PSERIES_RECONFIG_REMOVE: if (pseries_remove_memory(node)) diff --git a/include/linux/lmb.h b/include/linux/lmb.h index 271153d27fba..55d4b261a9e8 100644 --- a/include/linux/lmb.h +++ b/include/linux/lmb.h @@ -40,7 +40,8 @@ extern struct lmb lmb; extern void __init lmb_init(void); extern void __init lmb_analyze(void); -extern long __init lmb_add(u64 base, u64 size); +extern long lmb_add(u64 base, u64 size); +extern long lmb_remove(u64 base, u64 size); extern long __init lmb_reserve(u64 base, u64 size); extern u64 __init lmb_alloc_nid(u64 size, u64 align, int nid, u64 (*nid_range)(u64, u64, int *)); diff --git a/lib/lmb.c b/lib/lmb.c index 207147ab25e4..5b2a739bc3d5 100644 --- a/lib/lmb.c +++ b/lib/lmb.c @@ -46,14 +46,13 @@ void lmb_dump_all(void) #endif /* DEBUG */ } -static unsigned long __init lmb_addrs_overlap(u64 base1, u64 size1, - u64 base2, u64 size2) +static unsigned long lmb_addrs_overlap(u64 base1, u64 size1, u64 base2, + u64 size2) { return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); } -static long __init lmb_addrs_adjacent(u64 base1, u64 size1, - u64 base2, u64 size2) +static long lmb_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2) { if (base2 == base1 + size1) return 1; @@ -63,7 +62,7 @@ static long __init lmb_addrs_adjacent(u64 base1, u64 size1, return 0; } -static long __init lmb_regions_adjacent(struct lmb_region *rgn, +static long lmb_regions_adjacent(struct lmb_region *rgn, unsigned long r1, unsigned long r2) { u64 base1 = rgn->region[r1].base; @@ -74,7 +73,7 @@ static long __init lmb_regions_adjacent(struct lmb_region *rgn, return lmb_addrs_adjacent(base1, size1, base2, size2); } -static void __init lmb_remove_region(struct lmb_region *rgn, unsigned long r) +static void lmb_remove_region(struct lmb_region *rgn, unsigned long r) { unsigned long i; @@ -86,7 +85,7 @@ static void __init lmb_remove_region(struct lmb_region *rgn, unsigned long r) } /* Assumption: base addr of region 1 < base addr of region 2 */ -static void __init lmb_coalesce_regions(struct lmb_region *rgn, +static void lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2) { rgn->region[r1].size += rgn->region[r2].size; @@ -118,7 +117,7 @@ void __init lmb_analyze(void) lmb.memory.size += lmb.memory.region[i].size; } -static long __init lmb_add_region(struct lmb_region *rgn, u64 base, u64 size) +static long lmb_add_region(struct lmb_region *rgn, u64 base, u64 size) { unsigned long coalesced = 0; long adjacent, i; @@ -182,7 +181,7 @@ static long __init lmb_add_region(struct lmb_region *rgn, u64 base, u64 size) return 0; } -long __init lmb_add(u64 base, u64 size) +long lmb_add(u64 base, u64 size) { struct lmb_region *_rgn = &lmb.memory; @@ -194,6 +193,55 @@ long __init lmb_add(u64 base, u64 size) } +long lmb_remove(u64 base, u64 size) +{ + struct lmb_region *rgn = &(lmb.memory); + u64 rgnbegin, rgnend; + u64 end = base + size; + int i; + + rgnbegin = rgnend = 0; /* supress gcc warnings */ + + /* Find the region where (base, size) belongs to */ + for (i=0; i < rgn->cnt; i++) { + rgnbegin = rgn->region[i].base; + rgnend = rgnbegin + rgn->region[i].size; + + if ((rgnbegin <= base) && (end <= rgnend)) + break; + } + + /* Didn't find the region */ + if (i == rgn->cnt) + return -1; + + /* Check to see if we are removing entire region */ + if ((rgnbegin == base) && (rgnend == end)) { + lmb_remove_region(rgn, i); + return 0; + } + + /* Check to see if region is matching at the front */ + if (rgnbegin == base) { + rgn->region[i].base = end; + rgn->region[i].size -= size; + return 0; + } + + /* Check to see if the region is matching at the end */ + if (rgnend == end) { + rgn->region[i].size -= size; + return 0; + } + + /* + * We need to split the entry - adjust the current one to the + * beginging of the hole and add the region after hole. + */ + rgn->region[i].size = base - rgn->region[i].base; + return lmb_add_region(rgn, end, rgnend - end); +} + long __init lmb_reserve(u64 base, u64 size) { struct lmb_region *_rgn = &lmb.reserved; -- cgit v1.2.3-71-gd317 From 9d88a2eb6e05c07aa0d484b8fa1372722fa921d0 Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Fri, 18 Apr 2008 13:33:53 -0700 Subject: [POWERPC] Provide walk_memory_resource() for powerpc Provide walk_memory_resource() for 64-bit powerpc. PowerPC maintains logical memory region mapping in the lmb.memory structure. Walk through these structures and do the callbacks for the contiguous chunks. Signed-off-by: Badari Pulavarty Cc: Yasunori Goto Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Paul Mackerras --- arch/powerpc/mm/mem.c | 30 +++++++++++++++++++++++------- include/linux/lmb.h | 1 + lib/lmb.c | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index d9e37f365b54..f67e118116fa 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -154,19 +154,35 @@ out: /* * walk_memory_resource() needs to make sure there is no holes in a given - * memory range. On PPC64, since this range comes from /sysfs, the range - * is guaranteed to be valid, non-overlapping and can not contain any - * holes. By the time we get here (memory add or remove), /proc/device-tree - * is updated and correct. Only reason we need to check against device-tree - * would be if we allow user-land to specify a memory range through a - * system call/ioctl etc. instead of doing offline/online through /sysfs. + * memory range. PPC64 does not maintain the memory layout in /proc/iomem. + * Instead it maintains it in lmb.memory structures. Walk through the + * memory regions, find holes and callback for contiguous regions. */ int walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)) { - return (*func)(start_pfn, nr_pages, arg); + struct lmb_property res; + unsigned long pfn, len; + u64 end; + int ret = -1; + + res.base = (u64) start_pfn << PAGE_SHIFT; + res.size = (u64) nr_pages << PAGE_SHIFT; + + end = res.base + res.size - 1; + while ((res.base < end) && (lmb_find(&res) >= 0)) { + pfn = (unsigned long)(res.base >> PAGE_SHIFT); + len = (unsigned long)(res.size >> PAGE_SHIFT); + ret = (*func)(pfn, len, arg); + if (ret) + break; + res.base += (res.size + 1); + res.size = (end - res.base + 1); + } + return ret; } +EXPORT_SYMBOL_GPL(walk_memory_resource); #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/include/linux/lmb.h b/include/linux/lmb.h index 55d4b261a9e8..c46c89505dac 100644 --- a/include/linux/lmb.h +++ b/include/linux/lmb.h @@ -54,6 +54,7 @@ extern u64 __init lmb_phys_mem_size(void); extern u64 __init lmb_end_of_DRAM(void); extern void __init lmb_enforce_memory_limit(u64 memory_limit); extern int __init lmb_is_reserved(u64 addr); +extern int lmb_find(struct lmb_property *res); extern void lmb_dump_all(void); diff --git a/lib/lmb.c b/lib/lmb.c index 5b2a739bc3d5..83287d3869a3 100644 --- a/lib/lmb.c +++ b/lib/lmb.c @@ -474,3 +474,36 @@ int __init lmb_is_reserved(u64 addr) } return 0; } + +/* + * Given a , find which memory regions belong to this range. + * Adjust the request and return a contiguous chunk. + */ +int lmb_find(struct lmb_property *res) +{ + int i; + u64 rstart, rend; + + rstart = res->base; + rend = rstart + res->size - 1; + + for (i = 0; i < lmb.memory.cnt; i++) { + u64 start = lmb.memory.region[i].base; + u64 end = start + lmb.memory.region[i].size - 1; + + if (start > rend) + return -1; + + if ((end >= rstart) && (start < rend)) { + /* adjust the request */ + if (rstart < start) + rstart = start; + if (rend > end) + rend = end; + res->base = rstart; + res->size = rend - rstart + 1; + return 0; + } + } + return -1; +} -- cgit v1.2.3-71-gd317 From 63c4ec905d63834a97ec7dbbf0a2ec89ef5872be Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 21 Apr 2008 16:07:13 +0800 Subject: thermal: add the support for building the generic thermal as a module Build the generic thermal driver as module "thermal_sys". Make ACPI thermal, video, processor and fan SELECT the generic thermal driver, as these drivers rely on it to build the sysfs I/F. Signed-off-by: Zhang Rui Acked-by: Jean Delvare Signed-off-by: Len Brown --- drivers/acpi/Kconfig | 3 +++ drivers/misc/Kconfig | 1 + drivers/thermal/Kconfig | 4 ++-- drivers/thermal/Makefile | 3 ++- drivers/thermal/thermal.c | 2 +- include/linux/thermal.h | 14 -------------- 6 files changed, 9 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index b4f5e8542829..c52fca833268 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -140,6 +140,7 @@ config ACPI_VIDEO tristate "Video" depends on X86 && BACKLIGHT_CLASS_DEVICE && VIDEO_OUTPUT_CONTROL depends on INPUT + select THERMAL help This driver implement the ACPI Extensions For Display Adapters for integrated graphics devices on motherboard, as specified in @@ -151,6 +152,7 @@ config ACPI_VIDEO config ACPI_FAN tristate "Fan" + select THERMAL default y help This driver adds support for ACPI fan devices, allowing user-mode @@ -172,6 +174,7 @@ config ACPI_BAY config ACPI_PROCESSOR tristate "Processor" + select THERMAL default y help This driver installs ACPI as the idle handler for Linux, and uses diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 297a48f85446..08f35d76dcd9 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -344,6 +344,7 @@ config ATMEL_SSC config INTEL_MENLOW tristate "Thermal Management driver for Intel menlow platform" depends on ACPI_THERMAL + select THERMAL depends on X86 ---help--- ACPI thermal management enhancement driver on diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 17e71d56f31e..4b628526df09 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -3,7 +3,7 @@ # menuconfig THERMAL - bool "Generic Thermal sysfs driver" + tristate "Generic Thermal sysfs driver" help Generic Thermal Sysfs driver offers a generic mechanism for thermal management. Usually it's made up of one or more thermal @@ -11,4 +11,4 @@ menuconfig THERMAL Each thermal zone contains its own temperature, trip points, cooling devices. All platforms with ACPI thermal support can use this driver. - If you want this support, you should say Y here. + If you want this support, you should say Y or M here. diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index 8ef1232de376..02b64517be8f 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -2,4 +2,5 @@ # Makefile for sensor chip drivers. # -obj-$(CONFIG_THERMAL) += thermal.o +thermal_sys-objs += thermal.o +obj-$(CONFIG_THERMAL) += thermal_sys.o diff --git a/drivers/thermal/thermal.c b/drivers/thermal/thermal.c index 7f79bbf652d7..cf56af4b7e02 100644 --- a/drivers/thermal/thermal.c +++ b/drivers/thermal/thermal.c @@ -31,7 +31,7 @@ #include #include -MODULE_AUTHOR("Zhang Rui") +MODULE_AUTHOR("Zhang Rui"); MODULE_DESCRIPTION("Generic thermal management sysfs support"); MODULE_LICENSE("GPL"); diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 90c1c191ea69..3ff680b44e81 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -88,24 +88,10 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *); int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *); - -#ifdef CONFIG_THERMAL struct thermal_cooling_device *thermal_cooling_device_register(char *, void *, struct thermal_cooling_device_ops *); void thermal_cooling_device_unregister(struct thermal_cooling_device *); -#else -static inline struct thermal_cooling_device -*thermal_cooling_device_register(char *c, void *v, - struct thermal_cooling_device_ops *t) -{ - return NULL; -} -static inline - void thermal_cooling_device_unregister(struct thermal_cooling_device *t) -{ -}; -#endif #endif /* __THERMAL_H__ */ -- cgit v1.2.3-71-gd317 From 9ec732ff80b7e8a9096666f78ae584d3b393bc84 Mon Sep 17 00:00:00 2001 From: "Zhang, Rui" Date: Thu, 10 Apr 2008 16:13:10 +0800 Subject: thermal: add new get_crit_temp callback Add a new callback so that the generic thermal can get the critical trip point info of a thermal zone, which is needed for building the tempX_crit hwmon sysfs attribute. Signed-off-by: Zhang Rui Acked-by: Jean Delvare Signed-off-by: Len Brown --- drivers/acpi/thermal.c | 13 +++++++++++++ include/linux/thermal.h | 1 + 2 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 766bd25d3376..ec707ed1a70f 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -1012,6 +1012,18 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal, return -EINVAL; } +static int thermal_get_crit_temp(struct thermal_zone_device *thermal, + unsigned long *temperature) { + struct acpi_thermal *tz = thermal->devdata; + + if (tz->trips.critical.flags.valid) { + *temperature = KELVIN_TO_MILLICELSIUS( + tz->trips.critical.temperature); + return 0; + } else + return -EINVAL; +} + typedef int (*cb)(struct thermal_zone_device *, int, struct thermal_cooling_device *); static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal, @@ -1103,6 +1115,7 @@ static struct thermal_zone_device_ops acpi_thermal_zone_ops = { .set_mode = thermal_set_mode, .get_trip_type = thermal_get_trip_type, .get_trip_temp = thermal_get_trip_temp, + .get_crit_temp = thermal_get_crit_temp, }; static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz) diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 3ff680b44e81..16e6a8bdeb3c 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -41,6 +41,7 @@ struct thermal_zone_device_ops { int (*set_mode) (struct thermal_zone_device *, const char *); int (*get_trip_type) (struct thermal_zone_device *, int, char *); int (*get_trip_temp) (struct thermal_zone_device *, int, char *); + int (*get_crit_temp) (struct thermal_zone_device *, unsigned long *); }; struct thermal_cooling_device_ops { -- cgit v1.2.3-71-gd317 From e68b16abd91dca91e35ea47537ef8a1b7ad72841 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 21 Apr 2008 16:07:52 +0800 Subject: thermal: add hwmon sysfs I/F Add hwmon sys I/F for generic thermal driver. Note: we have one hwmon class device for EACH TYPE of the thermal zone device. Signed-off-by: Zhang Rui Acked-by: Jean Delvare Signed-off-by: Len Brown --- drivers/thermal/thermal.c | 163 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/thermal.h | 24 +++++++ 2 files changed, 187 insertions(+) (limited to 'include/linux') diff --git a/drivers/thermal/thermal.c b/drivers/thermal/thermal.c index cf56af4b7e02..6098787341f3 100644 --- a/drivers/thermal/thermal.c +++ b/drivers/thermal/thermal.c @@ -295,6 +295,164 @@ thermal_cooling_device_trip_point_show(struct device *dev, /* Device management */ +#if defined(CONFIG_HWMON) || \ + (defined(CONFIG_HWMON_MODULE) && defined(CONFIG_THERMAL_MODULE)) +/* hwmon sys I/F */ +#include +static LIST_HEAD(thermal_hwmon_list); + +static ssize_t +name_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct thermal_hwmon_device *hwmon = dev->driver_data; + return sprintf(buf, "%s\n", hwmon->type); +} +static DEVICE_ATTR(name, 0444, name_show, NULL); + +static ssize_t +temp_input_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct thermal_hwmon_attr *hwmon_attr + = container_of(attr, struct thermal_hwmon_attr, attr); + struct thermal_zone_device *tz + = container_of(hwmon_attr, struct thermal_zone_device, + temp_input); + + return tz->ops->get_temp(tz, buf); +} + +static ssize_t +temp_crit_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct thermal_hwmon_attr *hwmon_attr + = container_of(attr, struct thermal_hwmon_attr, attr); + struct thermal_zone_device *tz + = container_of(hwmon_attr, struct thermal_zone_device, + temp_crit); + + return tz->ops->get_trip_temp(tz, 0, buf); +} + + +static int +thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) +{ + struct thermal_hwmon_device *hwmon; + int new_hwmon_device = 1; + int result; + + mutex_lock(&thermal_list_lock); + list_for_each_entry(hwmon, &thermal_hwmon_list, node) + if (!strcmp(hwmon->type, tz->type)) { + new_hwmon_device = 0; + mutex_unlock(&thermal_list_lock); + goto register_sys_interface; + } + mutex_unlock(&thermal_list_lock); + + hwmon = kzalloc(sizeof(struct thermal_hwmon_device), GFP_KERNEL); + if (!hwmon) + return -ENOMEM; + + INIT_LIST_HEAD(&hwmon->tz_list); + strlcpy(hwmon->type, tz->type, THERMAL_NAME_LENGTH); + hwmon->device = hwmon_device_register(NULL); + if (IS_ERR(hwmon->device)) { + result = PTR_ERR(hwmon->device); + goto free_mem; + } + hwmon->device->driver_data = hwmon; + result = device_create_file(hwmon->device, &dev_attr_name); + if (result) + goto unregister_hwmon_device; + + register_sys_interface: + tz->hwmon = hwmon; + hwmon->count++; + + snprintf(tz->temp_input.name, THERMAL_NAME_LENGTH, + "temp%d_input", hwmon->count); + tz->temp_input.attr.attr.name = tz->temp_input.name; + tz->temp_input.attr.attr.mode = 0444; + tz->temp_input.attr.show = temp_input_show; + result = device_create_file(hwmon->device, &tz->temp_input.attr); + if (result) + goto unregister_hwmon_device; + + if (tz->ops->get_crit_temp) { + unsigned long temperature; + if (!tz->ops->get_crit_temp(tz, &temperature)) { + snprintf(tz->temp_crit.name, THERMAL_NAME_LENGTH, + "temp%d_crit", hwmon->count); + tz->temp_crit.attr.attr.name = tz->temp_crit.name; + tz->temp_crit.attr.attr.mode = 0444; + tz->temp_crit.attr.show = temp_crit_show; + result = device_create_file(hwmon->device, + &tz->temp_crit.attr); + if (result) + goto unregister_hwmon_device; + } + } + + mutex_lock(&thermal_list_lock); + if (new_hwmon_device) + list_add_tail(&hwmon->node, &thermal_hwmon_list); + list_add_tail(&tz->hwmon_node, &hwmon->tz_list); + mutex_unlock(&thermal_list_lock); + + return 0; + + unregister_hwmon_device: + device_remove_file(hwmon->device, &tz->temp_crit.attr); + device_remove_file(hwmon->device, &tz->temp_input.attr); + if (new_hwmon_device) { + device_remove_file(hwmon->device, &dev_attr_name); + hwmon_device_unregister(hwmon->device); + } + free_mem: + if (new_hwmon_device) + kfree(hwmon); + + return result; +} + +static void +thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) +{ + struct thermal_hwmon_device *hwmon = tz->hwmon; + + tz->hwmon = NULL; + device_remove_file(hwmon->device, &tz->temp_input.attr); + device_remove_file(hwmon->device, &tz->temp_crit.attr); + + mutex_lock(&thermal_list_lock); + list_del(&tz->hwmon_node); + if (!list_empty(&hwmon->tz_list)) { + mutex_unlock(&thermal_list_lock); + return; + } + list_del(&hwmon->node); + mutex_unlock(&thermal_list_lock); + + device_remove_file(hwmon->device, &dev_attr_name); + hwmon_device_unregister(hwmon->device); + kfree(hwmon); +} +#else +static int +thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) +{ + return 0; +} + +static void +thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) +{ +} +#endif + + /** * thermal_zone_bind_cooling_device - bind a cooling device to a thermal zone * @tz: thermal zone device @@ -642,6 +800,10 @@ struct thermal_zone_device *thermal_zone_device_register(char *type, goto unregister; } + result = thermal_add_hwmon_sysfs(tz); + if (result) + goto unregister; + mutex_lock(&thermal_list_lock); list_add_tail(&tz->node, &thermal_tz_list); if (ops->bind) @@ -700,6 +862,7 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz) for (count = 0; count < tz->trips; count++) TRIP_POINT_ATTR_REMOVE(&tz->device, count); + thermal_remove_hwmon_sysfs(tz); release_idr(&thermal_tz_idr, &thermal_idr_lock, tz->id); idr_destroy(&tz->idr); mutex_destroy(&tz->lock); diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 16e6a8bdeb3c..06d3e6eb9ca8 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -66,6 +66,23 @@ struct thermal_cooling_device { ((long)t-2732+5)/10 : ((long)t-2732-5)/10) #define CELSIUS_TO_KELVIN(t) ((t)*10+2732) +#if defined(CONFIG_HWMON) || \ + (defined(CONFIG_HWMON_MODULE) && defined(CONFIG_THERMAL_MODULE)) +/* thermal zone devices with the same type share one hwmon device */ +struct thermal_hwmon_device { + char type[THERMAL_NAME_LENGTH]; + struct device *device; + int count; + struct list_head tz_list; + struct list_head node; +}; + +struct thermal_hwmon_attr { + struct device_attribute attr; + char name[16]; +}; +#endif + struct thermal_zone_device { int id; char type[THERMAL_NAME_LENGTH]; @@ -77,6 +94,13 @@ struct thermal_zone_device { struct idr idr; struct mutex lock; /* protect cooling devices list */ struct list_head node; +#if defined(CONFIG_HWMON) || \ + (defined(CONFIG_HWMON_MODULE) && defined(CONFIG_THERMAL_MODULE)) + struct list_head hwmon_node; + struct thermal_hwmon_device *hwmon; + struct thermal_hwmon_attr temp_input; /* hwmon sys attr */ + struct thermal_hwmon_attr temp_crit; /* hwmon sys attr */ +#endif }; struct thermal_zone_device *thermal_zone_device_register(char *, int, void *, -- cgit v1.2.3-71-gd317 From ca0e8b6fd29819891c874b86ff286987c5bfdc21 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 28 Apr 2008 16:33:49 -0600 Subject: ISAPNP: move config register addresses out of isapnp.h These are used only in drivers/pnp/isapnp/core.c, so no need to expose them to the world. Signed-off-by: Bjorn Helgaas Acked-By: Rene Herman Signed-off-by: Len Brown --- drivers/pnp/isapnp/core.c | 8 ++++++++ include/linux/isapnp.h | 10 ---------- 2 files changed, 8 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c index 257f5d827d83..dd67752a5828 100644 --- a/drivers/pnp/isapnp/core.c +++ b/drivers/pnp/isapnp/core.c @@ -88,6 +88,14 @@ MODULE_LICENSE("GPL"); #define _LTAG_MEM32RANGE 0x85 #define _LTAG_FIXEDMEM32RANGE 0x86 +/* Logical device control and configuration registers */ + +#define ISAPNP_CFG_ACTIVATE 0x30 /* byte */ +#define ISAPNP_CFG_MEM 0x40 /* 4 * dword */ +#define ISAPNP_CFG_PORT 0x60 /* 8 * word */ +#define ISAPNP_CFG_IRQ 0x70 /* 2 * word */ +#define ISAPNP_CFG_DMA 0x74 /* 2 * byte */ + /* * Sizes of ISAPNP logical device configuration register sets. * See PNP-ISA-v1.0a.pdf, Appendix A. diff --git a/include/linux/isapnp.h b/include/linux/isapnp.h index 1e8728a9ee8a..cd5a269fdb5e 100644 --- a/include/linux/isapnp.h +++ b/include/linux/isapnp.h @@ -25,16 +25,6 @@ #include #include -/* - * Configuration registers (TODO: change by specification) - */ - -#define ISAPNP_CFG_ACTIVATE 0x30 /* byte */ -#define ISAPNP_CFG_MEM 0x40 /* 4 * dword */ -#define ISAPNP_CFG_PORT 0x60 /* 8 * word */ -#define ISAPNP_CFG_IRQ 0x70 /* 2 * word */ -#define ISAPNP_CFG_DMA 0x74 /* 2 * byte */ - /* * */ -- cgit v1.2.3-71-gd317 From 1692b27bf37826f85f9c12f8468848885643532a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 28 Apr 2008 16:33:51 -0600 Subject: PNP: make pnp_add_id() internal to PNP core pnp_add_id() doesn't need to be exposed outside the PNP core, so move the declaration to an internal header file. Signed-off-by: Bjorn Helgaas Acked-By: Rene Herman Signed-off-by: Len Brown --- drivers/pnp/base.h | 1 + drivers/pnp/pnpacpi/core.c | 1 + drivers/pnp/pnpbios/core.c | 1 + include/linux/pnp.h | 2 -- 4 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h index 31a633f65547..abefcc351521 100644 --- a/drivers/pnp/base.h +++ b/drivers/pnp/base.h @@ -1,5 +1,6 @@ extern spinlock_t pnp_lock; void *pnp_alloc(long size); +int pnp_add_id(struct pnp_id *id, struct pnp_dev *dev); int pnp_interface_attach_device(struct pnp_dev *dev); void pnp_fixup_device(struct pnp_dev *dev); void pnp_free_option(struct pnp_option *option); diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c index 53f91068d0b0..4807d76f8a04 100644 --- a/drivers/pnp/pnpacpi/core.c +++ b/drivers/pnp/pnpacpi/core.c @@ -25,6 +25,7 @@ #include #include +#include "../base.h" #include "pnpacpi.h" static int num = 0; diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c index a8a51500e1e9..2a5353bceb24 100644 --- a/drivers/pnp/pnpbios/core.c +++ b/drivers/pnp/pnpbios/core.c @@ -69,6 +69,7 @@ #include #include +#include "../base.h" #include "pnpbios.h" /* diff --git a/include/linux/pnp.h b/include/linux/pnp.h index b2f05c230f4b..9a05ab5515b6 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -403,7 +403,6 @@ void pnp_resource_change(struct resource *resource, resource_size_t start, /* protocol helpers */ int pnp_is_active(struct pnp_dev *dev); int compare_pnp_id(struct pnp_id *pos, const char *id); -int pnp_add_id(struct pnp_id *id, struct pnp_dev *dev); int pnp_register_driver(struct pnp_driver *drv); void pnp_unregister_driver(struct pnp_driver *drv); @@ -450,7 +449,6 @@ static inline void pnp_resource_change(struct resource *resource, resource_size_ /* protocol helpers */ static inline int pnp_is_active(struct pnp_dev *dev) { return 0; } static inline int compare_pnp_id(struct pnp_id *pos, const char *id) { return -ENODEV; } -static inline int pnp_add_id(struct pnp_id *id, struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_register_driver(struct pnp_driver *drv) { return -ENODEV; } static inline void pnp_unregister_driver(struct pnp_driver *drv) { } -- cgit v1.2.3-71-gd317 From 048825deea5f261335b5202cd1114c53a3a21ae7 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 28 Apr 2008 16:33:55 -0600 Subject: PNP: make pnp_add_card_id() internal to PNP core pnp_add_card_id() doesn't need to be exposed outside the PNP core, so move the declaration to an internal header file. Signed-off-by: Bjorn Helgaas Acked-By: Rene Herman Signed-off-by: Len Brown --- drivers/pnp/base.h | 1 + include/linux/pnp.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h index ff435bd1ca18..b492569bcdf4 100644 --- a/drivers/pnp/base.h +++ b/drivers/pnp/base.h @@ -4,6 +4,7 @@ void *pnp_alloc(long size); void pnp_eisa_id_to_string(u32 id, char *str); struct pnp_dev *pnp_alloc_dev(struct pnp_protocol *, int id, char *pnpid); struct pnp_id *pnp_add_id(struct pnp_dev *dev, char *id); +int pnp_add_card_id(struct pnp_id *id, struct pnp_card *card); int pnp_interface_attach_device(struct pnp_dev *dev); void pnp_fixup_device(struct pnp_dev *dev); void pnp_free_option(struct pnp_option *option); diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 9a05ab5515b6..7639db83ce3f 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -371,7 +371,6 @@ int pnp_add_card(struct pnp_card *card); void pnp_remove_card(struct pnp_card *card); int pnp_add_card_device(struct pnp_card *card, struct pnp_dev *dev); void pnp_remove_card_device(struct pnp_dev *dev); -int pnp_add_card_id(struct pnp_id *id, struct pnp_card *card); struct pnp_dev *pnp_request_card_device(struct pnp_card_link *clink, const char *id, struct pnp_dev *from); void pnp_release_card_device(struct pnp_dev *dev); @@ -423,7 +422,6 @@ static inline int pnp_add_card(struct pnp_card *card) { return -ENODEV; } static inline void pnp_remove_card(struct pnp_card *card) { } static inline int pnp_add_card_device(struct pnp_card *card, struct pnp_dev *dev) { return -ENODEV; } static inline void pnp_remove_card_device(struct pnp_dev *dev) { } -static inline int pnp_add_card_id(struct pnp_id *id, struct pnp_card *card) { return -ENODEV; } static inline struct pnp_dev *pnp_request_card_device(struct pnp_card_link *clink, const char *id, struct pnp_dev *from) { return NULL; } static inline void pnp_release_card_device(struct pnp_dev *dev) { } static inline int pnp_register_card_driver(struct pnp_card_driver *drv) { return -ENODEV; } -- cgit v1.2.3-71-gd317 From c1caf06ccfd3a4efd4b489f89bcdabd2362f31d0 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 28 Apr 2008 16:34:04 -0600 Subject: PNP: add debug output to option registration Add debug output to resource option registration functions (enabled by CONFIG_PNP_DEBUG). This uses dev_printk, so I had to add pnp_dev arguments at the same time. Signed-off-by: Bjorn Helgaas Acked-By: Rene Herman Signed-off-by: Len Brown --- drivers/pnp/isapnp/core.c | 50 ++++++++++++++++++------------- drivers/pnp/pnpacpi/rsparser.c | 68 +++++++++++++++++++++++++----------------- drivers/pnp/pnpbios/rsparser.c | 54 +++++++++++++++++++-------------- drivers/pnp/resource.c | 34 ++++++++++++++++++--- include/linux/pnp.h | 19 +++++++----- 5 files changed, 141 insertions(+), 84 deletions(-) (limited to 'include/linux') diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c index 883577a93d6a..38ff64dce9c0 100644 --- a/drivers/pnp/isapnp/core.c +++ b/drivers/pnp/isapnp/core.c @@ -431,7 +431,8 @@ static struct pnp_dev *__init isapnp_parse_device(struct pnp_card *card, /* * Add IRQ resource to resources list. */ -static void __init isapnp_parse_irq_resource(struct pnp_option *option, +static void __init isapnp_parse_irq_resource(struct pnp_dev *dev, + struct pnp_option *option, int size) { unsigned char tmp[3]; @@ -448,13 +449,14 @@ static void __init isapnp_parse_irq_resource(struct pnp_option *option, irq->flags = tmp[2]; else irq->flags = IORESOURCE_IRQ_HIGHEDGE; - pnp_register_irq_resource(option, irq); + pnp_register_irq_resource(dev, option, irq); } /* * Add DMA resource to resources list. */ -static void __init isapnp_parse_dma_resource(struct pnp_option *option, +static void __init isapnp_parse_dma_resource(struct pnp_dev *dev, + struct pnp_option *option, int size) { unsigned char tmp[2]; @@ -466,13 +468,14 @@ static void __init isapnp_parse_dma_resource(struct pnp_option *option, return; dma->map = tmp[0]; dma->flags = tmp[1]; - pnp_register_dma_resource(option, dma); + pnp_register_dma_resource(dev, option, dma); } /* * Add port resource to resources list. */ -static void __init isapnp_parse_port_resource(struct pnp_option *option, +static void __init isapnp_parse_port_resource(struct pnp_dev *dev, + struct pnp_option *option, int size) { unsigned char tmp[7]; @@ -487,13 +490,14 @@ static void __init isapnp_parse_port_resource(struct pnp_option *option, port->align = tmp[5]; port->size = tmp[6]; port->flags = tmp[0] ? PNP_PORT_FLAG_16BITADDR : 0; - pnp_register_port_resource(option, port); + pnp_register_port_resource(dev, option, port); } /* * Add fixed port resource to resources list. */ -static void __init isapnp_parse_fixed_port_resource(struct pnp_option *option, +static void __init isapnp_parse_fixed_port_resource(struct pnp_dev *dev, + struct pnp_option *option, int size) { unsigned char tmp[3]; @@ -507,13 +511,14 @@ static void __init isapnp_parse_fixed_port_resource(struct pnp_option *option, port->size = tmp[2]; port->align = 0; port->flags = PNP_PORT_FLAG_FIXED; - pnp_register_port_resource(option, port); + pnp_register_port_resource(dev, option, port); } /* * Add memory resource to resources list. */ -static void __init isapnp_parse_mem_resource(struct pnp_option *option, +static void __init isapnp_parse_mem_resource(struct pnp_dev *dev, + struct pnp_option *option, int size) { unsigned char tmp[9]; @@ -528,13 +533,14 @@ static void __init isapnp_parse_mem_resource(struct pnp_option *option, mem->align = (tmp[6] << 8) | tmp[5]; mem->size = ((tmp[8] << 8) | tmp[7]) << 8; mem->flags = tmp[0]; - pnp_register_mem_resource(option, mem); + pnp_register_mem_resource(dev, option, mem); } /* * Add 32-bit memory resource to resources list. */ -static void __init isapnp_parse_mem32_resource(struct pnp_option *option, +static void __init isapnp_parse_mem32_resource(struct pnp_dev *dev, + struct pnp_option *option, int size) { unsigned char tmp[17]; @@ -551,13 +557,14 @@ static void __init isapnp_parse_mem32_resource(struct pnp_option *option, mem->size = (tmp[16] << 24) | (tmp[15] << 16) | (tmp[14] << 8) | tmp[13]; mem->flags = tmp[0]; - pnp_register_mem_resource(option, mem); + pnp_register_mem_resource(dev, option, mem); } /* * Add 32-bit fixed memory resource to resources list. */ -static void __init isapnp_parse_fixed_mem32_resource(struct pnp_option *option, +static void __init isapnp_parse_fixed_mem32_resource(struct pnp_dev *dev, + struct pnp_option *option, int size) { unsigned char tmp[9]; @@ -572,7 +579,7 @@ static void __init isapnp_parse_fixed_mem32_resource(struct pnp_option *option, mem->size = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5]; mem->align = 0; mem->flags = tmp[0]; - pnp_register_mem_resource(option, mem); + pnp_register_mem_resource(dev, option, mem); } /* @@ -656,13 +663,13 @@ static int __init isapnp_create_device(struct pnp_card *card, case _STAG_IRQ: if (size < 2 || size > 3) goto __skip; - isapnp_parse_irq_resource(option, size); + isapnp_parse_irq_resource(dev, option, size); size = 0; break; case _STAG_DMA: if (size != 2) goto __skip; - isapnp_parse_dma_resource(option, size); + isapnp_parse_dma_resource(dev, option, size); size = 0; break; case _STAG_STARTDEP: @@ -682,17 +689,18 @@ static int __init isapnp_create_device(struct pnp_card *card, if (size != 0) goto __skip; priority = 0; + dev_dbg(&dev->dev, "end dependent options\n"); break; case _STAG_IOPORT: if (size != 7) goto __skip; - isapnp_parse_port_resource(option, size); + isapnp_parse_port_resource(dev, option, size); size = 0; break; case _STAG_FIXEDIO: if (size != 3) goto __skip; - isapnp_parse_fixed_port_resource(option, size); + isapnp_parse_fixed_port_resource(dev, option, size); size = 0; break; case _STAG_VENDOR: @@ -700,7 +708,7 @@ static int __init isapnp_create_device(struct pnp_card *card, case _LTAG_MEMRANGE: if (size != 9) goto __skip; - isapnp_parse_mem_resource(option, size); + isapnp_parse_mem_resource(dev, option, size); size = 0; break; case _LTAG_ANSISTR: @@ -715,13 +723,13 @@ static int __init isapnp_create_device(struct pnp_card *card, case _LTAG_MEM32RANGE: if (size != 17) goto __skip; - isapnp_parse_mem32_resource(option, size); + isapnp_parse_mem32_resource(dev, option, size); size = 0; break; case _LTAG_FIXEDMEM32RANGE: if (size != 9) goto __skip; - isapnp_parse_fixed_mem32_resource(option, size); + isapnp_parse_fixed_mem32_resource(dev, option, size); size = 0; break; case _STAG_END: diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index baaf60212779..32454aa07ebc 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -410,7 +410,8 @@ acpi_status pnpacpi_parse_allocated_resource(acpi_handle handle, pnpacpi_allocated_resource, res); } -static __init void pnpacpi_parse_dma_option(struct pnp_option *option, +static __init void pnpacpi_parse_dma_option(struct pnp_dev *dev, + struct pnp_option *option, struct acpi_resource_dma *p) { int i; @@ -427,10 +428,11 @@ static __init void pnpacpi_parse_dma_option(struct pnp_option *option, dma->flags = dma_flags(p->type, p->bus_master, p->transfer); - pnp_register_dma_resource(option, dma); + pnp_register_dma_resource(dev, option, dma); } -static __init void pnpacpi_parse_irq_option(struct pnp_option *option, +static __init void pnpacpi_parse_irq_option(struct pnp_dev *dev, + struct pnp_option *option, struct acpi_resource_irq *p) { int i; @@ -447,10 +449,11 @@ static __init void pnpacpi_parse_irq_option(struct pnp_option *option, __set_bit(p->interrupts[i], irq->map); irq->flags = irq_flags(p->triggering, p->polarity, p->sharable); - pnp_register_irq_resource(option, irq); + pnp_register_irq_resource(dev, option, irq); } -static __init void pnpacpi_parse_ext_irq_option(struct pnp_option *option, +static __init void pnpacpi_parse_ext_irq_option(struct pnp_dev *dev, + struct pnp_option *option, struct acpi_resource_extended_irq *p) { int i; @@ -467,10 +470,11 @@ static __init void pnpacpi_parse_ext_irq_option(struct pnp_option *option, __set_bit(p->interrupts[i], irq->map); irq->flags = irq_flags(p->triggering, p->polarity, p->sharable); - pnp_register_irq_resource(option, irq); + pnp_register_irq_resource(dev, option, irq); } -static __init void pnpacpi_parse_port_option(struct pnp_option *option, +static __init void pnpacpi_parse_port_option(struct pnp_dev *dev, + struct pnp_option *option, struct acpi_resource_io *io) { struct pnp_port *port; @@ -486,10 +490,11 @@ static __init void pnpacpi_parse_port_option(struct pnp_option *option, port->size = io->address_length; port->flags = ACPI_DECODE_16 == io->io_decode ? PNP_PORT_FLAG_16BITADDR : 0; - pnp_register_port_resource(option, port); + pnp_register_port_resource(dev, option, port); } -static __init void pnpacpi_parse_fixed_port_option(struct pnp_option *option, +static __init void pnpacpi_parse_fixed_port_option(struct pnp_dev *dev, + struct pnp_option *option, struct acpi_resource_fixed_io *io) { struct pnp_port *port; @@ -503,10 +508,11 @@ static __init void pnpacpi_parse_fixed_port_option(struct pnp_option *option, port->size = io->address_length; port->align = 0; port->flags = PNP_PORT_FLAG_FIXED; - pnp_register_port_resource(option, port); + pnp_register_port_resource(dev, option, port); } -static __init void pnpacpi_parse_mem24_option(struct pnp_option *option, +static __init void pnpacpi_parse_mem24_option(struct pnp_dev *dev, + struct pnp_option *option, struct acpi_resource_memory24 *p) { struct pnp_mem *mem; @@ -524,10 +530,11 @@ static __init void pnpacpi_parse_mem24_option(struct pnp_option *option, mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ? IORESOURCE_MEM_WRITEABLE : 0; - pnp_register_mem_resource(option, mem); + pnp_register_mem_resource(dev, option, mem); } -static __init void pnpacpi_parse_mem32_option(struct pnp_option *option, +static __init void pnpacpi_parse_mem32_option(struct pnp_dev *dev, + struct pnp_option *option, struct acpi_resource_memory32 *p) { struct pnp_mem *mem; @@ -545,10 +552,11 @@ static __init void pnpacpi_parse_mem32_option(struct pnp_option *option, mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ? IORESOURCE_MEM_WRITEABLE : 0; - pnp_register_mem_resource(option, mem); + pnp_register_mem_resource(dev, option, mem); } -static __init void pnpacpi_parse_fixed_mem32_option(struct pnp_option *option, +static __init void pnpacpi_parse_fixed_mem32_option(struct pnp_dev *dev, + struct pnp_option *option, struct acpi_resource_fixed_memory32 *p) { struct pnp_mem *mem; @@ -565,10 +573,11 @@ static __init void pnpacpi_parse_fixed_mem32_option(struct pnp_option *option, mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ? IORESOURCE_MEM_WRITEABLE : 0; - pnp_register_mem_resource(option, mem); + pnp_register_mem_resource(dev, option, mem); } -static __init void pnpacpi_parse_address_option(struct pnp_option *option, +static __init void pnpacpi_parse_address_option(struct pnp_dev *dev, + struct pnp_option *option, struct acpi_resource *r) { struct acpi_resource_address64 addr, *p = &addr; @@ -596,7 +605,7 @@ static __init void pnpacpi_parse_address_option(struct pnp_option *option, mem->flags = (p->info.mem.write_protect == ACPI_READ_WRITE_MEMORY) ? IORESOURCE_MEM_WRITEABLE : 0; - pnp_register_mem_resource(option, mem); + pnp_register_mem_resource(dev, option, mem); } else if (p->resource_type == ACPI_IO_RANGE) { port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); if (!port) @@ -605,7 +614,7 @@ static __init void pnpacpi_parse_address_option(struct pnp_option *option, port->size = p->address_length; port->align = 0; port->flags = PNP_PORT_FLAG_FIXED; - pnp_register_port_resource(option, port); + pnp_register_port_resource(dev, option, port); } } @@ -625,11 +634,11 @@ static __init acpi_status pnpacpi_option_resource(struct acpi_resource *res, switch (res->type) { case ACPI_RESOURCE_TYPE_IRQ: - pnpacpi_parse_irq_option(option, &res->data.irq); + pnpacpi_parse_irq_option(dev, option, &res->data.irq); break; case ACPI_RESOURCE_TYPE_DMA: - pnpacpi_parse_dma_option(option, &res->data.dma); + pnpacpi_parse_dma_option(dev, option, &res->data.dma); break; case ACPI_RESOURCE_TYPE_START_DEPENDENT: @@ -664,14 +673,16 @@ static __init acpi_status pnpacpi_option_resource(struct acpi_resource *res, } parse_data->option = parse_data->option_independent; parse_data->option_independent = NULL; + dev_dbg(&dev->dev, "end dependent options\n"); break; case ACPI_RESOURCE_TYPE_IO: - pnpacpi_parse_port_option(option, &res->data.io); + pnpacpi_parse_port_option(dev, option, &res->data.io); break; case ACPI_RESOURCE_TYPE_FIXED_IO: - pnpacpi_parse_fixed_port_option(option, &res->data.fixed_io); + pnpacpi_parse_fixed_port_option(dev, option, + &res->data.fixed_io); break; case ACPI_RESOURCE_TYPE_VENDOR: @@ -679,29 +690,30 @@ static __init acpi_status pnpacpi_option_resource(struct acpi_resource *res, break; case ACPI_RESOURCE_TYPE_MEMORY24: - pnpacpi_parse_mem24_option(option, &res->data.memory24); + pnpacpi_parse_mem24_option(dev, option, &res->data.memory24); break; case ACPI_RESOURCE_TYPE_MEMORY32: - pnpacpi_parse_mem32_option(option, &res->data.memory32); + pnpacpi_parse_mem32_option(dev, option, &res->data.memory32); break; case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: - pnpacpi_parse_fixed_mem32_option(option, + pnpacpi_parse_fixed_mem32_option(dev, option, &res->data.fixed_memory32); break; case ACPI_RESOURCE_TYPE_ADDRESS16: case ACPI_RESOURCE_TYPE_ADDRESS32: case ACPI_RESOURCE_TYPE_ADDRESS64: - pnpacpi_parse_address_option(option, res); + pnpacpi_parse_address_option(dev, option, res); break; case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64: break; case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: - pnpacpi_parse_ext_irq_option(option, &res->data.extended_irq); + pnpacpi_parse_ext_irq_option(dev, option, + &res->data.extended_irq); break; case ACPI_RESOURCE_TYPE_GENERIC_REGISTER: diff --git a/drivers/pnp/pnpbios/rsparser.c b/drivers/pnp/pnpbios/rsparser.c index 948a661280d7..70aa559b3f8c 100644 --- a/drivers/pnp/pnpbios/rsparser.c +++ b/drivers/pnp/pnpbios/rsparser.c @@ -263,7 +263,8 @@ len_err: * Resource Configuration Options */ -static __init void pnpbios_parse_mem_option(unsigned char *p, int size, +static __init void pnpbios_parse_mem_option(struct pnp_dev *dev, + unsigned char *p, int size, struct pnp_option *option) { struct pnp_mem *mem; @@ -276,10 +277,11 @@ static __init void pnpbios_parse_mem_option(unsigned char *p, int size, mem->align = (p[9] << 8) | p[8]; mem->size = ((p[11] << 8) | p[10]) << 8; mem->flags = p[3]; - pnp_register_mem_resource(option, mem); + pnp_register_mem_resource(dev, option, mem); } -static __init void pnpbios_parse_mem32_option(unsigned char *p, int size, +static __init void pnpbios_parse_mem32_option(struct pnp_dev *dev, + unsigned char *p, int size, struct pnp_option *option) { struct pnp_mem *mem; @@ -292,10 +294,11 @@ static __init void pnpbios_parse_mem32_option(unsigned char *p, int size, mem->align = (p[15] << 24) | (p[14] << 16) | (p[13] << 8) | p[12]; mem->size = (p[19] << 24) | (p[18] << 16) | (p[17] << 8) | p[16]; mem->flags = p[3]; - pnp_register_mem_resource(option, mem); + pnp_register_mem_resource(dev, option, mem); } -static __init void pnpbios_parse_fixed_mem32_option(unsigned char *p, int size, +static __init void pnpbios_parse_fixed_mem32_option(struct pnp_dev *dev, + unsigned char *p, int size, struct pnp_option *option) { struct pnp_mem *mem; @@ -307,11 +310,12 @@ static __init void pnpbios_parse_fixed_mem32_option(unsigned char *p, int size, mem->size = (p[11] << 24) | (p[10] << 16) | (p[9] << 8) | p[8]; mem->align = 0; mem->flags = p[3]; - pnp_register_mem_resource(option, mem); + pnp_register_mem_resource(dev, option, mem); } -static __init void pnpbios_parse_irq_option(unsigned char *p, int size, - struct pnp_option *option) +static __init void pnpbios_parse_irq_option(struct pnp_dev *dev, + unsigned char *p, int size, + struct pnp_option *option) { struct pnp_irq *irq; unsigned long bits; @@ -325,11 +329,12 @@ static __init void pnpbios_parse_irq_option(unsigned char *p, int size, irq->flags = p[3]; else irq->flags = IORESOURCE_IRQ_HIGHEDGE; - pnp_register_irq_resource(option, irq); + pnp_register_irq_resource(dev, option, irq); } -static __init void pnpbios_parse_dma_option(unsigned char *p, int size, - struct pnp_option *option) +static __init void pnpbios_parse_dma_option(struct pnp_dev *dev, + unsigned char *p, int size, + struct pnp_option *option) { struct pnp_dma *dma; @@ -338,10 +343,11 @@ static __init void pnpbios_parse_dma_option(unsigned char *p, int size, return; dma->map = p[1]; dma->flags = p[2]; - pnp_register_dma_resource(option, dma); + pnp_register_dma_resource(dev, option, dma); } -static __init void pnpbios_parse_port_option(unsigned char *p, int size, +static __init void pnpbios_parse_port_option(struct pnp_dev *dev, + unsigned char *p, int size, struct pnp_option *option) { struct pnp_port *port; @@ -354,10 +360,11 @@ static __init void pnpbios_parse_port_option(unsigned char *p, int size, port->align = p[6]; port->size = p[7]; port->flags = p[1] ? PNP_PORT_FLAG_16BITADDR : 0; - pnp_register_port_resource(option, port); + pnp_register_port_resource(dev, option, port); } -static __init void pnpbios_parse_fixed_port_option(unsigned char *p, int size, +static __init void pnpbios_parse_fixed_port_option(struct pnp_dev *dev, + unsigned char *p, int size, struct pnp_option *option) { struct pnp_port *port; @@ -369,7 +376,7 @@ static __init void pnpbios_parse_fixed_port_option(unsigned char *p, int size, port->size = p[3]; port->align = 0; port->flags = PNP_PORT_FLAG_FIXED; - pnp_register_port_resource(option, port); + pnp_register_port_resource(dev, option, port); } static __init unsigned char * @@ -403,37 +410,37 @@ pnpbios_parse_resource_option_data(unsigned char *p, unsigned char *end, case LARGE_TAG_MEM: if (len != 9) goto len_err; - pnpbios_parse_mem_option(p, len, option); + pnpbios_parse_mem_option(dev, p, len, option); break; case LARGE_TAG_MEM32: if (len != 17) goto len_err; - pnpbios_parse_mem32_option(p, len, option); + pnpbios_parse_mem32_option(dev, p, len, option); break; case LARGE_TAG_FIXEDMEM32: if (len != 9) goto len_err; - pnpbios_parse_fixed_mem32_option(p, len, option); + pnpbios_parse_fixed_mem32_option(dev, p, len, option); break; case SMALL_TAG_IRQ: if (len < 2 || len > 3) goto len_err; - pnpbios_parse_irq_option(p, len, option); + pnpbios_parse_irq_option(dev, p, len, option); break; case SMALL_TAG_DMA: if (len != 2) goto len_err; - pnpbios_parse_dma_option(p, len, option); + pnpbios_parse_dma_option(dev, p, len, option); break; case SMALL_TAG_PORT: if (len != 7) goto len_err; - pnpbios_parse_port_option(p, len, option); + pnpbios_parse_port_option(dev, p, len, option); break; case SMALL_TAG_VENDOR: @@ -443,7 +450,7 @@ pnpbios_parse_resource_option_data(unsigned char *p, unsigned char *end, case SMALL_TAG_FIXEDPORT: if (len != 3) goto len_err; - pnpbios_parse_fixed_port_option(p, len, option); + pnpbios_parse_fixed_port_option(dev, p, len, option); break; case SMALL_TAG_STARTDEP: @@ -464,6 +471,7 @@ pnpbios_parse_resource_option_data(unsigned char *p, unsigned char *end, printk(KERN_WARNING "PnPBIOS: Missing SMALL_TAG_STARTDEP tag\n"); option = option_independent; + dev_dbg(&dev->dev, "end dependent options\n"); break; case SMALL_TAG_END: diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c index e50ebcffb962..eee6d8eddcb4 100644 --- a/drivers/pnp/resource.c +++ b/drivers/pnp/resource.c @@ -53,6 +53,8 @@ struct pnp_option *pnp_register_independent_option(struct pnp_dev *dev) if (dev->independent) dev_err(&dev->dev, "independent resource already registered\n"); dev->independent = option; + + dev_dbg(&dev->dev, "new independent option\n"); return option; } @@ -70,12 +72,18 @@ struct pnp_option *pnp_register_dependent_option(struct pnp_dev *dev, parent->next = option; } else dev->dependent = option; + + dev_dbg(&dev->dev, "new dependent option (priority %#x)\n", priority); return option; } -int pnp_register_irq_resource(struct pnp_option *option, struct pnp_irq *data) +int pnp_register_irq_resource(struct pnp_dev *dev, struct pnp_option *option, + struct pnp_irq *data) { struct pnp_irq *ptr; +#ifdef DEBUG + char buf[PNP_IRQ_NR]; /* hex-encoded, so this is overkill but safe */ +#endif ptr = option->irq; while (ptr && ptr->next) @@ -94,10 +102,17 @@ int pnp_register_irq_resource(struct pnp_option *option, struct pnp_irq *data) pcibios_penalize_isa_irq(i, 0); } #endif + +#ifdef DEBUG + bitmap_scnprintf(buf, sizeof(buf), data->map, PNP_IRQ_NR); + dev_dbg(&dev->dev, " irq bitmask %s flags %#x\n", buf, + data->flags); +#endif return 0; } -int pnp_register_dma_resource(struct pnp_option *option, struct pnp_dma *data) +int pnp_register_dma_resource(struct pnp_dev *dev, struct pnp_option *option, + struct pnp_dma *data) { struct pnp_dma *ptr; @@ -109,10 +124,13 @@ int pnp_register_dma_resource(struct pnp_option *option, struct pnp_dma *data) else option->dma = data; + dev_dbg(&dev->dev, " dma bitmask %#x flags %#x\n", data->map, + data->flags); return 0; } -int pnp_register_port_resource(struct pnp_option *option, struct pnp_port *data) +int pnp_register_port_resource(struct pnp_dev *dev, struct pnp_option *option, + struct pnp_port *data) { struct pnp_port *ptr; @@ -124,10 +142,14 @@ int pnp_register_port_resource(struct pnp_option *option, struct pnp_port *data) else option->port = data; + dev_dbg(&dev->dev, " io " + "min %#x max %#x align %d size %d flags %#x\n", + data->min, data->max, data->align, data->size, data->flags); return 0; } -int pnp_register_mem_resource(struct pnp_option *option, struct pnp_mem *data) +int pnp_register_mem_resource(struct pnp_dev *dev, struct pnp_option *option, + struct pnp_mem *data) { struct pnp_mem *ptr; @@ -138,6 +160,10 @@ int pnp_register_mem_resource(struct pnp_option *option, struct pnp_mem *data) ptr->next = data; else option->mem = data; + + dev_dbg(&dev->dev, " mem " + "min %#x max %#x align %d size %d flags %#x\n", + data->min, data->max, data->align, data->size, data->flags); return 0; } diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 7639db83ce3f..a4c2bf361596 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -382,11 +382,14 @@ extern struct list_head pnp_cards; struct pnp_option *pnp_register_independent_option(struct pnp_dev *dev); struct pnp_option *pnp_register_dependent_option(struct pnp_dev *dev, int priority); -int pnp_register_irq_resource(struct pnp_option *option, struct pnp_irq *data); -int pnp_register_dma_resource(struct pnp_option *option, struct pnp_dma *data); -int pnp_register_port_resource(struct pnp_option *option, +int pnp_register_irq_resource(struct pnp_dev *dev, struct pnp_option *option, + struct pnp_irq *data); +int pnp_register_dma_resource(struct pnp_dev *dev, struct pnp_option *option, + struct pnp_dma *data); +int pnp_register_port_resource(struct pnp_dev *dev, struct pnp_option *option, struct pnp_port *data); -int pnp_register_mem_resource(struct pnp_option *option, struct pnp_mem *data); +int pnp_register_mem_resource(struct pnp_dev *dev, struct pnp_option *option, + struct pnp_mem *data); void pnp_init_resource_table(struct pnp_resource_table *table); int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table *res, int mode); @@ -430,10 +433,10 @@ static inline void pnp_unregister_card_driver(struct pnp_card_driver *drv) { } /* resource management */ static inline struct pnp_option *pnp_register_independent_option(struct pnp_dev *dev) { return NULL; } static inline struct pnp_option *pnp_register_dependent_option(struct pnp_dev *dev, int priority) { return NULL; } -static inline int pnp_register_irq_resource(struct pnp_option *option, struct pnp_irq *data) { return -ENODEV; } -static inline int pnp_register_dma_resource(struct pnp_option *option, struct pnp_dma *data) { return -ENODEV; } -static inline int pnp_register_port_resource(struct pnp_option *option, struct pnp_port *data) { return -ENODEV; } -static inline int pnp_register_mem_resource(struct pnp_option *option, struct pnp_mem *data) { return -ENODEV; } +static inline int pnp_register_irq_resource(struct pnp_dev *dev, struct pnp_option *option, struct pnp_irq *data) { return -ENODEV; } +static inline int pnp_register_dma_resource(struct pnp_dev *dev, struct pnp_option *option, struct pnp_dma *data) { return -ENODEV; } +static inline int pnp_register_port_resource(struct pnp_dev *dev, struct pnp_option *option, struct pnp_port *data) { return -ENODEV; } +static inline int pnp_register_mem_resource(struct pnp_dev *dev, struct pnp_option *option, struct pnp_mem *data) { return -ENODEV; } static inline void pnp_init_resource_table(struct pnp_resource_table *table) { } static inline int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table *res, int mode) { return -ENODEV; } static inline int pnp_auto_config_dev(struct pnp_dev *dev) { return -ENODEV; } -- cgit v1.2.3-71-gd317 From 59284cb4099411bc6f4915a5a4cb76414440c447 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 28 Apr 2008 16:34:05 -0600 Subject: PNP: remove pnp_resource_table from internal get/set interfaces When we call protocol->get() and protocol->set() methods, we currently supply pointers to both the pnp_dev and the pnp_resource_table even though the pnp_resource_table should always be the one associated with the pnp_dev. This removes the pnp_resource_table arguments to make it clear that these methods only operate on the specified pnp_dev. Signed-off-by: Bjorn Helgaas Acked-By: Rene Herman Signed-off-by: Len Brown --- drivers/pnp/interface.c | 2 +- drivers/pnp/isapnp/core.c | 11 +++++------ drivers/pnp/manager.c | 2 +- drivers/pnp/pnpacpi/core.c | 8 +++----- drivers/pnp/pnpbios/core.c | 10 ++++------ include/linux/pnp.h | 4 ++-- 6 files changed, 16 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c index 982658477a58..e882896bdbd7 100644 --- a/drivers/pnp/interface.c +++ b/drivers/pnp/interface.c @@ -364,7 +364,7 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr, if (!strnicmp(buf, "get", 3)) { mutex_lock(&pnp_res_mutex); if (pnp_can_read(dev)) - dev->protocol->get(dev, &dev->res); + dev->protocol->get(dev); mutex_unlock(&pnp_res_mutex); goto done; } diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c index 38ff64dce9c0..1ae3d8996156 100644 --- a/drivers/pnp/isapnp/core.c +++ b/drivers/pnp/isapnp/core.c @@ -976,21 +976,20 @@ static int isapnp_read_resources(struct pnp_dev *dev, return 0; } -static int isapnp_get_resources(struct pnp_dev *dev, - struct pnp_resource_table *res) +static int isapnp_get_resources(struct pnp_dev *dev) { int ret; - pnp_init_resource_table(res); + pnp_init_resource_table(&dev->res); isapnp_cfg_begin(dev->card->number, dev->number); - ret = isapnp_read_resources(dev, res); + ret = isapnp_read_resources(dev, &dev->res); isapnp_cfg_end(); return ret; } -static int isapnp_set_resources(struct pnp_dev *dev, - struct pnp_resource_table *res) +static int isapnp_set_resources(struct pnp_dev *dev) { + struct pnp_resource_table *res = &dev->res; int tmp; isapnp_cfg_begin(dev->card->number, dev->number); diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c index c28caf272c11..6a1f0b0b24b3 100644 --- a/drivers/pnp/manager.c +++ b/drivers/pnp/manager.c @@ -473,7 +473,7 @@ int pnp_start_dev(struct pnp_dev *dev) return -EINVAL; } - if (dev->protocol->set(dev, &dev->res) < 0) { + if (dev->protocol->set(dev) < 0) { dev_err(&dev->dev, "activation failed\n"); return -EIO; } diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c index 27546873880c..590fbcb0ee89 100644 --- a/drivers/pnp/pnpacpi/core.c +++ b/drivers/pnp/pnpacpi/core.c @@ -73,8 +73,7 @@ static int __init ispnpidacpi(char *id) return 1; } -static int pnpacpi_get_resources(struct pnp_dev *dev, - struct pnp_resource_table *res) +static int pnpacpi_get_resources(struct pnp_dev *dev) { acpi_status status; @@ -83,8 +82,7 @@ static int pnpacpi_get_resources(struct pnp_dev *dev, return ACPI_FAILURE(status) ? -ENODEV : 0; } -static int pnpacpi_set_resources(struct pnp_dev *dev, - struct pnp_resource_table *res) +static int pnpacpi_set_resources(struct pnp_dev *dev) { acpi_handle handle = dev->data; struct acpi_buffer buffer; @@ -94,7 +92,7 @@ static int pnpacpi_set_resources(struct pnp_dev *dev, ret = pnpacpi_build_resource_template(dev, &buffer); if (ret) return ret; - ret = pnpacpi_encode_resources(res, &buffer); + ret = pnpacpi_encode_resources(&dev->res, &buffer); if (ret) { kfree(buffer.pointer); return ret; diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c index 6af2be2c1d67..9852755b5590 100644 --- a/drivers/pnp/pnpbios/core.c +++ b/drivers/pnp/pnpbios/core.c @@ -204,8 +204,7 @@ static int pnp_dock_thread(void *unused) #endif /* CONFIG_HOTPLUG */ -static int pnpbios_get_resources(struct pnp_dev *dev, - struct pnp_resource_table *res) +static int pnpbios_get_resources(struct pnp_dev *dev) { u8 nodenum = dev->number; struct pnp_bios_node *node; @@ -220,14 +219,13 @@ static int pnpbios_get_resources(struct pnp_dev *dev, kfree(node); return -ENODEV; } - pnpbios_read_resources_from_node(res, node); + pnpbios_read_resources_from_node(&dev->res, node); dev->active = pnp_is_active(dev); kfree(node); return 0; } -static int pnpbios_set_resources(struct pnp_dev *dev, - struct pnp_resource_table *res) +static int pnpbios_set_resources(struct pnp_dev *dev) { u8 nodenum = dev->number; struct pnp_bios_node *node; @@ -243,7 +241,7 @@ static int pnpbios_set_resources(struct pnp_dev *dev, kfree(node); return -ENODEV; } - if (pnpbios_write_resources_to_node(res, node) < 0) { + if (pnpbios_write_resources_to_node(&dev->res, node) < 0) { kfree(node); return -1; } diff --git a/include/linux/pnp.h b/include/linux/pnp.h index a4c2bf361596..8d7c9bc2fdbb 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -328,8 +328,8 @@ struct pnp_protocol { char *name; /* resource control functions */ - int (*get) (struct pnp_dev *dev, struct pnp_resource_table *res); - int (*set) (struct pnp_dev *dev, struct pnp_resource_table *res); + int (*get) (struct pnp_dev *dev); + int (*set) (struct pnp_dev *dev); int (*disable) (struct pnp_dev *dev); /* protocol specific suspend/resume */ -- cgit v1.2.3-71-gd317 From f44900020926b2cb06b87f0f52643d6285514fc3 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 28 Apr 2008 16:34:09 -0600 Subject: PNP: add pnp_init_resources(struct pnp_dev *) interface Add pnp_init_resources(struct pnp_dev *) to replace pnp_init_resource_table(), which takes a pointer to the pnp_resource_table itself. Passing only the pnp_dev * reduces the possibility for error in the caller and removes the pnp_resource_table implementation detail from the interface. Even though pnp_init_resource_table() is exported, I did not export pnp_init_resources() because it is used only by the PNP core. Signed-off-by: Bjorn Helgaas Acked-By: Rene Herman Signed-off-by: Len Brown --- drivers/pnp/interface.c | 6 +++--- drivers/pnp/isapnp/core.c | 4 ++-- drivers/pnp/manager.c | 5 +++++ drivers/pnp/pnpacpi/core.c | 2 +- drivers/pnp/pnpacpi/rsparser.c | 3 +-- drivers/pnp/pnpbios/core.c | 2 +- drivers/pnp/pnpbios/rsparser.c | 3 +-- include/linux/pnp.h | 2 ++ 8 files changed, 16 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c index e882896bdbd7..cdc3ecfde6ef 100644 --- a/drivers/pnp/interface.c +++ b/drivers/pnp/interface.c @@ -351,14 +351,14 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr, if (!strnicmp(buf, "auto", 4)) { if (dev->active) goto done; - pnp_init_resource_table(&dev->res); + pnp_init_resources(dev); retval = pnp_auto_config_dev(dev); goto done; } if (!strnicmp(buf, "clear", 5)) { if (dev->active) goto done; - pnp_init_resource_table(&dev->res); + pnp_init_resources(dev); goto done; } if (!strnicmp(buf, "get", 3)) { @@ -373,7 +373,7 @@ pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr, if (dev->active) goto done; buf += 3; - pnp_init_resource_table(&dev->res); + pnp_init_resources(dev); mutex_lock(&pnp_res_mutex); while (1) { while (isspace(*buf)) diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c index 6740016437d9..6f1007548c93 100644 --- a/drivers/pnp/isapnp/core.c +++ b/drivers/pnp/isapnp/core.c @@ -424,7 +424,7 @@ static struct pnp_dev *__init isapnp_parse_device(struct pnp_card *card, dev->capabilities |= PNP_READ; dev->capabilities |= PNP_WRITE; dev->capabilities |= PNP_DISABLE; - pnp_init_resource_table(&dev->res); + pnp_init_resources(dev); return dev; } @@ -981,7 +981,7 @@ static int isapnp_get_resources(struct pnp_dev *dev) int ret; dev_dbg(&dev->dev, "get resources\n"); - pnp_init_resource_table(&dev->res); + pnp_init_resources(dev); isapnp_cfg_begin(dev->card->number, dev->number); ret = isapnp_read_resources(dev); isapnp_cfg_end(); diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c index 945c6201719d..c9af87a8fb16 100644 --- a/drivers/pnp/manager.c +++ b/drivers/pnp/manager.c @@ -272,6 +272,11 @@ void pnp_init_resource_table(struct pnp_resource_table *table) } } +void pnp_init_resources(struct pnp_dev *dev) +{ + pnp_init_resource_table(&dev->res); +} + /** * pnp_clean_resources - clears resources that were not manually set * @res: the resources to clean diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c index 1ac894d2df5a..7e4512a60f58 100644 --- a/drivers/pnp/pnpacpi/core.c +++ b/drivers/pnp/pnpacpi/core.c @@ -212,7 +212,7 @@ static int __init pnpacpi_add_device(struct acpi_device *device) /* clear out the damaged flags */ if (!dev->active) - pnp_init_resource_table(&dev->res); + pnp_init_resources(dev); pnp_add_device(dev); num++; diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index c5adf7631ac2..33dbf3644f2b 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -410,8 +410,7 @@ acpi_status pnpacpi_parse_allocated_resource(struct pnp_dev *dev) dev_dbg(&dev->dev, "parse allocated resources\n"); - /* Blank the resource table values */ - pnp_init_resource_table(&dev->res); + pnp_init_resources(dev); return acpi_walk_resources(handle, METHOD_NAME__CRS, pnpacpi_allocated_resource, dev); diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c index 76d398531da6..f5477ca85956 100644 --- a/drivers/pnp/pnpbios/core.c +++ b/drivers/pnp/pnpbios/core.c @@ -347,7 +347,7 @@ static int __init insert_device(struct pnp_bios_node *node) /* clear out the damaged flags */ if (!dev->active) - pnp_init_resource_table(&dev->res); + pnp_init_resources(dev); pnp_add_device(dev); pnpbios_interface_attach_device(node); diff --git a/drivers/pnp/pnpbios/rsparser.c b/drivers/pnp/pnpbios/rsparser.c index 7428f62db4d2..e90a3d4360b2 100644 --- a/drivers/pnp/pnpbios/rsparser.c +++ b/drivers/pnp/pnpbios/rsparser.c @@ -145,8 +145,7 @@ static unsigned char *pnpbios_parse_allocated_resource_data(struct pnp_dev *dev, dev_dbg(&dev->dev, "parse allocated resources\n"); - /* Blank the resource table values */ - pnp_init_resource_table(&dev->res); + pnp_init_resources(dev); while ((char *)p < (char *)end) { diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 8d7c9bc2fdbb..1737f071787a 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -391,6 +391,7 @@ int pnp_register_port_resource(struct pnp_dev *dev, struct pnp_option *option, int pnp_register_mem_resource(struct pnp_dev *dev, struct pnp_option *option, struct pnp_mem *data); void pnp_init_resource_table(struct pnp_resource_table *table); +void pnp_init_resources(struct pnp_dev *dev); int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table *res, int mode); int pnp_auto_config_dev(struct pnp_dev *dev); @@ -438,6 +439,7 @@ static inline int pnp_register_dma_resource(struct pnp_dev *dev, struct pnp_opti static inline int pnp_register_port_resource(struct pnp_dev *dev, struct pnp_option *option, struct pnp_port *data) { return -ENODEV; } static inline int pnp_register_mem_resource(struct pnp_dev *dev, struct pnp_option *option, struct pnp_mem *data) { return -ENODEV; } static inline void pnp_init_resource_table(struct pnp_resource_table *table) { } +static inline void pnp_init_resources(struct pnp_dev *dev) { } static inline int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table *res, int mode) { return -ENODEV; } static inline int pnp_auto_config_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_validate_config(struct pnp_dev *dev) { return -ENODEV; } -- cgit v1.2.3-71-gd317 From 2cd1393098073426256cb4543c897f8c340d0b93 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 28 Apr 2008 16:34:11 -0600 Subject: PNP: remove unused interfaces using pnp_resource_table Rene Herman recently removed the only in-tree driver uses of: pnp_init_resource_table() pnp_manual_config_dev() pnp_resource_change() in this change: http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=109c53f840e551d6e99ecfd8b0131a968332c89f These are no longer used in the PNP core either, so we can just remove them completely. It's possible that there are out-of-tree drivers that use these interfaces. They should be changed to either (1) use PNP quirks to work around broken hardware or firmware, or (2) use the sysfs interfaces to control resource usage from userspace. Signed-off-by: Bjorn Helgaas Acked-By: Rene Herman Signed-off-by: Len Brown --- drivers/pnp/manager.c | 78 ++------------------------------------------------- include/linux/pnp.h | 8 ------ 2 files changed, 2 insertions(+), 84 deletions(-) (limited to 'include/linux') diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c index 2251dd7da062..d407c07b5b9d 100644 --- a/drivers/pnp/manager.c +++ b/drivers/pnp/manager.c @@ -238,8 +238,9 @@ static void pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx) * pnp_init_resources - Resets a resource table to default values. * @table: pointer to the desired resource table */ -void pnp_init_resource_table(struct pnp_resource_table *table) +void pnp_init_resources(struct pnp_dev *dev) { + struct pnp_resource_table *table = &dev->res; int idx; for (idx = 0; idx < PNP_MAX_IRQ; idx++) { @@ -272,11 +273,6 @@ void pnp_init_resource_table(struct pnp_resource_table *table) } } -void pnp_init_resources(struct pnp_dev *dev) -{ - pnp_init_resource_table(&dev->res); -} - /** * pnp_clean_resources - clears resources that were not manually set * @res: the resources to clean @@ -422,59 +418,6 @@ fail: return 0; } -/** - * pnp_manual_config_dev - Disables Auto Config and Manually sets the resource table - * @dev: pointer to the desired device - * @res: pointer to the new resource config - * @mode: 0 or PNP_CONFIG_FORCE - * - * This function can be used by drivers that want to manually set thier resources. - */ -int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table *res, - int mode) -{ - int i; - struct pnp_resource_table *bak; - - if (!pnp_can_configure(dev)) - return -ENODEV; - bak = pnp_alloc(sizeof(struct pnp_resource_table)); - if (!bak) - return -ENOMEM; - *bak = dev->res; - - mutex_lock(&pnp_res_mutex); - dev->res = *res; - if (!(mode & PNP_CONFIG_FORCE)) { - for (i = 0; i < PNP_MAX_PORT; i++) { - if (!pnp_check_port(dev, i)) - goto fail; - } - for (i = 0; i < PNP_MAX_MEM; i++) { - if (!pnp_check_mem(dev, i)) - goto fail; - } - for (i = 0; i < PNP_MAX_IRQ; i++) { - if (!pnp_check_irq(dev, i)) - goto fail; - } - for (i = 0; i < PNP_MAX_DMA; i++) { - if (!pnp_check_dma(dev, i)) - goto fail; - } - } - mutex_unlock(&pnp_res_mutex); - - kfree(bak); - return 0; - -fail: - dev->res = *bak; - mutex_unlock(&pnp_res_mutex); - kfree(bak); - return -EINVAL; -} - /** * pnp_auto_config_dev - automatically assigns resources to a device * @dev: pointer to the desired device @@ -602,24 +545,7 @@ int pnp_disable_dev(struct pnp_dev *dev) return 0; } -/** - * pnp_resource_change - change one resource - * @resource: pointer to resource to be changed - * @start: start of region - * @size: size of region - */ -void pnp_resource_change(struct resource *resource, resource_size_t start, - resource_size_t size) -{ - resource->flags &= ~(IORESOURCE_AUTO | IORESOURCE_UNSET); - resource->start = start; - resource->end = start + size - 1; -} - -EXPORT_SYMBOL(pnp_manual_config_dev); EXPORT_SYMBOL(pnp_start_dev); EXPORT_SYMBOL(pnp_stop_dev); EXPORT_SYMBOL(pnp_activate_dev); EXPORT_SYMBOL(pnp_disable_dev); -EXPORT_SYMBOL(pnp_resource_change); -EXPORT_SYMBOL(pnp_init_resource_table); diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 1737f071787a..e8187d9faf01 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -390,18 +390,13 @@ int pnp_register_port_resource(struct pnp_dev *dev, struct pnp_option *option, struct pnp_port *data); int pnp_register_mem_resource(struct pnp_dev *dev, struct pnp_option *option, struct pnp_mem *data); -void pnp_init_resource_table(struct pnp_resource_table *table); void pnp_init_resources(struct pnp_dev *dev); -int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table *res, - int mode); int pnp_auto_config_dev(struct pnp_dev *dev); int pnp_validate_config(struct pnp_dev *dev); int pnp_start_dev(struct pnp_dev *dev); int pnp_stop_dev(struct pnp_dev *dev); int pnp_activate_dev(struct pnp_dev *dev); int pnp_disable_dev(struct pnp_dev *dev); -void pnp_resource_change(struct resource *resource, resource_size_t start, - resource_size_t size); /* protocol helpers */ int pnp_is_active(struct pnp_dev *dev); @@ -438,16 +433,13 @@ static inline int pnp_register_irq_resource(struct pnp_dev *dev, struct pnp_opti static inline int pnp_register_dma_resource(struct pnp_dev *dev, struct pnp_option *option, struct pnp_dma *data) { return -ENODEV; } static inline int pnp_register_port_resource(struct pnp_dev *dev, struct pnp_option *option, struct pnp_port *data) { return -ENODEV; } static inline int pnp_register_mem_resource(struct pnp_dev *dev, struct pnp_option *option, struct pnp_mem *data) { return -ENODEV; } -static inline void pnp_init_resource_table(struct pnp_resource_table *table) { } static inline void pnp_init_resources(struct pnp_dev *dev) { } -static inline int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table *res, int mode) { return -ENODEV; } static inline int pnp_auto_config_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_validate_config(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_start_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_stop_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_activate_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_disable_dev(struct pnp_dev *dev) { return -ENODEV; } -static inline void pnp_resource_change(struct resource *resource, resource_size_t start, resource_size_t size) { } /* protocol helpers */ static inline int pnp_is_active(struct pnp_dev *dev) { return 0; } -- cgit v1.2.3-71-gd317 From b90eca0a61ebd010036242e29610bc6a909e3f19 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 28 Apr 2008 16:34:14 -0600 Subject: PNP: add pnp_get_resource() interface This adds a pnp_get_resource() that works the same way as platform_get_resource(). This will enable us to consolidate many pnp_resource_table references in one place, which will make it easier to make the table dynamic. Signed-off-by: Bjorn Helgaas Acked-By: Rene Herman Signed-off-by: Len Brown --- drivers/pnp/resource.c | 27 +++++++++++++++++++++++++++ include/linux/pnp.h | 1 + 2 files changed, 28 insertions(+) (limited to 'include/linux') diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c index eee6d8eddcb4..ef8835ec5778 100644 --- a/drivers/pnp/resource.c +++ b/drivers/pnp/resource.c @@ -487,6 +487,33 @@ int pnp_check_dma(struct pnp_dev *dev, int idx) #endif } +struct resource *pnp_get_resource(struct pnp_dev *dev, + unsigned int type, unsigned int num) +{ + struct pnp_resource_table *res = &dev->res; + + switch (type) { + case IORESOURCE_IO: + if (num >= PNP_MAX_PORT) + return NULL; + return &res->port_resource[num]; + case IORESOURCE_MEM: + if (num >= PNP_MAX_MEM) + return NULL; + return &res->mem_resource[num]; + case IORESOURCE_IRQ: + if (num >= PNP_MAX_IRQ) + return NULL; + return &res->irq_resource[num]; + case IORESOURCE_DMA: + if (num >= PNP_MAX_DMA) + return NULL; + return &res->dma_resource[num]; + } + return NULL; +} +EXPORT_SYMBOL(pnp_get_resource); + /* format is: pnp_reserve_irq=irq1[,irq2] .... */ static int __init pnp_setup_reserve_irq(char *str) { diff --git a/include/linux/pnp.h b/include/linux/pnp.h index e8187d9faf01..b5fd03854fa4 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -25,6 +25,7 @@ struct pnp_dev; /* * Resource Management */ +struct resource *pnp_get_resource(struct pnp_dev *, unsigned int, unsigned int); /* Use these instead of directly reading pnp_dev to get resource information */ #define pnp_port_start(dev,bar) ((dev)->res.port_resource[(bar)].start) -- cgit v1.2.3-71-gd317 From 13575e81bb38fc797a5513ad1bd8e6fda17439b8 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 28 Apr 2008 16:34:16 -0600 Subject: PNP: convert resource accessors to use pnp_get_resource(), not pnp_resource_table This removes more direct references to pnp_resource_table. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown --- include/linux/pnp.h | 145 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 105 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pnp.h b/include/linux/pnp.h index b5fd03854fa4..1640562f3ebc 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -27,46 +27,111 @@ struct pnp_dev; */ struct resource *pnp_get_resource(struct pnp_dev *, unsigned int, unsigned int); -/* Use these instead of directly reading pnp_dev to get resource information */ -#define pnp_port_start(dev,bar) ((dev)->res.port_resource[(bar)].start) -#define pnp_port_end(dev,bar) ((dev)->res.port_resource[(bar)].end) -#define pnp_port_flags(dev,bar) ((dev)->res.port_resource[(bar)].flags) -#define pnp_port_valid(dev,bar) \ - ((pnp_port_flags((dev),(bar)) & (IORESOURCE_IO | IORESOURCE_UNSET)) \ - == IORESOURCE_IO) -#define pnp_port_len(dev,bar) \ - ((pnp_port_start((dev),(bar)) == 0 && \ - pnp_port_end((dev),(bar)) == \ - pnp_port_start((dev),(bar))) ? 0 : \ - \ - (pnp_port_end((dev),(bar)) - \ - pnp_port_start((dev),(bar)) + 1)) - -#define pnp_mem_start(dev,bar) ((dev)->res.mem_resource[(bar)].start) -#define pnp_mem_end(dev,bar) ((dev)->res.mem_resource[(bar)].end) -#define pnp_mem_flags(dev,bar) ((dev)->res.mem_resource[(bar)].flags) -#define pnp_mem_valid(dev,bar) \ - ((pnp_mem_flags((dev),(bar)) & (IORESOURCE_MEM | IORESOURCE_UNSET)) \ - == IORESOURCE_MEM) -#define pnp_mem_len(dev,bar) \ - ((pnp_mem_start((dev),(bar)) == 0 && \ - pnp_mem_end((dev),(bar)) == \ - pnp_mem_start((dev),(bar))) ? 0 : \ - \ - (pnp_mem_end((dev),(bar)) - \ - pnp_mem_start((dev),(bar)) + 1)) - -#define pnp_irq(dev,bar) ((dev)->res.irq_resource[(bar)].start) -#define pnp_irq_flags(dev,bar) ((dev)->res.irq_resource[(bar)].flags) -#define pnp_irq_valid(dev,bar) \ - ((pnp_irq_flags((dev),(bar)) & (IORESOURCE_IRQ | IORESOURCE_UNSET)) \ - == IORESOURCE_IRQ) - -#define pnp_dma(dev,bar) ((dev)->res.dma_resource[(bar)].start) -#define pnp_dma_flags(dev,bar) ((dev)->res.dma_resource[(bar)].flags) -#define pnp_dma_valid(dev,bar) \ - ((pnp_dma_flags((dev),(bar)) & (IORESOURCE_DMA | IORESOURCE_UNSET)) \ - == IORESOURCE_DMA) +static inline int pnp_resource_valid(struct resource *res) +{ + if (res && !(res->flags & IORESOURCE_UNSET)) + return 1; + return 0; +} + +static inline resource_size_t pnp_resource_len(struct resource *res) +{ + if (res->start == 0 && res->end == 0) + return 0; + return res->end - res->start + 1; +} + + +static inline resource_size_t pnp_port_start(struct pnp_dev *dev, + unsigned int bar) +{ + return pnp_get_resource(dev, IORESOURCE_IO, bar)->start; +} + +static inline resource_size_t pnp_port_end(struct pnp_dev *dev, + unsigned int bar) +{ + return pnp_get_resource(dev, IORESOURCE_IO, bar)->end; +} + +static inline unsigned long pnp_port_flags(struct pnp_dev *dev, + unsigned int bar) +{ + return pnp_get_resource(dev, IORESOURCE_IO, bar)->flags; +} + +static inline int pnp_port_valid(struct pnp_dev *dev, unsigned int bar) +{ + return pnp_resource_valid(pnp_get_resource(dev, IORESOURCE_IO, bar)); +} + +static inline resource_size_t pnp_port_len(struct pnp_dev *dev, + unsigned int bar) +{ + return pnp_resource_len(pnp_get_resource(dev, IORESOURCE_IO, bar)); +} + + +static inline resource_size_t pnp_mem_start(struct pnp_dev *dev, + unsigned int bar) +{ + return pnp_get_resource(dev, IORESOURCE_MEM, bar)->start; +} + +static inline resource_size_t pnp_mem_end(struct pnp_dev *dev, + unsigned int bar) +{ + return pnp_get_resource(dev, IORESOURCE_MEM, bar)->end; +} + +static inline unsigned long pnp_mem_flags(struct pnp_dev *dev, unsigned int bar) +{ + return pnp_get_resource(dev, IORESOURCE_MEM, bar)->flags; +} + +static inline int pnp_mem_valid(struct pnp_dev *dev, unsigned int bar) +{ + return pnp_resource_valid(pnp_get_resource(dev, IORESOURCE_MEM, bar)); +} + +static inline resource_size_t pnp_mem_len(struct pnp_dev *dev, + unsigned int bar) +{ + return pnp_resource_len(pnp_get_resource(dev, IORESOURCE_MEM, bar)); +} + + +static inline resource_size_t pnp_irq(struct pnp_dev *dev, unsigned int bar) +{ + return pnp_get_resource(dev, IORESOURCE_IRQ, bar)->start; +} + +static inline unsigned long pnp_irq_flags(struct pnp_dev *dev, unsigned int bar) +{ + return pnp_get_resource(dev, IORESOURCE_IRQ, bar)->flags; +} + +static inline int pnp_irq_valid(struct pnp_dev *dev, unsigned int bar) +{ + return pnp_resource_valid(pnp_get_resource(dev, IORESOURCE_IRQ, bar)); +} + + +static inline resource_size_t pnp_dma(struct pnp_dev *dev, unsigned int bar) +{ + return pnp_get_resource(dev, IORESOURCE_DMA, bar)->start; +} + +static inline unsigned long pnp_dma_flags(struct pnp_dev *dev, unsigned int bar) +{ + return pnp_get_resource(dev, IORESOURCE_DMA, bar)->flags; +} + +static inline int pnp_dma_valid(struct pnp_dev *dev, unsigned int bar) +{ + return pnp_resource_valid(pnp_get_resource(dev, IORESOURCE_DMA, bar)); +} + #define PNP_PORT_FLAG_16BITADDR (1<<0) #define PNP_PORT_FLAG_FIXED (1<<1) -- cgit v1.2.3-71-gd317 From 02d83b5da3efa3c278ce87db2637f3dd6837166d Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 28 Apr 2008 16:34:28 -0600 Subject: PNP: make pnp_resource_table private to PNP core There are no remaining references to the PNP_MAX_* constants or the pnp_resource_table structure outside of the PNP core. Make them private to the PNP core. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown --- drivers/pnp/base.h | 12 ++++++++++++ drivers/pnp/core.c | 8 ++++++++ drivers/pnp/isapnp/core.c | 4 ++-- drivers/pnp/manager.c | 16 ++++++++-------- drivers/pnp/pnpacpi/rsparser.c | 10 ++++++---- drivers/pnp/pnpbios/rsparser.c | 8 ++++---- drivers/pnp/resource.c | 2 +- include/linux/pnp.h | 14 ++------------ 8 files changed, 43 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h index e739d4bba423..b888a5fb6b7f 100644 --- a/drivers/pnp/base.h +++ b/drivers/pnp/base.h @@ -20,3 +20,15 @@ int pnp_check_dma(struct pnp_dev *dev, struct resource *res); void dbg_pnp_show_resources(struct pnp_dev *dev, char *desc); void pnp_init_resource(struct resource *res); + +#define PNP_MAX_PORT 40 +#define PNP_MAX_MEM 24 +#define PNP_MAX_IRQ 2 +#define PNP_MAX_DMA 2 + +struct pnp_resource_table { + struct resource port_resource[PNP_MAX_PORT]; + struct resource mem_resource[PNP_MAX_MEM]; + struct resource dma_resource[PNP_MAX_DMA]; + struct resource irq_resource[PNP_MAX_IRQ]; +}; diff --git a/drivers/pnp/core.c b/drivers/pnp/core.c index cf37701a4f9e..20771b7d4482 100644 --- a/drivers/pnp/core.c +++ b/drivers/pnp/core.c @@ -106,6 +106,7 @@ static void pnp_release_device(struct device *dmdev) pnp_free_option(dev->independent); pnp_free_option(dev->dependent); pnp_free_ids(dev); + kfree(dev->res); kfree(dev); } @@ -118,6 +119,12 @@ struct pnp_dev *pnp_alloc_dev(struct pnp_protocol *protocol, int id, char *pnpid if (!dev) return NULL; + dev->res = kzalloc(sizeof(struct pnp_resource_table), GFP_KERNEL); + if (!dev->res) { + kfree(dev); + return NULL; + } + dev->protocol = protocol; dev->number = id; dev->dma_mask = DMA_24BIT_MASK; @@ -133,6 +140,7 @@ struct pnp_dev *pnp_alloc_dev(struct pnp_protocol *protocol, int id, char *pnpid dev_id = pnp_add_id(dev, pnpid); if (!dev_id) { + kfree(dev->res); kfree(dev); return NULL; } diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c index 990d8cd6295c..4407e844b5ea 100644 --- a/drivers/pnp/isapnp/core.c +++ b/drivers/pnp/isapnp/core.c @@ -931,7 +931,7 @@ EXPORT_SYMBOL(isapnp_write_byte); static int isapnp_read_resources(struct pnp_dev *dev) { - struct pnp_resource_table *res = &dev->res; + struct pnp_resource_table *res = dev->res; int tmp, ret; dev->active = isapnp_read_byte(ISAPNP_CFG_ACTIVATE); @@ -987,7 +987,7 @@ static int isapnp_get_resources(struct pnp_dev *dev) static int isapnp_set_resources(struct pnp_dev *dev) { - struct pnp_resource_table *res = &dev->res; + struct pnp_resource_table *res = dev->res; int tmp; dev_dbg(&dev->dev, "set resources\n"); diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c index 7c5ebddfc6af..46a5e0e90d9a 100644 --- a/drivers/pnp/manager.c +++ b/drivers/pnp/manager.c @@ -247,22 +247,22 @@ void pnp_init_resources(struct pnp_dev *dev) int idx; for (idx = 0; idx < PNP_MAX_IRQ; idx++) { - res = &dev->res.irq_resource[idx]; + res = &dev->res->irq_resource[idx]; res->flags = IORESOURCE_IRQ; pnp_init_resource(res); } for (idx = 0; idx < PNP_MAX_DMA; idx++) { - res = &dev->res.dma_resource[idx]; + res = &dev->res->dma_resource[idx]; res->flags = IORESOURCE_DMA; pnp_init_resource(res); } for (idx = 0; idx < PNP_MAX_PORT; idx++) { - res = &dev->res.port_resource[idx]; + res = &dev->res->port_resource[idx]; res->flags = IORESOURCE_IO; pnp_init_resource(res); } for (idx = 0; idx < PNP_MAX_MEM; idx++) { - res = &dev->res.mem_resource[idx]; + res = &dev->res->mem_resource[idx]; res->flags = IORESOURCE_MEM; pnp_init_resource(res); } @@ -278,28 +278,28 @@ static void pnp_clean_resource_table(struct pnp_dev *dev) int idx; for (idx = 0; idx < PNP_MAX_IRQ; idx++) { - res = &dev->res.irq_resource[idx]; + res = &dev->res->irq_resource[idx]; if (res->flags & IORESOURCE_AUTO) { res->flags = IORESOURCE_IRQ; pnp_init_resource(res); } } for (idx = 0; idx < PNP_MAX_DMA; idx++) { - res = &dev->res.dma_resource[idx]; + res = &dev->res->dma_resource[idx]; if (res->flags & IORESOURCE_AUTO) { res->flags = IORESOURCE_DMA; pnp_init_resource(res); } } for (idx = 0; idx < PNP_MAX_PORT; idx++) { - res = &dev->res.port_resource[idx]; + res = &dev->res->port_resource[idx]; if (res->flags & IORESOURCE_AUTO) { res->flags = IORESOURCE_IO; pnp_init_resource(res); } } for (idx = 0; idx < PNP_MAX_MEM; idx++) { - res = &dev->res.mem_resource[idx]; + res = &dev->res->mem_resource[idx]; if (res->flags & IORESOURCE_AUTO) { res->flags = IORESOURCE_MEM; pnp_init_resource(res); diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index 000a1b39f0b6..2669518b4795 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include "../base.h" #include "pnpacpi.h" #ifdef CONFIG_IA64 @@ -80,7 +82,7 @@ static void pnpacpi_parse_allocated_irqresource(struct pnp_dev *dev, u32 gsi, int triggering, int polarity, int shareable) { - struct pnp_resource_table *res = &dev->res; + struct pnp_resource_table *res = dev->res; int i = 0; int irq; int p, t; @@ -176,7 +178,7 @@ static int dma_flags(int type, int bus_master, int transfer) static void pnpacpi_parse_allocated_dmaresource(struct pnp_dev *dev, u32 dma, int flags) { - struct pnp_resource_table *res = &dev->res; + struct pnp_resource_table *res = dev->res; int i = 0; static unsigned char warned; @@ -202,7 +204,7 @@ static void pnpacpi_parse_allocated_dmaresource(struct pnp_dev *dev, static void pnpacpi_parse_allocated_ioresource(struct pnp_dev *dev, u64 io, u64 len, int io_decode) { - struct pnp_resource_table *res = &dev->res; + struct pnp_resource_table *res = dev->res; int i = 0; static unsigned char warned; @@ -230,7 +232,7 @@ static void pnpacpi_parse_allocated_memresource(struct pnp_dev *dev, u64 mem, u64 len, int write_protect) { - struct pnp_resource_table *res = &dev->res; + struct pnp_resource_table *res = dev->res; int i = 0; static unsigned char warned; diff --git a/drivers/pnp/pnpbios/rsparser.c b/drivers/pnp/pnpbios/rsparser.c index c1f9e162d2c5..9f0538af0321 100644 --- a/drivers/pnp/pnpbios/rsparser.c +++ b/drivers/pnp/pnpbios/rsparser.c @@ -56,7 +56,7 @@ inline void pcibios_penalize_isa_irq(int irq, int active) static void pnpbios_parse_allocated_irqresource(struct pnp_dev *dev, int irq) { - struct pnp_resource_table *res = &dev->res; + struct pnp_resource_table *res = dev->res; int i = 0; while (!(res->irq_resource[i].flags & IORESOURCE_UNSET) @@ -76,7 +76,7 @@ static void pnpbios_parse_allocated_irqresource(struct pnp_dev *dev, int irq) static void pnpbios_parse_allocated_dmaresource(struct pnp_dev *dev, int dma) { - struct pnp_resource_table *res = &dev->res; + struct pnp_resource_table *res = dev->res; int i = 0; while (i < PNP_MAX_DMA && @@ -96,7 +96,7 @@ static void pnpbios_parse_allocated_dmaresource(struct pnp_dev *dev, int dma) static void pnpbios_parse_allocated_ioresource(struct pnp_dev *dev, int io, int len) { - struct pnp_resource_table *res = &dev->res; + struct pnp_resource_table *res = dev->res; int i = 0; while (!(res->port_resource[i].flags & IORESOURCE_UNSET) @@ -116,7 +116,7 @@ static void pnpbios_parse_allocated_ioresource(struct pnp_dev *dev, static void pnpbios_parse_allocated_memresource(struct pnp_dev *dev, int mem, int len) { - struct pnp_resource_table *res = &dev->res; + struct pnp_resource_table *res = dev->res; int i = 0; while (!(res->mem_resource[i].flags & IORESOURCE_UNSET) diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c index 84362818fa8b..f7adc7eefbf8 100644 --- a/drivers/pnp/resource.c +++ b/drivers/pnp/resource.c @@ -502,7 +502,7 @@ int pnp_check_dma(struct pnp_dev *dev, struct resource *res) struct resource *pnp_get_resource(struct pnp_dev *dev, unsigned int type, unsigned int num) { - struct pnp_resource_table *res = &dev->res; + struct pnp_resource_table *res = dev->res; switch (type) { case IORESOURCE_IO: diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 1640562f3ebc..a5487b6a4e57 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -13,14 +13,11 @@ #include #include -#define PNP_MAX_PORT 40 -#define PNP_MAX_MEM 24 -#define PNP_MAX_IRQ 2 -#define PNP_MAX_DMA 2 #define PNP_NAME_LEN 50 struct pnp_protocol; struct pnp_dev; +struct pnp_resource_table; /* * Resource Management @@ -184,13 +181,6 @@ struct pnp_option { struct pnp_option *next; /* used to chain dependent resources */ }; -struct pnp_resource_table { - struct resource port_resource[PNP_MAX_PORT]; - struct resource mem_resource[PNP_MAX_MEM]; - struct resource dma_resource[PNP_MAX_DMA]; - struct resource irq_resource[PNP_MAX_IRQ]; -}; - /* * Device Management */ @@ -260,7 +250,7 @@ struct pnp_dev { int capabilities; struct pnp_option *independent; struct pnp_option *dependent; - struct pnp_resource_table res; + struct pnp_resource_table *res; char name[PNP_NAME_LEN]; /* contains a human-readable name */ unsigned short regs; /* ISAPnP: supported registers */ -- cgit v1.2.3-71-gd317 From 62cfb298b95d713825deb8faf2044c45a1e17a0a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 28 Apr 2008 16:34:40 -0600 Subject: PNP: make interfaces private to the PNP core The interfaces for registering protocols, devices, cards, and resource options should only be used inside the PNP core. Signed-off-by: Bjorn Helgaas Acked-By: Rene Herman Signed-off-by: Len Brown --- drivers/pnp/base.h | 27 ++++++++++++++++++++++++++- include/linux/pnp.h | 36 ------------------------------------ 2 files changed, 26 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h index 9b7bb62c98b1..4fe7c58f57e9 100644 --- a/drivers/pnp/base.h +++ b/drivers/pnp/base.h @@ -1,12 +1,37 @@ extern spinlock_t pnp_lock; void *pnp_alloc(long size); + +int pnp_register_protocol(struct pnp_protocol *protocol); +void pnp_unregister_protocol(struct pnp_protocol *protocol); + #define PNP_EISA_ID_MASK 0x7fffffff void pnp_eisa_id_to_string(u32 id, char *str); struct pnp_dev *pnp_alloc_dev(struct pnp_protocol *, int id, char *pnpid); struct pnp_card *pnp_alloc_card(struct pnp_protocol *, int id, char *pnpid); + +int pnp_add_device(struct pnp_dev *dev); struct pnp_id *pnp_add_id(struct pnp_dev *dev, char *id); -struct pnp_id *pnp_add_card_id(struct pnp_card *card, char *id); int pnp_interface_attach_device(struct pnp_dev *dev); + +int pnp_add_card(struct pnp_card *card); +struct pnp_id *pnp_add_card_id(struct pnp_card *card, char *id); +void pnp_remove_card(struct pnp_card *card); +int pnp_add_card_device(struct pnp_card *card, struct pnp_dev *dev); +void pnp_remove_card_device(struct pnp_dev *dev); + +struct pnp_option *pnp_register_independent_option(struct pnp_dev *dev); +struct pnp_option *pnp_register_dependent_option(struct pnp_dev *dev, + int priority); +int pnp_register_irq_resource(struct pnp_dev *dev, struct pnp_option *option, + struct pnp_irq *data); +int pnp_register_dma_resource(struct pnp_dev *dev, struct pnp_option *option, + struct pnp_dma *data); +int pnp_register_port_resource(struct pnp_dev *dev, struct pnp_option *option, + struct pnp_port *data); +int pnp_register_mem_resource(struct pnp_dev *dev, struct pnp_option *option, + struct pnp_mem *data); +void pnp_init_resources(struct pnp_dev *dev); + void pnp_fixup_device(struct pnp_dev *dev); void pnp_free_option(struct pnp_option *option); int __pnp_add_device(struct pnp_dev *dev); diff --git a/include/linux/pnp.h b/include/linux/pnp.h index a5487b6a4e57..f5b985e912ae 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -414,19 +414,12 @@ extern struct bus_type pnp_bus_type; #if defined(CONFIG_PNP) /* device management */ -int pnp_register_protocol(struct pnp_protocol *protocol); -void pnp_unregister_protocol(struct pnp_protocol *protocol); -int pnp_add_device(struct pnp_dev *dev); int pnp_device_attach(struct pnp_dev *pnp_dev); void pnp_device_detach(struct pnp_dev *pnp_dev); extern struct list_head pnp_global; extern int pnp_platform_devices; /* multidevice card support */ -int pnp_add_card(struct pnp_card *card); -void pnp_remove_card(struct pnp_card *card); -int pnp_add_card_device(struct pnp_card *card, struct pnp_dev *dev); -void pnp_remove_card_device(struct pnp_dev *dev); struct pnp_dev *pnp_request_card_device(struct pnp_card_link *clink, const char *id, struct pnp_dev *from); void pnp_release_card_device(struct pnp_dev *dev); @@ -435,20 +428,7 @@ void pnp_unregister_card_driver(struct pnp_card_driver *drv); extern struct list_head pnp_cards; /* resource management */ -struct pnp_option *pnp_register_independent_option(struct pnp_dev *dev); -struct pnp_option *pnp_register_dependent_option(struct pnp_dev *dev, - int priority); -int pnp_register_irq_resource(struct pnp_dev *dev, struct pnp_option *option, - struct pnp_irq *data); -int pnp_register_dma_resource(struct pnp_dev *dev, struct pnp_option *option, - struct pnp_dma *data); -int pnp_register_port_resource(struct pnp_dev *dev, struct pnp_option *option, - struct pnp_port *data); -int pnp_register_mem_resource(struct pnp_dev *dev, struct pnp_option *option, - struct pnp_mem *data); -void pnp_init_resources(struct pnp_dev *dev); int pnp_auto_config_dev(struct pnp_dev *dev); -int pnp_validate_config(struct pnp_dev *dev); int pnp_start_dev(struct pnp_dev *dev); int pnp_stop_dev(struct pnp_dev *dev); int pnp_activate_dev(struct pnp_dev *dev); @@ -463,35 +443,19 @@ void pnp_unregister_driver(struct pnp_driver *drv); #else /* device management */ -static inline int pnp_register_protocol(struct pnp_protocol *protocol) { return -ENODEV; } -static inline void pnp_unregister_protocol(struct pnp_protocol *protocol) { } -static inline int pnp_init_device(struct pnp_dev *dev) { return -ENODEV; } -static inline int pnp_add_device(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_device_attach(struct pnp_dev *pnp_dev) { return -ENODEV; } static inline void pnp_device_detach(struct pnp_dev *pnp_dev) { } #define pnp_platform_devices 0 /* multidevice card support */ -static inline int pnp_add_card(struct pnp_card *card) { return -ENODEV; } -static inline void pnp_remove_card(struct pnp_card *card) { } -static inline int pnp_add_card_device(struct pnp_card *card, struct pnp_dev *dev) { return -ENODEV; } -static inline void pnp_remove_card_device(struct pnp_dev *dev) { } static inline struct pnp_dev *pnp_request_card_device(struct pnp_card_link *clink, const char *id, struct pnp_dev *from) { return NULL; } static inline void pnp_release_card_device(struct pnp_dev *dev) { } static inline int pnp_register_card_driver(struct pnp_card_driver *drv) { return -ENODEV; } static inline void pnp_unregister_card_driver(struct pnp_card_driver *drv) { } /* resource management */ -static inline struct pnp_option *pnp_register_independent_option(struct pnp_dev *dev) { return NULL; } -static inline struct pnp_option *pnp_register_dependent_option(struct pnp_dev *dev, int priority) { return NULL; } -static inline int pnp_register_irq_resource(struct pnp_dev *dev, struct pnp_option *option, struct pnp_irq *data) { return -ENODEV; } -static inline int pnp_register_dma_resource(struct pnp_dev *dev, struct pnp_option *option, struct pnp_dma *data) { return -ENODEV; } -static inline int pnp_register_port_resource(struct pnp_dev *dev, struct pnp_option *option, struct pnp_port *data) { return -ENODEV; } -static inline int pnp_register_mem_resource(struct pnp_dev *dev, struct pnp_option *option, struct pnp_mem *data) { return -ENODEV; } -static inline void pnp_init_resources(struct pnp_dev *dev) { } static inline int pnp_auto_config_dev(struct pnp_dev *dev) { return -ENODEV; } -static inline int pnp_validate_config(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_start_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_stop_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_activate_dev(struct pnp_dev *dev) { return -ENODEV; } -- cgit v1.2.3-71-gd317 From 261b20da4bd349f1b26e206f440809f1351be34b Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 28 Apr 2008 16:34:41 -0600 Subject: ISAPNP: remove unused pnp_dev->regs field The "regs" field in struct pnp_dev is set but never read, so remove it. Signed-off-by: Bjorn Helgaas Acked-By: Rene Herman Signed-off-by: Len Brown --- drivers/pnp/isapnp/core.c | 3 --- include/linux/pnp.h | 1 - 2 files changed, 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c index a3f1566ccea5..f1bccdbdeb08 100644 --- a/drivers/pnp/isapnp/core.c +++ b/drivers/pnp/isapnp/core.c @@ -416,10 +416,7 @@ static struct pnp_dev *__init isapnp_parse_device(struct pnp_card *card, if (!dev) return NULL; - dev->regs = tmp[4]; dev->card = card; - if (size > 5) - dev->regs |= tmp[5] << 8; dev->capabilities |= PNP_CONFIGURABLE; dev->capabilities |= PNP_READ; dev->capabilities |= PNP_WRITE; diff --git a/include/linux/pnp.h b/include/linux/pnp.h index f5b985e912ae..e3b2c0068de7 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -253,7 +253,6 @@ struct pnp_dev { struct pnp_resource_table *res; char name[PNP_NAME_LEN]; /* contains a human-readable name */ - unsigned short regs; /* ISAPnP: supported registers */ int flags; /* used by protocols */ struct proc_dir_entry *procent; /* device entry in /proc/bus/isapnp */ void *data; -- cgit v1.2.3-71-gd317 From dfd2e1b4e6eb46ff59c7e1c1111c967b8b5981c1 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 28 Apr 2008 16:34:42 -0600 Subject: PNPBIOS: remove include/linux/pnpbios.h The contents of include/linux/pnpbios.h are used only inside the PNPBIOS backend, so this file doesn't need to be visible outside PNP. This patch moves the contents into an existing PNPBIOS-specific file, drivers/pnp/pnpbios/pnpbios.h. Signed-off-by: Bjorn Helgaas Acked-By: Rene Herman Signed-off-by: Len Brown --- drivers/pnp/pnpbios/bioscalls.c | 1 - drivers/pnp/pnpbios/core.c | 1 - drivers/pnp/pnpbios/pnpbios.h | 136 ++++++++++++++++++++++++++++++++++++ drivers/pnp/pnpbios/proc.c | 2 +- drivers/pnp/pnpbios/rsparser.c | 1 - include/linux/pnpbios.h | 151 ---------------------------------------- 6 files changed, 137 insertions(+), 155 deletions(-) delete mode 100644 include/linux/pnpbios.h (limited to 'include/linux') diff --git a/drivers/pnp/pnpbios/bioscalls.c b/drivers/pnp/pnpbios/bioscalls.c index a8364d815222..7ff824496b39 100644 --- a/drivers/pnp/pnpbios/bioscalls.c +++ b/drivers/pnp/pnpbios/bioscalls.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c index f5477ca85956..19a4be1a9a31 100644 --- a/drivers/pnp/pnpbios/core.c +++ b/drivers/pnp/pnpbios/core.c @@ -50,7 +50,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pnp/pnpbios/pnpbios.h b/drivers/pnp/pnpbios/pnpbios.h index 42343fc753ba..b09cf6dc2075 100644 --- a/drivers/pnp/pnpbios/pnpbios.h +++ b/drivers/pnp/pnpbios/pnpbios.h @@ -2,6 +2,142 @@ * pnpbios.h - contains local definitions */ +/* + * Include file for the interface to a PnP BIOS + * + * Original BIOS code (C) 1998 Christian Schmidt (chr.schmidt@tu-bs.de) + * PnP handler parts (c) 1998 Tom Lees + * Minor reorganizations by David Hinds + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * Return codes + */ +#define PNP_SUCCESS 0x00 +#define PNP_NOT_SET_STATICALLY 0x7f +#define PNP_UNKNOWN_FUNCTION 0x81 +#define PNP_FUNCTION_NOT_SUPPORTED 0x82 +#define PNP_INVALID_HANDLE 0x83 +#define PNP_BAD_PARAMETER 0x84 +#define PNP_SET_FAILED 0x85 +#define PNP_EVENTS_NOT_PENDING 0x86 +#define PNP_SYSTEM_NOT_DOCKED 0x87 +#define PNP_NO_ISA_PNP_CARDS 0x88 +#define PNP_UNABLE_TO_DETERMINE_DOCK_CAPABILITIES 0x89 +#define PNP_CONFIG_CHANGE_FAILED_NO_BATTERY 0x8a +#define PNP_CONFIG_CHANGE_FAILED_RESOURCE_CONFLICT 0x8b +#define PNP_BUFFER_TOO_SMALL 0x8c +#define PNP_USE_ESCD_SUPPORT 0x8d +#define PNP_MESSAGE_NOT_SUPPORTED 0x8e +#define PNP_HARDWARE_ERROR 0x8f + +#define ESCD_SUCCESS 0x00 +#define ESCD_IO_ERROR_READING 0x55 +#define ESCD_INVALID 0x56 +#define ESCD_BUFFER_TOO_SMALL 0x59 +#define ESCD_NVRAM_TOO_SMALL 0x5a +#define ESCD_FUNCTION_NOT_SUPPORTED 0x81 + +/* + * Events that can be received by "get event" + */ +#define PNPEV_ABOUT_TO_CHANGE_CONFIG 0x0001 +#define PNPEV_DOCK_CHANGED 0x0002 +#define PNPEV_SYSTEM_DEVICE_CHANGED 0x0003 +#define PNPEV_CONFIG_CHANGED_FAILED 0x0004 +#define PNPEV_UNKNOWN_SYSTEM_EVENT 0xffff +/* 0x8000 through 0xfffe are OEM defined */ + +/* + * Messages that should be sent through "send message" + */ +#define PNPMSG_OK 0x00 +#define PNPMSG_ABORT 0x01 +#define PNPMSG_UNDOCK_DEFAULT_ACTION 0x40 +#define PNPMSG_POWER_OFF 0x41 +#define PNPMSG_PNP_OS_ACTIVE 0x42 +#define PNPMSG_PNP_OS_INACTIVE 0x43 + +/* + * Plug and Play BIOS flags + */ +#define PNPBIOS_NO_DISABLE 0x0001 +#define PNPBIOS_NO_CONFIG 0x0002 +#define PNPBIOS_OUTPUT 0x0004 +#define PNPBIOS_INPUT 0x0008 +#define PNPBIOS_BOOTABLE 0x0010 +#define PNPBIOS_DOCK 0x0020 +#define PNPBIOS_REMOVABLE 0x0040 +#define pnpbios_is_static(x) (((x)->flags & 0x0100) == 0x0000) +#define pnpbios_is_dynamic(x) ((x)->flags & 0x0080) + +/* + * Function Parameters + */ +#define PNPMODE_STATIC 1 +#define PNPMODE_DYNAMIC 0 + +/* 0x8000 through 0xffff are OEM defined */ + +#pragma pack(1) +struct pnp_dev_node_info { + __u16 no_nodes; + __u16 max_node_size; +}; +struct pnp_docking_station_info { + __u32 location_id; + __u32 serial; + __u16 capabilities; +}; +struct pnp_isa_config_struc { + __u8 revision; + __u8 no_csns; + __u16 isa_rd_data_port; + __u16 reserved; +}; +struct escd_info_struc { + __u16 min_escd_write_size; + __u16 escd_size; + __u32 nv_storage_base; +}; +struct pnp_bios_node { + __u16 size; + __u8 handle; + __u32 eisa_id; + __u8 type_code[3]; + __u16 flags; + __u8 data[0]; +}; +#pragma pack() + +/* non-exported */ +extern struct pnp_dev_node_info node_info; + +extern int pnp_bios_dev_node_info(struct pnp_dev_node_info *data); +extern int pnp_bios_get_dev_node(u8 *nodenum, char config, + struct pnp_bios_node *data); +extern int pnp_bios_set_dev_node(u8 nodenum, char config, + struct pnp_bios_node *data); +extern int pnp_bios_get_stat_res(char *info); +extern int pnp_bios_isapnp_config(struct pnp_isa_config_struc *data); +extern int pnp_bios_escd_info(struct escd_info_struc *data); +extern int pnp_bios_read_escd(char *data, u32 nvram_base); +extern int pnp_bios_dock_station_info(struct pnp_docking_station_info *data); + #pragma pack(1) union pnp_bios_install_struct { struct { diff --git a/drivers/pnp/pnpbios/proc.c b/drivers/pnp/pnpbios/proc.c index bb19bc957bad..4f89f1677e69 100644 --- a/drivers/pnp/pnpbios/proc.c +++ b/drivers/pnp/pnpbios/proc.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/pnp/pnpbios/rsparser.c b/drivers/pnp/pnpbios/rsparser.c index ed63ecd9bf40..2e2c457a0fea 100644 --- a/drivers/pnp/pnpbios/rsparser.c +++ b/drivers/pnp/pnpbios/rsparser.c @@ -4,7 +4,6 @@ #include #include -#include #include #include diff --git a/include/linux/pnpbios.h b/include/linux/pnpbios.h deleted file mode 100644 index 329192adc9dd..000000000000 --- a/include/linux/pnpbios.h +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Include file for the interface to a PnP BIOS - * - * Original BIOS code (C) 1998 Christian Schmidt (chr.schmidt@tu-bs.de) - * PnP handler parts (c) 1998 Tom Lees - * Minor reorganizations by David Hinds - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _LINUX_PNPBIOS_H -#define _LINUX_PNPBIOS_H - -#ifdef __KERNEL__ - -#include -#include - -/* - * Return codes - */ -#define PNP_SUCCESS 0x00 -#define PNP_NOT_SET_STATICALLY 0x7f -#define PNP_UNKNOWN_FUNCTION 0x81 -#define PNP_FUNCTION_NOT_SUPPORTED 0x82 -#define PNP_INVALID_HANDLE 0x83 -#define PNP_BAD_PARAMETER 0x84 -#define PNP_SET_FAILED 0x85 -#define PNP_EVENTS_NOT_PENDING 0x86 -#define PNP_SYSTEM_NOT_DOCKED 0x87 -#define PNP_NO_ISA_PNP_CARDS 0x88 -#define PNP_UNABLE_TO_DETERMINE_DOCK_CAPABILITIES 0x89 -#define PNP_CONFIG_CHANGE_FAILED_NO_BATTERY 0x8a -#define PNP_CONFIG_CHANGE_FAILED_RESOURCE_CONFLICT 0x8b -#define PNP_BUFFER_TOO_SMALL 0x8c -#define PNP_USE_ESCD_SUPPORT 0x8d -#define PNP_MESSAGE_NOT_SUPPORTED 0x8e -#define PNP_HARDWARE_ERROR 0x8f - -#define ESCD_SUCCESS 0x00 -#define ESCD_IO_ERROR_READING 0x55 -#define ESCD_INVALID 0x56 -#define ESCD_BUFFER_TOO_SMALL 0x59 -#define ESCD_NVRAM_TOO_SMALL 0x5a -#define ESCD_FUNCTION_NOT_SUPPORTED 0x81 - -/* - * Events that can be received by "get event" - */ -#define PNPEV_ABOUT_TO_CHANGE_CONFIG 0x0001 -#define PNPEV_DOCK_CHANGED 0x0002 -#define PNPEV_SYSTEM_DEVICE_CHANGED 0x0003 -#define PNPEV_CONFIG_CHANGED_FAILED 0x0004 -#define PNPEV_UNKNOWN_SYSTEM_EVENT 0xffff -/* 0x8000 through 0xfffe are OEM defined */ - -/* - * Messages that should be sent through "send message" - */ -#define PNPMSG_OK 0x00 -#define PNPMSG_ABORT 0x01 -#define PNPMSG_UNDOCK_DEFAULT_ACTION 0x40 -#define PNPMSG_POWER_OFF 0x41 -#define PNPMSG_PNP_OS_ACTIVE 0x42 -#define PNPMSG_PNP_OS_INACTIVE 0x43 - -/* - * Plug and Play BIOS flags - */ -#define PNPBIOS_NO_DISABLE 0x0001 -#define PNPBIOS_NO_CONFIG 0x0002 -#define PNPBIOS_OUTPUT 0x0004 -#define PNPBIOS_INPUT 0x0008 -#define PNPBIOS_BOOTABLE 0x0010 -#define PNPBIOS_DOCK 0x0020 -#define PNPBIOS_REMOVABLE 0x0040 -#define pnpbios_is_static(x) (((x)->flags & 0x0100) == 0x0000) -#define pnpbios_is_dynamic(x) ((x)->flags & 0x0080) - -/* - * Function Parameters - */ -#define PNPMODE_STATIC 1 -#define PNPMODE_DYNAMIC 0 - -/* 0x8000 through 0xffff are OEM defined */ - -#pragma pack(1) -struct pnp_dev_node_info { - __u16 no_nodes; - __u16 max_node_size; -}; -struct pnp_docking_station_info { - __u32 location_id; - __u32 serial; - __u16 capabilities; -}; -struct pnp_isa_config_struc { - __u8 revision; - __u8 no_csns; - __u16 isa_rd_data_port; - __u16 reserved; -}; -struct escd_info_struc { - __u16 min_escd_write_size; - __u16 escd_size; - __u32 nv_storage_base; -}; -struct pnp_bios_node { - __u16 size; - __u8 handle; - __u32 eisa_id; - __u8 type_code[3]; - __u16 flags; - __u8 data[0]; -}; -#pragma pack() - -#ifdef CONFIG_PNPBIOS - -/* non-exported */ -extern struct pnp_dev_node_info node_info; - -extern int pnp_bios_dev_node_info(struct pnp_dev_node_info *data); -extern int pnp_bios_get_dev_node(u8 *nodenum, char config, - struct pnp_bios_node *data); -extern int pnp_bios_set_dev_node(u8 nodenum, char config, - struct pnp_bios_node *data); -extern int pnp_bios_get_stat_res(char *info); -extern int pnp_bios_isapnp_config(struct pnp_isa_config_struc *data); -extern int pnp_bios_escd_info(struct escd_info_struc *data); -extern int pnp_bios_read_escd(char *data, u32 nvram_base); -extern int pnp_bios_dock_station_info(struct pnp_docking_station_info *data); - -#endif /* CONFIG_PNPBIOS */ - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_PNPBIOS_H */ -- cgit v1.2.3-71-gd317 From 68154e90c9d1492d570671ae181d9a8f8530da55 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 25 Apr 2008 12:47:50 +0200 Subject: block: add dma alignment and padding support to blk_rq_map_kern This patch adds bio_copy_kern similar to bio_copy_user. blk_rq_map_kern uses bio_copy_kern instead of bio_map_kern if necessary. bio_copy_kern uses temporary pages and the bi_end_io callback frees these pages. bio_copy_kern saves the original kernel buffer at bio->bi_private it doesn't use something like struct bio_map_data to store the information about the caller. Signed-off-by: FUJITA Tomonori Cc: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-map.c | 21 ++++++++++++- fs/bio.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/bio.h | 2 ++ 3 files changed, 112 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/block/blk-map.c b/block/blk-map.c index 3c942bd6422a..0b1af5a3537c 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -255,10 +255,18 @@ EXPORT_SYMBOL(blk_rq_unmap_user); * @kbuf: the kernel buffer * @len: length of user data * @gfp_mask: memory allocation flags + * + * Description: + * Data will be mapped directly if possible. Otherwise a bounce + * buffer is used. */ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, unsigned int len, gfp_t gfp_mask) { + unsigned long kaddr; + unsigned int alignment; + int reading = rq_data_dir(rq) == READ; + int do_copy = 0; struct bio *bio; if (len > (q->max_hw_sectors << 9)) @@ -266,13 +274,24 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, if (!len || !kbuf) return -EINVAL; - bio = bio_map_kern(q, kbuf, len, gfp_mask); + kaddr = (unsigned long)kbuf; + alignment = queue_dma_alignment(q) | q->dma_pad_mask; + do_copy = ((kaddr & alignment) || (len & alignment)); + + if (do_copy) + bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); + else + bio = bio_map_kern(q, kbuf, len, gfp_mask); + if (IS_ERR(bio)) return PTR_ERR(bio); if (rq_data_dir(rq) == WRITE) bio->bi_rw |= (1 << BIO_RW); + if (do_copy) + rq->cmd_flags |= REQ_COPY_USER; + blk_rq_bio_prep(q, rq, bio); blk_queue_bounce(q, &rq->bio); rq->buffer = rq->data = NULL; diff --git a/fs/bio.c b/fs/bio.c index 6e0b6f66df03..799f86deff24 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -937,6 +937,95 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, return ERR_PTR(-EINVAL); } +static void bio_copy_kern_endio(struct bio *bio, int err) +{ + struct bio_vec *bvec; + const int read = bio_data_dir(bio) == READ; + char *p = bio->bi_private; + int i; + + __bio_for_each_segment(bvec, bio, i, 0) { + char *addr = page_address(bvec->bv_page); + + if (read && !err) + memcpy(p, addr, bvec->bv_len); + + __free_page(bvec->bv_page); + p += bvec->bv_len; + } + + bio_put(bio); +} + +/** + * bio_copy_kern - copy kernel address into bio + * @q: the struct request_queue for the bio + * @data: pointer to buffer to copy + * @len: length in bytes + * @gfp_mask: allocation flags for bio and page allocation + * + * copy the kernel address into a bio suitable for io to a block + * device. Returns an error pointer in case of error. + */ +struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, + gfp_t gfp_mask, int reading) +{ + unsigned long kaddr = (unsigned long)data; + unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = kaddr >> PAGE_SHIFT; + const int nr_pages = end - start; + struct bio *bio; + struct bio_vec *bvec; + int i, ret; + + bio = bio_alloc(gfp_mask, nr_pages); + if (!bio) + return ERR_PTR(-ENOMEM); + + while (len) { + struct page *page; + unsigned int bytes = PAGE_SIZE; + + if (bytes > len) + bytes = len; + + page = alloc_page(q->bounce_gfp | gfp_mask); + if (!page) { + ret = -ENOMEM; + goto cleanup; + } + + if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) { + ret = -EINVAL; + goto cleanup; + } + + len -= bytes; + } + + if (!reading) { + void *p = data; + + bio_for_each_segment(bvec, bio, i) { + char *addr = page_address(bvec->bv_page); + + memcpy(addr, p, bvec->bv_len); + p += bvec->bv_len; + } + } + + bio->bi_private = data; + bio->bi_end_io = bio_copy_kern_endio; + return bio; +cleanup: + bio_for_each_segment(bvec, bio, i) + __free_page(bvec->bv_page); + + bio_put(bio); + + return ERR_PTR(ret); +} + /* * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions * for performing direct-IO in BIOs. @@ -1273,6 +1362,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs); EXPORT_SYMBOL(bio_map_user); EXPORT_SYMBOL(bio_unmap_user); EXPORT_SYMBOL(bio_map_kern); +EXPORT_SYMBOL(bio_copy_kern); EXPORT_SYMBOL(bio_pair_release); EXPORT_SYMBOL(bio_split); EXPORT_SYMBOL(bio_split_pool); diff --git a/include/linux/bio.h b/include/linux/bio.h index d259690863fb..61c15eaf3fb3 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -324,6 +324,8 @@ extern struct bio *bio_map_user_iov(struct request_queue *, extern void bio_unmap_user(struct bio *); extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, gfp_t); +extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, + gfp_t, int); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int); -- cgit v1.2.3-71-gd317 From ad1e9380b17addf112f89ce5a57d4d0bee129b7a Mon Sep 17 00:00:00 2001 From: Zhang Wei Date: Fri, 18 Apr 2008 13:33:41 -0700 Subject: [RAPIDIO] Add RapidIO multi mport support The original RapidIO driver suppose there is only one mpc85xx RIO controller in system. So, some data structures are defined as mpc85xx_rio global, such as 'regs_win', 'dbell_ring', 'msg_tx_ring'. Now, I changed them to mport's private members. And you can define multi RIO OF-nodes in dts file for multi RapidIO controller in one processor, such as PCI/PCI-Ex host controllers in Freescale's silicon. And the mport operation function declaration should be changed to know which RapidIO controller is target. Signed-off-by: Zhang Wei Signed-off-by: Andrew Morton Signed-off-by: Paul Mackerras --- arch/powerpc/sysdev/fsl_rio.c | 395 ++++++++++++++++++++++++------------------ drivers/rapidio/rio-access.c | 10 +- include/linux/rio.h | 18 +- 3 files changed, 238 insertions(+), 185 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 659a5609d2db..80acc7940194 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -1,6 +1,9 @@ /* * Freescale MPC85xx/MPC86xx RapidIO support * + * Copyright (C) 2007, 2008 Freescale Semiconductor, Inc. + * Zhang Wei + * * Copyright 2005 MontaVista Software, Inc. * Matt Porter * @@ -20,6 +23,11 @@ #include +/* RapidIO definition irq, which read from OF-tree */ +#define IRQ_RIO_BELL(m) (((struct rio_priv *)(m->priv))->bellirq) +#define IRQ_RIO_TX(m) (((struct rio_priv *)(m->priv))->txirq) +#define IRQ_RIO_RX(m) (((struct rio_priv *)(m->priv))->rxirq) + #define RIO_REGS_BASE (CCSRBAR + 0xc0000) #define RIO_ATMU_REGS_OFFSET 0x10c00 #define RIO_MSG_REGS_OFFSET 0x11000 @@ -112,20 +120,12 @@ struct rio_tx_desc { u32 res4; }; -static u32 regs_win; -static struct rio_atmu_regs *atmu_regs; -static struct rio_atmu_regs *maint_atmu_regs; -static struct rio_atmu_regs *dbell_atmu_regs; -static u32 dbell_win; -static u32 maint_win; -static struct rio_msg_regs *msg_regs; - -static struct rio_dbell_ring { +struct rio_dbell_ring { void *virt; dma_addr_t phys; -} dbell_ring; +}; -static struct rio_msg_tx_ring { +struct rio_msg_tx_ring { void *virt; dma_addr_t phys; void *virt_buffer[RIO_MAX_TX_RING_SIZE]; @@ -133,16 +133,32 @@ static struct rio_msg_tx_ring { int tx_slot; int size; void *dev_id; -} msg_tx_ring; +}; -static struct rio_msg_rx_ring { +struct rio_msg_rx_ring { void *virt; dma_addr_t phys; void *virt_buffer[RIO_MAX_RX_RING_SIZE]; int rx_slot; int size; void *dev_id; -} msg_rx_ring; +}; + +struct rio_priv { + void __iomem *regs_win; + struct rio_atmu_regs __iomem *atmu_regs; + struct rio_atmu_regs __iomem *maint_atmu_regs; + struct rio_atmu_regs __iomem *dbell_atmu_regs; + void __iomem *dbell_win; + void __iomem *maint_win; + struct rio_msg_regs __iomem *msg_regs; + struct rio_dbell_ring dbell_ring; + struct rio_msg_tx_ring msg_tx_ring; + struct rio_msg_rx_ring msg_rx_ring; + int bellirq; + int txirq; + int rxirq; +}; /** * fsl_rio_doorbell_send - Send a MPC85xx doorbell message @@ -153,12 +169,14 @@ static struct rio_msg_rx_ring { * Sends a MPC85xx doorbell message. Returns %0 on success or * %-EINVAL on failure. */ -static int fsl_rio_doorbell_send(int index, u16 destid, u16 data) +static int fsl_rio_doorbell_send(struct rio_mport *mport, + int index, u16 destid, u16 data) { + struct rio_priv *priv = mport->priv; pr_debug("fsl_doorbell_send: index %d destid %4.4x data %4.4x\n", index, destid, data); - out_be32((void *)&dbell_atmu_regs->rowtar, destid << 22); - out_be16((void *)(dbell_win), data); + out_be32(&priv->dbell_atmu_regs->rowtar, destid << 22); + out_be16(priv->dbell_win, data); return 0; } @@ -173,11 +191,13 @@ static int fsl_rio_doorbell_send(int index, u16 destid, u16 data) * Generates a MPC85xx local configuration space read. Returns %0 on * success or %-EINVAL on failure. */ -static int fsl_local_config_read(int index, u32 offset, int len, u32 *data) +static int fsl_local_config_read(struct rio_mport *mport, + int index, u32 offset, int len, u32 *data) { + struct rio_priv *priv = mport->priv; pr_debug("fsl_local_config_read: index %d offset %8.8x\n", index, offset); - *data = in_be32((void *)(regs_win + offset)); + *data = in_be32(priv->regs_win + offset); return 0; } @@ -192,12 +212,14 @@ static int fsl_local_config_read(int index, u32 offset, int len, u32 *data) * Generates a MPC85xx local configuration space write. Returns %0 on * success or %-EINVAL on failure. */ -static int fsl_local_config_write(int index, u32 offset, int len, u32 data) +static int fsl_local_config_write(struct rio_mport *mport, + int index, u32 offset, int len, u32 data) { + struct rio_priv *priv = mport->priv; pr_debug ("fsl_local_config_write: index %d offset %8.8x data %8.8x\n", index, offset, data); - out_be32((void *)(regs_win + offset), data); + out_be32(priv->regs_win + offset, data); return 0; } @@ -215,18 +237,19 @@ static int fsl_local_config_write(int index, u32 offset, int len, u32 data) * success or %-EINVAL on failure. */ static int -fsl_rio_config_read(int index, u16 destid, u8 hopcount, u32 offset, int len, - u32 * val) +fsl_rio_config_read(struct rio_mport *mport, int index, u16 destid, + u8 hopcount, u32 offset, int len, u32 *val) { + struct rio_priv *priv = mport->priv; u8 *data; pr_debug ("fsl_rio_config_read: index %d destid %d hopcount %d offset %8.8x len %d\n", index, destid, hopcount, offset, len); - out_be32((void *)&maint_atmu_regs->rowtar, + out_be32(&priv->maint_atmu_regs->rowtar, (destid << 22) | (hopcount << 12) | ((offset & ~0x3) >> 9)); - data = (u8 *) maint_win + offset; + data = (u8 *) priv->maint_win + offset; switch (len) { case 1: *val = in_8((u8 *) data); @@ -255,17 +278,18 @@ fsl_rio_config_read(int index, u16 destid, u8 hopcount, u32 offset, int len, * success or %-EINVAL on failure. */ static int -fsl_rio_config_write(int index, u16 destid, u8 hopcount, u32 offset, - int len, u32 val) +fsl_rio_config_write(struct rio_mport *mport, int index, u16 destid, + u8 hopcount, u32 offset, int len, u32 val) { + struct rio_priv *priv = mport->priv; u8 *data; pr_debug ("fsl_rio_config_write: index %d destid %d hopcount %d offset %8.8x len %d val %8.8x\n", index, destid, hopcount, offset, len, val); - out_be32((void *)&maint_atmu_regs->rowtar, + out_be32(&priv->maint_atmu_regs->rowtar, (destid << 22) | (hopcount << 12) | ((offset & ~0x3) >> 9)); - data = (u8 *) maint_win + offset; + data = (u8 *) priv->maint_win + offset; switch (len) { case 1: out_8((u8 *) data, val); @@ -296,9 +320,10 @@ int rio_hw_add_outb_message(struct rio_mport *mport, struct rio_dev *rdev, int mbox, void *buffer, size_t len) { + struct rio_priv *priv = mport->priv; u32 omr; - struct rio_tx_desc *desc = - (struct rio_tx_desc *)msg_tx_ring.virt + msg_tx_ring.tx_slot; + struct rio_tx_desc *desc = (struct rio_tx_desc *)priv->msg_tx_ring.virt + + priv->msg_tx_ring.tx_slot; int ret = 0; pr_debug @@ -311,11 +336,11 @@ rio_hw_add_outb_message(struct rio_mport *mport, struct rio_dev *rdev, int mbox, } /* Copy and clear rest of buffer */ - memcpy(msg_tx_ring.virt_buffer[msg_tx_ring.tx_slot], buffer, len); + memcpy(priv->msg_tx_ring.virt_buffer[priv->msg_tx_ring.tx_slot], buffer, + len); if (len < (RIO_MAX_MSG_SIZE - 4)) - memset((void *)((u32) msg_tx_ring. - virt_buffer[msg_tx_ring.tx_slot] + len), 0, - RIO_MAX_MSG_SIZE - len); + memset(priv->msg_tx_ring.virt_buffer[priv->msg_tx_ring.tx_slot] + + len, 0, RIO_MAX_MSG_SIZE - len); /* Set mbox field for message */ desc->dport = mbox & 0x3; @@ -327,15 +352,16 @@ rio_hw_add_outb_message(struct rio_mport *mport, struct rio_dev *rdev, int mbox, desc->dwcnt = is_power_of_2(len) ? len : 1 << get_bitmask_order(len); /* Set snooping and source buffer address */ - desc->saddr = 0x00000004 | msg_tx_ring.phys_buffer[msg_tx_ring.tx_slot]; + desc->saddr = 0x00000004 + | priv->msg_tx_ring.phys_buffer[priv->msg_tx_ring.tx_slot]; /* Increment enqueue pointer */ - omr = in_be32((void *)&msg_regs->omr); - out_be32((void *)&msg_regs->omr, omr | RIO_MSG_OMR_MUI); + omr = in_be32(&priv->msg_regs->omr); + out_be32(&priv->msg_regs->omr, omr | RIO_MSG_OMR_MUI); /* Go to next descriptor */ - if (++msg_tx_ring.tx_slot == msg_tx_ring.size) - msg_tx_ring.tx_slot = 0; + if (++priv->msg_tx_ring.tx_slot == priv->msg_tx_ring.size) + priv->msg_tx_ring.tx_slot = 0; out: return ret; @@ -356,28 +382,30 @@ fsl_rio_tx_handler(int irq, void *dev_instance) { int osr; struct rio_mport *port = (struct rio_mport *)dev_instance; + struct rio_priv *priv = port->priv; - osr = in_be32((void *)&msg_regs->osr); + osr = in_be32(&priv->msg_regs->osr); if (osr & RIO_MSG_OSR_TE) { pr_info("RIO: outbound message transmission error\n"); - out_be32((void *)&msg_regs->osr, RIO_MSG_OSR_TE); + out_be32(&priv->msg_regs->osr, RIO_MSG_OSR_TE); goto out; } if (osr & RIO_MSG_OSR_QOI) { pr_info("RIO: outbound message queue overflow\n"); - out_be32((void *)&msg_regs->osr, RIO_MSG_OSR_QOI); + out_be32(&priv->msg_regs->osr, RIO_MSG_OSR_QOI); goto out; } if (osr & RIO_MSG_OSR_EOMI) { - u32 dqp = in_be32((void *)&msg_regs->odqdpar); - int slot = (dqp - msg_tx_ring.phys) >> 5; - port->outb_msg[0].mcback(port, msg_tx_ring.dev_id, -1, slot); + u32 dqp = in_be32(&priv->msg_regs->odqdpar); + int slot = (dqp - priv->msg_tx_ring.phys) >> 5; + port->outb_msg[0].mcback(port, priv->msg_tx_ring.dev_id, -1, + slot); /* Ack the end-of-message interrupt */ - out_be32((void *)&msg_regs->osr, RIO_MSG_OSR_EOMI); + out_be32(&priv->msg_regs->osr, RIO_MSG_OSR_EOMI); } out: @@ -398,6 +426,7 @@ fsl_rio_tx_handler(int irq, void *dev_instance) int rio_open_outb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entries) { int i, j, rc = 0; + struct rio_priv *priv = mport->priv; if ((entries < RIO_MIN_TX_RING_SIZE) || (entries > RIO_MAX_TX_RING_SIZE) || (!is_power_of_2(entries))) { @@ -406,54 +435,53 @@ int rio_open_outb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entr } /* Initialize shadow copy ring */ - msg_tx_ring.dev_id = dev_id; - msg_tx_ring.size = entries; - - for (i = 0; i < msg_tx_ring.size; i++) { - if (! - (msg_tx_ring.virt_buffer[i] = - dma_alloc_coherent(NULL, RIO_MSG_BUFFER_SIZE, - &msg_tx_ring.phys_buffer[i], - GFP_KERNEL))) { + priv->msg_tx_ring.dev_id = dev_id; + priv->msg_tx_ring.size = entries; + + for (i = 0; i < priv->msg_tx_ring.size; i++) { + priv->msg_tx_ring.virt_buffer[i] = + dma_alloc_coherent(NULL, RIO_MSG_BUFFER_SIZE, + &priv->msg_tx_ring.phys_buffer[i], GFP_KERNEL); + if (!priv->msg_tx_ring.virt_buffer[i]) { rc = -ENOMEM; - for (j = 0; j < msg_tx_ring.size; j++) - if (msg_tx_ring.virt_buffer[j]) + for (j = 0; j < priv->msg_tx_ring.size; j++) + if (priv->msg_tx_ring.virt_buffer[j]) dma_free_coherent(NULL, - RIO_MSG_BUFFER_SIZE, - msg_tx_ring. - virt_buffer[j], - msg_tx_ring. - phys_buffer[j]); + RIO_MSG_BUFFER_SIZE, + priv->msg_tx_ring. + virt_buffer[j], + priv->msg_tx_ring. + phys_buffer[j]); goto out; } } /* Initialize outbound message descriptor ring */ - if (!(msg_tx_ring.virt = dma_alloc_coherent(NULL, - msg_tx_ring.size * - RIO_MSG_DESC_SIZE, - &msg_tx_ring.phys, - GFP_KERNEL))) { + priv->msg_tx_ring.virt = dma_alloc_coherent(NULL, + priv->msg_tx_ring.size * RIO_MSG_DESC_SIZE, + &priv->msg_tx_ring.phys, GFP_KERNEL); + if (!priv->msg_tx_ring.virt) { rc = -ENOMEM; goto out_dma; } - memset(msg_tx_ring.virt, 0, msg_tx_ring.size * RIO_MSG_DESC_SIZE); - msg_tx_ring.tx_slot = 0; + memset(priv->msg_tx_ring.virt, 0, + priv->msg_tx_ring.size * RIO_MSG_DESC_SIZE); + priv->msg_tx_ring.tx_slot = 0; /* Point dequeue/enqueue pointers at first entry in ring */ - out_be32((void *)&msg_regs->odqdpar, msg_tx_ring.phys); - out_be32((void *)&msg_regs->odqepar, msg_tx_ring.phys); + out_be32(&priv->msg_regs->odqdpar, priv->msg_tx_ring.phys); + out_be32(&priv->msg_regs->odqepar, priv->msg_tx_ring.phys); /* Configure for snooping */ - out_be32((void *)&msg_regs->osar, 0x00000004); + out_be32(&priv->msg_regs->osar, 0x00000004); /* Clear interrupt status */ - out_be32((void *)&msg_regs->osr, 0x000000b3); + out_be32(&priv->msg_regs->osr, 0x000000b3); /* Hook up outbound message handler */ - if ((rc = - request_irq(MPC85xx_IRQ_RIO_TX, fsl_rio_tx_handler, 0, - "msg_tx", (void *)mport)) < 0) + rc = request_irq(IRQ_RIO_TX(mport), fsl_rio_tx_handler, 0, + "msg_tx", (void *)mport); + if (rc < 0) goto out_irq; /* @@ -463,28 +491,28 @@ int rio_open_outb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entr * Chaining mode * Disable */ - out_be32((void *)&msg_regs->omr, 0x00100220); + out_be32(&priv->msg_regs->omr, 0x00100220); /* Set number of entries */ - out_be32((void *)&msg_regs->omr, - in_be32((void *)&msg_regs->omr) | + out_be32(&priv->msg_regs->omr, + in_be32(&priv->msg_regs->omr) | ((get_bitmask_order(entries) - 2) << 12)); /* Now enable the unit */ - out_be32((void *)&msg_regs->omr, in_be32((void *)&msg_regs->omr) | 0x1); + out_be32(&priv->msg_regs->omr, in_be32(&priv->msg_regs->omr) | 0x1); out: return rc; out_irq: - dma_free_coherent(NULL, msg_tx_ring.size * RIO_MSG_DESC_SIZE, - msg_tx_ring.virt, msg_tx_ring.phys); + dma_free_coherent(NULL, priv->msg_tx_ring.size * RIO_MSG_DESC_SIZE, + priv->msg_tx_ring.virt, priv->msg_tx_ring.phys); out_dma: - for (i = 0; i < msg_tx_ring.size; i++) + for (i = 0; i < priv->msg_tx_ring.size; i++) dma_free_coherent(NULL, RIO_MSG_BUFFER_SIZE, - msg_tx_ring.virt_buffer[i], - msg_tx_ring.phys_buffer[i]); + priv->msg_tx_ring.virt_buffer[i], + priv->msg_tx_ring.phys_buffer[i]); return rc; } @@ -499,15 +527,16 @@ int rio_open_outb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entr */ void rio_close_outb_mbox(struct rio_mport *mport, int mbox) { + struct rio_priv *priv = mport->priv; /* Disable inbound message unit */ - out_be32((void *)&msg_regs->omr, 0); + out_be32(&priv->msg_regs->omr, 0); /* Free ring */ - dma_free_coherent(NULL, msg_tx_ring.size * RIO_MSG_DESC_SIZE, - msg_tx_ring.virt, msg_tx_ring.phys); + dma_free_coherent(NULL, priv->msg_tx_ring.size * RIO_MSG_DESC_SIZE, + priv->msg_tx_ring.virt, priv->msg_tx_ring.phys); /* Free interrupt */ - free_irq(MPC85xx_IRQ_RIO_TX, (void *)mport); + free_irq(IRQ_RIO_TX(mport), (void *)mport); } /** @@ -523,12 +552,13 @@ fsl_rio_rx_handler(int irq, void *dev_instance) { int isr; struct rio_mport *port = (struct rio_mport *)dev_instance; + struct rio_priv *priv = port->priv; - isr = in_be32((void *)&msg_regs->isr); + isr = in_be32(&priv->msg_regs->isr); if (isr & RIO_MSG_ISR_TE) { pr_info("RIO: inbound message reception error\n"); - out_be32((void *)&msg_regs->isr, RIO_MSG_ISR_TE); + out_be32((void *)&priv->msg_regs->isr, RIO_MSG_ISR_TE); goto out; } @@ -540,10 +570,10 @@ fsl_rio_rx_handler(int irq, void *dev_instance) * make the callback with an unknown/invalid mailbox number * argument. */ - port->inb_msg[0].mcback(port, msg_rx_ring.dev_id, -1, -1); + port->inb_msg[0].mcback(port, priv->msg_rx_ring.dev_id, -1, -1); /* Ack the queueing interrupt */ - out_be32((void *)&msg_regs->isr, RIO_MSG_ISR_DIQI); + out_be32(&priv->msg_regs->isr, RIO_MSG_ISR_DIQI); } out: @@ -564,6 +594,7 @@ fsl_rio_rx_handler(int irq, void *dev_instance) int rio_open_inb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entries) { int i, rc = 0; + struct rio_priv *priv = mport->priv; if ((entries < RIO_MIN_RX_RING_SIZE) || (entries > RIO_MAX_RX_RING_SIZE) || (!is_power_of_2(entries))) { @@ -572,36 +603,35 @@ int rio_open_inb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entri } /* Initialize client buffer ring */ - msg_rx_ring.dev_id = dev_id; - msg_rx_ring.size = entries; - msg_rx_ring.rx_slot = 0; - for (i = 0; i < msg_rx_ring.size; i++) - msg_rx_ring.virt_buffer[i] = NULL; + priv->msg_rx_ring.dev_id = dev_id; + priv->msg_rx_ring.size = entries; + priv->msg_rx_ring.rx_slot = 0; + for (i = 0; i < priv->msg_rx_ring.size; i++) + priv->msg_rx_ring.virt_buffer[i] = NULL; /* Initialize inbound message ring */ - if (!(msg_rx_ring.virt = dma_alloc_coherent(NULL, - msg_rx_ring.size * - RIO_MAX_MSG_SIZE, - &msg_rx_ring.phys, - GFP_KERNEL))) { + priv->msg_rx_ring.virt = dma_alloc_coherent(NULL, + priv->msg_rx_ring.size * RIO_MAX_MSG_SIZE, + &priv->msg_rx_ring.phys, GFP_KERNEL); + if (!priv->msg_rx_ring.virt) { rc = -ENOMEM; goto out; } /* Point dequeue/enqueue pointers at first entry in ring */ - out_be32((void *)&msg_regs->ifqdpar, (u32) msg_rx_ring.phys); - out_be32((void *)&msg_regs->ifqepar, (u32) msg_rx_ring.phys); + out_be32(&priv->msg_regs->ifqdpar, (u32) priv->msg_rx_ring.phys); + out_be32(&priv->msg_regs->ifqepar, (u32) priv->msg_rx_ring.phys); /* Clear interrupt status */ - out_be32((void *)&msg_regs->isr, 0x00000091); + out_be32(&priv->msg_regs->isr, 0x00000091); /* Hook up inbound message handler */ - if ((rc = - request_irq(MPC85xx_IRQ_RIO_RX, fsl_rio_rx_handler, 0, - "msg_rx", (void *)mport)) < 0) { + rc = request_irq(IRQ_RIO_RX(mport), fsl_rio_rx_handler, 0, + "msg_rx", (void *)mport); + if (rc < 0) { dma_free_coherent(NULL, RIO_MSG_BUFFER_SIZE, - msg_tx_ring.virt_buffer[i], - msg_tx_ring.phys_buffer[i]); + priv->msg_tx_ring.virt_buffer[i], + priv->msg_tx_ring.phys_buffer[i]); goto out; } @@ -612,15 +642,13 @@ int rio_open_inb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entri * Unmask all interrupt sources * Disable */ - out_be32((void *)&msg_regs->imr, 0x001b0060); + out_be32(&priv->msg_regs->imr, 0x001b0060); /* Set number of queue entries */ - out_be32((void *)&msg_regs->imr, - in_be32((void *)&msg_regs->imr) | - ((get_bitmask_order(entries) - 2) << 12)); + setbits32(&priv->msg_regs->imr, (get_bitmask_order(entries) - 2) << 12); /* Now enable the unit */ - out_be32((void *)&msg_regs->imr, in_be32((void *)&msg_regs->imr) | 0x1); + setbits32(&priv->msg_regs->imr, 0x1); out: return rc; @@ -636,15 +664,16 @@ int rio_open_inb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entri */ void rio_close_inb_mbox(struct rio_mport *mport, int mbox) { + struct rio_priv *priv = mport->priv; /* Disable inbound message unit */ - out_be32((void *)&msg_regs->imr, 0); + out_be32(&priv->msg_regs->imr, 0); /* Free ring */ - dma_free_coherent(NULL, msg_rx_ring.size * RIO_MAX_MSG_SIZE, - msg_rx_ring.virt, msg_rx_ring.phys); + dma_free_coherent(NULL, priv->msg_rx_ring.size * RIO_MAX_MSG_SIZE, + priv->msg_rx_ring.virt, priv->msg_rx_ring.phys); /* Free interrupt */ - free_irq(MPC85xx_IRQ_RIO_RX, (void *)mport); + free_irq(IRQ_RIO_RX(mport), (void *)mport); } /** @@ -659,21 +688,22 @@ void rio_close_inb_mbox(struct rio_mport *mport, int mbox) int rio_hw_add_inb_buffer(struct rio_mport *mport, int mbox, void *buf) { int rc = 0; + struct rio_priv *priv = mport->priv; pr_debug("RIO: rio_hw_add_inb_buffer(), msg_rx_ring.rx_slot %d\n", - msg_rx_ring.rx_slot); + priv->msg_rx_ring.rx_slot); - if (msg_rx_ring.virt_buffer[msg_rx_ring.rx_slot]) { + if (priv->msg_rx_ring.virt_buffer[priv->msg_rx_ring.rx_slot]) { printk(KERN_ERR "RIO: error adding inbound buffer %d, buffer exists\n", - msg_rx_ring.rx_slot); + priv->msg_rx_ring.rx_slot); rc = -EINVAL; goto out; } - msg_rx_ring.virt_buffer[msg_rx_ring.rx_slot] = buf; - if (++msg_rx_ring.rx_slot == msg_rx_ring.size) - msg_rx_ring.rx_slot = 0; + priv->msg_rx_ring.virt_buffer[priv->msg_rx_ring.rx_slot] = buf; + if (++priv->msg_rx_ring.rx_slot == priv->msg_rx_ring.size) + priv->msg_rx_ring.rx_slot = 0; out: return rc; @@ -691,20 +721,21 @@ EXPORT_SYMBOL_GPL(rio_hw_add_inb_buffer); */ void *rio_hw_get_inb_message(struct rio_mport *mport, int mbox) { - u32 imr; + struct rio_priv *priv = mport->priv; u32 phys_buf, virt_buf; void *buf = NULL; int buf_idx; - phys_buf = in_be32((void *)&msg_regs->ifqdpar); + phys_buf = in_be32(&priv->msg_regs->ifqdpar); /* If no more messages, then bail out */ - if (phys_buf == in_be32((void *)&msg_regs->ifqepar)) + if (phys_buf == in_be32(&priv->msg_regs->ifqepar)) goto out2; - virt_buf = (u32) msg_rx_ring.virt + (phys_buf - msg_rx_ring.phys); - buf_idx = (phys_buf - msg_rx_ring.phys) / RIO_MAX_MSG_SIZE; - buf = msg_rx_ring.virt_buffer[buf_idx]; + virt_buf = (u32) priv->msg_rx_ring.virt + (phys_buf + - priv->msg_rx_ring.phys); + buf_idx = (phys_buf - priv->msg_rx_ring.phys) / RIO_MAX_MSG_SIZE; + buf = priv->msg_rx_ring.virt_buffer[buf_idx]; if (!buf) { printk(KERN_ERR @@ -716,11 +747,10 @@ void *rio_hw_get_inb_message(struct rio_mport *mport, int mbox) memcpy(buf, (void *)virt_buf, RIO_MAX_MSG_SIZE); /* Clear the available buffer */ - msg_rx_ring.virt_buffer[buf_idx] = NULL; + priv->msg_rx_ring.virt_buffer[buf_idx] = NULL; out1: - imr = in_be32((void *)&msg_regs->imr); - out_be32((void *)&msg_regs->imr, imr | RIO_MSG_IMR_MI); + setbits32(&priv->msg_regs->imr, RIO_MSG_IMR_MI); out2: return buf; @@ -741,27 +771,27 @@ fsl_rio_dbell_handler(int irq, void *dev_instance) { int dsr; struct rio_mport *port = (struct rio_mport *)dev_instance; + struct rio_priv *priv = port->priv; - dsr = in_be32((void *)&msg_regs->dsr); + dsr = in_be32(&priv->msg_regs->dsr); if (dsr & DOORBELL_DSR_TE) { pr_info("RIO: doorbell reception error\n"); - out_be32((void *)&msg_regs->dsr, DOORBELL_DSR_TE); + out_be32(&priv->msg_regs->dsr, DOORBELL_DSR_TE); goto out; } if (dsr & DOORBELL_DSR_QFI) { pr_info("RIO: doorbell queue full\n"); - out_be32((void *)&msg_regs->dsr, DOORBELL_DSR_QFI); + out_be32(&priv->msg_regs->dsr, DOORBELL_DSR_QFI); goto out; } /* XXX Need to check/dispatch until queue empty */ if (dsr & DOORBELL_DSR_DIQI) { u32 dmsg = - (u32) dbell_ring.virt + - (in_be32((void *)&msg_regs->dqdpar) & 0xfff); - u32 dmr; + (u32) priv->dbell_ring.virt + + (in_be32(&priv->msg_regs->dqdpar) & 0xfff); struct rio_dbell *dbell; int found = 0; @@ -784,9 +814,8 @@ fsl_rio_dbell_handler(int irq, void *dev_instance) ("RIO: spurious doorbell, sid %2.2x tid %2.2x info %4.4x\n", DBELL_SID(dmsg), DBELL_TID(dmsg), DBELL_INF(dmsg)); } - dmr = in_be32((void *)&msg_regs->dmr); - out_be32((void *)&msg_regs->dmr, dmr | DOORBELL_DMR_DI); - out_be32((void *)&msg_regs->dsr, DOORBELL_DSR_DIQI); + setbits32(&priv->msg_regs->dmr, DOORBELL_DMR_DI); + out_be32(&priv->msg_regs->dsr, DOORBELL_DSR_DIQI); } out: @@ -803,12 +832,13 @@ fsl_rio_dbell_handler(int irq, void *dev_instance) */ static int fsl_rio_doorbell_init(struct rio_mport *mport) { + struct rio_priv *priv = mport->priv; int rc = 0; /* Map outbound doorbell window immediately after maintenance window */ - if (!(dbell_win = - (u32) ioremap(mport->iores.start + RIO_MAINT_WIN_SIZE, - RIO_DBELL_WIN_SIZE))) { + priv->dbell_win = ioremap(mport->iores.start + RIO_MAINT_WIN_SIZE, + RIO_DBELL_WIN_SIZE); + if (!priv->dbell_win) { printk(KERN_ERR "RIO: unable to map outbound doorbell window\n"); rc = -ENOMEM; @@ -816,37 +846,36 @@ static int fsl_rio_doorbell_init(struct rio_mport *mport) } /* Initialize inbound doorbells */ - if (!(dbell_ring.virt = dma_alloc_coherent(NULL, - 512 * DOORBELL_MESSAGE_SIZE, - &dbell_ring.phys, - GFP_KERNEL))) { + priv->dbell_ring.virt = dma_alloc_coherent(NULL, 512 * + DOORBELL_MESSAGE_SIZE, &priv->dbell_ring.phys, GFP_KERNEL); + if (!priv->dbell_ring.virt) { printk(KERN_ERR "RIO: unable allocate inbound doorbell ring\n"); rc = -ENOMEM; - iounmap((void *)dbell_win); + iounmap(priv->dbell_win); goto out; } /* Point dequeue/enqueue pointers at first entry in ring */ - out_be32((void *)&msg_regs->dqdpar, (u32) dbell_ring.phys); - out_be32((void *)&msg_regs->dqepar, (u32) dbell_ring.phys); + out_be32(&priv->msg_regs->dqdpar, (u32) priv->dbell_ring.phys); + out_be32(&priv->msg_regs->dqepar, (u32) priv->dbell_ring.phys); /* Clear interrupt status */ - out_be32((void *)&msg_regs->dsr, 0x00000091); + out_be32(&priv->msg_regs->dsr, 0x00000091); /* Hook up doorbell handler */ - if ((rc = - request_irq(MPC85xx_IRQ_RIO_BELL, fsl_rio_dbell_handler, 0, - "dbell_rx", (void *)mport) < 0)) { - iounmap((void *)dbell_win); + rc = request_irq(IRQ_RIO_BELL(mport), fsl_rio_dbell_handler, 0, + "dbell_rx", (void *)mport); + if (rc < 0) { + iounmap(priv->dbell_win); dma_free_coherent(NULL, 512 * DOORBELL_MESSAGE_SIZE, - dbell_ring.virt, dbell_ring.phys); + priv->dbell_ring.virt, priv->dbell_ring.phys); printk(KERN_ERR "MPC85xx RIO: unable to request inbound doorbell irq"); goto out; } /* Configure doorbells for snooping, 512 entries, and enable */ - out_be32((void *)&msg_regs->dmr, 0x00108161); + out_be32(&priv->msg_regs->dmr, 0x00108161); out: return rc; @@ -887,6 +916,8 @@ void fsl_rio_setup(int law_start, int law_size) { struct rio_ops *ops; struct rio_mport *port; + struct rio_priv *priv = NULL; + int rc; ops = kmalloc(sizeof(struct rio_ops), GFP_KERNEL); ops->lcread = fsl_local_config_read; @@ -895,9 +926,17 @@ void fsl_rio_setup(int law_start, int law_size) ops->cwrite = fsl_rio_config_write; ops->dsend = fsl_rio_doorbell_send; - port = kmalloc(sizeof(struct rio_mport), GFP_KERNEL); + port = kzalloc(sizeof(struct rio_mport), GFP_KERNEL); port->id = 0; port->index = 0; + + priv = kzalloc(sizeof(struct rio_priv), GFP_KERNEL); + if (!priv) { + printk(KERN_ERR "Can't alloc memory for 'priv'\n"); + rc = -ENOMEM; + goto err; + } + INIT_LIST_HEAD(&port->dbells); port->iores.start = law_start; port->iores.end = law_start + law_size; @@ -911,22 +950,32 @@ void fsl_rio_setup(int law_start, int law_size) port->ops = ops; port->host_deviceid = fsl_rio_get_hdid(port->id); + port->priv = priv; rio_register_mport(port); - regs_win = (u32) ioremap(RIO_REGS_BASE, 0x20000); - atmu_regs = (struct rio_atmu_regs *)(regs_win + RIO_ATMU_REGS_OFFSET); - maint_atmu_regs = atmu_regs + 1; - dbell_atmu_regs = atmu_regs + 2; - msg_regs = (struct rio_msg_regs *)(regs_win + RIO_MSG_REGS_OFFSET); + priv->regs_win = ioremap(RIO_REGS_BASE, 0x20000); + priv->atmu_regs = (struct rio_atmu_regs *)(priv->regs_win + + RIO_ATMU_REGS_OFFSET); + priv->maint_atmu_regs = priv->atmu_regs + 1; + priv->dbell_atmu_regs = priv->atmu_regs + 2; + priv->msg_regs = (struct rio_msg_regs *)(priv->regs_win + + RIO_MSG_REGS_OFFSET); /* Configure maintenance transaction window */ - out_be32((void *)&maint_atmu_regs->rowbar, 0x000c0000); - out_be32((void *)&maint_atmu_regs->rowar, 0x80077015); + out_be32(&priv->maint_atmu_regs->rowbar, 0x000c0000); + out_be32(&priv->maint_atmu_regs->rowar, 0x80077015); - maint_win = (u32) ioremap(law_start, RIO_MAINT_WIN_SIZE); + priv->maint_win = ioremap(law_start, RIO_MAINT_WIN_SIZE); /* Configure outbound doorbell window */ - out_be32((void *)&dbell_atmu_regs->rowbar, 0x000c0400); - out_be32((void *)&dbell_atmu_regs->rowar, 0x8004200b); + out_be32(&priv->dbell_atmu_regs->rowbar, 0x000c0400); + out_be32(&priv->dbell_atmu_regs->rowar, 0x8004200b); fsl_rio_doorbell_init(port); + + return; +err: + if (priv) + iounmap(priv->regs_win); + kfree(priv); + kfree(port); } diff --git a/drivers/rapidio/rio-access.c b/drivers/rapidio/rio-access.c index 8b56bbdd011e..a3824baca2e5 100644 --- a/drivers/rapidio/rio-access.c +++ b/drivers/rapidio/rio-access.c @@ -48,7 +48,7 @@ int __rio_local_read_config_##size \ u32 data = 0; \ if (RIO_##size##_BAD) return RIO_BAD_SIZE; \ spin_lock_irqsave(&rio_config_lock, flags); \ - res = mport->ops->lcread(mport->id, offset, len, &data); \ + res = mport->ops->lcread(mport, mport->id, offset, len, &data); \ *value = (type)data; \ spin_unlock_irqrestore(&rio_config_lock, flags); \ return res; \ @@ -71,7 +71,7 @@ int __rio_local_write_config_##size \ unsigned long flags; \ if (RIO_##size##_BAD) return RIO_BAD_SIZE; \ spin_lock_irqsave(&rio_config_lock, flags); \ - res = mport->ops->lcwrite(mport->id, offset, len, value); \ + res = mport->ops->lcwrite(mport, mport->id, offset, len, value);\ spin_unlock_irqrestore(&rio_config_lock, flags); \ return res; \ } @@ -108,7 +108,7 @@ int rio_mport_read_config_##size \ u32 data = 0; \ if (RIO_##size##_BAD) return RIO_BAD_SIZE; \ spin_lock_irqsave(&rio_config_lock, flags); \ - res = mport->ops->cread(mport->id, destid, hopcount, offset, len, &data); \ + res = mport->ops->cread(mport, mport->id, destid, hopcount, offset, len, &data); \ *value = (type)data; \ spin_unlock_irqrestore(&rio_config_lock, flags); \ return res; \ @@ -131,7 +131,7 @@ int rio_mport_write_config_##size \ unsigned long flags; \ if (RIO_##size##_BAD) return RIO_BAD_SIZE; \ spin_lock_irqsave(&rio_config_lock, flags); \ - res = mport->ops->cwrite(mport->id, destid, hopcount, offset, len, value); \ + res = mport->ops->cwrite(mport, mport->id, destid, hopcount, offset, len, value); \ spin_unlock_irqrestore(&rio_config_lock, flags); \ return res; \ } @@ -166,7 +166,7 @@ int rio_mport_send_doorbell(struct rio_mport *mport, u16 destid, u16 data) unsigned long flags; spin_lock_irqsave(&rio_doorbell_lock, flags); - res = mport->ops->dsend(mport->id, destid, data); + res = mport->ops->dsend(mport, mport->id, destid, data); spin_unlock_irqrestore(&rio_doorbell_lock, flags); return res; diff --git a/include/linux/rio.h b/include/linux/rio.h index 68e3f6853fa6..258c453f43f6 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -163,6 +163,7 @@ struct rio_dbell { * @id: Port ID, unique among all ports * @index: Port index, unique among all port interfaces of the same type * @name: Port name string + * @priv: Master port private data */ struct rio_mport { struct list_head dbells; /* list of doorbell events */ @@ -178,6 +179,7 @@ struct rio_mport { unsigned char index; /* port index, unique among all port interfaces of the same type */ unsigned char name[40]; + void *priv; /* Master port private data */ }; /** @@ -229,13 +231,15 @@ struct rio_switch { * @dsend: Callback to send a doorbell message. */ struct rio_ops { - int (*lcread) (int index, u32 offset, int len, u32 * data); - int (*lcwrite) (int index, u32 offset, int len, u32 data); - int (*cread) (int index, u16 destid, u8 hopcount, u32 offset, int len, - u32 * data); - int (*cwrite) (int index, u16 destid, u8 hopcount, u32 offset, int len, - u32 data); - int (*dsend) (int index, u16 destid, u16 data); + int (*lcread) (struct rio_mport *mport, int index, u32 offset, int len, + u32 *data); + int (*lcwrite) (struct rio_mport *mport, int index, u32 offset, int len, + u32 data); + int (*cread) (struct rio_mport *mport, int index, u16 destid, + u8 hopcount, u32 offset, int len, u32 *data); + int (*cwrite) (struct rio_mport *mport, int index, u16 destid, + u8 hopcount, u32 offset, int len, u32 data); + int (*dsend) (struct rio_mport *mport, int index, u16 destid, u16 data); }; #define RIO_RESOURCE_MEM 0x00000100 -- cgit v1.2.3-71-gd317 From e042323607f5c14844b1c59aebbca8d1251c7d89 Mon Sep 17 00:00:00 2001 From: Zhang Wei Date: Fri, 18 Apr 2008 13:33:42 -0700 Subject: [RAPIDIO] Auto-probe the RapidIO system size The RapidIO system size will auto probe in RIO setup. The route table and rionet_active in rionet.c are changed to be allocated dynamically according to the size of the system. Signed-off-by: Zhang Wei Signed-off-by: Andrew Morton Signed-off-by: Paul Mackerras --- arch/powerpc/sysdev/fsl_rio.c | 6 +++++ drivers/net/rionet.c | 16 +++++++++++-- drivers/rapidio/Kconfig | 8 ------- drivers/rapidio/rio-scan.c | 55 ++++++++++++++++++++++++++++++------------- drivers/rapidio/rio-sysfs.c | 3 ++- drivers/rapidio/rio.c | 2 +- drivers/rapidio/rio.h | 9 ++----- include/linux/rio.h | 14 +++++------ 8 files changed, 71 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index d2d14708e714..14c106cf4aa2 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -1007,6 +1007,12 @@ int fsl_rio_setup(struct of_device *dev) rio_register_mport(port); priv->regs_win = ioremap(regs.start, regs.end - regs.start + 1); + + port->sys_size = (in_be32((priv->regs_win + RIO_PEF_CAR)) + & RIO_PEF_CTLS) >> 4; + dev_info(&dev->dev, "RapidIO Common Transport System size: %d\n", + port->sys_size ? 65536 : 256); + priv->atmu_regs = (struct rio_atmu_regs *)(priv->regs_win + RIO_ATMU_REGS_OFFSET); priv->maint_atmu_regs = priv->atmu_regs + 1; diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index e7fd08adbbac..2b8fd68bc516 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -77,7 +77,7 @@ static int rionet_capable = 1; * could be made into a hash table to save memory depending * on system trade-offs. */ -static struct rio_dev *rionet_active[RIO_MAX_ROUTE_ENTRIES]; +static struct rio_dev **rionet_active; #define is_rionet_capable(pef, src_ops, dst_ops) \ ((pef & RIO_PEF_INB_MBOX) && \ @@ -195,7 +195,8 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev) } if (eth->h_dest[0] & 0x01) { - for (i = 0; i < RIO_MAX_ROUTE_ENTRIES; i++) + for (i = 0; i < RIO_MAX_ROUTE_ENTRIES(rnet->mport->sys_size); + i++) if (rionet_active[i]) rionet_queue_tx_msg(skb, ndev, rionet_active[i]); @@ -385,6 +386,8 @@ static void rionet_remove(struct rio_dev *rdev) struct net_device *ndev = NULL; struct rionet_peer *peer, *tmp; + free_pages((unsigned long)rionet_active, rdev->net->hport->sys_size ? + __ilog2(sizeof(void *)) + 4 : 0); unregister_netdev(ndev); kfree(ndev); @@ -443,6 +446,15 @@ static int rionet_setup_netdev(struct rio_mport *mport) goto out; } + rionet_active = (struct rio_dev **)__get_free_pages(GFP_KERNEL, + mport->sys_size ? __ilog2(sizeof(void *)) + 4 : 0); + if (!rionet_active) { + rc = -ENOMEM; + goto out; + } + memset((void *)rionet_active, 0, sizeof(void *) * + RIO_MAX_ROUTE_ENTRIES(mport->sys_size)); + /* Set up private area */ rnet = (struct rionet_private *)ndev->priv; rnet->mport = mport; diff --git a/drivers/rapidio/Kconfig b/drivers/rapidio/Kconfig index 4142115d298e..c32822ad84a4 100644 --- a/drivers/rapidio/Kconfig +++ b/drivers/rapidio/Kconfig @@ -1,14 +1,6 @@ # # RapidIO configuration # -config RAPIDIO_8_BIT_TRANSPORT - bool "8-bit transport addressing" - depends on RAPIDIO - ---help--- - By default, the kernel assumes a 16-bit addressed RapidIO - network. By selecting this option, the kernel will support - an 8-bit addressed network. - config RAPIDIO_DISC_TIMEOUT int "Discovery timeout duration (seconds)" depends on RAPIDIO diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index 44420723a359..a926c896475e 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -73,7 +73,7 @@ static u16 rio_get_device_id(struct rio_mport *port, u16 destid, u8 hopcount) rio_mport_read_config_32(port, destid, hopcount, RIO_DID_CSR, &result); - return RIO_GET_DID(result); + return RIO_GET_DID(port->sys_size, result); } /** @@ -88,7 +88,7 @@ static u16 rio_get_device_id(struct rio_mport *port, u16 destid, u8 hopcount) static void rio_set_device_id(struct rio_mport *port, u16 destid, u8 hopcount, u16 did) { rio_mport_write_config_32(port, destid, hopcount, RIO_DID_CSR, - RIO_SET_DID(did)); + RIO_SET_DID(port->sys_size, did)); } /** @@ -100,7 +100,8 @@ static void rio_set_device_id(struct rio_mport *port, u16 destid, u8 hopcount, u */ static void rio_local_set_device_id(struct rio_mport *port, u16 did) { - rio_local_write_config_32(port, RIO_DID_CSR, RIO_SET_DID(did)); + rio_local_write_config_32(port, RIO_DID_CSR, RIO_SET_DID(port->sys_size, + did)); } /** @@ -350,8 +351,18 @@ static struct rio_dev *rio_setup_device(struct rio_net *net, rswitch->switchid = next_switchid; rswitch->hopcount = hopcount; rswitch->destid = destid; + rswitch->route_table = kzalloc(sizeof(u8)* + RIO_MAX_ROUTE_ENTRIES(port->sys_size), + GFP_KERNEL); + if (!rswitch->route_table) { + kfree(rdev); + rdev = NULL; + kfree(rswitch); + goto out; + } /* Initialize switch route table */ - for (rdid = 0; rdid < RIO_MAX_ROUTE_ENTRIES; rdid++) + for (rdid = 0; rdid < RIO_MAX_ROUTE_ENTRIES(port->sys_size); + rdid++) rswitch->route_table[rdid] = RIO_INVALID_ROUTE; rdev->rswitch = rswitch; sprintf(rio_name(rdev), "%02x:s:%04x", rdev->net->id, @@ -480,7 +491,7 @@ static u16 rio_get_host_deviceid_lock(struct rio_mport *port, u8 hopcount) { u32 result; - rio_mport_read_config_32(port, RIO_ANY_DESTID, hopcount, + rio_mport_read_config_32(port, RIO_ANY_DESTID(port->sys_size), hopcount, RIO_HOST_DID_LOCK_CSR, &result); return (u16) (result & 0xffff); @@ -571,14 +582,16 @@ static int rio_enum_peer(struct rio_net *net, struct rio_mport *port, } /* Attempt to acquire device lock */ - rio_mport_write_config_32(port, RIO_ANY_DESTID, hopcount, + rio_mport_write_config_32(port, RIO_ANY_DESTID(port->sys_size), + hopcount, RIO_HOST_DID_LOCK_CSR, port->host_deviceid); while ((tmp = rio_get_host_deviceid_lock(port, hopcount)) < port->host_deviceid) { /* Delay a bit */ mdelay(1); /* Attempt to acquire device lock again */ - rio_mport_write_config_32(port, RIO_ANY_DESTID, hopcount, + rio_mport_write_config_32(port, RIO_ANY_DESTID(port->sys_size), + hopcount, RIO_HOST_DID_LOCK_CSR, port->host_deviceid); } @@ -590,7 +603,9 @@ static int rio_enum_peer(struct rio_net *net, struct rio_mport *port, } /* Setup new RIO device */ - if ((rdev = rio_setup_device(net, port, RIO_ANY_DESTID, hopcount, 1))) { + rdev = rio_setup_device(net, port, RIO_ANY_DESTID(port->sys_size), + hopcount, 1); + if (rdev) { /* Add device to the global and bus/net specific list. */ list_add_tail(&rdev->net_list, &net->devices); } else @@ -598,7 +613,8 @@ static int rio_enum_peer(struct rio_net *net, struct rio_mport *port, if (rio_is_switch(rdev)) { next_switchid++; - sw_inport = rio_get_swpinfo_inport(port, RIO_ANY_DESTID, hopcount); + sw_inport = rio_get_swpinfo_inport(port, + RIO_ANY_DESTID(port->sys_size), hopcount); rio_route_add_entry(port, rdev->rswitch, RIO_GLOBAL_TABLE, port->host_deviceid, sw_inport); rdev->rswitch->route_table[port->host_deviceid] = sw_inport; @@ -612,7 +628,8 @@ static int rio_enum_peer(struct rio_net *net, struct rio_mport *port, } num_ports = - rio_get_swpinfo_tports(port, RIO_ANY_DESTID, hopcount); + rio_get_swpinfo_tports(port, RIO_ANY_DESTID(port->sys_size), + hopcount); pr_debug( "RIO: found %s (vid %4.4x did %4.4x) with %d ports\n", rio_name(rdev), rdev->vid, rdev->did, num_ports); @@ -624,13 +641,15 @@ static int rio_enum_peer(struct rio_net *net, struct rio_mport *port, cur_destid = next_destid; if (rio_sport_is_active - (port, RIO_ANY_DESTID, hopcount, port_num)) { + (port, RIO_ANY_DESTID(port->sys_size), hopcount, + port_num)) { pr_debug( "RIO: scanning device on port %d\n", port_num); rio_route_add_entry(port, rdev->rswitch, - RIO_GLOBAL_TABLE, - RIO_ANY_DESTID, port_num); + RIO_GLOBAL_TABLE, + RIO_ANY_DESTID(port->sys_size), + port_num); if (rio_enum_peer(net, port, hopcount + 1) < 0) return -1; @@ -735,7 +754,8 @@ rio_disc_peer(struct rio_net *net, struct rio_mport *port, u16 destid, pr_debug( "RIO: scanning device on port %d\n", port_num); - for (ndestid = 0; ndestid < RIO_ANY_DESTID; + for (ndestid = 0; + ndestid < RIO_ANY_DESTID(port->sys_size); ndestid++) { rio_route_get_entry(port, rdev->rswitch, RIO_GLOBAL_TABLE, @@ -917,7 +937,9 @@ static void rio_build_route_tables(void) list_for_each_entry(rdev, &rio_devices, global_list) if (rio_is_switch(rdev)) - for (i = 0; i < RIO_MAX_ROUTE_ENTRIES; i++) { + for (i = 0; + i < RIO_MAX_ROUTE_ENTRIES(rdev->net->hport->sys_size); + i++) { if (rio_route_get_entry (rdev->net->hport, rdev->rswitch, RIO_GLOBAL_TABLE, i, &sport) < 0) @@ -981,7 +1003,8 @@ int rio_disc_mport(struct rio_mport *mport) del_timer_sync(&rio_enum_timer); pr_debug("done\n"); - if (rio_disc_peer(net, mport, RIO_ANY_DESTID, 0) < 0) { + if (rio_disc_peer(net, mport, RIO_ANY_DESTID(mport->sys_size), + 0) < 0) { printk(KERN_INFO "RIO: master port %d device has failed discovery\n", mport->id); diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c index 659e31164cf0..97a147f050d6 100644 --- a/drivers/rapidio/rio-sysfs.c +++ b/drivers/rapidio/rio-sysfs.c @@ -43,7 +43,8 @@ static ssize_t routes_show(struct device *dev, struct device_attribute *attr, ch if (!rdev->rswitch) goto out; - for (i = 0; i < RIO_MAX_ROUTE_ENTRIES; i++) { + for (i = 0; i < RIO_MAX_ROUTE_ENTRIES(rdev->net->hport->sys_size); + i++) { if (rdev->rswitch->route_table[i] == RIO_INVALID_ROUTE) continue; str += diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index 80c5f1ba2e49..680661abbc4b 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -43,7 +43,7 @@ u16 rio_local_get_device_id(struct rio_mport *port) rio_local_read_config_32(port, RIO_DID_CSR, &result); - return (RIO_GET_DID(result)); + return (RIO_GET_DID(port->sys_size, result)); } /** diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h index 80e3f03b5041..7786d02581f2 100644 --- a/drivers/rapidio/rio.h +++ b/drivers/rapidio/rio.h @@ -51,10 +51,5 @@ extern struct rio_route_ops __end_rio_route_ops[]; DECLARE_RIO_ROUTE_SECTION(.rio_route_ops, \ vid, did, add_hook, get_hook) -#ifdef CONFIG_RAPIDIO_8_BIT_TRANSPORT -#define RIO_GET_DID(x) ((x & 0x00ff0000) >> 16) -#define RIO_SET_DID(x) ((x & 0x000000ff) << 16) -#else -#define RIO_GET_DID(x) (x & 0xffff) -#define RIO_SET_DID(x) (x & 0xffff) -#endif +#define RIO_GET_DID(size, x) (size ? (x & 0xffff) : ((x & 0x00ff0000) >> 16)) +#define RIO_SET_DID(size, x) (size ? (x & 0xffff) : ((x & 0x000000ff) << 16)) diff --git a/include/linux/rio.h b/include/linux/rio.h index 258c453f43f6..4a064bcd7c0c 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -23,7 +23,6 @@ #include #include -#define RIO_ANY_DESTID 0xff #define RIO_NO_HOPCOUNT -1 #define RIO_INVALID_DESTID 0xffff @@ -39,11 +38,8 @@ entry is invalid (no route exists for the device ID) */ -#ifdef CONFIG_RAPIDIO_8_BIT_TRANSPORT -#define RIO_MAX_ROUTE_ENTRIES (1 << 8) -#else -#define RIO_MAX_ROUTE_ENTRIES (1 << 16) -#endif +#define RIO_MAX_ROUTE_ENTRIES(size) (size ? (1 << 16) : (1 << 8)) +#define RIO_ANY_DESTID(size) (size ? 0xffff : 0xff) #define RIO_MAX_MBOX 4 #define RIO_MAX_MSG_SIZE 0x1000 @@ -178,6 +174,10 @@ struct rio_mport { unsigned char id; /* port ID, unique among all ports */ unsigned char index; /* port index, unique among all port interfaces of the same type */ + unsigned int sys_size; /* RapidIO common transport system size. + * 0 - Small size. 256 devices. + * 1 - Large size, 65536 devices. + */ unsigned char name[40]; void *priv; /* Master port private data */ }; @@ -213,7 +213,7 @@ struct rio_switch { u16 switchid; u16 hopcount; u16 destid; - u8 route_table[RIO_MAX_ROUTE_ENTRIES]; + u8 *route_table; int (*add_entry) (struct rio_mport * mport, u16 destid, u8 hopcount, u16 table, u16 route_destid, u8 route_port); int (*get_entry) (struct rio_mport * mport, u16 destid, u8 hopcount, -- cgit v1.2.3-71-gd317 From 61b269179df582bb363f871e88f732fe8af62a5e Mon Sep 17 00:00:00 2001 From: Zhang Wei Date: Fri, 18 Apr 2008 13:33:44 -0700 Subject: [RAPIDIO] Add serial RapidIO controller support, which includes MPC8548, MPC8641 Signed-off-by: Zhang Wei Signed-off-by: Andrew Morton Signed-off-by: Paul Mackerras --- arch/powerpc/sysdev/fsl_rio.c | 75 ++++++++++++++++++++++++++++++++++++------- include/linux/rio.h | 6 ++++ 2 files changed, 70 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 14c106cf4aa2..935822a25030 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -30,7 +31,12 @@ #define IRQ_RIO_RX(m) (((struct rio_priv *)(m->priv))->rxirq) #define RIO_ATMU_REGS_OFFSET 0x10c00 -#define RIO_MSG_REGS_OFFSET 0x11000 +#define RIO_P_MSG_REGS_OFFSET 0x11000 +#define RIO_S_MSG_REGS_OFFSET 0x13000 +#define RIO_ESCSR 0x158 +#define RIO_CCSR 0x15c +#define RIO_ISR_AACR 0x10120 +#define RIO_ISR_AACR_AA 0x1 /* Accept All ID */ #define RIO_MAINT_WIN_SIZE 0x400000 #define RIO_DBELL_WIN_SIZE 0x1000 @@ -69,7 +75,7 @@ struct rio_atmu_regs { u32 rowtar; - u32 pad1; + u32 rowtear; u32 rowbar; u32 pad2; u32 rowar; @@ -95,7 +101,15 @@ struct rio_msg_regs { u32 ifqdpar; u32 pad6; u32 ifqepar; - u32 pad7[250]; + u32 pad7[226]; + u32 odmr; + u32 odsr; + u32 res0[4]; + u32 oddpr; + u32 oddatr; + u32 res1[3]; + u32 odretcr; + u32 res2[12]; u32 dmr; u32 dsr; u32 pad8; @@ -175,8 +189,22 @@ static int fsl_rio_doorbell_send(struct rio_mport *mport, struct rio_priv *priv = mport->priv; pr_debug("fsl_doorbell_send: index %d destid %4.4x data %4.4x\n", index, destid, data); - out_be32(&priv->dbell_atmu_regs->rowtar, destid << 22); - out_be16(priv->dbell_win, data); + switch (mport->phy_type) { + case RIO_PHY_PARALLEL: + out_be32(&priv->dbell_atmu_regs->rowtar, destid << 22); + out_be16(priv->dbell_win, data); + break; + case RIO_PHY_SERIAL: + /* In the serial version silicons, such as MPC8548, MPC8641, + * below operations is must be. + */ + out_be32(&priv->msg_regs->odmr, 0x00000000); + out_be32(&priv->msg_regs->odretcr, 0x00000004); + out_be32(&priv->msg_regs->oddpr, destid << 16); + out_be32(&priv->msg_regs->oddatr, data); + out_be32(&priv->msg_regs->odmr, 0x00000001); + break; + } return 0; } @@ -342,11 +370,22 @@ rio_hw_add_outb_message(struct rio_mport *mport, struct rio_dev *rdev, int mbox, memset(priv->msg_tx_ring.virt_buffer[priv->msg_tx_ring.tx_slot] + len, 0, RIO_MAX_MSG_SIZE - len); - /* Set mbox field for message */ - desc->dport = mbox & 0x3; + switch (mport->phy_type) { + case RIO_PHY_PARALLEL: + /* Set mbox field for message */ + desc->dport = mbox & 0x3; - /* Enable EOMI interrupt, set priority, and set destid */ - desc->dattr = 0x28000000 | (rdev->destid << 2); + /* Enable EOMI interrupt, set priority, and set destid */ + desc->dattr = 0x28000000 | (rdev->destid << 2); + break; + case RIO_PHY_SERIAL: + /* Set mbox field for message, and set destid */ + desc->dport = (rdev->destid << 16) | (mbox & 0x3); + + /* Enable EOMI interrupt and priority */ + desc->dattr = 0x28000000; + break; + } /* Set transfer size aligned to next power of 2 (in double words) */ desc->dwcnt = is_power_of_2(len) ? len : 1 << get_bitmask_order(len); @@ -920,6 +959,7 @@ int fsl_rio_setup(struct of_device *dev) const u32 *dt_range, *cell; struct resource regs; int rlen; + u32 ccsr; u64 law_start, law_size; int paw, aw, sw; @@ -1008,6 +1048,14 @@ int fsl_rio_setup(struct of_device *dev) priv->regs_win = ioremap(regs.start, regs.end - regs.start + 1); + /* Probe the master port phy type */ + ccsr = in_be32(priv->regs_win + RIO_CCSR); + port->phy_type = (ccsr & 1) ? RIO_PHY_SERIAL : RIO_PHY_PARALLEL; + dev_info(&dev->dev, "RapidIO PHY type: %s\n", + (port->phy_type == RIO_PHY_PARALLEL) ? "parallel" : + ((port->phy_type == RIO_PHY_SERIAL) ? "serial" : + "unknown")); + port->sys_size = (in_be32((priv->regs_win + RIO_PEF_CAR)) & RIO_PEF_CTLS) >> 4; dev_info(&dev->dev, "RapidIO Common Transport System size: %d\n", @@ -1017,8 +1065,13 @@ int fsl_rio_setup(struct of_device *dev) + RIO_ATMU_REGS_OFFSET); priv->maint_atmu_regs = priv->atmu_regs + 1; priv->dbell_atmu_regs = priv->atmu_regs + 2; - priv->msg_regs = (struct rio_msg_regs *)(priv->regs_win - + RIO_MSG_REGS_OFFSET); + priv->msg_regs = (struct rio_msg_regs *)(priv->regs_win + + ((port->phy_type == RIO_PHY_SERIAL) ? + RIO_S_MSG_REGS_OFFSET : RIO_P_MSG_REGS_OFFSET)); + + /* Set to receive any dist ID for serial RapidIO controller. */ + if (port->phy_type == RIO_PHY_SERIAL) + out_be32((priv->regs_win + RIO_ISR_AACR), RIO_ISR_AACR_AA); /* Configure maintenance transaction window */ out_be32(&priv->maint_atmu_regs->rowbar, 0x000c0000); diff --git a/include/linux/rio.h b/include/linux/rio.h index 4a064bcd7c0c..cfb66bbc0f27 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -145,6 +145,11 @@ struct rio_dbell { void *dev_id; }; +enum rio_phy_type { + RIO_PHY_PARALLEL, + RIO_PHY_SERIAL, +}; + /** * struct rio_mport - RIO master port info * @dbells: List of doorbell events @@ -178,6 +183,7 @@ struct rio_mport { * 0 - Small size. 256 devices. * 1 - Large size, 65536 devices. */ + enum rio_phy_type phy_type; /* RapidIO phy type */ unsigned char name[40]; void *priv; /* Master port private data */ }; -- cgit v1.2.3-71-gd317 From 75ad23bc0fcb4f992a5d06982bf0857ab1738e9e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 29 Apr 2008 14:48:33 +0200 Subject: block: make queue flags non-atomic We can save some atomic ops in the IO path, if we clearly define the rules of how to modify the queue flags. Signed-off-by: Jens Axboe --- block/blk-core.c | 39 ++++++++++++++++++++++++++------------- block/blk-merge.c | 6 +++--- block/blk-settings.c | 2 +- block/blk-tag.c | 8 ++++---- block/elevator.c | 13 ++++++++++--- drivers/block/loop.c | 2 +- drivers/block/ub.c | 2 +- drivers/md/dm-table.c | 7 +++++-- drivers/md/md.c | 3 ++- drivers/scsi/scsi_debug.c | 2 +- drivers/scsi/scsi_lib.c | 31 ++++++++++++++++++------------- drivers/scsi/scsi_transport_sas.c | 3 +-- include/linux/blkdev.h | 33 +++++++++++++++++++++++++++++---- 13 files changed, 102 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index e447799256d6..d2f23ec5ebfa 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -198,7 +198,8 @@ void blk_plug_device(struct request_queue *q) if (blk_queue_stopped(q)) return; - if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { + if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { + __set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags); mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); } @@ -213,9 +214,10 @@ int blk_remove_plug(struct request_queue *q) { WARN_ON(!irqs_disabled()); - if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) + if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) return 0; + queue_flag_clear(QUEUE_FLAG_PLUGGED, q); del_timer(&q->unplug_timer); return 1; } @@ -311,15 +313,16 @@ void blk_start_queue(struct request_queue *q) { WARN_ON(!irqs_disabled()); - clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); + queue_flag_clear(QUEUE_FLAG_STOPPED, q); /* * one level of recursion is ok and is much faster than kicking * the unplug handling */ - if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { + if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { + queue_flag_set(QUEUE_FLAG_REENTER, q); q->request_fn(q); - clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); + queue_flag_clear(QUEUE_FLAG_REENTER, q); } else { blk_plug_device(q); kblockd_schedule_work(&q->unplug_work); @@ -344,7 +347,7 @@ EXPORT_SYMBOL(blk_start_queue); void blk_stop_queue(struct request_queue *q) { blk_remove_plug(q); - set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); + queue_flag_set(QUEUE_FLAG_STOPPED, q); } EXPORT_SYMBOL(blk_stop_queue); @@ -373,11 +376,8 @@ EXPORT_SYMBOL(blk_sync_queue); * blk_run_queue - run a single device queue * @q: The queue to run */ -void blk_run_queue(struct request_queue *q) +void __blk_run_queue(struct request_queue *q) { - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); blk_remove_plug(q); /* @@ -385,15 +385,28 @@ void blk_run_queue(struct request_queue *q) * handling reinvoke the handler shortly if we already got there. */ if (!elv_queue_empty(q)) { - if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { + if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { + queue_flag_set(QUEUE_FLAG_REENTER, q); q->request_fn(q); - clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); + queue_flag_clear(QUEUE_FLAG_REENTER, q); } else { blk_plug_device(q); kblockd_schedule_work(&q->unplug_work); } } +} +EXPORT_SYMBOL(__blk_run_queue); +/** + * blk_run_queue - run a single device queue + * @q: The queue to run + */ +void blk_run_queue(struct request_queue *q) +{ + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + __blk_run_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(blk_run_queue); @@ -406,7 +419,7 @@ void blk_put_queue(struct request_queue *q) void blk_cleanup_queue(struct request_queue *q) { mutex_lock(&q->sysfs_lock); - set_bit(QUEUE_FLAG_DEAD, &q->queue_flags); + queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); mutex_unlock(&q->sysfs_lock); if (q->elevator) diff --git a/block/blk-merge.c b/block/blk-merge.c index b5c5c4a9e3f0..73b23562af20 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -55,7 +55,7 @@ void blk_recalc_rq_segments(struct request *rq) if (!rq->bio) return; - cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); + cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); hw_seg_size = seg_size = 0; phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; rq_for_each_segment(bv, rq, iter) { @@ -128,7 +128,7 @@ EXPORT_SYMBOL(blk_recount_segments); static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, struct bio *nxt) { - if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER))) + if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) return 0; if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) @@ -175,7 +175,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, int nsegs, cluster; nsegs = 0; - cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); + cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); /* * for each bio in rq diff --git a/block/blk-settings.c b/block/blk-settings.c index 77b51dc37a3c..6089384ab064 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -287,7 +287,7 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) t->max_segment_size = min(t->max_segment_size, b->max_segment_size); t->hardsect_size = max(t->hardsect_size, b->hardsect_size); if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) - clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags); + queue_flag_clear(QUEUE_FLAG_CLUSTER, t); } EXPORT_SYMBOL(blk_queue_stack_limits); diff --git a/block/blk-tag.c b/block/blk-tag.c index 4780a46ce234..e176ddbe599e 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -70,7 +70,7 @@ void __blk_queue_free_tags(struct request_queue *q) __blk_free_tags(bqt); q->queue_tags = NULL; - q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED); + queue_flag_clear(QUEUE_FLAG_QUEUED, q); } /** @@ -98,7 +98,7 @@ EXPORT_SYMBOL(blk_free_tags); **/ void blk_queue_free_tags(struct request_queue *q) { - clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); + queue_flag_clear(QUEUE_FLAG_QUEUED, q); } EXPORT_SYMBOL(blk_queue_free_tags); @@ -188,7 +188,7 @@ int blk_queue_init_tags(struct request_queue *q, int depth, rc = blk_queue_resize_tags(q, depth); if (rc) return rc; - set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); + queue_flag_set(QUEUE_FLAG_QUEUED, q); return 0; } else atomic_inc(&tags->refcnt); @@ -197,7 +197,7 @@ int blk_queue_init_tags(struct request_queue *q, int depth, * assign it, all done */ q->queue_tags = tags; - q->queue_flags |= (1 << QUEUE_FLAG_QUEUED); + queue_flag_set(QUEUE_FLAG_QUEUED, q); INIT_LIST_HEAD(&q->tag_busy_list); return 0; fail: diff --git a/block/elevator.c b/block/elevator.c index 88318c383608..e8a90fe23424 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -1070,7 +1070,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) */ spin_lock_irq(q->queue_lock); - set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); + queue_flag_set(QUEUE_FLAG_ELVSWITCH, q); elv_drain_elevator(q); @@ -1104,7 +1104,10 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) * finally exit old elevator and turn off BYPASS. */ elevator_exit(old_elevator); - clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); + spin_lock_irq(q->queue_lock); + queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); + spin_unlock_irq(q->queue_lock); + return 1; fail_register: @@ -1115,7 +1118,11 @@ fail_register: elevator_exit(e); q->elevator = old_elevator; elv_register_queue(q); - clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); + + spin_lock_irq(q->queue_lock); + queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); + spin_unlock_irq(q->queue_lock); + return 0; } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f7f163557aa0..d3a25b027ff9 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -546,7 +546,7 @@ static void loop_unplug(struct request_queue *q) { struct loop_device *lo = q->queuedata; - clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags); + queue_flag_clear_unlocked(QUEUE_FLAG_PLUGGED, q); blk_run_address_space(lo->lo_backing_file->f_mapping); } diff --git a/drivers/block/ub.c b/drivers/block/ub.c index 27bfe72aab59..e322cce8c12d 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -2399,7 +2399,7 @@ static void ub_disconnect(struct usb_interface *intf) del_gendisk(lun->disk); /* * I wish I could do: - * set_bit(QUEUE_FLAG_DEAD, &q->queue_flags); + * queue_flag_set(QUEUE_FLAG_DEAD, q); * As it is, we rely on our internal poisoning and let * the upper levels to spin furiously failing all the I/O. */ diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 51be53344214..73326e7c54bf 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -873,10 +873,13 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) q->max_hw_sectors = t->limits.max_hw_sectors; q->seg_boundary_mask = t->limits.seg_boundary_mask; q->bounce_pfn = t->limits.bounce_pfn; + /* XXX: the below will probably go bug. must ensure there can be no + * concurrency on queue_flags, and use the unlocked versions... + */ if (t->limits.no_cluster) - q->queue_flags &= ~(1 << QUEUE_FLAG_CLUSTER); + queue_flag_clear(QUEUE_FLAG_CLUSTER, q); else - q->queue_flags |= (1 << QUEUE_FLAG_CLUSTER); + queue_flag_set(QUEUE_FLAG_CLUSTER, q); } diff --git a/drivers/md/md.c b/drivers/md/md.c index 87620b705bee..acd716b657b8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -282,7 +282,8 @@ static mddev_t * mddev_find(dev_t unit) kfree(new); return NULL; } - set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags); + /* Can be unlocked because the queue is new: no concurrency */ + queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue); blk_queue_make_request(new->queue, md_fail_request); diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 07103c399fe0..f6600bfb5bde 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -1773,7 +1773,7 @@ static int scsi_debug_slave_alloc(struct scsi_device *sdp) if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts) printk(KERN_INFO "scsi_debug: slave_alloc <%u %u %u %u>\n", sdp->host->host_no, sdp->channel, sdp->id, sdp->lun); - set_bit(QUEUE_FLAG_BIDI, &sdp->request_queue->queue_flags); + queue_flag_set_unlocked(QUEUE_FLAG_BIDI, sdp->request_queue); return 0; } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 67f412bb4974..d545ad1cf47a 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -536,6 +536,9 @@ static void scsi_run_queue(struct request_queue *q) !shost->host_blocked && !shost->host_self_blocked && !((shost->can_queue > 0) && (shost->host_busy >= shost->can_queue))) { + + int flagset; + /* * As long as shost is accepting commands and we have * starved queues, call blk_run_queue. scsi_request_fn @@ -549,19 +552,20 @@ static void scsi_run_queue(struct request_queue *q) sdev = list_entry(shost->starved_list.next, struct scsi_device, starved_entry); list_del_init(&sdev->starved_entry); - spin_unlock_irqrestore(shost->host_lock, flags); - + spin_unlock(shost->host_lock); + + spin_lock(sdev->request_queue->queue_lock); + flagset = test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) && + !test_bit(QUEUE_FLAG_REENTER, + &sdev->request_queue->queue_flags); + if (flagset) + queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue); + __blk_run_queue(sdev->request_queue); + if (flagset) + queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue); + spin_unlock(sdev->request_queue->queue_lock); - if (test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) && - !test_and_set_bit(QUEUE_FLAG_REENTER, - &sdev->request_queue->queue_flags)) { - blk_run_queue(sdev->request_queue); - clear_bit(QUEUE_FLAG_REENTER, - &sdev->request_queue->queue_flags); - } else - blk_run_queue(sdev->request_queue); - - spin_lock_irqsave(shost->host_lock, flags); + spin_lock(shost->host_lock); if (unlikely(!list_empty(&sdev->starved_entry))) /* * sdev lost a race, and was put back on the @@ -1585,8 +1589,9 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, blk_queue_max_segment_size(q, dma_get_max_seg_size(dev)); + /* New queue, no concurrency on queue_flags */ if (!shost->use_clustering) - clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); + queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); /* * set a reasonable default alignment on word boundaries: the diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 7899e3dda9bf..f4461d35ffb9 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -248,8 +248,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy) else q->queuedata = shost; - set_bit(QUEUE_FLAG_BIDI, &q->queue_flags); - + queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); return 0; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c5065e3d2ca9..8ca481cd7d73 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -408,6 +408,30 @@ struct request_queue #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ +static inline void queue_flag_set_unlocked(unsigned int flag, + struct request_queue *q) +{ + __set_bit(flag, &q->queue_flags); +} + +static inline void queue_flag_set(unsigned int flag, struct request_queue *q) +{ + WARN_ON_ONCE(!spin_is_locked(q->queue_lock)); + __set_bit(flag, &q->queue_flags); +} + +static inline void queue_flag_clear_unlocked(unsigned int flag, + struct request_queue *q) +{ + __clear_bit(flag, &q->queue_flags); +} + +static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) +{ + WARN_ON_ONCE(!spin_is_locked(q->queue_lock)); + __clear_bit(flag, &q->queue_flags); +} + enum { /* * Hardbarrier is supported with one of the following methods. @@ -496,17 +520,17 @@ static inline int blk_queue_full(struct request_queue *q, int rw) static inline void blk_set_queue_full(struct request_queue *q, int rw) { if (rw == READ) - set_bit(QUEUE_FLAG_READFULL, &q->queue_flags); + queue_flag_set(QUEUE_FLAG_READFULL, q); else - set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); + queue_flag_set(QUEUE_FLAG_WRITEFULL, q); } static inline void blk_clear_queue_full(struct request_queue *q, int rw) { if (rw == READ) - clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags); + queue_flag_clear(QUEUE_FLAG_READFULL, q); else - clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); + queue_flag_clear(QUEUE_FLAG_WRITEFULL, q); } @@ -626,6 +650,7 @@ extern void blk_start_queue(struct request_queue *q); extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(struct request_queue *q); +extern void __blk_run_queue(struct request_queue *); extern void blk_run_queue(struct request_queue *); extern void blk_start_queueing(struct request_queue *); extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long); -- cgit v1.2.3-71-gd317 From 2a4aa30c5f967eb6ae874c67fa6fceeee84815f9 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 29 Apr 2008 09:54:36 +0200 Subject: block: rename and export rq_init() This rename rq_init() blk_rq_init() and export it. Any path that hands the request to the block layer needs to call it to initialize the request. This is a preparation for large command support, which needs to initialize the request in a proper way (that is, just doing a memset() will not work). Signed-off-by: FUJITA Tomonori Cc: Jens Axboe Signed-off-by: Jens Axboe --- block/blk-barrier.c | 4 ++-- block/blk-core.c | 5 +++-- block/blk.h | 1 - include/linux/blkdev.h | 1 + 4 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 47127ba09e45..66e55288178c 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -143,7 +143,7 @@ static void queue_flush(struct request_queue *q, unsigned which) end_io = post_flush_end_io; } - rq_init(q, rq); + blk_rq_init(q, rq); rq->cmd_flags = REQ_HARDBARRIER; rq->rq_disk = q->bar_rq.rq_disk; rq->end_io = end_io; @@ -165,7 +165,7 @@ static inline struct request *start_ordered(struct request_queue *q, blkdev_dequeue_request(rq); q->orig_bar_rq = rq; rq = &q->bar_rq; - rq_init(q, rq); + blk_rq_init(q, rq); if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) rq->cmd_flags |= REQ_RW; if (q->ordered & QUEUE_ORDERED_FUA) diff --git a/block/blk-core.c b/block/blk-core.c index d2f23ec5ebfa..fe0d1390b743 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -107,7 +107,7 @@ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) } EXPORT_SYMBOL(blk_get_backing_dev_info); -void rq_init(struct request_queue *q, struct request *rq) +void blk_rq_init(struct request_queue *q, struct request *rq) { memset(rq, 0, sizeof(*rq)); @@ -120,6 +120,7 @@ void rq_init(struct request_queue *q, struct request *rq) rq->tag = -1; rq->ref_count = 1; } +EXPORT_SYMBOL(blk_rq_init); static void req_bio_endio(struct request *rq, struct bio *bio, unsigned int nbytes, int error) @@ -598,7 +599,7 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) if (!rq) return NULL; - rq_init(q, rq); + blk_rq_init(q, rq); /* * first three bits are identical in rq->cmd_flags and bio->bi_rw, diff --git a/block/blk.h b/block/blk.h index ec9120fb789a..59776ab4742a 100644 --- a/block/blk.h +++ b/block/blk.h @@ -10,7 +10,6 @@ extern struct kmem_cache *blk_requestq_cachep; extern struct kobj_type blk_queue_ktype; -void rq_init(struct request_queue *q, struct request *rq); void init_request_from_bio(struct request *req, struct bio *bio); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8ca481cd7d73..d17032c347c0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -607,6 +607,7 @@ extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); extern void register_disk(struct gendisk *dev); extern void generic_make_request(struct bio *bio); +extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); extern void blk_end_sync_rq(struct request *rq, int error); -- cgit v1.2.3-71-gd317 From d7e3c3249ef23b4617393c69fe464765b4ff1645 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 29 Apr 2008 09:54:39 +0200 Subject: block: add large command support This patch changes rq->cmd from the static array to a pointer to support large commands. We rarely handle large commands. So for optimization, a struct request still has a static array for a command. rq_init sets rq->cmd pointer to the static array. Signed-off-by: FUJITA Tomonori Cc: Jens Axboe Signed-off-by: Jens Axboe --- block/blk-core.c | 1 + include/linux/blkdev.h | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index e6fdb288be65..5d09f8c56024 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -117,6 +117,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) rq->sector = rq->hard_sector = (sector_t) -1; INIT_HLIST_NODE(&rq->hash); RB_CLEAR_NODE(&rq->rb_node); + rq->cmd = rq->__cmd; rq->tag = -1; rq->ref_count = 1; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d17032c347c0..08df1ea8bac4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -215,8 +215,9 @@ struct request { /* * when request is used as a packet command carrier */ - unsigned int cmd_len; - unsigned char cmd[BLK_MAX_CDB]; + unsigned short cmd_len; + unsigned char __cmd[BLK_MAX_CDB]; + unsigned char *cmd; unsigned int data_len; unsigned int extra_len; /* length of alignment and padding */ -- cgit v1.2.3-71-gd317 From ac9fafa1243640349aa481adf473db283a695766 Mon Sep 17 00:00:00 2001 From: "Alan D. Brunelle" Date: Tue, 29 Apr 2008 14:44:19 +0200 Subject: block: Skip I/O merges when disabled The block I/O + elevator + I/O scheduler code spend a lot of time trying to merge I/Os -- rightfully so under "normal" circumstances. However, if one were to know that the incoming I/O stream was /very/ random in nature, the cycles are wasted. This patch adds a per-request_queue tunable that (when set) disables merge attempts (beyond the simple one-hit cache check), thus freeing up a non-trivial amount of CPU cycles. Signed-off-by: Alan D. Brunelle Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 26 ++++++++++++++++++++++++++ block/elevator.c | 3 +++ include/linux/blkdev.h | 2 ++ 3 files changed, 31 insertions(+) (limited to 'include/linux') diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index fc41d83be22b..e85c4013e8a2 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -135,6 +135,25 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) return queue_var_show(max_hw_sectors_kb, (page)); } +static ssize_t queue_nomerges_show(struct request_queue *q, char *page) +{ + return queue_var_show(blk_queue_nomerges(q), page); +} + +static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, + size_t count) +{ + unsigned long nm; + ssize_t ret = queue_var_store(&nm, page, count); + + if (nm) + set_bit(QUEUE_FLAG_NOMERGES, &q->queue_flags); + else + clear_bit(QUEUE_FLAG_NOMERGES, &q->queue_flags); + + return ret; +} + static struct queue_sysfs_entry queue_requests_entry = { .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, @@ -170,6 +189,12 @@ static struct queue_sysfs_entry queue_hw_sector_size_entry = { .show = queue_hw_sector_size_show, }; +static struct queue_sysfs_entry queue_nomerges_entry = { + .attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR }, + .show = queue_nomerges_show, + .store = queue_nomerges_store, +}; + static struct attribute *default_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -177,6 +202,7 @@ static struct attribute *default_attrs[] = { &queue_max_sectors_entry.attr, &queue_iosched_entry.attr, &queue_hw_sector_size_entry.attr, + &queue_nomerges_entry.attr, NULL, }; diff --git a/block/elevator.c b/block/elevator.c index 7253fa05db0a..ac5310ef8270 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -488,6 +488,9 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) } } + if (blk_queue_nomerges(q)) + return ELEVATOR_NO_MERGE; + /* * See if our hash lookup can find a potential backmerge. */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 08df1ea8bac4..c09696a90d6a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -408,6 +408,7 @@ struct request_queue #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ +#define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ static inline void queue_flag_set_unlocked(unsigned int flag, struct request_queue *q) @@ -476,6 +477,7 @@ enum { #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) +#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_flushing(q) ((q)->ordseq) #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) -- cgit v1.2.3-71-gd317 From ab857d09386661145924c9403792234aeca4bdff Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Tue, 29 Apr 2008 00:58:23 -0700 Subject: mm: fix misleading __GFP_REPEAT related comments The definition and use of __GFP_REPEAT, __GFP_NOFAIL and __GFP_NORETRY in the core VM have somewhat differing comments as to their actual semantics. Annoyingly, the flags definition has inline and header comments, which might be interpreted as not being equivalent. Just add references to the header comments in the inline ones so they don't go out of sync in the future. In their use in __alloc_pages() clarify that the current implementation treats low-order allocations and __GFP_REPEAT allocations as distinct cases. To clarify, the flags' semantics are: __GFP_NORETRY means try no harder than one run through __alloc_pages __GFP_REPEAT means __GFP_NOFAIL __GFP_NOFAIL means repeat forever order <= PAGE_ALLOC_COSTLY_ORDER means __GFP_NOFAIL Signed-off-by: Nishanth Aravamudan Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 6 +++--- mm/page_alloc.c | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index c37653b6843f..b414be387180 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -40,9 +40,9 @@ struct vm_area_struct; #define __GFP_FS ((__force gfp_t)0x80u) /* Can call down to low-level FS? */ #define __GFP_COLD ((__force gfp_t)0x100u) /* Cache-cold page required */ #define __GFP_NOWARN ((__force gfp_t)0x200u) /* Suppress page allocation failure warning */ -#define __GFP_REPEAT ((__force gfp_t)0x400u) /* Retry the allocation. Might fail */ -#define __GFP_NOFAIL ((__force gfp_t)0x800u) /* Retry for ever. Cannot fail */ -#define __GFP_NORETRY ((__force gfp_t)0x1000u)/* Do not retry. Might fail */ +#define __GFP_REPEAT ((__force gfp_t)0x400u) /* See above */ +#define __GFP_NOFAIL ((__force gfp_t)0x800u) /* See above */ +#define __GFP_NORETRY ((__force gfp_t)0x1000u)/* See above */ #define __GFP_COMP ((__force gfp_t)0x4000u)/* Add compound page metadata */ #define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */ #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 88eb59dd7ac6..6965be064a31 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1611,8 +1611,9 @@ nofail_alloc: * Don't let big-order allocations loop unless the caller explicitly * requests that. Wait for some write requests to complete then retry. * - * In this implementation, __GFP_REPEAT means __GFP_NOFAIL for order - * <= 3, but that may not be true in other implementations. + * In this implementation, either order <= PAGE_ALLOC_COSTLY_ORDER or + * __GFP_REPEAT mean __GFP_NOFAIL, but that may not be true in other + * implementations. */ do_retry = 0; if (!(gfp_mask & __GFP_NORETRY)) { -- cgit v1.2.3-71-gd317 From ede9c697bc7513f210103fa77a9031e89726ae40 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 29 Apr 2008 00:58:35 -0700 Subject: Avoid divides in BITS_TO_LONGS BITS_PER_LONG is a signed value (32 or 64) DIV_ROUND_UP(nr, BITS_PER_LONG) performs signed arithmetic if "nr" is signed too. Converting BITS_TO_LONGS(nr) to DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) makes sure compiler can perform a right shift, even if "nr" is a signed value, instead of an expensive integer divide. Applying this patch saves 141 bytes on x86 when CONFIG_CC_OPTIMIZE_FOR_SIZE=y and speedup bitmap operations. Signed-off-by: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 48bde600a2db..8340a3aba49a 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -6,8 +6,8 @@ #define BIT(nr) (1UL << (nr)) #define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) -#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG) #define BITS_PER_BYTE 8 +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) #endif /* -- cgit v1.2.3-71-gd317 From bd3feb13e15a4859f629c9a076554e260c1d1397 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Tue, 29 Apr 2008 00:58:37 -0700 Subject: fs/coda: remove static inline forward declarations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit They're defined later on in the same file with bodies and nothing in between needs them. Signed-off-by: Ilpo Järvinen Reviewed-by: Pekka Enberg Acked-by: Jan Harkes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/coda_linux.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h index 1c47a34aa794..31b75311e2ca 100644 --- a/include/linux/coda_linux.h +++ b/include/linux/coda_linux.h @@ -43,9 +43,6 @@ int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *); int coda_setattr(struct dentry *, struct iattr *); /* this file: heloers */ -static __inline__ struct CodaFid *coda_i2f(struct inode *); -static __inline__ char *coda_i2s(struct inode *); -static __inline__ void coda_flag_inode(struct inode *, int flag); char *coda_f2s(struct CodaFid *f); int coda_isroot(struct inode *i); int coda_iscontrol(const char *name, size_t length); -- cgit v1.2.3-71-gd317 From 95b570c9cef3b12356454c7112571b7e406b4b51 Mon Sep 17 00:00:00 2001 From: Nur Hussein Date: Tue, 29 Apr 2008 00:58:39 -0700 Subject: Taint kernel after WARN_ON(condition) The kernel is sent to tainted within the warn_on_slowpath() function, and whenever a warning occurs the new taint flag 'W' is set. This is useful to know if a warning occurred before a BUG by preserving the warning as a flag in the taint state. This does not work on architectures where WARN_ON has its own definition. These archs are: 1. s390 2. superh 3. avr32 4. parisc The maintainers of these architectures have been added in the Cc: list in this email to alert them to the situation. The documentation in oops-tracing.txt has been updated to include the new flag. Signed-off-by: Nur Hussein Cc: Arjan van de Ven Cc: "Randy.Dunlap" Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Kyle McMartin Cc: Martin Schwidefsky Cc: Haavard Skinnemoen Cc: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/oops-tracing.txt | 4 ++++ include/linux/kernel.h | 1 + kernel/panic.c | 8 ++++++-- 3 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/Documentation/oops-tracing.txt b/Documentation/oops-tracing.txt index 7f60dfe642ca..b152e81da592 100644 --- a/Documentation/oops-tracing.txt +++ b/Documentation/oops-tracing.txt @@ -253,6 +253,10 @@ characters, each representing a particular tainted value. 8: 'D' if the kernel has died recently, i.e. there was an OOPS or BUG. + 9: 'A' if the ACPI table has been overridden. + + 10: 'W' if a warning has previously been issued by the kernel. + The primary reason for the 'Tainted: ' string is to tell kernel debuggers if this is a clean kernel or if anything unusual has occurred. Tainting is permanent: even if an offending module is diff --git a/include/linux/kernel.h b/include/linux/kernel.h index cd6d02cf854d..28caa53dd1f7 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -255,6 +255,7 @@ extern enum system_states { #define TAINT_USER (1<<6) #define TAINT_DIE (1<<7) #define TAINT_OVERRIDDEN_ACPI_TABLE (1<<8) +#define TAINT_WARN (1<<9) extern void dump_stack(void) __cold; diff --git a/kernel/panic.c b/kernel/panic.c index 24af9f8bac99..425567f45b9f 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -153,6 +153,8 @@ EXPORT_SYMBOL(panic); * 'M' - System experienced a machine check exception. * 'B' - System has hit bad_page. * 'U' - Userspace-defined naughtiness. + * 'A' - ACPI table overridden. + * 'W' - Taint on warning. * * The string is overwritten by the next call to print_taint(). */ @@ -161,7 +163,7 @@ const char *print_tainted(void) { static char buf[20]; if (tainted) { - snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c%c", + snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c%c%c", tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G', tainted & TAINT_FORCED_MODULE ? 'F' : ' ', tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', @@ -170,7 +172,8 @@ const char *print_tainted(void) tainted & TAINT_BAD_PAGE ? 'B' : ' ', tainted & TAINT_USER ? 'U' : ' ', tainted & TAINT_DIE ? 'D' : ' ', - tainted & TAINT_OVERRIDDEN_ACPI_TABLE ? 'A' : ' '); + tainted & TAINT_OVERRIDDEN_ACPI_TABLE ? 'A' : ' ', + tainted & TAINT_WARN ? 'W' : ' '); } else snprintf(buf, sizeof(buf), "Not tainted"); @@ -312,6 +315,7 @@ void warn_on_slowpath(const char *file, int line) print_modules(); dump_stack(); print_oops_end_marker(); + add_taint(TAINT_WARN); } EXPORT_SYMBOL(warn_on_slowpath); #endif -- cgit v1.2.3-71-gd317 From 679c9cd4acc2cf2872171813752eab3320273339 Mon Sep 17 00:00:00 2001 From: Sripathi Kodi Date: Tue, 29 Apr 2008 00:58:42 -0700 Subject: add RUSAGE_THREAD Add the RUSAGE_THREAD option for the getrusage system call. This is essentially Roland's patch from http://lkml.org/lkml/2008/1/18/589, but the line about RUSAGE_LWP line has been removed, as suggested by Ulrich and Christoph. Signed-off-by: Roland McGrath Signed-off-by: Sripathi Kodi Cc: Ingo Molnar Cc: Michael Kerrisk Cc: Ulrich Drepper Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/resource.h | 1 + kernel/sys.c | 31 ++++++++++++++++++++++--------- 2 files changed, 23 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/resource.h b/include/linux/resource.h index ae13db714742..aaa423a6f3d9 100644 --- a/include/linux/resource.h +++ b/include/linux/resource.h @@ -19,6 +19,7 @@ struct task_struct; #define RUSAGE_SELF 0 #define RUSAGE_CHILDREN (-1) #define RUSAGE_BOTH (-2) /* sys_wait4() uses this */ +#define RUSAGE_THREAD 1 /* only the calling thread */ struct rusage { struct timeval ru_utime; /* user time used */ diff --git a/kernel/sys.c b/kernel/sys.c index f2a451366953..e423d0d9e6ff 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1545,6 +1545,19 @@ out: * */ +static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r, + cputime_t *utimep, cputime_t *stimep) +{ + *utimep = cputime_add(*utimep, t->utime); + *stimep = cputime_add(*stimep, t->stime); + r->ru_nvcsw += t->nvcsw; + r->ru_nivcsw += t->nivcsw; + r->ru_minflt += t->min_flt; + r->ru_majflt += t->maj_flt; + r->ru_inblock += task_io_get_inblock(t); + r->ru_oublock += task_io_get_oublock(t); +} + static void k_getrusage(struct task_struct *p, int who, struct rusage *r) { struct task_struct *t; @@ -1554,6 +1567,11 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) memset((char *) r, 0, sizeof *r); utime = stime = cputime_zero; + if (who == RUSAGE_THREAD) { + accumulate_thread_rusage(p, r, &utime, &stime); + goto out; + } + rcu_read_lock(); if (!lock_task_sighand(p, &flags)) { rcu_read_unlock(); @@ -1586,14 +1604,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) r->ru_oublock += p->signal->oublock; t = p; do { - utime = cputime_add(utime, t->utime); - stime = cputime_add(stime, t->stime); - r->ru_nvcsw += t->nvcsw; - r->ru_nivcsw += t->nivcsw; - r->ru_minflt += t->min_flt; - r->ru_majflt += t->maj_flt; - r->ru_inblock += task_io_get_inblock(t); - r->ru_oublock += task_io_get_oublock(t); + accumulate_thread_rusage(t, r, &utime, &stime); t = next_thread(t); } while (t != p); break; @@ -1605,6 +1616,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) unlock_task_sighand(p, &flags); rcu_read_unlock(); +out: cputime_to_timeval(utime, &r->ru_utime); cputime_to_timeval(stime, &r->ru_stime); } @@ -1618,7 +1630,8 @@ int getrusage(struct task_struct *p, int who, struct rusage __user *ru) asmlinkage long sys_getrusage(int who, struct rusage __user *ru) { - if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN) + if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && + who != RUSAGE_THREAD) return -EINVAL; return getrusage(current, who, ru); } -- cgit v1.2.3-71-gd317 From f718e31819857825315300ea3c2dbc3f26ff3b0e Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Tue, 29 Apr 2008 00:58:47 -0700 Subject: cpu: fix section mismatch warnings in hotcpu_register Fix following warnings: WARNING: vmlinux.o(.data+0x5020): Section mismatch in reference from the variable cpu_vsyscall_notifier_nb.12876 to the function .cpuinit.text:cpu_vsyscall_notifier() WARNING: vmlinux.o(.data+0x9ce0): Section mismatch in reference from the variable profile_cpu_callback_nb.17654 to the function .devinit.text:profile_cpu_callback() WARNING: vmlinux.o(.data+0xd380): Section mismatch in reference from the variable workqueue_cpu_callback_nb.15004 to the function .devinit.text:workqueue_cpu_callback() WARNING: vmlinux.o(.data+0x11d00): Section mismatch in reference from the variable relay_hotcpu_callback_nb.19626 to the function .cpuinit.text:relay_hotcpu_callback() WARNING: vmlinux.o(.data+0x12970): Section mismatch in reference from the variable cpu_callback_nb.24694 to the function .devinit.text:cpu_callback() WARNING: vmlinux.o(.data+0x3fee0): Section mismatch in reference from the variable percpu_counter_hotcpu_callback_nb.10903 to the function .cpuinit.text:percpu_counter_hotcpu_callback() WARNING: vmlinux.o(.data+0x74ce0): Section mismatch in reference from the variable topology_cpu_callback_nb.12506 to the function .cpuinit.text:topology_cpu_callback() Functions used as argument are by definition only used in HOTPLUG_CPU situations so thay are annotated __cpuinit. Annotate the static variable used by hotcpu_register with __cpuinitdata to match this definition. Signed-off-by: Sam Ravnborg Cc: Gautham R Shenoy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index f212fa98283e..7464ba3b4333 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -108,7 +108,7 @@ static inline void cpuhotplug_mutex_unlock(struct mutex *cpu_hp_mutex) extern void get_online_cpus(void); extern void put_online_cpus(void); #define hotcpu_notifier(fn, pri) { \ - static struct notifier_block fn##_nb = \ + static struct notifier_block fn##_nb __cpuinitdata = \ { .notifier_call = fn, .priority = pri }; \ register_cpu_notifier(&fn##_nb); \ } -- cgit v1.2.3-71-gd317 From 6b09ae66922ca198e5830c0a4d74400a507a9170 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:58:54 -0700 Subject: make __put_super() static Make the needlessly global __put_super() static. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/super.c | 2 +- include/linux/fs.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/super.c b/fs/super.c index a5a4aca7e22f..453877c5697b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -117,7 +117,7 @@ static inline void destroy_super(struct super_block *s) * Drop a superblock's refcount. Returns non-zero if the superblock was * destroyed. The caller must hold sb_lock. */ -int __put_super(struct super_block *sb) +static int __put_super(struct super_block *sb) { int ret = 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index 2c925747bc49..1de9d72178e1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1521,7 +1521,6 @@ extern int get_sb_pseudo(struct file_system_type *, char *, const struct super_operations *ops, unsigned long, struct vfsmount *mnt); extern int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); -int __put_super(struct super_block *sb); int __put_super_and_need_restart(struct super_block *sb); void unnamed_dev_init(void); -- cgit v1.2.3-71-gd317 From 67cde595374dd0e4e4a537dbf9dff70fd3d7bd7b Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:58:55 -0700 Subject: make vfs_ioctl() static Make the needlessly global vfs_ioctl() static. Signed-off-by: Adrian Bunk Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ioctl.c | 4 ++-- include/linux/fs.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/ioctl.c b/fs/ioctl.c index f32fbde2175e..7db32b3382d3 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -28,8 +28,8 @@ * * Returns 0 on success, -errno on error. */ -long vfs_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg) +static long vfs_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) { int error = -ENOTTY; diff --git a/include/linux/fs.h b/include/linux/fs.h index 1de9d72178e1..a1ba005d08e7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1964,7 +1964,6 @@ extern int vfs_stat_fd(int dfd, char __user *, struct kstat *); extern int vfs_lstat_fd(int dfd, char __user *, struct kstat *); extern int vfs_fstat(unsigned int, struct kstat *); -extern long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, unsigned long arg); -- cgit v1.2.3-71-gd317 From f11b00f3bd89c91c684d56b2082d1b0241ff20ae Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:58:56 -0700 Subject: fs/fs-writeback.c: make 2 functions static Make the following needlessly global functions static: - writeback_acquire() - writeback_release() Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 78 ++++++++++++++++++++++----------------------- include/linux/backing-dev.h | 2 -- 2 files changed, 39 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 06557679ca41..ae45f77765c0 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -25,6 +25,45 @@ #include #include "internal.h" + +/** + * writeback_acquire - attempt to get exclusive writeback access to a device + * @bdi: the device's backing_dev_info structure + * + * It is a waste of resources to have more than one pdflush thread blocked on + * a single request queue. Exclusion at the request_queue level is obtained + * via a flag in the request_queue's backing_dev_info.state. + * + * Non-request_queue-backed address_spaces will share default_backing_dev_info, + * unless they implement their own. Which is somewhat inefficient, as this + * may prevent concurrent writeback against multiple devices. + */ +static int writeback_acquire(struct backing_dev_info *bdi) +{ + return !test_and_set_bit(BDI_pdflush, &bdi->state); +} + +/** + * writeback_in_progress - determine whether there is writeback in progress + * @bdi: the device's backing_dev_info structure. + * + * Determine whether there is writeback in progress against a backing device. + */ +int writeback_in_progress(struct backing_dev_info *bdi) +{ + return test_bit(BDI_pdflush, &bdi->state); +} + +/** + * writeback_release - relinquish exclusive writeback access against a device. + * @bdi: the device's backing_dev_info structure + */ +static void writeback_release(struct backing_dev_info *bdi) +{ + BUG_ON(!writeback_in_progress(bdi)); + clear_bit(BDI_pdflush, &bdi->state); +} + /** * __mark_inode_dirty - internal function * @inode: inode to mark @@ -747,43 +786,4 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int return err; } - EXPORT_SYMBOL(generic_osync_inode); - -/** - * writeback_acquire - attempt to get exclusive writeback access to a device - * @bdi: the device's backing_dev_info structure - * - * It is a waste of resources to have more than one pdflush thread blocked on - * a single request queue. Exclusion at the request_queue level is obtained - * via a flag in the request_queue's backing_dev_info.state. - * - * Non-request_queue-backed address_spaces will share default_backing_dev_info, - * unless they implement their own. Which is somewhat inefficient, as this - * may prevent concurrent writeback against multiple devices. - */ -int writeback_acquire(struct backing_dev_info *bdi) -{ - return !test_and_set_bit(BDI_pdflush, &bdi->state); -} - -/** - * writeback_in_progress - determine whether there is writeback in progress - * @bdi: the device's backing_dev_info structure. - * - * Determine whether there is writeback in progress against a backing device. - */ -int writeback_in_progress(struct backing_dev_info *bdi) -{ - return test_bit(BDI_pdflush, &bdi->state); -} - -/** - * writeback_release - relinquish exclusive writeback access against a device. - * @bdi: the device's backing_dev_info structure - */ -void writeback_release(struct backing_dev_info *bdi) -{ - BUG_ON(!writeback_in_progress(bdi)); - clear_bit(BDI_pdflush, &bdi->state); -} diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 48a62baace58..b66fa2bdfd9c 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -156,9 +156,7 @@ static inline unsigned long bdi_stat_error(struct backing_dev_info *bdi) extern struct backing_dev_info default_backing_dev_info; void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page); -int writeback_acquire(struct backing_dev_info *bdi); int writeback_in_progress(struct backing_dev_info *bdi); -void writeback_release(struct backing_dev_info *bdi); static inline int bdi_congested(struct backing_dev_info *bdi, int bdi_bits) { -- cgit v1.2.3-71-gd317 From 07d45da616f8514651360b502314fc9554223a03 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:58:57 -0700 Subject: fs/drop_caches.c: make 2 functions static Make the following needlessly global functions static: - drop_pagecache() - drop_slab() Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/drop_caches.c | 4 ++-- include/linux/mm.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 59375efcf39d..e2c6b6500c78 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -25,7 +25,7 @@ static void drop_pagecache_sb(struct super_block *sb) spin_unlock(&inode_lock); } -void drop_pagecache(void) +static void drop_pagecache(void) { struct super_block *sb; @@ -45,7 +45,7 @@ restart: spin_unlock(&sb_lock); } -void drop_slab(void) +static void drop_slab(void) { int nr_objects; diff --git a/include/linux/mm.h b/include/linux/mm.h index 8b7f4a5d4f6a..fef602d82722 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1230,8 +1230,6 @@ int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages); -void drop_pagecache(void); -void drop_slab(void); #ifndef CONFIG_MMU #define randomize_va_space 0 -- cgit v1.2.3-71-gd317 From d5470b596abdd566339b2417e807b1198be64b97 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:58:57 -0700 Subject: fs/aio.c: make 3 functions static Make the following needlessly global functions static: - __put_ioctx() - lookup_ioctx() - io_submit_one() Signed-off-by: Adrian Bunk Cc: Zach Brown Cc: Benjamin LaHaise Cc: Badari Pulavarty Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 67 +++++++++++++++++++++++++++++++---------------------- include/linux/aio.h | 19 --------------- 2 files changed, 39 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/fs/aio.c b/fs/aio.c index ae94e1dea266..81c01290939b 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -191,6 +191,43 @@ static int aio_setup_ring(struct kioctx *ctx) kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \ } while(0) + +/* __put_ioctx + * Called when the last user of an aio context has gone away, + * and the struct needs to be freed. + */ +static void __put_ioctx(struct kioctx *ctx) +{ + unsigned nr_events = ctx->max_reqs; + + BUG_ON(ctx->reqs_active); + + cancel_delayed_work(&ctx->wq); + cancel_work_sync(&ctx->wq.work); + aio_free_ring(ctx); + mmdrop(ctx->mm); + ctx->mm = NULL; + pr_debug("__put_ioctx: freeing %p\n", ctx); + kmem_cache_free(kioctx_cachep, ctx); + + if (nr_events) { + spin_lock(&aio_nr_lock); + BUG_ON(aio_nr - nr_events > aio_nr); + aio_nr -= nr_events; + spin_unlock(&aio_nr_lock); + } +} + +#define get_ioctx(kioctx) do { \ + BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ + atomic_inc(&(kioctx)->users); \ +} while (0) +#define put_ioctx(kioctx) do { \ + BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ + if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \ + __put_ioctx(kioctx); \ +} while (0) + /* ioctx_alloc * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. */ @@ -361,32 +398,6 @@ void exit_aio(struct mm_struct *mm) } } -/* __put_ioctx - * Called when the last user of an aio context has gone away, - * and the struct needs to be freed. - */ -void __put_ioctx(struct kioctx *ctx) -{ - unsigned nr_events = ctx->max_reqs; - - BUG_ON(ctx->reqs_active); - - cancel_delayed_work(&ctx->wq); - cancel_work_sync(&ctx->wq.work); - aio_free_ring(ctx); - mmdrop(ctx->mm); - ctx->mm = NULL; - pr_debug("__put_ioctx: freeing %p\n", ctx); - kmem_cache_free(kioctx_cachep, ctx); - - if (nr_events) { - spin_lock(&aio_nr_lock); - BUG_ON(aio_nr - nr_events > aio_nr); - aio_nr -= nr_events; - spin_unlock(&aio_nr_lock); - } -} - /* aio_get_req * Allocate a slot for an aio request. Increments the users count * of the kioctx so that the kioctx stays around until all requests are @@ -545,7 +556,7 @@ int aio_put_req(struct kiocb *req) /* Lookup an ioctx id. ioctx_list is lockless for reads. * FIXME: this is O(n) and is only suitable for development. */ -struct kioctx *lookup_ioctx(unsigned long ctx_id) +static struct kioctx *lookup_ioctx(unsigned long ctx_id) { struct kioctx *ioctx; struct mm_struct *mm; @@ -1552,7 +1563,7 @@ static int aio_wake_function(wait_queue_t *wait, unsigned mode, return 1; } -int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, +static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, struct iocb *iocb) { struct kiocb *req; diff --git a/include/linux/aio.h b/include/linux/aio.h index 0d0b7f629bd3..b51ddd28444e 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -209,27 +209,8 @@ extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb); extern int aio_put_req(struct kiocb *iocb); extern void kick_iocb(struct kiocb *iocb); extern int aio_complete(struct kiocb *iocb, long res, long res2); -extern void __put_ioctx(struct kioctx *ctx); struct mm_struct; extern void exit_aio(struct mm_struct *mm); -extern struct kioctx *lookup_ioctx(unsigned long ctx_id); -extern int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, - struct iocb *iocb); - -/* semi private, but used by the 32bit emulations: */ -struct kioctx *lookup_ioctx(unsigned long ctx_id); -int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, - struct iocb *iocb); - -#define get_ioctx(kioctx) do { \ - BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ - atomic_inc(&(kioctx)->users); \ -} while (0) -#define put_ioctx(kioctx) do { \ - BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ - if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \ - __put_ioctx(kioctx); \ -} while (0) #define io_wait_to_kiocb(wait) container_of(wait, struct kiocb, ki_wait) -- cgit v1.2.3-71-gd317 From 946a57b526a16e5662235cb8f573337bc8ecdc48 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:59:00 -0700 Subject: remove generic_commit_write() Remove the obsolete and no longer used generic_commit_write(). Signed-off-by: Adrian Bunk Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 18 ------------------ include/linux/buffer_head.h | 1 - 2 files changed, 19 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index 3db4a26adc44..22ed55198f3d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2328,23 +2328,6 @@ int block_commit_write(struct page *page, unsigned from, unsigned to) return 0; } -int generic_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - struct inode *inode = page->mapping->host; - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - __block_commit_write(inode,page,from,to); - /* - * No need to use i_size_read() here, the i_size - * cannot change under us because we hold i_mutex. - */ - if (pos > inode->i_size) { - i_size_write(inode, pos); - mark_inode_dirty(inode); - } - return 0; -} - /* * block_page_mkwrite() is not allowed to change the file size as it gets * called from a page fault handler when a page is first dirtied. Hence we must @@ -3315,7 +3298,6 @@ EXPORT_SYMBOL(end_buffer_write_sync); EXPORT_SYMBOL(file_fsync); EXPORT_SYMBOL(fsync_bdev); EXPORT_SYMBOL(generic_block_bmap); -EXPORT_SYMBOL(generic_commit_write); EXPORT_SYMBOL(generic_cont_expand_simple); EXPORT_SYMBOL(init_buffer); EXPORT_SYMBOL(invalidate_bdev); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 932eb02a2753..82aa36c53ea7 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -225,7 +225,6 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct page *page, get_block_t get_block); void block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); -int generic_commit_write(struct file *, struct page *, unsigned, unsigned); int block_truncate_page(struct address_space *, loff_t, get_block_t *); int file_fsync(struct file *, struct dentry *, int); int nobh_write_begin(struct file *, struct address_space *, -- cgit v1.2.3-71-gd317 From 58b250daff6a24518813975143c8433d9d5b684f Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:59:02 -0700 Subject: remove mca_is_adapter_used() Remove the no longer used mca_is_adapter_used(). Signed-off-by: Adrian Bunk Cc: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/mca/mca-legacy.c | 18 ------------------ include/linux/mca-legacy.h | 1 - 2 files changed, 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/mca/mca-legacy.c b/drivers/mca/mca-legacy.c index 0c7bfa74c8ef..494f0c2001f5 100644 --- a/drivers/mca/mca-legacy.c +++ b/drivers/mca/mca-legacy.c @@ -281,24 +281,6 @@ void mca_set_adapter_name(int slot, char* name) } EXPORT_SYMBOL(mca_set_adapter_name); -/** - * mca_is_adapter_used - check if claimed by driver - * @slot: slot to check - * - * Returns 1 if the slot has been claimed by a driver - */ - -int mca_is_adapter_used(int slot) -{ - struct mca_device *mca_dev = mca_find_device_by_slot(slot); - - if(!mca_dev) - return 0; - - return mca_device_claimed(mca_dev); -} -EXPORT_SYMBOL(mca_is_adapter_used); - /** * mca_mark_as_used - claim an MCA device * @slot: slot to claim diff --git a/include/linux/mca-legacy.h b/include/linux/mca-legacy.h index f2bb770e530a..7a3aea845902 100644 --- a/include/linux/mca-legacy.h +++ b/include/linux/mca-legacy.h @@ -34,7 +34,6 @@ extern int mca_find_adapter(int id, int start); extern int mca_find_unused_adapter(int id, int start); -extern int mca_is_adapter_used(int slot); extern int mca_mark_as_used(int slot); extern void mca_mark_as_unused(int slot); -- cgit v1.2.3-71-gd317 From eb0f1c442d7cf1f7cb746c26c6120bb42e69c49c Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:59:12 -0700 Subject: proper __do_softirq() prototype Add a proper prototype for __do_softirq() in include/linux/interrupt.h Signed-off-by: Adrian Bunk Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/kernel/irq.c | 2 -- arch/sh/kernel/irq.c | 2 -- arch/x86/kernel/irq_32.c | 2 -- include/asm-powerpc/irq.h | 2 -- include/linux/interrupt.h | 1 + 5 files changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index c36d8123ca14..c59a86dca584 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -60,8 +60,6 @@ init_IRQ(void) /* * Switch to the asynchronous interrupt stack for softirq execution. */ -extern void __do_softirq(void); - asmlinkage void do_softirq(void) { unsigned long flags, old, new; diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index 9bf19b00696a..a2a99e487e33 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -200,8 +200,6 @@ void irq_ctx_exit(int cpu) hardirq_ctx[cpu] = NULL; } -extern asmlinkage void __do_softirq(void); - asmlinkage void do_softirq(void) { unsigned long flags; diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 00bda7bcda63..147352df28b9 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -190,8 +190,6 @@ void irq_ctx_exit(int cpu) hardirq_ctx[cpu] = NULL; } -extern asmlinkage void __do_softirq(void); - asmlinkage void do_softirq(void) { unsigned long flags; diff --git a/include/asm-powerpc/irq.h b/include/asm-powerpc/irq.h index b5c03127a9b9..5089deb8fec3 100644 --- a/include/asm-powerpc/irq.h +++ b/include/asm-powerpc/irq.h @@ -619,8 +619,6 @@ struct pt_regs; #define __ARCH_HAS_DO_SOFTIRQ -extern void __do_softirq(void); - #ifdef CONFIG_IRQSTACKS /* * Per-cpu stacks for handling hard and soft interrupts. diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index b5fef13148bd..f1fc7470d26c 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -289,6 +289,7 @@ struct softirq_action }; asmlinkage void do_softirq(void); +asmlinkage void __do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data); extern void softirq_init(void); #define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) -- cgit v1.2.3-71-gd317 From 7e4e8e689fe90dd94bd76f9706d6cce580941ed5 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 29 Apr 2008 00:59:13 -0700 Subject: Misc: phantom, add compat ioctl Openhaptics uses pointers in _IOC() macros, implement compat for them. Also add _IOC alternatives which are not 32/64 bit dependent (structures passed through aren't yet) -- libphantom will use them. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Jiri Slaby Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/misc/phantom.c | 24 ++++++++++++++++++++---- include/linux/phantom.h | 5 ++++- 2 files changed, 24 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c index 7fa61e907e1c..5447a603686a 100644 --- a/drivers/misc/phantom.c +++ b/drivers/misc/phantom.c @@ -12,6 +12,7 @@ * or alternatively, you might use OpenHaptics provided by Sensable. */ +#include #include #include #include @@ -91,11 +92,8 @@ static long phantom_ioctl(struct file *file, unsigned int cmd, unsigned long flags; unsigned int i; - if (_IOC_TYPE(cmd) != PH_IOC_MAGIC || - _IOC_NR(cmd) > PH_IOC_MAXNR) - return -ENOTTY; - switch (cmd) { + case PHN_SETREG: case PHN_SET_REG: if (copy_from_user(&r, argp, sizeof(r))) return -EFAULT; @@ -126,6 +124,7 @@ static long phantom_ioctl(struct file *file, unsigned int cmd, phantom_status(dev, dev->status & ~PHB_RUNNING); spin_unlock_irqrestore(&dev->regs_lock, flags); break; + case PHN_SETREGS: case PHN_SET_REGS: if (copy_from_user(&rs, argp, sizeof(rs))) return -EFAULT; @@ -143,6 +142,7 @@ static long phantom_ioctl(struct file *file, unsigned int cmd, } spin_unlock_irqrestore(&dev->regs_lock, flags); break; + case PHN_GETREG: case PHN_GET_REG: if (copy_from_user(&r, argp, sizeof(r))) return -EFAULT; @@ -155,6 +155,7 @@ static long phantom_ioctl(struct file *file, unsigned int cmd, if (copy_to_user(argp, &r, sizeof(r))) return -EFAULT; break; + case PHN_GETREGS: case PHN_GET_REGS: { u32 m; @@ -191,6 +192,20 @@ static long phantom_ioctl(struct file *file, unsigned int cmd, return 0; } +#ifdef CONFIG_COMPAT +static long phantom_compat_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + if (_IOC_NR(cmd) <= 3 && _IOC_SIZE(cmd) == sizeof(compat_uptr_t)) { + cmd &= ~(_IOC_SIZEMASK << _IOC_SIZESHIFT); + cmd |= sizeof(void *) << _IOC_SIZESHIFT; + } + return phantom_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)); +} +#else +#define phantom_compat_ioctl NULL +#endif + static int phantom_open(struct inode *inode, struct file *file) { struct phantom_device *dev = container_of(inode->i_cdev, @@ -253,6 +268,7 @@ static struct file_operations phantom_file_ops = { .open = phantom_open, .release = phantom_release, .unlocked_ioctl = phantom_ioctl, + .compat_ioctl = phantom_compat_ioctl, .poll = phantom_poll, }; diff --git a/include/linux/phantom.h b/include/linux/phantom.h index 96f4048a6cc3..a341e2162b45 100644 --- a/include/linux/phantom.h +++ b/include/linux/phantom.h @@ -34,7 +34,10 @@ struct phm_regs { * use improved registers update (no more phantom switchoffs when using * libphantom) */ #define PHN_NOT_OH _IO (PH_IOC_MAGIC, 4) -#define PH_IOC_MAXNR 4 +#define PHN_GETREG _IOWR(PH_IOC_MAGIC, 5, struct phm_reg) +#define PHN_SETREG _IOW(PH_IOC_MAGIC, 6, struct phm_reg) +#define PHN_GETREGS _IOWR(PH_IOC_MAGIC, 7, struct phm_regs) +#define PHN_SETREGS _IOW(PH_IOC_MAGIC, 8, struct phm_regs) #define PHN_CONTROL 0x6 /* control byte in iaddr space */ #define PHN_CTL_AMP 0x1 /* switch after torques change */ -- cgit v1.2.3-71-gd317 From 6e5e8c5085190b30b6fa42a4b75a88c10846b5f2 Mon Sep 17 00:00:00 2001 From: jan sonnek Date: Tue, 29 Apr 2008 00:59:15 -0700 Subject: Misc: phantom, consistent whitespace Make it consistent with the rest of the header. Signed-off-by: jan sonnek Cc: Jiri Slaby Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/phantom.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phantom.h b/include/linux/phantom.h index a341e2162b45..02268c54c250 100644 --- a/include/linux/phantom.h +++ b/include/linux/phantom.h @@ -27,13 +27,13 @@ struct phm_regs { #define PH_IOC_MAGIC 'p' #define PHN_GET_REG _IOWR(PH_IOC_MAGIC, 0, struct phm_reg *) -#define PHN_SET_REG _IOW (PH_IOC_MAGIC, 1, struct phm_reg *) +#define PHN_SET_REG _IOW(PH_IOC_MAGIC, 1, struct phm_reg *) #define PHN_GET_REGS _IOWR(PH_IOC_MAGIC, 2, struct phm_regs *) -#define PHN_SET_REGS _IOW (PH_IOC_MAGIC, 3, struct phm_regs *) +#define PHN_SET_REGS _IOW(PH_IOC_MAGIC, 3, struct phm_regs *) /* this ioctl tells the driver, that the caller is not OpenHaptics and might * use improved registers update (no more phantom switchoffs when using * libphantom) */ -#define PHN_NOT_OH _IO (PH_IOC_MAGIC, 4) +#define PHN_NOT_OH _IO(PH_IOC_MAGIC, 4) #define PHN_GETREG _IOWR(PH_IOC_MAGIC, 5, struct phm_reg) #define PHN_SETREG _IOW(PH_IOC_MAGIC, 6, struct phm_reg) #define PHN_GETREGS _IOWR(PH_IOC_MAGIC, 7, struct phm_regs) -- cgit v1.2.3-71-gd317 From ecd0fa9825a1270e31fb48bc9edcfb28918b6c51 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 29 Apr 2008 00:59:15 -0700 Subject: Remove the macro get_personality Remove the macro get_personality, use ->personality instead. Cc: Christoph Hellwig Cc: David Howells Cc: Bryan Wu Signed-off-by: WANG Cong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/blackfin/kernel/signal.c | 2 +- arch/frv/kernel/signal.c | 4 ++-- include/linux/personality.h | 4 ---- 3 files changed, 3 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/arch/blackfin/kernel/signal.c b/arch/blackfin/kernel/signal.c index d1fa24401dc6..cb9d883d493c 100644 --- a/arch/blackfin/kernel/signal.c +++ b/arch/blackfin/kernel/signal.c @@ -212,7 +212,7 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t * info, /* Set up registers for signal handler */ wrusp((unsigned long)frame); - if (get_personality & FDPIC_FUNCPTRS) { + if (current->personality & FDPIC_FUNCPTRS) { struct fdpic_func_descriptor __user *funcptr = (struct fdpic_func_descriptor *) ka->sa.sa_handler; __get_user(regs->pc, &funcptr->text); diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c index d64bcaff54cd..3bdb368292a8 100644 --- a/arch/frv/kernel/signal.c +++ b/arch/frv/kernel/signal.c @@ -297,7 +297,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, sigset_t *set) __frame->lr = (unsigned long) &frame->retcode; __frame->gr8 = sig; - if (get_personality & FDPIC_FUNCPTRS) { + if (current->personality & FDPIC_FUNCPTRS) { struct fdpic_func_descriptor __user *funcptr = (struct fdpic_func_descriptor __user *) ka->sa.sa_handler; __get_user(__frame->pc, &funcptr->text); @@ -396,7 +396,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, __frame->gr8 = sig; __frame->gr9 = (unsigned long) &frame->info; - if (get_personality & FDPIC_FUNCPTRS) { + if (current->personality & FDPIC_FUNCPTRS) { struct fdpic_func_descriptor __user *funcptr = (struct fdpic_func_descriptor __user *) ka->sa.sa_handler; __get_user(__frame->pc, &funcptr->text); diff --git a/include/linux/personality.h b/include/linux/personality.h index 012cd558189b..a84e9ff9b27e 100644 --- a/include/linux/personality.h +++ b/include/linux/personality.h @@ -105,10 +105,6 @@ struct exec_domain { */ #define personality(pers) (pers & PER_MASK) -/* - * Personality of the currently running process. - */ -#define get_personality (current->personality) /* * Change personality of the currently running process. -- cgit v1.2.3-71-gd317 From 175a06ae300188af8a61db68a78e1af44dc7d44f Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 29 Apr 2008 00:59:17 -0700 Subject: exec: remove argv_len from struct linux_binprm I noticed that 2.6.24.2 calculates bprm->argv_len at do_execve(). But it doesn't update bprm->argv_len after "remove_arg_zero() + copy_strings_kernel()" at load_script() etc. audit_bprm() is called from search_binary_handler() and search_binary_handler() is called from load_script() etc. Thus, I think the condition check if (bprm->argv_len > (audit_argv_kb << 10)) return -E2BIG; in audit_bprm() might return wrong result when strlen(removed_arg) != strlen(spliced_args). Why not update bprm->argv_len at load_script() etc. ? By the way, 2.6.25-rc3 seems to not doing the condition check. Is the field bprm->argv_len no longer needed? Signed-off-by: Tetsuo Handa Cc: Ollie Wild Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 3 --- include/linux/binfmts.h | 1 - 2 files changed, 4 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index b152029f18f6..7768453dc986 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1268,7 +1268,6 @@ int do_execve(char * filename, { struct linux_binprm *bprm; struct file *file; - unsigned long env_p; struct files_struct *displaced; int retval; @@ -1321,11 +1320,9 @@ int do_execve(char * filename, if (retval < 0) goto out; - env_p = bprm->p; retval = copy_strings(bprm->argc, argv, bprm); if (retval < 0) goto out; - bprm->argv_len = env_p - bprm->p; retval = search_binary_handler(bprm,regs); if (retval >= 0) { diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index b7fc55ec8d48..1dd756731c95 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -48,7 +48,6 @@ struct linux_binprm{ unsigned interp_flags; unsigned interp_data; unsigned long loader, exec; - unsigned long argv_len; }; #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0 -- cgit v1.2.3-71-gd317 From 7d195a5409120277b800c42e846ee29cc667b777 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 00:59:18 -0700 Subject: proper extern for late_time_init Add a proper extern for late_time_init in include/linux/init.h Signed-off-by: Adrian Bunk Acked-by: Ingo Molnar Cc: Thomas Gleixner Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/pmc-sierra/yosemite/setup.c | 3 --- arch/ppc/platforms/sbc82xx.c | 2 -- arch/um/kernel/time.c | 3 +-- arch/x86/kernel/time_32.c | 1 - include/asm-x86/time.h | 1 - include/linux/init.h | 2 ++ 6 files changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/arch/mips/pmc-sierra/yosemite/setup.c b/arch/mips/pmc-sierra/yosemite/setup.c index 855977ca51cd..6537d90a25bb 100644 --- a/arch/mips/pmc-sierra/yosemite/setup.c +++ b/arch/mips/pmc-sierra/yosemite/setup.c @@ -143,9 +143,6 @@ void __init plat_time_init(void) mips_hpt_frequency = 33000000 * 3 * 5; } -/* No other usable initialization hook than this ... */ -extern void (*late_time_init)(void); - unsigned long ocd_base; EXPORT_SYMBOL(ocd_base); diff --git a/arch/ppc/platforms/sbc82xx.c b/arch/ppc/platforms/sbc82xx.c index 0df6aacb8237..24f6e0694ac1 100644 --- a/arch/ppc/platforms/sbc82xx.c +++ b/arch/ppc/platforms/sbc82xx.c @@ -30,8 +30,6 @@ static void (*callback_init_IRQ)(void); extern unsigned char __res[sizeof(bd_t)]; -extern void (*late_time_init)(void); - #ifdef CONFIG_GEN_RTC TODC_ALLOC(); diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index e066e84493b1..0d0cea2ac98d 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -109,8 +110,6 @@ static void __init setup_itimer(void) clockevents_register_device(&itimer_clockevent); } -extern void (*late_time_init)(void); - void __init time_init(void) { long long nsecs; diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 1a89e93f3f1c..2ff21f398934 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -115,7 +115,6 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -extern void (*late_time_init)(void); /* Duplicate of time_init() below, with hpet_enable part added */ void __init hpet_time_init(void) { diff --git a/include/asm-x86/time.h b/include/asm-x86/time.h index 68779b048a3e..bce72d7a958c 100644 --- a/include/asm-x86/time.h +++ b/include/asm-x86/time.h @@ -1,7 +1,6 @@ #ifndef _ASMX86_TIME_H #define _ASMX86_TIME_H -extern void (*late_time_init)(void); extern void hpet_time_init(void); #include diff --git a/include/linux/init.h b/include/linux/init.h index fb58c0493cf2..21d658cdfa27 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -147,6 +147,8 @@ extern unsigned int reset_devices; void setup_arch(char **); void prepare_namespace(void); +extern void (*late_time_init)(void); + #endif #ifndef MODULE -- cgit v1.2.3-71-gd317 From 3a2e7f47d71e1df86acc1dda6826890b6546a4e1 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 29 Apr 2008 00:59:24 -0700 Subject: binfmt_misc.c: avoid potential kernel stack overflow This can be triggered with root help only, but... Register the ":text:E::txt::/root/cat.txt:' rule in binfmt_misc (by root) and try launching the cat.txt file (by anyone) :) The result is - the endless recursion in the load_misc_binary -> open_exec -> load_misc_binary chain and stack overflow. There's a similar problem with binfmt_script, and there's a sh_bang memner on linux_binprm structure to handle this, but simply raising this in binfmt_misc may break some setups when the interpreter of some misc binaries is a script. So the proposal is to turn sh_bang into a bit, add a new one (the misc_bang) and raise it in load_misc_binary. After this, even if we set up the misc -> script -> misc loop for binfmts one of them will step on its own bang and exit. Signed-off-by: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_em86.c | 2 +- fs/binfmt_misc.c | 6 ++++++ fs/binfmt_script.c | 2 +- include/linux/binfmts.h | 3 ++- 4 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index f95ae9789c91..f9c88d0c8ced 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -43,7 +43,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs) return -ENOEXEC; } - bprm->sh_bang++; /* Well, the bang-shell is implicit... */ + bprm->sh_bang = 1; /* Well, the bang-shell is implicit... */ allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index dbf0ac0523de..7191306367c5 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -115,6 +115,12 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!enabled) goto _ret; + retval = -ENOEXEC; + if (bprm->misc_bang) + goto _ret; + + bprm->misc_bang = 1; + /* to keep locking time low, we copy the interpreter string */ read_lock(&entries_lock); fmt = check_file(bprm); diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index ab33939b12a7..9e3963f7ebf1 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -29,7 +29,7 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) * Sorta complicated, but hopefully it will work. -TYT */ - bprm->sh_bang++; + bprm->sh_bang = 1; allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 1dd756731c95..b512e48f6d8e 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -34,7 +34,8 @@ struct linux_binprm{ #endif struct mm_struct *mm; unsigned long p; /* current top of mem */ - int sh_bang; + unsigned int sh_bang:1, + misc_bang:1; struct file * file; int e_uid, e_gid; kernel_cap_t cap_inheritable, cap_permitted; -- cgit v1.2.3-71-gd317 From aab3c3b01d1848a5e8a1ddec4e5656fc4de04982 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Tue, 29 Apr 2008 00:59:25 -0700 Subject: Remove superfluous include of string.h from percpu.h There's nothing in percpu.h that requires an explicit inclusion of string.h. Signed-off-by: Robert P. J. Day Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 1ac969724bb2..d746a2abb322 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -4,7 +4,6 @@ #include #include /* For kmalloc() */ #include -#include /* For memset() */ #include #include -- cgit v1.2.3-71-gd317 From 1a6924f93d0d511da5b34189563c5e31ffe5df2e Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Tue, 29 Apr 2008 00:59:28 -0700 Subject: kbuild: remove duplicate, conflicting entry for oom.h oom.h is already tagged for unifdef'ing, so its entry as a simple exportable header should be deleted. Signed-off-by: Robert P. J. Day Cc: Sam Ravnborg Cc: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/Kbuild | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index bda6f04791d4..0634e5a8d3ed 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -117,7 +117,6 @@ header-y += nfs2.h header-y += nfs4_mount.h header-y += nfs_mount.h header-y += nl80211.h -header-y += oom.h header-y += param.h header-y += pci_regs.h header-y += pfkeyv2.h -- cgit v1.2.3-71-gd317 From 86735118459b46422e20d3b73ee732b1f1f780b1 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Tue, 29 Apr 2008 00:59:28 -0700 Subject: kbuild: move files that don't check __KERNEL__ Move files that don't check __KERNEL__ from unifdef-y to header-y. Signed-off-by: Robert P. J. Day Cc: Sam Ravnborg Cc: David Woodhouse Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/Kbuild | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 0634e5a8d3ed..78fade0a1e35 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -20,6 +20,7 @@ header-y += affs_hardblocks.h header-y += aio_abi.h header-y += arcfb.h header-y += atmapi.h +header-y += atmarp.h header-y += atmbr2684.h header-y += atmclip.h header-y += atm_eni.h @@ -48,6 +49,7 @@ header-y += coff.h header-y += comstats.h header-y += const.h header-y += cgroupstats.h +header-y += cramfs_fs.h header-y += cycx_cfm.h header-y += dlmconstants.h header-y += dlm_device.h @@ -70,10 +72,12 @@ header-y += firewire-constants.h header-y += fuse.h header-y += genetlink.h header-y += gen_stats.h +header-y += gfs2_ondisk.h header-y += gigaset_dev.h header-y += hysdn_if.h header-y += i2o-dev.h header-y += i8k.h +header-y += if_addrlabel.h header-y += if_arcnet.h header-y += if_bonding.h header-y += if_cablemodem.h @@ -91,6 +95,7 @@ header-y += if_tunnel.h header-y += in6.h header-y += in_route.h header-y += ioctl.h +header-y += ip6_tunnel.h header-y += ipmi_msgdefs.h header-y += ipsec.h header-y += ipx.h @@ -165,7 +170,6 @@ unifdef-y += adfs_fs.h unifdef-y += agpgart.h unifdef-y += apm_bios.h unifdef-y += atalk.h -unifdef-y += atmarp.h unifdef-y += atmdev.h unifdef-y += atm.h unifdef-y += atm_tcp.h @@ -181,7 +185,6 @@ unifdef-y += cm4000_cs.h unifdef-y += cn_proc.h unifdef-y += coda.h unifdef-y += connector.h -unifdef-y += cramfs_fs.h unifdef-y += cuda.h unifdef-y += cyclades.h unifdef-y += dccp.h @@ -204,7 +207,6 @@ unifdef-y += futex.h unifdef-y += fs.h unifdef-y += gameport.h unifdef-y += generic_serial.h -unifdef-y += gfs2_ondisk.h unifdef-y += hayesesp.h unifdef-y += hdlcdrv.h unifdef-y += hdlc.h @@ -218,7 +220,6 @@ unifdef-y += i2c-dev.h unifdef-y += icmp.h unifdef-y += icmpv6.h unifdef-y += if_addr.h -unifdef-y += if_addrlabel.h unifdef-y += if_arp.h unifdef-y += if_bridge.h unifdef-y += if_ec.h @@ -242,7 +243,6 @@ unifdef-y += ipc.h unifdef-y += ipmi.h unifdef-y += ipv6.h unifdef-y += ipv6_route.h -unifdef-y += ip6_tunnel.h unifdef-y += isdn.h unifdef-y += isdnif.h unifdef-y += isdn_divertif.h -- cgit v1.2.3-71-gd317 From 95d8c365b2df2adb904963333a93b15414403ed1 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Tue, 29 Apr 2008 00:59:29 -0700 Subject: lists: add "const" qualifier to first arg of list_splice() operations Since neither the list_splice() nor __list_splice() routines modify their first argument, might as well declare them "const". [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Robert P. J. Day Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/list.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index b4a939b6b625..7627508f1b74 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -328,7 +328,7 @@ static inline int list_is_singular(const struct list_head *head) return !list_empty(head) && (head->next == head->prev); } -static inline void __list_splice(struct list_head *list, +static inline void __list_splice(const struct list_head *list, struct list_head *head) { struct list_head *first = list->next; @@ -347,7 +347,8 @@ static inline void __list_splice(struct list_head *list, * @list: the new list to add. * @head: the place to add it in the first list. */ -static inline void list_splice(struct list_head *list, struct list_head *head) +static inline void list_splice(const struct list_head *list, + struct list_head *head) { if (!list_empty(list)) __list_splice(list, head); -- cgit v1.2.3-71-gd317 From 76308da18910e556747f6d100147f42ccc78662c Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Tue, 29 Apr 2008 00:59:33 -0700 Subject: smb.h: uses struct timespec but didn't include linux/time.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/smb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/smb.h b/include/linux/smb.h index f098dff93f6b..caa43b2370cb 100644 --- a/include/linux/smb.h +++ b/include/linux/smb.h @@ -11,6 +11,7 @@ #include #include +#include enum smb_protocol { SMB_PROTOCOL_NONE, -- cgit v1.2.3-71-gd317 From 8f0cfa52a1d4ffacd8e7de906d19662f5da58d58 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Apr 2008 00:59:41 -0700 Subject: xattr: add missing consts to function arguments Add missing consts to xattr function arguments. Signed-off-by: David Howells Cc: Andreas Gruenbacher Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/xattr.c | 41 ++++++++++++++++++----------------- include/linux/security.h | 43 ++++++++++++++++++++----------------- include/linux/syscalls.h | 30 ++++++++++++++------------ include/linux/xattr.h | 6 +++--- security/commoncap.c | 6 +++--- security/dummy.c | 13 +++++------ security/security.c | 12 +++++------ security/selinux/hooks.c | 14 ++++++------ security/selinux/include/security.h | 2 +- security/selinux/ss/services.c | 4 ++-- security/smack/smack_lsm.c | 12 +++++------ 11 files changed, 96 insertions(+), 87 deletions(-) (limited to 'include/linux') diff --git a/fs/xattr.c b/fs/xattr.c index 89a942f07e1b..4706a8b1f495 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -67,7 +67,7 @@ xattr_permission(struct inode *inode, const char *name, int mask) } int -vfs_setxattr(struct dentry *dentry, char *name, void *value, +vfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { struct inode *inode = dentry->d_inode; @@ -131,7 +131,7 @@ out_noalloc: EXPORT_SYMBOL_GPL(xattr_getsecurity); ssize_t -vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) +vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) { struct inode *inode = dentry->d_inode; int error; @@ -187,7 +187,7 @@ vfs_listxattr(struct dentry *d, char *list, size_t size) EXPORT_SYMBOL_GPL(vfs_listxattr); int -vfs_removexattr(struct dentry *dentry, char *name) +vfs_removexattr(struct dentry *dentry, const char *name) { struct inode *inode = dentry->d_inode; int error; @@ -218,7 +218,7 @@ EXPORT_SYMBOL_GPL(vfs_removexattr); * Extended attribute SET operations */ static long -setxattr(struct dentry *d, char __user *name, void __user *value, +setxattr(struct dentry *d, const char __user *name, const void __user *value, size_t size, int flags) { int error; @@ -252,8 +252,8 @@ setxattr(struct dentry *d, char __user *name, void __user *value, } asmlinkage long -sys_setxattr(char __user *path, char __user *name, void __user *value, - size_t size, int flags) +sys_setxattr(const char __user *path, const char __user *name, + const void __user *value, size_t size, int flags) { struct nameidata nd; int error; @@ -271,8 +271,8 @@ sys_setxattr(char __user *path, char __user *name, void __user *value, } asmlinkage long -sys_lsetxattr(char __user *path, char __user *name, void __user *value, - size_t size, int flags) +sys_lsetxattr(const char __user *path, const char __user *name, + const void __user *value, size_t size, int flags) { struct nameidata nd; int error; @@ -290,7 +290,7 @@ sys_lsetxattr(char __user *path, char __user *name, void __user *value, } asmlinkage long -sys_fsetxattr(int fd, char __user *name, void __user *value, +sys_fsetxattr(int fd, const char __user *name, const void __user *value, size_t size, int flags) { struct file *f; @@ -315,7 +315,8 @@ sys_fsetxattr(int fd, char __user *name, void __user *value, * Extended attribute GET operations */ static ssize_t -getxattr(struct dentry *d, char __user *name, void __user *value, size_t size) +getxattr(struct dentry *d, const char __user *name, void __user *value, + size_t size) { ssize_t error; void *kvalue = NULL; @@ -349,8 +350,8 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size) } asmlinkage ssize_t -sys_getxattr(char __user *path, char __user *name, void __user *value, - size_t size) +sys_getxattr(const char __user *path, const char __user *name, + void __user *value, size_t size) { struct nameidata nd; ssize_t error; @@ -364,7 +365,7 @@ sys_getxattr(char __user *path, char __user *name, void __user *value, } asmlinkage ssize_t -sys_lgetxattr(char __user *path, char __user *name, void __user *value, +sys_lgetxattr(const char __user *path, const char __user *name, void __user *value, size_t size) { struct nameidata nd; @@ -379,7 +380,7 @@ sys_lgetxattr(char __user *path, char __user *name, void __user *value, } asmlinkage ssize_t -sys_fgetxattr(int fd, char __user *name, void __user *value, size_t size) +sys_fgetxattr(int fd, const char __user *name, void __user *value, size_t size) { struct file *f; ssize_t error = -EBADF; @@ -424,7 +425,7 @@ listxattr(struct dentry *d, char __user *list, size_t size) } asmlinkage ssize_t -sys_listxattr(char __user *path, char __user *list, size_t size) +sys_listxattr(const char __user *path, char __user *list, size_t size) { struct nameidata nd; ssize_t error; @@ -438,7 +439,7 @@ sys_listxattr(char __user *path, char __user *list, size_t size) } asmlinkage ssize_t -sys_llistxattr(char __user *path, char __user *list, size_t size) +sys_llistxattr(const char __user *path, char __user *list, size_t size) { struct nameidata nd; ssize_t error; @@ -470,7 +471,7 @@ sys_flistxattr(int fd, char __user *list, size_t size) * Extended attribute REMOVE operations */ static long -removexattr(struct dentry *d, char __user *name) +removexattr(struct dentry *d, const char __user *name) { int error; char kname[XATTR_NAME_MAX + 1]; @@ -485,7 +486,7 @@ removexattr(struct dentry *d, char __user *name) } asmlinkage long -sys_removexattr(char __user *path, char __user *name) +sys_removexattr(const char __user *path, const char __user *name) { struct nameidata nd; int error; @@ -503,7 +504,7 @@ sys_removexattr(char __user *path, char __user *name) } asmlinkage long -sys_lremovexattr(char __user *path, char __user *name) +sys_lremovexattr(const char __user *path, const char __user *name) { struct nameidata nd; int error; @@ -521,7 +522,7 @@ sys_lremovexattr(char __user *path, char __user *name) } asmlinkage long -sys_fremovexattr(int fd, char __user *name) +sys_fremovexattr(int fd, const char __user *name) { struct file *f; struct dentry *dentry; diff --git a/include/linux/security.h b/include/linux/security.h index d0a28fd1747a..3ebcdd00b17d 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -53,8 +53,9 @@ extern void cap_capset_set(struct task_struct *target, kernel_cap_t *effective, extern int cap_bprm_set_security(struct linux_binprm *bprm); extern void cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); extern int cap_bprm_secureexec(struct linux_binprm *bprm); -extern int cap_inode_setxattr(struct dentry *dentry, char *name, void *value, size_t size, int flags); -extern int cap_inode_removexattr(struct dentry *dentry, char *name); +extern int cap_inode_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); +extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); extern int cap_task_post_setuid(uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); @@ -1362,13 +1363,13 @@ struct security_operations { int (*inode_setattr) (struct dentry *dentry, struct iattr *attr); int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry); void (*inode_delete) (struct inode *inode); - int (*inode_setxattr) (struct dentry *dentry, char *name, void *value, - size_t size, int flags); - void (*inode_post_setxattr) (struct dentry *dentry, char *name, void *value, - size_t size, int flags); - int (*inode_getxattr) (struct dentry *dentry, char *name); + int (*inode_setxattr) (struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); + void (*inode_post_setxattr) (struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); + int (*inode_getxattr) (struct dentry *dentry, const char *name); int (*inode_listxattr) (struct dentry *dentry); - int (*inode_removexattr) (struct dentry *dentry, char *name); + int (*inode_removexattr) (struct dentry *dentry, const char *name); int (*inode_need_killpriv) (struct dentry *dentry); int (*inode_killpriv) (struct dentry *dentry); int (*inode_getsecurity) (const struct inode *inode, const char *name, void **buffer, bool alloc); @@ -1633,13 +1634,13 @@ int security_inode_permission(struct inode *inode, int mask, struct nameidata *n int security_inode_setattr(struct dentry *dentry, struct iattr *attr); int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry); void security_inode_delete(struct inode *inode); -int security_inode_setxattr(struct dentry *dentry, char *name, - void *value, size_t size, int flags); -void security_inode_post_setxattr(struct dentry *dentry, char *name, - void *value, size_t size, int flags); -int security_inode_getxattr(struct dentry *dentry, char *name); +int security_inode_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); +void security_inode_post_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); +int security_inode_getxattr(struct dentry *dentry, const char *name); int security_inode_listxattr(struct dentry *dentry); -int security_inode_removexattr(struct dentry *dentry, char *name); +int security_inode_removexattr(struct dentry *dentry, const char *name); int security_inode_need_killpriv(struct dentry *dentry); int security_inode_killpriv(struct dentry *dentry); int security_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc); @@ -2041,17 +2042,18 @@ static inline int security_inode_getattr(struct vfsmount *mnt, static inline void security_inode_delete(struct inode *inode) { } -static inline int security_inode_setxattr(struct dentry *dentry, char *name, - void *value, size_t size, int flags) +static inline int security_inode_setxattr(struct dentry *dentry, + const char *name, const void *value, size_t size, int flags) { return cap_inode_setxattr(dentry, name, value, size, flags); } -static inline void security_inode_post_setxattr(struct dentry *dentry, char *name, - void *value, size_t size, int flags) +static inline void security_inode_post_setxattr(struct dentry *dentry, + const char *name, const void *value, size_t size, int flags) { } -static inline int security_inode_getxattr(struct dentry *dentry, char *name) +static inline int security_inode_getxattr(struct dentry *dentry, + const char *name) { return 0; } @@ -2061,7 +2063,8 @@ static inline int security_inode_listxattr(struct dentry *dentry) return 0; } -static inline int security_inode_removexattr(struct dentry *dentry, char *name) +static inline int security_inode_removexattr(struct dentry *dentry, + const char *name) { return cap_inode_removexattr(dentry, name); } diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 8df6d1382ac8..0522f368f9d7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -240,26 +240,28 @@ asmlinkage long sys_truncate64(const char __user *path, loff_t length); asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length); #endif -asmlinkage long sys_setxattr(char __user *path, char __user *name, - void __user *value, size_t size, int flags); -asmlinkage long sys_lsetxattr(char __user *path, char __user *name, - void __user *value, size_t size, int flags); -asmlinkage long sys_fsetxattr(int fd, char __user *name, void __user *value, - size_t size, int flags); -asmlinkage ssize_t sys_getxattr(char __user *path, char __user *name, +asmlinkage long sys_setxattr(const char __user *path, const char __user *name, + const void __user *value, size_t size, int flags); +asmlinkage long sys_lsetxattr(const char __user *path, const char __user *name, + const void __user *value, size_t size, int flags); +asmlinkage long sys_fsetxattr(int fd, const char __user *name, + const void __user *value, size_t size, int flags); +asmlinkage ssize_t sys_getxattr(const char __user *path, const char __user *name, void __user *value, size_t size); -asmlinkage ssize_t sys_lgetxattr(char __user *path, char __user *name, +asmlinkage ssize_t sys_lgetxattr(const char __user *path, const char __user *name, void __user *value, size_t size); -asmlinkage ssize_t sys_fgetxattr(int fd, char __user *name, +asmlinkage ssize_t sys_fgetxattr(int fd, const char __user *name, void __user *value, size_t size); -asmlinkage ssize_t sys_listxattr(char __user *path, char __user *list, +asmlinkage ssize_t sys_listxattr(const char __user *path, char __user *list, size_t size); -asmlinkage ssize_t sys_llistxattr(char __user *path, char __user *list, +asmlinkage ssize_t sys_llistxattr(const char __user *path, char __user *list, size_t size); asmlinkage ssize_t sys_flistxattr(int fd, char __user *list, size_t size); -asmlinkage long sys_removexattr(char __user *path, char __user *name); -asmlinkage long sys_lremovexattr(char __user *path, char __user *name); -asmlinkage long sys_fremovexattr(int fd, char __user *name); +asmlinkage long sys_removexattr(const char __user *path, + const char __user *name); +asmlinkage long sys_lremovexattr(const char __user *path, + const char __user *name); +asmlinkage long sys_fremovexattr(int fd, const char __user *name); asmlinkage unsigned long sys_brk(unsigned long brk); asmlinkage long sys_mprotect(unsigned long start, size_t len, diff --git a/include/linux/xattr.h b/include/linux/xattr.h index df6b95d2218e..d131e352cfe1 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -47,10 +47,10 @@ struct xattr_handler { }; ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t); -ssize_t vfs_getxattr(struct dentry *, char *, void *, size_t); +ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t); ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); -int vfs_setxattr(struct dentry *, char *, void *, size_t, int); -int vfs_removexattr(struct dentry *, char *); +int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int); +int vfs_removexattr(struct dentry *, const char *); ssize_t generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size); ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); diff --git a/security/commoncap.c b/security/commoncap.c index e8c3f5e46705..5edabc7542ae 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -383,8 +383,8 @@ int cap_bprm_secureexec (struct linux_binprm *bprm) current->egid != current->gid); } -int cap_inode_setxattr(struct dentry *dentry, char *name, void *value, - size_t size, int flags) +int cap_inode_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (!strcmp(name, XATTR_NAME_CAPS)) { if (!capable(CAP_SETFCAP)) @@ -397,7 +397,7 @@ int cap_inode_setxattr(struct dentry *dentry, char *name, void *value, return 0; } -int cap_inode_removexattr(struct dentry *dentry, char *name) +int cap_inode_removexattr(struct dentry *dentry, const char *name) { if (!strcmp(name, XATTR_NAME_CAPS)) { if (!capable(CAP_SETFCAP)) diff --git a/security/dummy.c b/security/dummy.c index 58d4dd1af5c7..26ee06ef0e93 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -365,8 +365,8 @@ static void dummy_inode_delete (struct inode *ino) return; } -static int dummy_inode_setxattr (struct dentry *dentry, char *name, void *value, - size_t size, int flags) +static int dummy_inode_setxattr (struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (!strncmp(name, XATTR_SECURITY_PREFIX, sizeof(XATTR_SECURITY_PREFIX) - 1) && @@ -375,12 +375,13 @@ static int dummy_inode_setxattr (struct dentry *dentry, char *name, void *value, return 0; } -static void dummy_inode_post_setxattr (struct dentry *dentry, char *name, void *value, - size_t size, int flags) +static void dummy_inode_post_setxattr (struct dentry *dentry, const char *name, + const void *value, size_t size, + int flags) { } -static int dummy_inode_getxattr (struct dentry *dentry, char *name) +static int dummy_inode_getxattr (struct dentry *dentry, const char *name) { return 0; } @@ -390,7 +391,7 @@ static int dummy_inode_listxattr (struct dentry *dentry) return 0; } -static int dummy_inode_removexattr (struct dentry *dentry, char *name) +static int dummy_inode_removexattr (struct dentry *dentry, const char *name) { if (!strncmp(name, XATTR_SECURITY_PREFIX, sizeof(XATTR_SECURITY_PREFIX) - 1) && diff --git a/security/security.c b/security/security.c index d5cb5898d967..a809035441ab 100644 --- a/security/security.c +++ b/security/security.c @@ -491,23 +491,23 @@ void security_inode_delete(struct inode *inode) security_ops->inode_delete(inode); } -int security_inode_setxattr(struct dentry *dentry, char *name, - void *value, size_t size, int flags) +int security_inode_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (unlikely(IS_PRIVATE(dentry->d_inode))) return 0; return security_ops->inode_setxattr(dentry, name, value, size, flags); } -void security_inode_post_setxattr(struct dentry *dentry, char *name, - void *value, size_t size, int flags) +void security_inode_post_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (unlikely(IS_PRIVATE(dentry->d_inode))) return; security_ops->inode_post_setxattr(dentry, name, value, size, flags); } -int security_inode_getxattr(struct dentry *dentry, char *name) +int security_inode_getxattr(struct dentry *dentry, const char *name) { if (unlikely(IS_PRIVATE(dentry->d_inode))) return 0; @@ -521,7 +521,7 @@ int security_inode_listxattr(struct dentry *dentry) return security_ops->inode_listxattr(dentry); } -int security_inode_removexattr(struct dentry *dentry, char *name) +int security_inode_removexattr(struct dentry *dentry, const char *name) { if (unlikely(IS_PRIVATE(dentry->d_inode))) return 0; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 04acb5af8317..047365ac9faa 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2619,7 +2619,7 @@ static int selinux_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) return dentry_has_perm(current, mnt, dentry, FILE__GETATTR); } -static int selinux_inode_setotherxattr(struct dentry *dentry, char *name) +static int selinux_inode_setotherxattr(struct dentry *dentry, const char *name) { if (!strncmp(name, XATTR_SECURITY_PREFIX, sizeof XATTR_SECURITY_PREFIX - 1)) { @@ -2638,7 +2638,8 @@ static int selinux_inode_setotherxattr(struct dentry *dentry, char *name) return dentry_has_perm(current, NULL, dentry, FILE__SETATTR); } -static int selinux_inode_setxattr(struct dentry *dentry, char *name, void *value, size_t size, int flags) +static int selinux_inode_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { struct task_security_struct *tsec = current->security; struct inode *inode = dentry->d_inode; @@ -2687,8 +2688,9 @@ static int selinux_inode_setxattr(struct dentry *dentry, char *name, void *value &ad); } -static void selinux_inode_post_setxattr(struct dentry *dentry, char *name, - void *value, size_t size, int flags) +static void selinux_inode_post_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, + int flags) { struct inode *inode = dentry->d_inode; struct inode_security_struct *isec = inode->i_security; @@ -2711,7 +2713,7 @@ static void selinux_inode_post_setxattr(struct dentry *dentry, char *name, return; } -static int selinux_inode_getxattr(struct dentry *dentry, char *name) +static int selinux_inode_getxattr(struct dentry *dentry, const char *name) { return dentry_has_perm(current, NULL, dentry, FILE__GETATTR); } @@ -2721,7 +2723,7 @@ static int selinux_inode_listxattr(struct dentry *dentry) return dentry_has_perm(current, NULL, dentry, FILE__GETATTR); } -static int selinux_inode_removexattr(struct dentry *dentry, char *name) +static int selinux_inode_removexattr(struct dentry *dentry, const char *name) { if (strcmp(name, XATTR_NAME_SELINUX)) return selinux_inode_setotherxattr(dentry, name); diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 6445b6440648..cdb14add27d2 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -93,7 +93,7 @@ int security_change_sid(u32 ssid, u32 tsid, int security_sid_to_context(u32 sid, char **scontext, u32 *scontext_len); -int security_context_to_sid(char *scontext, u32 scontext_len, +int security_context_to_sid(const char *scontext, u32 scontext_len, u32 *out_sid); int security_context_to_sid_default(char *scontext, u32 scontext_len, diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 2daaddbb301d..25cac5a2aa8e 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -708,7 +708,7 @@ out: } -static int security_context_to_sid_core(char *scontext, u32 scontext_len, +static int security_context_to_sid_core(const char *scontext, u32 scontext_len, u32 *sid, u32 def_sid, gfp_t gfp_flags) { char *scontext2; @@ -835,7 +835,7 @@ out: * Returns -%EINVAL if the context is invalid, -%ENOMEM if insufficient * memory is available, or 0 on success. */ -int security_context_to_sid(char *scontext, u32 scontext_len, u32 *sid) +int security_context_to_sid(const char *scontext, u32 scontext_len, u32 *sid) { return security_context_to_sid_core(scontext, scontext_len, sid, SECSID_NULL, GFP_KERNEL); diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 77ec16a3b68b..5d2ec5650e61 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -574,8 +574,8 @@ static int smack_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) * * Returns 0 if access is permitted, an error code otherwise */ -static int smack_inode_setxattr(struct dentry *dentry, char *name, - void *value, size_t size, int flags) +static int smack_inode_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { int rc = 0; @@ -604,8 +604,8 @@ static int smack_inode_setxattr(struct dentry *dentry, char *name, * Set the pointer in the inode blob to the entry found * in the master label list. */ -static void smack_inode_post_setxattr(struct dentry *dentry, char *name, - void *value, size_t size, int flags) +static void smack_inode_post_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { struct inode_smack *isp; char *nsp; @@ -641,7 +641,7 @@ static void smack_inode_post_setxattr(struct dentry *dentry, char *name, * * Returns 0 if access is permitted, an error code otherwise */ -static int smack_inode_getxattr(struct dentry *dentry, char *name) +static int smack_inode_getxattr(struct dentry *dentry, const char *name) { return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ); } @@ -655,7 +655,7 @@ static int smack_inode_getxattr(struct dentry *dentry, char *name) * * Returns 0 if access is permitted, an error code otherwise */ -static int smack_inode_removexattr(struct dentry *dentry, char *name) +static int smack_inode_removexattr(struct dentry *dentry, const char *name) { int rc = 0; -- cgit v1.2.3-71-gd317 From 5f97a5a8799b8d7d0afdb9d68a50a4e0e8298a05 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Tue, 29 Apr 2008 00:59:43 -0700 Subject: isolate ratelimit from printk.c for other use Due to the rcupreempt.h WARN_ON trigged, I got 2G syslog file. For some serious complaining of kernel, we need repeat the warnings, so here I isolate the ratelimit part of printk.c to a standalone file. Signed-off-by: Dave Young Acked-by: Paul E. McKenney Tested-by: Paul E. McKenney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 1 + kernel/printk.c | 26 +------------------------ lib/Makefile | 2 +- lib/ratelimit.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 54 insertions(+), 26 deletions(-) create mode 100644 lib/ratelimit.c (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 28caa53dd1f7..ad5d05efcd1a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -188,6 +188,7 @@ extern int log_buf_copy(char *dest, int idx, int len); extern int printk_ratelimit_jiffies; extern int printk_ratelimit_burst; extern int printk_ratelimit(void); +extern int __ratelimit(int ratelimit_jiffies, int ratelimit_burst); extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst); extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); diff --git a/kernel/printk.c b/kernel/printk.c index bdd4ea8c3f2b..d3f9c0f788bf 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1287,31 +1287,7 @@ void tty_write_message(struct tty_struct *tty, char *msg) */ int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst) { - static DEFINE_SPINLOCK(ratelimit_lock); - static unsigned toks = 10 * 5 * HZ; - static unsigned long last_msg; - static int missed; - unsigned long flags; - unsigned long now = jiffies; - - spin_lock_irqsave(&ratelimit_lock, flags); - toks += now - last_msg; - last_msg = now; - if (toks > (ratelimit_burst * ratelimit_jiffies)) - toks = ratelimit_burst * ratelimit_jiffies; - if (toks >= ratelimit_jiffies) { - int lost = missed; - - missed = 0; - toks -= ratelimit_jiffies; - spin_unlock_irqrestore(&ratelimit_lock, flags); - if (lost) - printk(KERN_WARNING "printk: %d messages suppressed.\n", lost); - return 1; - } - missed++; - spin_unlock_irqrestore(&ratelimit_lock, flags); - return 0; + return __ratelimit(ratelimit_jiffies, ratelimit_burst); } EXPORT_SYMBOL(__printk_ratelimit); diff --git a/lib/Makefile b/lib/Makefile index 2d7001b7f5a4..0ae4eb047aac 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -6,7 +6,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ - proportions.o prio_heap.o + proportions.o prio_heap.o ratelimit.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/lib/ratelimit.c b/lib/ratelimit.c new file mode 100644 index 000000000000..485e3040dcd4 --- /dev/null +++ b/lib/ratelimit.c @@ -0,0 +1,51 @@ +/* + * ratelimit.c - Do something with rate limit. + * + * Isolated from kernel/printk.c by Dave Young + * + * This file is released under the GPLv2. + * + */ + +#include +#include +#include + +/* + * __ratelimit - rate limiting + * @ratelimit_jiffies: minimum time in jiffies between two callbacks + * @ratelimit_burst: number of callbacks we do before ratelimiting + * + * This enforces a rate limit: not more than @ratelimit_burst callbacks + * in every ratelimit_jiffies + */ +int __ratelimit(int ratelimit_jiffies, int ratelimit_burst) +{ + static DEFINE_SPINLOCK(ratelimit_lock); + static unsigned toks = 10 * 5 * HZ; + static unsigned long last_msg; + static int missed; + unsigned long flags; + unsigned long now = jiffies; + + spin_lock_irqsave(&ratelimit_lock, flags); + toks += now - last_msg; + last_msg = now; + if (toks > (ratelimit_burst * ratelimit_jiffies)) + toks = ratelimit_burst * ratelimit_jiffies; + if (toks >= ratelimit_jiffies) { + int lost = missed; + + missed = 0; + toks -= ratelimit_jiffies; + spin_unlock_irqrestore(&ratelimit_lock, flags); + if (lost) + printk(KERN_WARNING "%s: %d messages suppressed\n", + __func__, lost); + return 1; + } + missed++; + spin_unlock_irqrestore(&ratelimit_lock, flags); + return 0; +} +EXPORT_SYMBOL(__ratelimit); -- cgit v1.2.3-71-gd317 From c9e587abfdec2c2aaa55fab83bcb4972e2f84f9b Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 29 Apr 2008 00:59:46 -0700 Subject: vt: fix background color on line feed A command that causes a line feed while a background color is active, such as perl -e 'print "x" x 60, "\e[44m", "x" x 40, "\e[0m\n"' and perl -e 'print "x" x 40, "\e[44m\n", "x" x 40, "\e[0m\n"' causes the line that was started as a result of the line feed to be completely filled with the currently active background color instead of the default color. When scrolling, part of the current screen is memcpy'd/memmove'd to the new region, and the new line(s) that will appear as a result are cleared using memset. However, the lines are cleared with vc->vc_video_erase_char, causing them to be colored with the currently active background color. This is different from X11 terminal emulators which always paint the new lines with the default background color (e.g. `xterm -bg black`). The clear operation (\e[1J and \e[2J) also use vc_video_erase_char, so a new vc->vc_scrl_erase_char is introduced with contains the erase character used for scrolling, which is built from vc->vc_def_color instead of vc->vc_color. Signed-off-by: Jan Engelhardt Cc: "Antonino A. Daplas" Cc: "H. Peter Anvin" Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/vt.c | 7 ++++--- drivers/video/console/fbcon.c | 8 ++++---- drivers/video/console/mdacon.c | 2 +- drivers/video/console/sticon.c | 4 ++-- drivers/video/console/vgacon.c | 4 ++-- include/linux/console_struct.h | 1 + 6 files changed, 14 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/vt.c b/drivers/char/vt.c index df4c3ead9e2b..1c2660477135 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -301,7 +301,7 @@ static void scrup(struct vc_data *vc, unsigned int t, unsigned int b, int nr) d = (unsigned short *)(vc->vc_origin + vc->vc_size_row * t); s = (unsigned short *)(vc->vc_origin + vc->vc_size_row * (t + nr)); scr_memmovew(d, s, (b - t - nr) * vc->vc_size_row); - scr_memsetw(d + (b - t - nr) * vc->vc_cols, vc->vc_video_erase_char, + scr_memsetw(d + (b - t - nr) * vc->vc_cols, vc->vc_scrl_erase_char, vc->vc_size_row * nr); } @@ -319,7 +319,7 @@ static void scrdown(struct vc_data *vc, unsigned int t, unsigned int b, int nr) s = (unsigned short *)(vc->vc_origin + vc->vc_size_row * t); step = vc->vc_cols * nr; scr_memmovew(s + step, s, (b - t - nr) * vc->vc_size_row); - scr_memsetw(s, vc->vc_video_erase_char, 2 * step); + scr_memsetw(s, vc->vc_scrl_erase_char, 2 * step); } static void do_update_region(struct vc_data *vc, unsigned long start, int count) @@ -400,7 +400,7 @@ static u8 build_attr(struct vc_data *vc, u8 _color, u8 _intensity, u8 _blink, * Bit 7 : blink */ { - u8 a = vc->vc_color; + u8 a = _color; if (!vc->vc_can_do_color) return _intensity | (_italic ? 2 : 0) | @@ -434,6 +434,7 @@ static void update_attr(struct vc_data *vc) vc->vc_blink, vc->vc_underline, vc->vc_reverse ^ vc->vc_decscnm, vc->vc_italic); vc->vc_video_erase_char = (build_attr(vc, vc->vc_color, 1, vc->vc_blink, 0, vc->vc_decscnm, 0) << 8) | ' '; + vc->vc_scrl_erase_char = (build_attr(vc, vc->vc_def_color, 1, false, false, false, false) << 8) | ' '; } /* Note: inverting the screen twice should revert to the original state */ diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c index 8eda7b60df8f..ad31983b43eb 100644 --- a/drivers/video/console/fbcon.c +++ b/drivers/video/console/fbcon.c @@ -1881,7 +1881,7 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir, scr_memsetw((unsigned short *) (vc->vc_origin + vc->vc_size_row * (b - count)), - vc->vc_video_erase_char, + vc->vc_scrl_erase_char, vc->vc_size_row * count); return 1; break; @@ -1953,7 +1953,7 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir, scr_memsetw((unsigned short *) (vc->vc_origin + vc->vc_size_row * (b - count)), - vc->vc_video_erase_char, + vc->vc_scrl_erase_char, vc->vc_size_row * count); return 1; } @@ -1972,7 +1972,7 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir, scr_memsetw((unsigned short *) (vc->vc_origin + vc->vc_size_row * t), - vc->vc_video_erase_char, + vc->vc_scrl_erase_char, vc->vc_size_row * count); return 1; break; @@ -2042,7 +2042,7 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir, scr_memsetw((unsigned short *) (vc->vc_origin + vc->vc_size_row * t), - vc->vc_video_erase_char, + vc->vc_scrl_erase_char, vc->vc_size_row * count); return 1; } diff --git a/drivers/video/console/mdacon.c b/drivers/video/console/mdacon.c index bd8d995fe25d..38a296bbdfc9 100644 --- a/drivers/video/console/mdacon.c +++ b/drivers/video/console/mdacon.c @@ -531,7 +531,7 @@ static void mdacon_cursor(struct vc_data *c, int mode) static int mdacon_scroll(struct vc_data *c, int t, int b, int dir, int lines) { - u16 eattr = mda_convert_attr(c->vc_video_erase_char); + u16 eattr = mda_convert_attr(c->vc_scrl_erase_char); if (!lines) return 0; diff --git a/drivers/video/console/sticon.c b/drivers/video/console/sticon.c index 67a682d6cc7b..a11cc2fdd4cd 100644 --- a/drivers/video/console/sticon.c +++ b/drivers/video/console/sticon.c @@ -170,12 +170,12 @@ static int sticon_scroll(struct vc_data *conp, int t, int b, int dir, int count) switch (dir) { case SM_UP: sti_bmove(sti, t + count, 0, t, 0, b - t - count, conp->vc_cols); - sti_clear(sti, b - count, 0, count, conp->vc_cols, conp->vc_video_erase_char); + sti_clear(sti, b - count, 0, count, conp->vc_cols, conp->vc_scrl_erase_char); break; case SM_DOWN: sti_bmove(sti, t, 0, t + count, 0, b - t - count, conp->vc_cols); - sti_clear(sti, t, 0, count, conp->vc_cols, conp->vc_video_erase_char); + sti_clear(sti, t, 0, count, conp->vc_cols, conp->vc_scrl_erase_char); break; } diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index 6df29a62d720..bd1f57b259d9 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -1350,7 +1350,7 @@ static int vgacon_scroll(struct vc_data *c, int t, int b, int dir, } else c->vc_origin += delta; scr_memsetw((u16 *) (c->vc_origin + c->vc_screenbuf_size - - delta), c->vc_video_erase_char, + delta), c->vc_scrl_erase_char, delta); } else { if (oldo - delta < vga_vram_base) { @@ -1363,7 +1363,7 @@ static int vgacon_scroll(struct vc_data *c, int t, int b, int dir, } else c->vc_origin -= delta; c->vc_scr_end = c->vc_origin + c->vc_screenbuf_size; - scr_memsetw((u16 *) (c->vc_origin), c->vc_video_erase_char, + scr_memsetw((u16 *) (c->vc_origin), c->vc_scrl_erase_char, delta); } c->vc_scr_end = c->vc_origin + c->vc_screenbuf_size; diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index d71f7c0f931b..b03f80a078be 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -53,6 +53,7 @@ struct vc_data { unsigned short vc_hi_font_mask; /* [#] Attribute set for upper 256 chars of font or 0 if not supported */ struct console_font vc_font; /* Current VC font set */ unsigned short vc_video_erase_char; /* Background erase character */ + unsigned short vc_scrl_erase_char; /* Erase character for scroll */ /* VT terminal data */ unsigned int vc_state; /* Escape sequence parser state */ unsigned int vc_npar,vc_par[NPAR]; /* Parameters of current escape sequence */ -- cgit v1.2.3-71-gd317 From f4c753b7eacc277e506066abdda351cbc1cf8e6a Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Tue, 29 Apr 2008 00:59:56 -0700 Subject: CGroup API files: rename read/write_uint methods to read_write_u64 Several people have justifiably complained that the "_uint" suffix is inappropriate for functions that handle u64 values, so this patch just renames all these functions and their users to have the suffic _u64. [peterz@infradead.org: build fix] Signed-off-by: Paul Menage Cc: "Li Zefan" Cc: Balbir Singh Cc: Paul Jackson Cc: Pavel Emelyanov Cc: KAMEZAWA Hiroyuki Cc: "YAMAMOTO Takashi" Signed-off-by: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 8 ++++---- kernel/cgroup.c | 32 ++++++++++++++++---------------- kernel/cgroup_debug.c | 8 ++++---- kernel/sched.c | 16 ++++++++-------- 4 files changed, 32 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a6a6035a4e1e..058371c5d360 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -190,20 +190,20 @@ struct cftype { struct file *file, char __user *buf, size_t nbytes, loff_t *ppos); /* - * read_uint() is a shortcut for the common case of returning a + * read_u64() is a shortcut for the common case of returning a * single integer. Use it in place of read() */ - u64 (*read_uint) (struct cgroup *cgrp, struct cftype *cft); + u64 (*read_u64) (struct cgroup *cgrp, struct cftype *cft); ssize_t (*write) (struct cgroup *cgrp, struct cftype *cft, struct file *file, const char __user *buf, size_t nbytes, loff_t *ppos); /* - * write_uint() is a shortcut for the common case of accepting + * write_u64() is a shortcut for the common case of accepting * a single integer (as parsed by simple_strtoull) from * userspace. Use in place of write(); return 0 or error. */ - int (*write_uint) (struct cgroup *cgrp, struct cftype *cft, u64 val); + int (*write_u64) (struct cgroup *cgrp, struct cftype *cft, u64 val); int (*release) (struct inode *inode, struct file *file); }; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 068f58da855a..0bd79a81666a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1311,10 +1311,10 @@ enum cgroup_filetype { FILE_RELEASE_AGENT, }; -static ssize_t cgroup_write_uint(struct cgroup *cgrp, struct cftype *cft, - struct file *file, - const char __user *userbuf, - size_t nbytes, loff_t *unused_ppos) +static ssize_t cgroup_write_u64(struct cgroup *cgrp, struct cftype *cft, + struct file *file, + const char __user *userbuf, + size_t nbytes, loff_t *unused_ppos) { char buffer[64]; int retval = 0; @@ -1338,7 +1338,7 @@ static ssize_t cgroup_write_uint(struct cgroup *cgrp, struct cftype *cft, return -EINVAL; /* Pass to subsystem */ - retval = cft->write_uint(cgrp, cft, val); + retval = cft->write_u64(cgrp, cft, val); if (!retval) retval = nbytes; return retval; @@ -1419,18 +1419,18 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf, return -ENODEV; if (cft->write) return cft->write(cgrp, cft, file, buf, nbytes, ppos); - if (cft->write_uint) - return cgroup_write_uint(cgrp, cft, file, buf, nbytes, ppos); + if (cft->write_u64) + return cgroup_write_u64(cgrp, cft, file, buf, nbytes, ppos); return -EINVAL; } -static ssize_t cgroup_read_uint(struct cgroup *cgrp, struct cftype *cft, - struct file *file, - char __user *buf, size_t nbytes, - loff_t *ppos) +static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft, + struct file *file, + char __user *buf, size_t nbytes, + loff_t *ppos) { char tmp[64]; - u64 val = cft->read_uint(cgrp, cft); + u64 val = cft->read_u64(cgrp, cft); int len = sprintf(tmp, "%llu\n", (unsigned long long) val); return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); @@ -1490,8 +1490,8 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf, if (cft->read) return cft->read(cgrp, cft, file, buf, nbytes, ppos); - if (cft->read_uint) - return cgroup_read_uint(cgrp, cft, file, buf, nbytes, ppos); + if (cft->read_u64) + return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos); return -EINVAL; } @@ -2158,14 +2158,14 @@ static struct cftype files[] = { { .name = "notify_on_release", - .read_uint = cgroup_read_notify_on_release, + .read_u64 = cgroup_read_notify_on_release, .write = cgroup_common_file_write, .private = FILE_NOTIFY_ON_RELEASE, }, { .name = "releasable", - .read_uint = cgroup_read_releasable, + .read_u64 = cgroup_read_releasable, .private = FILE_RELEASABLE, } }; diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c index 37301e877cb0..cbb7a26f4ea3 100644 --- a/kernel/cgroup_debug.c +++ b/kernel/cgroup_debug.c @@ -65,21 +65,21 @@ static u64 current_css_set_refcount_read(struct cgroup *cont, static struct cftype files[] = { { .name = "cgroup_refcount", - .read_uint = cgroup_refcount_read, + .read_u64 = cgroup_refcount_read, }, { .name = "taskcount", - .read_uint = taskcount_read, + .read_u64 = taskcount_read, }, { .name = "current_css_set", - .read_uint = current_css_set_read, + .read_u64 = current_css_set_read, }, { .name = "current_css_set_refcount", - .read_uint = current_css_set_refcount_read, + .read_u64 = current_css_set_refcount_read, }, }; diff --git a/kernel/sched.c b/kernel/sched.c index 740fb409e5bb..2528fbd974b4 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -9057,13 +9057,13 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, } #ifdef CONFIG_FAIR_GROUP_SCHED -static int cpu_shares_write_uint(struct cgroup *cgrp, struct cftype *cftype, +static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, u64 shareval) { return sched_group_set_shares(cgroup_tg(cgrp), shareval); } -static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft) +static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) { struct task_group *tg = cgroup_tg(cgrp); @@ -9133,8 +9133,8 @@ static struct cftype cpu_files[] = { #ifdef CONFIG_FAIR_GROUP_SCHED { .name = "shares", - .read_uint = cpu_shares_read_uint, - .write_uint = cpu_shares_write_uint, + .read_u64 = cpu_shares_read_u64, + .write_u64 = cpu_shares_write_u64, }, #endif #ifdef CONFIG_RT_GROUP_SCHED @@ -9145,8 +9145,8 @@ static struct cftype cpu_files[] = { }, { .name = "rt_period_us", - .read_uint = cpu_rt_period_read_uint, - .write_uint = cpu_rt_period_write_uint, + .read_u64 = cpu_rt_period_read_uint, + .write_u64 = cpu_rt_period_write_uint, }, #endif }; @@ -9277,8 +9277,8 @@ out: static struct cftype files[] = { { .name = "usage", - .read_uint = cpuusage_read, - .write_uint = cpuusage_write, + .read_u64 = cpuusage_read, + .write_u64 = cpuusage_write, }, }; -- cgit v1.2.3-71-gd317 From 2c7eabf37647dd459d555e76954b4de87be2321f Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Tue, 29 Apr 2008 00:59:58 -0700 Subject: CGroup API files: add res_counter_read_u64() Adds a function for returning the value of a resource counter member, in a form suitable for use in a cgroup read_u64 control file method. Signed-off-by: Paul Menage Cc: "Li Zefan" Cc: Balbir Singh Cc: Paul Jackson Cc: Pavel Emelyanov Cc: KAMEZAWA Hiroyuki Cc: "YAMAMOTO Takashi" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/res_counter.h | 5 ++++- kernel/res_counter.c | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 61363ce896d5..8cb1ecd420a9 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -39,8 +39,9 @@ struct res_counter { spinlock_t lock; }; -/* +/** * Helpers to interact with userspace + * res_counter_read_u64() - returns the value of the specified member. * res_counter_read/_write - put/get the specified fields from the * res_counter struct to/from the user * @@ -51,6 +52,8 @@ struct res_counter { * @pos: and the offset. */ +u64 res_counter_read_u64(struct res_counter *counter, int member); + ssize_t res_counter_read(struct res_counter *counter, int member, const char __user *buf, size_t nbytes, loff_t *pos, int (*read_strategy)(unsigned long long val, char *s)); diff --git a/kernel/res_counter.c b/kernel/res_counter.c index a508c2769463..70587657dda3 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c @@ -93,6 +93,11 @@ ssize_t res_counter_read(struct res_counter *counter, int member, pos, buf, s - buf); } +u64 res_counter_read_u64(struct res_counter *counter, int member) +{ + return *res_counter_member(counter, member); +} + ssize_t res_counter_write(struct res_counter *counter, int member, const char __user *userbuf, size_t nbytes, loff_t *pos, int (*write_strategy)(char *st_buf, unsigned long long *val)) -- cgit v1.2.3-71-gd317 From 9179656961adcea3c25403365597e486d851ac5e Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Tue, 29 Apr 2008 01:00:01 -0700 Subject: CGroup API files: add cgroup map data type Adds a new type of supported control file representation, a map from strings to u64 values. Each map entry is printed as a line in a similar format to /proc/vmstat, i.e. "$key $value\n" Signed-off-by: Paul Menage Cc: "Li Zefan" Cc: Balbir Singh Cc: Paul Jackson Cc: Pavel Emelyanov Cc: KAMEZAWA Hiroyuki Cc: "YAMAMOTO Takashi" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 19 ++++++++++++++++++ kernel/cgroup.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 71 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 058371c5d360..0a3ab670dd2f 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -166,6 +166,16 @@ struct css_set { }; +/* + * cgroup_map_cb is an abstract callback API for reporting map-valued + * control files + */ + +struct cgroup_map_cb { + int (*fill)(struct cgroup_map_cb *cb, const char *key, u64 value); + void *state; +}; + /* struct cftype: * * The files in the cgroup filesystem mostly have a very simple read/write @@ -194,6 +204,15 @@ struct cftype { * single integer. Use it in place of read() */ u64 (*read_u64) (struct cgroup *cgrp, struct cftype *cft); + /* + * read_map() is used for defining a map of key/value + * pairs. It should call cb->fill(cb, key, value) for each + * entry. The key/value pairs (and their ordering) should not + * change between reboots. + */ + int (*read_map) (struct cgroup *cont, struct cftype *cft, + struct cgroup_map_cb *cb); + ssize_t (*write) (struct cgroup *cgrp, struct cftype *cft, struct file *file, const char __user *buf, size_t nbytes, loff_t *ppos); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 57afdde871ac..693bcc03188b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1492,6 +1492,46 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf, return -EINVAL; } +/* + * seqfile ops/methods for returning structured data. Currently just + * supports string->u64 maps, but can be extended in future. + */ + +struct cgroup_seqfile_state { + struct cftype *cft; + struct cgroup *cgroup; +}; + +static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value) +{ + struct seq_file *sf = cb->state; + return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value); +} + +static int cgroup_seqfile_show(struct seq_file *m, void *arg) +{ + struct cgroup_seqfile_state *state = m->private; + struct cftype *cft = state->cft; + struct cgroup_map_cb cb = { + .fill = cgroup_map_add, + .state = m, + }; + return cft->read_map(state->cgroup, cft, &cb); +} + +int cgroup_seqfile_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + kfree(seq->private); + return single_release(inode, file); +} + +static struct file_operations cgroup_seqfile_operations = { + .read = seq_read, + .llseek = seq_lseek, + .release = cgroup_seqfile_release, +}; + static int cgroup_file_open(struct inode *inode, struct file *file) { int err; @@ -1504,7 +1544,18 @@ static int cgroup_file_open(struct inode *inode, struct file *file) cft = __d_cft(file->f_dentry); if (!cft) return -ENODEV; - if (cft->open) + if (cft->read_map) { + struct cgroup_seqfile_state *state = + kzalloc(sizeof(*state), GFP_USER); + if (!state) + return -ENOMEM; + state->cft = cft; + state->cgroup = __d_cgrp(file->f_dentry->d_parent); + file->f_op = &cgroup_seqfile_operations; + err = single_open(file, cgroup_seqfile_show, state); + if (err < 0) + kfree(state); + } else if (cft->open) err = cft->open(inode, file); else err = 0; -- cgit v1.2.3-71-gd317 From 3116f0e3df0a67ad56f15dd4c5f6cefb04bb4a98 Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Tue, 29 Apr 2008 01:00:04 -0700 Subject: CGroup API files: move "releasable" to cgroup_debug subsystem The "releasable" control file provided by the cgroup framework exports the state of a per-cgroup flag that's related to the notify-on-release feature. This isn't really generally useful, unless you're trying to debug this particular feature of cgroups. This patch moves the "releasable" file to the cgroup_debug subsystem. Signed-off-by: Paul Menage Cc: "Li Zefan" Cc: Balbir Singh Cc: Paul Jackson Cc: Pavel Emelyanov Cc: KAMEZAWA Hiroyuki Cc: "YAMAMOTO Takashi" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 11 +++++++++++ kernel/cgroup.c | 23 ----------------------- kernel/cgroup_debug.c | 12 +++++++++++- 3 files changed, 22 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 0a3ab670dd2f..b40fd5ee9a76 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -88,6 +88,17 @@ static inline void css_put(struct cgroup_subsys_state *css) __css_put(css); } +/* bits in struct cgroup flags field */ +enum { + /* Control Group is dead */ + CGRP_REMOVED, + /* Control Group has previously had a child cgroup or a task, + * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) */ + CGRP_RELEASABLE, + /* Control Group requires release notifications to userspace */ + CGRP_NOTIFY_ON_RELEASE, +}; + struct cgroup { unsigned long flags; /* "unsigned long" so bitops work */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 693bcc03188b..b5ef0c4772f7 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -119,17 +119,6 @@ static int root_count; */ static int need_forkexit_callback; -/* bits in struct cgroup flags field */ -enum { - /* Control Group is dead */ - CGRP_REMOVED, - /* Control Group has previously had a child cgroup or a task, - * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) */ - CGRP_RELEASABLE, - /* Control Group requires release notifications to userspace */ - CGRP_NOTIFY_ON_RELEASE, -}; - /* convenient tests for these bits */ inline int cgroup_is_removed(const struct cgroup *cgrp) { @@ -1307,7 +1296,6 @@ enum cgroup_filetype { FILE_DIR, FILE_TASKLIST, FILE_NOTIFY_ON_RELEASE, - FILE_RELEASABLE, FILE_RELEASE_AGENT, }; @@ -2186,11 +2174,6 @@ static u64 cgroup_read_notify_on_release(struct cgroup *cgrp, return notify_on_release(cgrp); } -static u64 cgroup_read_releasable(struct cgroup *cgrp, struct cftype *cft) -{ - return test_bit(CGRP_RELEASABLE, &cgrp->flags); -} - /* * for the common functions, 'private' gives the type of file */ @@ -2210,12 +2193,6 @@ static struct cftype files[] = { .write = cgroup_common_file_write, .private = FILE_NOTIFY_ON_RELEASE, }, - - { - .name = "releasable", - .read_u64 = cgroup_read_releasable, - .private = FILE_RELEASABLE, - } }; static struct cftype cft_release_agent = { diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c index cbb7a26f4ea3..c3dc3aba4c02 100644 --- a/kernel/cgroup_debug.c +++ b/kernel/cgroup_debug.c @@ -1,5 +1,5 @@ /* - * kernel/ccontainer_debug.c - Example cgroup subsystem that + * kernel/cgroup_debug.c - Example cgroup subsystem that * exposes debug info * * Copyright (C) Google Inc, 2007 @@ -62,6 +62,11 @@ static u64 current_css_set_refcount_read(struct cgroup *cont, return count; } +static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft) +{ + return test_bit(CGRP_RELEASABLE, &cgrp->flags); +} + static struct cftype files[] = { { .name = "cgroup_refcount", @@ -81,6 +86,11 @@ static struct cftype files[] = { .name = "current_css_set_refcount", .read_u64 = current_css_set_refcount_read, }, + + { + .name = "releasable", + .read_u64 = releasable_read, + } }; static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) -- cgit v1.2.3-71-gd317 From e73d2c61d1fcbd3621688ae457b49509c8d4c601 Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Tue, 29 Apr 2008 01:00:06 -0700 Subject: CGroups _s64 files: add cgroups read_s64/write_s64 file methods These patches add cgroups read_s64 and write_s64 control file methods (the signed equivalent of read_u64/write_u64) and use them to implement the cpu.rt_runtime_us control file in the CFS cgroup subsystem. This patch: These are the signed equivalents of the read_u64/write_u64 methods Signed-off-by: Paul Menage Acked-by: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 8 ++++++++ kernel/cgroup.c | 38 ++++++++++++++++++++++++++++---------- 2 files changed, 36 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b40fd5ee9a76..785a01cfb49d 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -215,6 +215,10 @@ struct cftype { * single integer. Use it in place of read() */ u64 (*read_u64) (struct cgroup *cgrp, struct cftype *cft); + /* + * read_s64() is a signed version of read_u64() + */ + s64 (*read_s64) (struct cgroup *cgrp, struct cftype *cft); /* * read_map() is used for defining a map of key/value * pairs. It should call cb->fill(cb, key, value) for each @@ -234,6 +238,10 @@ struct cftype { * userspace. Use in place of write(); return 0 or error. */ int (*write_u64) (struct cgroup *cgrp, struct cftype *cft, u64 val); + /* + * write_s64() is a signed version of write_u64() + */ + int (*write_s64) (struct cgroup *cgrp, struct cftype *cft, s64 val); int (*release) (struct inode *inode, struct file *file); }; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index b5ef0c4772f7..bd6122ccc0ba 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1299,14 +1299,13 @@ enum cgroup_filetype { FILE_RELEASE_AGENT, }; -static ssize_t cgroup_write_u64(struct cgroup *cgrp, struct cftype *cft, +static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft, struct file *file, const char __user *userbuf, size_t nbytes, loff_t *unused_ppos) { char buffer[64]; int retval = 0; - u64 val; char *end; if (!nbytes) @@ -1318,12 +1317,17 @@ static ssize_t cgroup_write_u64(struct cgroup *cgrp, struct cftype *cft, buffer[nbytes] = 0; /* nul-terminate */ strstrip(buffer); - val = simple_strtoull(buffer, &end, 0); - if (*end) - return -EINVAL; - - /* Pass to subsystem */ - retval = cft->write_u64(cgrp, cft, val); + if (cft->write_u64) { + u64 val = simple_strtoull(buffer, &end, 0); + if (*end) + return -EINVAL; + retval = cft->write_u64(cgrp, cft, val); + } else { + s64 val = simple_strtoll(buffer, &end, 0); + if (*end) + return -EINVAL; + retval = cft->write_s64(cgrp, cft, val); + } if (!retval) retval = nbytes; return retval; @@ -1404,8 +1408,8 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf, return -ENODEV; if (cft->write) return cft->write(cgrp, cft, file, buf, nbytes, ppos); - if (cft->write_u64) - return cgroup_write_u64(cgrp, cft, file, buf, nbytes, ppos); + if (cft->write_u64 || cft->write_s64) + return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos); return -EINVAL; } @@ -1421,6 +1425,18 @@ static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft, return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); } +static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft, + struct file *file, + char __user *buf, size_t nbytes, + loff_t *ppos) +{ + char tmp[64]; + s64 val = cft->read_s64(cgrp, cft); + int len = sprintf(tmp, "%lld\n", (long long) val); + + return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); +} + static ssize_t cgroup_common_file_read(struct cgroup *cgrp, struct cftype *cft, struct file *file, @@ -1477,6 +1493,8 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf, return cft->read(cgrp, cft, file, buf, nbytes, ppos); if (cft->read_u64) return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos); + if (cft->read_s64) + return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos); return -EINVAL; } -- cgit v1.2.3-71-gd317 From d447ea2f30ec60370ddb99a668e5ac12995f043d Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 29 Apr 2008 01:00:08 -0700 Subject: cgroups: add the trigger callback to struct cftype Trigger callback can be used to receive a kick-up from the user space. The string written is ignored. The cftype->private is used for multiplexing events. Signed-off-by: Pavel Emelyanov Acked-by: Paul Menage Acked-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 8 ++++++++ kernel/cgroup.c | 4 ++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 785a01cfb49d..2d1d151258cf 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -243,6 +243,14 @@ struct cftype { */ int (*write_s64) (struct cgroup *cgrp, struct cftype *cft, s64 val); + /* + * trigger() callback can be used to get some kick from the + * userspace, when the actual string written is not important + * at all. The private field can be used to determine the + * kick type for multiplexing. + */ + int (*trigger)(struct cgroup *cgrp, unsigned int event); + int (*release) (struct inode *inode, struct file *file); }; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 436e26f4d624..7c8cc5141877 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1410,6 +1410,10 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf, return cft->write(cgrp, cft, file, buf, nbytes, ppos); if (cft->write_u64 || cft->write_s64) return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos); + if (cft->trigger) { + int ret = cft->trigger(cgrp, (unsigned int)cft->private); + return ret ? ret : nbytes; + } return -EINVAL; } -- cgit v1.2.3-71-gd317 From 08ce5f16ee466ffc5bf243800deeecd77d9eaf50 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Tue, 29 Apr 2008 01:00:10 -0700 Subject: cgroups: implement device whitelist Implement a cgroup to track and enforce open and mknod restrictions on device files. A device cgroup associates a device access whitelist with each cgroup. A whitelist entry has 4 fields. 'type' is a (all), c (char), or b (block). 'all' means it applies to all types and all major and minor numbers. Major and minor are either an integer or * for all. Access is a composition of r (read), w (write), and m (mknod). The root device cgroup starts with rwm to 'all'. A child devcg gets a copy of the parent. Admins can then remove devices from the whitelist or add new entries. A child cgroup can never receive a device access which is denied its parent. However when a device access is removed from a parent it will not also be removed from the child(ren). An entry is added using devices.allow, and removed using devices.deny. For instance echo 'c 1:3 mr' > /cgroups/1/devices.allow allows cgroup 1 to read and mknod the device usually known as /dev/null. Doing echo a > /cgroups/1/devices.deny will remove the default 'a *:* mrw' entry. CAP_SYS_ADMIN is needed to change permissions or move another task to a new cgroup. A cgroup may not be granted more permissions than the cgroup's parent has. Any task can move itself between cgroups. This won't be sufficient, but we can decide the best way to adequately restrict movement later. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: fix may-be-used-uninitialized warning] Signed-off-by: Serge E. Hallyn Acked-by: James Morris Looks-good-to: Pavel Emelyanov Cc: Daniel Hokka Zakrisson Cc: Li Zefan Cc: Paul Menage Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/controllers/devices.txt | 48 +++ fs/namei.c | 9 + include/linux/cgroup_subsys.h | 6 + include/linux/device_cgroup.h | 12 + init/Kconfig | 7 + security/Makefile | 1 + security/device_cgroup.c | 603 ++++++++++++++++++++++++++++++++++ 7 files changed, 686 insertions(+) create mode 100644 Documentation/controllers/devices.txt create mode 100644 include/linux/device_cgroup.h create mode 100644 security/device_cgroup.c (limited to 'include/linux') diff --git a/Documentation/controllers/devices.txt b/Documentation/controllers/devices.txt new file mode 100644 index 000000000000..4dcea42432c2 --- /dev/null +++ b/Documentation/controllers/devices.txt @@ -0,0 +1,48 @@ +Device Whitelist Controller + +1. Description: + +Implement a cgroup to track and enforce open and mknod restrictions +on device files. A device cgroup associates a device access +whitelist with each cgroup. A whitelist entry has 4 fields. +'type' is a (all), c (char), or b (block). 'all' means it applies +to all types and all major and minor numbers. Major and minor are +either an integer or * for all. Access is a composition of r +(read), w (write), and m (mknod). + +The root device cgroup starts with rwm to 'all'. A child device +cgroup gets a copy of the parent. Administrators can then remove +devices from the whitelist or add new entries. A child cgroup can +never receive a device access which is denied its parent. However +when a device access is removed from a parent it will not also be +removed from the child(ren). + +2. User Interface + +An entry is added using devices.allow, and removed using +devices.deny. For instance + + echo 'c 1:3 mr' > /cgroups/1/devices.allow + +allows cgroup 1 to read and mknod the device usually known as +/dev/null. Doing + + echo a > /cgroups/1/devices.deny + +will remove the default 'a *:* mrw' entry. + +3. Security + +Any task can move itself between cgroups. This clearly won't +suffice, but we can decide the best way to adequately restrict +movement as people get some experience with this. We may just want +to require CAP_SYS_ADMIN, which at least is a separate bit from +CAP_MKNOD. We may want to just refuse moving to a cgroup which +isn't a descendent of the current one. Or we may want to use +CAP_MAC_ADMIN, since we really are trying to lock down root. + +CAP_SYS_ADMIN is needed to modify the whitelist or move another +task to a new cgroup. (Again we'll probably want to change that). + +A cgroup may not be granted more permissions than the cgroup's +parent has. diff --git a/fs/namei.c b/fs/namei.c index e179f71bfcb0..32fd9655485b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -281,6 +282,10 @@ int permission(struct inode *inode, int mask, struct nameidata *nd) if (retval) return retval; + retval = devcgroup_inode_permission(inode, mask); + if (retval) + return retval; + return security_inode_permission(inode, mask, nd); } @@ -2028,6 +2033,10 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) if (!dir->i_op || !dir->i_op->mknod) return -EPERM; + error = devcgroup_inode_mknod(mode, dev); + if (error) + return error; + error = security_inode_mknod(dir, dentry, mode, dev); if (error) return error; diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 1ddebfc52565..e2877454ec82 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -42,3 +42,9 @@ SUBSYS(mem_cgroup) #endif /* */ + +#ifdef CONFIG_CGROUP_DEVICE +SUBSYS(devices) +#endif + +/* */ diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h new file mode 100644 index 000000000000..0b0d9c39ed67 --- /dev/null +++ b/include/linux/device_cgroup.h @@ -0,0 +1,12 @@ +#include +#include + +#ifdef CONFIG_CGROUP_DEVICE +extern int devcgroup_inode_permission(struct inode *inode, int mask); +extern int devcgroup_inode_mknod(int mode, dev_t dev); +#else +static inline int devcgroup_inode_permission(struct inode *inode, int mask) +{ return 0; } +static inline int devcgroup_inode_mknod(int mode, dev_t dev) +{ return 0; } +#endif diff --git a/init/Kconfig b/init/Kconfig index 6ce16bdbec76..a3457926342a 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -298,6 +298,13 @@ config CGROUP_NS for instance virtual servers and checkpoint/restart jobs. +config CGROUP_DEVICE + bool "Device controller for cgroups" + depends on CGROUPS && EXPERIMENTAL + help + Provides a cgroup implementing whitelists for devices which + a process in the cgroup can mknod or open. + config CPUSETS bool "Cpuset support" depends on SMP && CGROUPS diff --git a/security/Makefile b/security/Makefile index 9e8b02525014..7ef1107a7287 100644 --- a/security/Makefile +++ b/security/Makefile @@ -18,3 +18,4 @@ obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o obj-$(CONFIG_SECURITY_SMACK) += commoncap.o smack/built-in.o obj-$(CONFIG_SECURITY_CAPABILITIES) += commoncap.o capability.o obj-$(CONFIG_SECURITY_ROOTPLUG) += commoncap.o root_plug.o +obj-$(CONFIG_CGROUP_DEVICE) += device_cgroup.o diff --git a/security/device_cgroup.c b/security/device_cgroup.c new file mode 100644 index 000000000000..4237b19e8fb3 --- /dev/null +++ b/security/device_cgroup.c @@ -0,0 +1,603 @@ +/* + * dev_cgroup.c - device cgroup subsystem + * + * Copyright 2007 IBM Corp + */ + +#include +#include +#include +#include +#include + +#define ACC_MKNOD 1 +#define ACC_READ 2 +#define ACC_WRITE 4 +#define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE) + +#define DEV_BLOCK 1 +#define DEV_CHAR 2 +#define DEV_ALL 4 /* this represents all devices */ + +/* + * whitelist locking rules: + * cgroup_lock() cannot be taken under dev_cgroup->lock. + * dev_cgroup->lock can be taken with or without cgroup_lock(). + * + * modifications always require cgroup_lock + * modifications to a list which is visible require the + * dev_cgroup->lock *and* cgroup_lock() + * walking the list requires dev_cgroup->lock or cgroup_lock(). + * + * reasoning: dev_whitelist_copy() needs to kmalloc, so needs + * a mutex, which the cgroup_lock() is. Since modifying + * a visible list requires both locks, either lock can be + * taken for walking the list. + */ + +struct dev_whitelist_item { + u32 major, minor; + short type; + short access; + struct list_head list; +}; + +struct dev_cgroup { + struct cgroup_subsys_state css; + struct list_head whitelist; + spinlock_t lock; +}; + +static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup) +{ + return container_of(cgroup_subsys_state(cgroup, devices_subsys_id), + struct dev_cgroup, css); +} + +struct cgroup_subsys devices_subsys; + +static int devcgroup_can_attach(struct cgroup_subsys *ss, + struct cgroup *new_cgroup, struct task_struct *task) +{ + if (current != task && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + return 0; +} + +/* + * called under cgroup_lock() + */ +static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig) +{ + struct dev_whitelist_item *wh, *tmp, *new; + + list_for_each_entry(wh, orig, list) { + new = kmalloc(sizeof(*wh), GFP_KERNEL); + if (!new) + goto free_and_exit; + new->major = wh->major; + new->minor = wh->minor; + new->type = wh->type; + new->access = wh->access; + list_add_tail(&new->list, dest); + } + + return 0; + +free_and_exit: + list_for_each_entry_safe(wh, tmp, dest, list) { + list_del(&wh->list); + kfree(wh); + } + return -ENOMEM; +} + +/* Stupid prototype - don't bother combining existing entries */ +/* + * called under cgroup_lock() + * since the list is visible to other tasks, we need the spinlock also + */ +static int dev_whitelist_add(struct dev_cgroup *dev_cgroup, + struct dev_whitelist_item *wh) +{ + struct dev_whitelist_item *whcopy; + + whcopy = kmalloc(sizeof(*whcopy), GFP_KERNEL); + if (!whcopy) + return -ENOMEM; + + memcpy(whcopy, wh, sizeof(*whcopy)); + spin_lock(&dev_cgroup->lock); + list_add_tail(&whcopy->list, &dev_cgroup->whitelist); + spin_unlock(&dev_cgroup->lock); + return 0; +} + +/* + * called under cgroup_lock() + * since the list is visible to other tasks, we need the spinlock also + */ +static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup, + struct dev_whitelist_item *wh) +{ + struct dev_whitelist_item *walk, *tmp; + + spin_lock(&dev_cgroup->lock); + list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) { + if (walk->type == DEV_ALL) + goto remove; + if (walk->type != wh->type) + continue; + if (walk->major != ~0 && walk->major != wh->major) + continue; + if (walk->minor != ~0 && walk->minor != wh->minor) + continue; + +remove: + walk->access &= ~wh->access; + if (!walk->access) { + list_del(&walk->list); + kfree(walk); + } + } + spin_unlock(&dev_cgroup->lock); +} + +/* + * called from kernel/cgroup.c with cgroup_lock() held. + */ +static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss, + struct cgroup *cgroup) +{ + struct dev_cgroup *dev_cgroup, *parent_dev_cgroup; + struct cgroup *parent_cgroup; + int ret; + + dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL); + if (!dev_cgroup) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&dev_cgroup->whitelist); + parent_cgroup = cgroup->parent; + + if (parent_cgroup == NULL) { + struct dev_whitelist_item *wh; + wh = kmalloc(sizeof(*wh), GFP_KERNEL); + if (!wh) { + kfree(dev_cgroup); + return ERR_PTR(-ENOMEM); + } + wh->minor = wh->major = ~0; + wh->type = DEV_ALL; + wh->access = ACC_MKNOD | ACC_READ | ACC_WRITE; + list_add(&wh->list, &dev_cgroup->whitelist); + } else { + parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); + ret = dev_whitelist_copy(&dev_cgroup->whitelist, + &parent_dev_cgroup->whitelist); + if (ret) { + kfree(dev_cgroup); + return ERR_PTR(ret); + } + } + + spin_lock_init(&dev_cgroup->lock); + return &dev_cgroup->css; +} + +static void devcgroup_destroy(struct cgroup_subsys *ss, + struct cgroup *cgroup) +{ + struct dev_cgroup *dev_cgroup; + struct dev_whitelist_item *wh, *tmp; + + dev_cgroup = cgroup_to_devcgroup(cgroup); + list_for_each_entry_safe(wh, tmp, &dev_cgroup->whitelist, list) { + list_del(&wh->list); + kfree(wh); + } + kfree(dev_cgroup); +} + +#define DEVCG_ALLOW 1 +#define DEVCG_DENY 2 + +static void set_access(char *acc, short access) +{ + int idx = 0; + memset(acc, 0, 4); + if (access & ACC_READ) + acc[idx++] = 'r'; + if (access & ACC_WRITE) + acc[idx++] = 'w'; + if (access & ACC_MKNOD) + acc[idx++] = 'm'; +} + +static char type_to_char(short type) +{ + if (type == DEV_ALL) + return 'a'; + if (type == DEV_CHAR) + return 'c'; + if (type == DEV_BLOCK) + return 'b'; + return 'X'; +} + +static void set_majmin(char *str, int len, unsigned m) +{ + memset(str, 0, len); + if (m == ~0) + sprintf(str, "*"); + else + snprintf(str, len, "%d", m); +} + +static char *print_whitelist(struct dev_cgroup *devcgroup, int *len) +{ + char *buf, *s, acc[4]; + struct dev_whitelist_item *wh; + int ret; + int count = 0; + char maj[10], min[10]; + + buf = kmalloc(4096, GFP_KERNEL); + if (!buf) + return ERR_PTR(-ENOMEM); + s = buf; + *s = '\0'; + *len = 0; + + spin_lock(&devcgroup->lock); + list_for_each_entry(wh, &devcgroup->whitelist, list) { + set_access(acc, wh->access); + set_majmin(maj, 10, wh->major); + set_majmin(min, 10, wh->minor); + ret = snprintf(s, 4095-(s-buf), "%c %s:%s %s\n", + type_to_char(wh->type), maj, min, acc); + if (s+ret >= buf+4095) { + kfree(buf); + buf = ERR_PTR(-ENOMEM); + break; + } + s += ret; + *len += ret; + count++; + } + spin_unlock(&devcgroup->lock); + + return buf; +} + +static ssize_t devcgroup_access_read(struct cgroup *cgroup, + struct cftype *cft, struct file *file, + char __user *userbuf, size_t nbytes, loff_t *ppos) +{ + struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup); + int filetype = cft->private; + char *buffer; + int uninitialized_var(len); + int retval; + + if (filetype != DEVCG_ALLOW) + return -EINVAL; + buffer = print_whitelist(devcgroup, &len); + if (IS_ERR(buffer)) + return PTR_ERR(buffer); + + retval = simple_read_from_buffer(userbuf, nbytes, ppos, buffer, len); + kfree(buffer); + return retval; +} + +/* + * may_access_whitelist: + * does the access granted to dev_cgroup c contain the access + * requested in whitelist item refwh. + * return 1 if yes, 0 if no. + * call with c->lock held + */ +static int may_access_whitelist(struct dev_cgroup *c, + struct dev_whitelist_item *refwh) +{ + struct dev_whitelist_item *whitem; + + list_for_each_entry(whitem, &c->whitelist, list) { + if (whitem->type & DEV_ALL) + return 1; + if ((refwh->type & DEV_BLOCK) && !(whitem->type & DEV_BLOCK)) + continue; + if ((refwh->type & DEV_CHAR) && !(whitem->type & DEV_CHAR)) + continue; + if (whitem->major != ~0 && whitem->major != refwh->major) + continue; + if (whitem->minor != ~0 && whitem->minor != refwh->minor) + continue; + if (refwh->access & (~(whitem->access | ACC_MASK))) + continue; + return 1; + } + return 0; +} + +/* + * parent_has_perm: + * when adding a new allow rule to a device whitelist, the rule + * must be allowed in the parent device + */ +static int parent_has_perm(struct cgroup *childcg, + struct dev_whitelist_item *wh) +{ + struct cgroup *pcg = childcg->parent; + struct dev_cgroup *parent; + int ret; + + if (!pcg) + return 1; + parent = cgroup_to_devcgroup(pcg); + spin_lock(&parent->lock); + ret = may_access_whitelist(parent, wh); + spin_unlock(&parent->lock); + return ret; +} + +/* + * Modify the whitelist using allow/deny rules. + * CAP_SYS_ADMIN is needed for this. It's at least separate from CAP_MKNOD + * so we can give a container CAP_MKNOD to let it create devices but not + * modify the whitelist. + * It seems likely we'll want to add a CAP_CONTAINER capability to allow + * us to also grant CAP_SYS_ADMIN to containers without giving away the + * device whitelist controls, but for now we'll stick with CAP_SYS_ADMIN + * + * Taking rules away is always allowed (given CAP_SYS_ADMIN). Granting + * new access is only allowed if you're in the top-level cgroup, or your + * parent cgroup has the access you're asking for. + */ +static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft, + struct file *file, const char __user *userbuf, + size_t nbytes, loff_t *ppos) +{ + struct cgroup *cur_cgroup; + struct dev_cgroup *devcgroup, *cur_devcgroup; + int filetype = cft->private; + char *buffer, *b; + int retval = 0, count; + struct dev_whitelist_item wh; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + devcgroup = cgroup_to_devcgroup(cgroup); + cur_cgroup = task_cgroup(current, devices_subsys.subsys_id); + cur_devcgroup = cgroup_to_devcgroup(cur_cgroup); + + buffer = kmalloc(nbytes+1, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + + if (copy_from_user(buffer, userbuf, nbytes)) { + retval = -EFAULT; + goto out1; + } + buffer[nbytes] = 0; /* nul-terminate */ + + cgroup_lock(); + if (cgroup_is_removed(cgroup)) { + retval = -ENODEV; + goto out2; + } + + memset(&wh, 0, sizeof(wh)); + b = buffer; + + switch (*b) { + case 'a': + wh.type = DEV_ALL; + wh.access = ACC_MASK; + goto handle; + case 'b': + wh.type = DEV_BLOCK; + break; + case 'c': + wh.type = DEV_CHAR; + break; + default: + retval = -EINVAL; + goto out2; + } + b++; + if (!isspace(*b)) { + retval = -EINVAL; + goto out2; + } + b++; + if (*b == '*') { + wh.major = ~0; + b++; + } else if (isdigit(*b)) { + wh.major = 0; + while (isdigit(*b)) { + wh.major = wh.major*10+(*b-'0'); + b++; + } + } else { + retval = -EINVAL; + goto out2; + } + if (*b != ':') { + retval = -EINVAL; + goto out2; + } + b++; + + /* read minor */ + if (*b == '*') { + wh.minor = ~0; + b++; + } else if (isdigit(*b)) { + wh.minor = 0; + while (isdigit(*b)) { + wh.minor = wh.minor*10+(*b-'0'); + b++; + } + } else { + retval = -EINVAL; + goto out2; + } + if (!isspace(*b)) { + retval = -EINVAL; + goto out2; + } + for (b++, count = 0; count < 3; count++, b++) { + switch (*b) { + case 'r': + wh.access |= ACC_READ; + break; + case 'w': + wh.access |= ACC_WRITE; + break; + case 'm': + wh.access |= ACC_MKNOD; + break; + case '\n': + case '\0': + count = 3; + break; + default: + retval = -EINVAL; + goto out2; + } + } + +handle: + retval = 0; + switch (filetype) { + case DEVCG_ALLOW: + if (!parent_has_perm(cgroup, &wh)) + retval = -EPERM; + else + retval = dev_whitelist_add(devcgroup, &wh); + break; + case DEVCG_DENY: + dev_whitelist_rm(devcgroup, &wh); + break; + default: + retval = -EINVAL; + goto out2; + } + + if (retval == 0) + retval = nbytes; + +out2: + cgroup_unlock(); +out1: + kfree(buffer); + return retval; +} + +static struct cftype dev_cgroup_files[] = { + { + .name = "allow", + .read = devcgroup_access_read, + .write = devcgroup_access_write, + .private = DEVCG_ALLOW, + }, + { + .name = "deny", + .write = devcgroup_access_write, + .private = DEVCG_DENY, + }, +}; + +static int devcgroup_populate(struct cgroup_subsys *ss, + struct cgroup *cgroup) +{ + return cgroup_add_files(cgroup, ss, dev_cgroup_files, + ARRAY_SIZE(dev_cgroup_files)); +} + +struct cgroup_subsys devices_subsys = { + .name = "devices", + .can_attach = devcgroup_can_attach, + .create = devcgroup_create, + .destroy = devcgroup_destroy, + .populate = devcgroup_populate, + .subsys_id = devices_subsys_id, +}; + +int devcgroup_inode_permission(struct inode *inode, int mask) +{ + struct cgroup *cgroup; + struct dev_cgroup *dev_cgroup; + struct dev_whitelist_item *wh; + + dev_t device = inode->i_rdev; + if (!device) + return 0; + if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)) + return 0; + cgroup = task_cgroup(current, devices_subsys.subsys_id); + dev_cgroup = cgroup_to_devcgroup(cgroup); + if (!dev_cgroup) + return 0; + + spin_lock(&dev_cgroup->lock); + list_for_each_entry(wh, &dev_cgroup->whitelist, list) { + if (wh->type & DEV_ALL) + goto acc_check; + if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode)) + continue; + if ((wh->type & DEV_CHAR) && !S_ISCHR(inode->i_mode)) + continue; + if (wh->major != ~0 && wh->major != imajor(inode)) + continue; + if (wh->minor != ~0 && wh->minor != iminor(inode)) + continue; +acc_check: + if ((mask & MAY_WRITE) && !(wh->access & ACC_WRITE)) + continue; + if ((mask & MAY_READ) && !(wh->access & ACC_READ)) + continue; + spin_unlock(&dev_cgroup->lock); + return 0; + } + spin_unlock(&dev_cgroup->lock); + + return -EPERM; +} + +int devcgroup_inode_mknod(int mode, dev_t dev) +{ + struct cgroup *cgroup; + struct dev_cgroup *dev_cgroup; + struct dev_whitelist_item *wh; + + cgroup = task_cgroup(current, devices_subsys.subsys_id); + dev_cgroup = cgroup_to_devcgroup(cgroup); + if (!dev_cgroup) + return 0; + + spin_lock(&dev_cgroup->lock); + list_for_each_entry(wh, &dev_cgroup->whitelist, list) { + if (wh->type & DEV_ALL) + goto acc_check; + if ((wh->type & DEV_BLOCK) && !S_ISBLK(mode)) + continue; + if ((wh->type & DEV_CHAR) && !S_ISCHR(mode)) + continue; + if (wh->major != ~0 && wh->major != MAJOR(dev)) + continue; + if (wh->minor != ~0 && wh->minor != MINOR(dev)) + continue; +acc_check: + if (!(wh->access & ACC_MKNOD)) + continue; + spin_unlock(&dev_cgroup->lock); + return 0; + } + spin_unlock(&dev_cgroup->lock); + return -EPERM; +} -- cgit v1.2.3-71-gd317 From 472b1053f3c319cc60bfb2a0bb062fed77a93eb6 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 29 Apr 2008 01:00:11 -0700 Subject: cgroups: use a hash table for css_set finding When we attach a process to a different cgroup, the css_set linked-list will be run through to find a suitable existing css_set to use. This patch implements a hash table for better performance. The following benchmarks have been tested: For N in 1, 5, 10, 50, 100, 500, 1000, create N cgroups with one sleeping task in each, and then move an additional task through each cgroup in turn. Here is a test result: N Loop orig - Time(s) hash - Time(s) ---------------------------------------------- 1 10000 1.201231728 1.196311177 5 2000 1.065743872 1.040566424 10 1000 0.991054735 0.986876440 50 200 0.976554203 0.969608733 100 100 0.998504680 0.969218270 500 20 1.157347764 0.962602963 1000 10 1.619521852 1.085140172 Signed-off-by: Li Zefan Reviewed-by: Paul Menage Cc: Balbir Singh Cc: Pavel Emelyanov Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 7 +++++- kernel/cgroup.c | 59 ++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 53 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 2d1d151258cf..f585b7cde87b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -155,6 +155,12 @@ struct css_set { */ struct list_head list; + /* + * List running through all cgroup groups in the same hash + * slot. Protected by css_set_lock + */ + struct hlist_node hlist; + /* * List running through all tasks using this cgroup * group. Protected by css_set_lock @@ -174,7 +180,6 @@ struct css_set { * during subsystem registration (at boot time). */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; - }; /* diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7c8cc5141877..c447c29f8749 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -44,6 +44,7 @@ #include #include #include +#include #include @@ -193,6 +194,27 @@ static struct cg_cgroup_link init_css_set_link; static DEFINE_RWLOCK(css_set_lock); static int css_set_count; +/* hash table for cgroup groups. This improves the performance to + * find an existing css_set */ +#define CSS_SET_HASH_BITS 7 +#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS) +static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE]; + +static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]) +{ + int i; + int index; + unsigned long tmp = 0UL; + + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) + tmp += (unsigned long)css[i]; + tmp = (tmp >> 16) ^ tmp; + + index = hash_long(tmp, CSS_SET_HASH_BITS); + + return &css_set_table[index]; +} + /* We don't maintain the lists running through each css_set to its * task until after the first call to cgroup_iter_start(). This * reduces the fork()/exit() overhead for people who have cgroups @@ -219,6 +241,7 @@ static int use_task_css_set_links; static void unlink_css_set(struct css_set *cg) { write_lock(&css_set_lock); + hlist_del(&cg->hlist); list_del(&cg->list); css_set_count--; while (!list_empty(&cg->cg_links)) { @@ -284,9 +307,7 @@ static inline void put_css_set_taskexit(struct css_set *cg) /* * find_existing_css_set() is a helper for * find_css_set(), and checks to see whether an existing - * css_set is suitable. This currently walks a linked-list for - * simplicity; a later patch will use a hash table for better - * performance + * css_set is suitable. * * oldcg: the cgroup group that we're using before the cgroup * transition @@ -303,7 +324,9 @@ static struct css_set *find_existing_css_set( { int i; struct cgroupfs_root *root = cgrp->root; - struct list_head *l = &init_css_set.list; + struct hlist_head *hhead; + struct hlist_node *node; + struct css_set *cg; /* Built the set of subsystem state objects that we want to * see in the new css_set */ @@ -320,18 +343,13 @@ static struct css_set *find_existing_css_set( } } - /* Look through existing cgroup groups to find one to reuse */ - do { - struct css_set *cg = - list_entry(l, struct css_set, list); - + hhead = css_set_hash(template); + hlist_for_each_entry(cg, node, hhead, hlist) { if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) { /* All subsystems matched */ return cg; } - /* Try the next cgroup group */ - l = l->next; - } while (l != &init_css_set.list); + } /* No existing cgroup group matched */ return NULL; @@ -393,6 +411,8 @@ static struct css_set *find_css_set( struct list_head tmp_cg_links; struct cg_cgroup_link *link; + struct hlist_head *hhead; + /* First see if we already have a cgroup group that matches * the desired set */ write_lock(&css_set_lock); @@ -417,6 +437,7 @@ static struct css_set *find_css_set( kref_init(&res->ref); INIT_LIST_HEAD(&res->cg_links); INIT_LIST_HEAD(&res->tasks); + INIT_HLIST_NODE(&res->hlist); /* Copy the set of subsystem state objects generated in * find_existing_css_set() */ @@ -459,6 +480,11 @@ static struct css_set *find_css_set( /* Link this cgroup group into the list */ list_add(&res->list, &init_css_set.list); css_set_count++; + + /* Add this cgroup group to the hash table */ + hhead = css_set_hash(res->subsys); + hlist_add_head(&res->hlist, hhead); + write_unlock(&css_set_lock); return res; @@ -2508,6 +2534,7 @@ int __init cgroup_init_early(void) INIT_LIST_HEAD(&init_css_set.list); INIT_LIST_HEAD(&init_css_set.cg_links); INIT_LIST_HEAD(&init_css_set.tasks); + INIT_HLIST_NODE(&init_css_set.hlist); css_set_count = 1; init_cgroup_root(&rootnode); list_add(&rootnode.root_list, &roots); @@ -2520,6 +2547,9 @@ int __init cgroup_init_early(void) list_add(&init_css_set_link.cg_link_list, &init_css_set.cg_links); + for (i = 0; i < CSS_SET_TABLE_SIZE; i++) + INIT_HLIST_HEAD(&css_set_table[i]); + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; @@ -2549,6 +2579,7 @@ int __init cgroup_init(void) { int err; int i; + struct hlist_head *hhead; err = bdi_init(&cgroup_backing_dev_info); if (err) @@ -2560,6 +2591,10 @@ int __init cgroup_init(void) cgroup_init_subsys(ss); } + /* Add init_css_set to the hash table */ + hhead = css_set_hash(init_css_set.subsys); + hlist_add_head(&init_css_set.hlist, hhead); + err = register_filesystem(&cgroup_fs_type); if (err < 0) goto out; -- cgit v1.2.3-71-gd317 From 28fd5dfc12bde391981dfdcf20755952b6e916af Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 29 Apr 2008 01:00:13 -0700 Subject: cgroups: remove the css_set linked-list Now we can run through the hash table instead of running through the linked-list. Signed-off-by: Li Zefan Reviewed-by: Paul Menage Cc: Balbir Singh Cc: Pavel Emelyanov Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 6 ------ kernel/cgroup.c | 40 ++++++++++++++++++++-------------------- 2 files changed, 20 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f585b7cde87b..d58a958444ab 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -149,12 +149,6 @@ struct css_set { /* Reference count */ struct kref ref; - /* - * List running through all cgroup groups. Protected by - * css_set_lock - */ - struct list_head list; - /* * List running through all cgroup groups in the same hash * slot. Protected by css_set_lock diff --git a/kernel/cgroup.c b/kernel/cgroup.c index b893c8c94858..aeceb8868981 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -242,7 +242,6 @@ static void unlink_css_set(struct css_set *cg) { write_lock(&css_set_lock); hlist_del(&cg->hlist); - list_del(&cg->list); css_set_count--; while (!list_empty(&cg->cg_links)) { struct cg_cgroup_link *link; @@ -477,8 +476,6 @@ static struct css_set *find_css_set( BUG_ON(!list_empty(&tmp_cg_links)); - /* Link this cgroup group into the list */ - list_add(&res->list, &init_css_set.list); css_set_count++; /* Add this cgroup group to the hash table */ @@ -963,7 +960,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, int ret = 0; struct super_block *sb; struct cgroupfs_root *root; - struct list_head tmp_cg_links, *l; + struct list_head tmp_cg_links; INIT_LIST_HEAD(&tmp_cg_links); /* First find the desired set of subsystems */ @@ -1005,6 +1002,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, /* New superblock */ struct cgroup *cgrp = &root->top_cgroup; struct inode *inode; + int i; BUG_ON(sb->s_root != NULL); @@ -1049,22 +1047,25 @@ static int cgroup_get_sb(struct file_system_type *fs_type, /* Link the top cgroup in this hierarchy into all * the css_set objects */ write_lock(&css_set_lock); - l = &init_css_set.list; - do { + for (i = 0; i < CSS_SET_TABLE_SIZE; i++) { + struct hlist_head *hhead = &css_set_table[i]; + struct hlist_node *node; struct css_set *cg; - struct cg_cgroup_link *link; - cg = list_entry(l, struct css_set, list); - BUG_ON(list_empty(&tmp_cg_links)); - link = list_entry(tmp_cg_links.next, - struct cg_cgroup_link, - cgrp_link_list); - list_del(&link->cgrp_link_list); - link->cg = cg; - list_add(&link->cgrp_link_list, - &root->top_cgroup.css_sets); - list_add(&link->cg_link_list, &cg->cg_links); - l = l->next; - } while (l != &init_css_set.list); + + hlist_for_each_entry(cg, node, hhead, hlist) { + struct cg_cgroup_link *link; + + BUG_ON(list_empty(&tmp_cg_links)); + link = list_entry(tmp_cg_links.next, + struct cg_cgroup_link, + cgrp_link_list); + list_del(&link->cgrp_link_list); + link->cg = cg; + list_add(&link->cgrp_link_list, + &root->top_cgroup.css_sets); + list_add(&link->cg_link_list, &cg->cg_links); + } + } write_unlock(&css_set_lock); free_cg_links(&tmp_cg_links); @@ -2514,7 +2515,6 @@ int __init cgroup_init_early(void) int i; kref_init(&init_css_set.ref); kref_get(&init_css_set.ref); - INIT_LIST_HEAD(&init_css_set.list); INIT_LIST_HEAD(&init_css_set.cg_links); INIT_LIST_HEAD(&init_css_set.tasks); INIT_HLIST_NODE(&init_css_set.hlist); -- cgit v1.2.3-71-gd317 From 29486df325e1fe6e1764afcb19e3370804c2b002 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Tue, 29 Apr 2008 01:00:14 -0700 Subject: cgroups: introduce cft->read_seq() Introduce a read_seq() helper in cftype, which uses seq_file to print out lists. Use it in the devices cgroup. Also split devices.allow into two files, so now devices.deny and devices.allow are the ones to use to manipulate the whitelist, while devices.list outputs the cgroup's current whitelist. Signed-off-by: Serge E. Hallyn Acked-by: Paul Menage Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 6 ++++ kernel/cgroup.c | 15 ++++++---- security/device_cgroup.c | 74 +++++++++++++++--------------------------------- 3 files changed, 38 insertions(+), 57 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index d58a958444ab..095248082b7e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -226,6 +226,12 @@ struct cftype { */ int (*read_map) (struct cgroup *cont, struct cftype *cft, struct cgroup_map_cb *cb); + /* + * read_seq_string() is used for outputting a simple sequence + * using seqfile. + */ + int (*read_seq_string) (struct cgroup *cont, struct cftype *cft, + struct seq_file *m); ssize_t (*write) (struct cgroup *cgrp, struct cftype *cft, struct file *file, diff --git a/kernel/cgroup.c b/kernel/cgroup.c index aeceb8868981..abc433772e5a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1549,11 +1549,14 @@ static int cgroup_seqfile_show(struct seq_file *m, void *arg) { struct cgroup_seqfile_state *state = m->private; struct cftype *cft = state->cft; - struct cgroup_map_cb cb = { - .fill = cgroup_map_add, - .state = m, - }; - return cft->read_map(state->cgroup, cft, &cb); + if (cft->read_map) { + struct cgroup_map_cb cb = { + .fill = cgroup_map_add, + .state = m, + }; + return cft->read_map(state->cgroup, cft, &cb); + } + return cft->read_seq_string(state->cgroup, cft, m); } int cgroup_seqfile_release(struct inode *inode, struct file *file) @@ -1581,7 +1584,7 @@ static int cgroup_file_open(struct inode *inode, struct file *file) cft = __d_cft(file->f_dentry); if (!cft) return -ENODEV; - if (cft->read_map) { + if (cft->read_map || cft->read_seq_string) { struct cgroup_seqfile_state *state = kzalloc(sizeof(*state), GFP_USER); if (!state) diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 4237b19e8fb3..4ea583689eec 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -9,6 +9,7 @@ #include #include #include +#include #define ACC_MKNOD 1 #define ACC_READ 2 @@ -201,11 +202,15 @@ static void devcgroup_destroy(struct cgroup_subsys *ss, #define DEVCG_ALLOW 1 #define DEVCG_DENY 2 +#define DEVCG_LIST 3 + +#define MAJMINLEN 10 +#define ACCLEN 4 static void set_access(char *acc, short access) { int idx = 0; - memset(acc, 0, 4); + memset(acc, 0, ACCLEN); if (access & ACC_READ) acc[idx++] = 'r'; if (access & ACC_WRITE) @@ -225,70 +230,33 @@ static char type_to_char(short type) return 'X'; } -static void set_majmin(char *str, int len, unsigned m) +static void set_majmin(char *str, unsigned m) { - memset(str, 0, len); + memset(str, 0, MAJMINLEN); if (m == ~0) sprintf(str, "*"); else - snprintf(str, len, "%d", m); + snprintf(str, MAJMINLEN, "%d", m); } -static char *print_whitelist(struct dev_cgroup *devcgroup, int *len) +static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft, + struct seq_file *m) { - char *buf, *s, acc[4]; + struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup); struct dev_whitelist_item *wh; - int ret; - int count = 0; - char maj[10], min[10]; - - buf = kmalloc(4096, GFP_KERNEL); - if (!buf) - return ERR_PTR(-ENOMEM); - s = buf; - *s = '\0'; - *len = 0; + char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; spin_lock(&devcgroup->lock); list_for_each_entry(wh, &devcgroup->whitelist, list) { set_access(acc, wh->access); - set_majmin(maj, 10, wh->major); - set_majmin(min, 10, wh->minor); - ret = snprintf(s, 4095-(s-buf), "%c %s:%s %s\n", - type_to_char(wh->type), maj, min, acc); - if (s+ret >= buf+4095) { - kfree(buf); - buf = ERR_PTR(-ENOMEM); - break; - } - s += ret; - *len += ret; - count++; + set_majmin(maj, wh->major); + set_majmin(min, wh->minor); + seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type), + maj, min, acc); } spin_unlock(&devcgroup->lock); - return buf; -} - -static ssize_t devcgroup_access_read(struct cgroup *cgroup, - struct cftype *cft, struct file *file, - char __user *userbuf, size_t nbytes, loff_t *ppos) -{ - struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup); - int filetype = cft->private; - char *buffer; - int uninitialized_var(len); - int retval; - - if (filetype != DEVCG_ALLOW) - return -EINVAL; - buffer = print_whitelist(devcgroup, &len); - if (IS_ERR(buffer)) - return PTR_ERR(buffer); - - retval = simple_read_from_buffer(userbuf, nbytes, ppos, buffer, len); - kfree(buffer); - return retval; + return 0; } /* @@ -501,7 +469,6 @@ out1: static struct cftype dev_cgroup_files[] = { { .name = "allow", - .read = devcgroup_access_read, .write = devcgroup_access_write, .private = DEVCG_ALLOW, }, @@ -510,6 +477,11 @@ static struct cftype dev_cgroup_files[] = { .write = devcgroup_access_write, .private = DEVCG_DENY, }, + { + .name = "list", + .read_seq_string = devcgroup_seq_read, + .private = DEVCG_LIST, + }, }; static int devcgroup_populate(struct cgroup_subsys *ss, -- cgit v1.2.3-71-gd317 From cf475ad28ac35cc9ba612d67158f29b73b38b05d Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Tue, 29 Apr 2008 01:00:16 -0700 Subject: cgroups: add an owner to the mm_struct Remove the mem_cgroup member from mm_struct and instead adds an owner. This approach was suggested by Paul Menage. The advantage of this approach is that, once the mm->owner is known, using the subsystem id, the cgroup can be determined. It also allows several control groups that are virtually grouped by mm_struct, to exist independent of the memory controller i.e., without adding mem_cgroup's for each controller, to mm_struct. A new config option CONFIG_MM_OWNER is added and the memory resource controller selects this config option. This patch also adds cgroup callbacks to notify subsystems when mm->owner changes. The mm_cgroup_changed callback is called with the task_lock() of the new task held and is called just prior to changing the mm->owner. I am indebted to Paul Menage for the several reviews of this patchset and helping me make it lighter and simpler. This patch was tested on a powerpc box, it was compiled with both the MM_OWNER config turned on and off. After the thread group leader exits, it's moved to init_css_state by cgroup_exit(), thus all future charges from runnings threads would be redirected to the init_css_set's subsystem. Signed-off-by: Balbir Singh Cc: Pavel Emelianov Cc: Hugh Dickins Cc: Sudhir Kumar Cc: YAMAMOTO Takashi Cc: Hirokazu Takahashi Cc: David Rientjes , Cc: Balbir Singh Acked-by: KAMEZAWA Hiroyuki Acked-by: Pekka Enberg Reviewed-by: Paul Menage Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 1 + include/linux/cgroup.h | 15 +++++++++ include/linux/memcontrol.h | 16 ++------- include/linux/mm_types.h | 5 +-- include/linux/sched.h | 13 ++++++++ init/Kconfig | 7 ++++ init/main.c | 1 + kernel/cgroup.c | 30 +++++++++++++++++ kernel/exit.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++ kernel/fork.c | 11 ++++-- mm/memcontrol.c | 28 +++------------- 11 files changed, 169 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index 7768453dc986..711bc45d789c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -735,6 +735,7 @@ static int exec_mmap(struct mm_struct *mm) tsk->active_mm = mm; activate_mm(active_mm, mm); task_unlock(tsk); + mm_update_next_owner(mm); arch_pick_mmap_layout(mm); if (old_mm) { up_read(&old_mm->mmap_sem); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 095248082b7e..e155aa78d859 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -305,6 +305,12 @@ struct cgroup_subsys { struct cgroup *cgrp); void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp); void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); + /* + * This routine is called with the task_lock of mm->owner held + */ + void (*mm_owner_changed)(struct cgroup_subsys *ss, + struct cgroup *old, + struct cgroup *new); int subsys_id; int active; int disabled; @@ -390,4 +396,13 @@ static inline int cgroupstats_build(struct cgroupstats *stats, #endif /* !CONFIG_CGROUPS */ +#ifdef CONFIG_MM_OWNER +extern void +cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new); +#else /* !CONFIG_MM_OWNER */ +static inline void +cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new) +{ +} +#endif /* CONFIG_MM_OWNER */ #endif /* _LINUX_CGROUP_H */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8b1c4295848b..e6608776bc96 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -27,9 +27,6 @@ struct mm_struct; #ifdef CONFIG_CGROUP_MEM_RES_CTLR -extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p); -extern void mm_free_cgroup(struct mm_struct *mm); - #define page_reset_bad_cgroup(page) ((page)->page_cgroup = 0) extern struct page_cgroup *page_get_page_cgroup(struct page *page); @@ -48,8 +45,10 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); +extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); + #define mm_match_cgroup(mm, cgroup) \ - ((cgroup) == rcu_dereference((mm)->mem_cgroup)) + ((cgroup) == mem_cgroup_from_task((mm)->owner)) extern int mem_cgroup_prepare_migration(struct page *page); extern void mem_cgroup_end_migration(struct page *page); @@ -73,15 +72,6 @@ extern long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem, struct zone *zone, int priority); #else /* CONFIG_CGROUP_MEM_RES_CTLR */ -static inline void mm_init_cgroup(struct mm_struct *mm, - struct task_struct *p) -{ -} - -static inline void mm_free_cgroup(struct mm_struct *mm) -{ -} - static inline void page_reset_bad_cgroup(struct page *page) { } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e2bae8dde35a..bc97bd54f606 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -225,8 +225,9 @@ struct mm_struct { /* aio bits */ rwlock_t ioctx_list_lock; /* aio lock */ struct kioctx *ioctx_list; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR - struct mem_cgroup *mem_cgroup; +#ifdef CONFIG_MM_OWNER + struct task_struct *owner; /* The thread group leader that */ + /* owns the mm_struct. */ #endif }; diff --git a/include/linux/sched.h b/include/linux/sched.h index 024d72b47a0c..1d02babdb2c7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2148,6 +2148,19 @@ static inline void migration_init(void) #define TASK_SIZE_OF(tsk) TASK_SIZE #endif +#ifdef CONFIG_MM_OWNER +extern void mm_update_next_owner(struct mm_struct *mm); +extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); +#else +static inline void mm_update_next_owner(struct mm_struct *mm) +{ +} + +static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) +{ +} +#endif /* CONFIG_MM_OWNER */ + #endif /* __KERNEL__ */ #endif diff --git a/init/Kconfig b/init/Kconfig index a3457926342a..98fa96eac415 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -378,9 +378,13 @@ config RESOURCE_COUNTERS infrastructure that works with cgroups depends on CGROUPS +config MM_OWNER + bool + config CGROUP_MEM_RES_CTLR bool "Memory Resource Controller for Control Groups" depends on CGROUPS && RESOURCE_COUNTERS + select MM_OWNER help Provides a memory resource controller that manages both page cache and RSS memory. @@ -393,6 +397,9 @@ config CGROUP_MEM_RES_CTLR Only enable when you're ok with these trade offs and really sure you need the memory resource controller. + This config option also selects MM_OWNER config option, which + could in turn add some fork/exit overhead. + config SYSFS_DEPRECATED bool diff --git a/init/main.c b/init/main.c index 1116d2f40cc1..c62c98f381f2 100644 --- a/init/main.c +++ b/init/main.c @@ -559,6 +559,7 @@ asmlinkage void __init start_kernel(void) printk(KERN_NOTICE); printk(linux_banner); setup_arch(&command_line); + mm_init_owner(&init_mm, &init_task); setup_command_line(command_line); unwind_setup(); setup_per_cpu_areas(); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index abc433772e5a..b9d467d83fc1 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -119,6 +119,7 @@ static int root_count; * be called. */ static int need_forkexit_callback; +static int need_mm_owner_callback __read_mostly; /* convenient tests for these bits */ inline int cgroup_is_removed(const struct cgroup *cgrp) @@ -2498,6 +2499,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id]; need_forkexit_callback |= ss->fork || ss->exit; + need_mm_owner_callback |= !!ss->mm_owner_changed; /* At system boot, before all subsystems have been * registered, no tasks have been forked, so we don't @@ -2748,6 +2750,34 @@ void cgroup_fork_callbacks(struct task_struct *child) } } +#ifdef CONFIG_MM_OWNER +/** + * cgroup_mm_owner_callbacks - run callbacks when the mm->owner changes + * @p: the new owner + * + * Called on every change to mm->owner. mm_init_owner() does not + * invoke this routine, since it assigns the mm->owner the first time + * and does not change it. + */ +void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new) +{ + struct cgroup *oldcgrp, *newcgrp; + + if (need_mm_owner_callback) { + int i; + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys *ss = subsys[i]; + oldcgrp = task_cgroup(old, ss->subsys_id); + newcgrp = task_cgroup(new, ss->subsys_id); + if (oldcgrp == newcgrp) + continue; + if (ss->mm_owner_changed) + ss->mm_owner_changed(ss, oldcgrp, newcgrp); + } + } +} +#endif /* CONFIG_MM_OWNER */ + /** * cgroup_post_fork - called on a new task after adding it to the task list * @child: the task in question diff --git a/kernel/exit.c b/kernel/exit.c index 2a9d98c641ac..ae0f2c4e452b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -557,6 +557,88 @@ void exit_fs(struct task_struct *tsk) EXPORT_SYMBOL_GPL(exit_fs); +#ifdef CONFIG_MM_OWNER +/* + * Task p is exiting and it owned mm, lets find a new owner for it + */ +static inline int +mm_need_new_owner(struct mm_struct *mm, struct task_struct *p) +{ + /* + * If there are other users of the mm and the owner (us) is exiting + * we need to find a new owner to take on the responsibility. + */ + if (!mm) + return 0; + if (atomic_read(&mm->mm_users) <= 1) + return 0; + if (mm->owner != p) + return 0; + return 1; +} + +void mm_update_next_owner(struct mm_struct *mm) +{ + struct task_struct *c, *g, *p = current; + +retry: + if (!mm_need_new_owner(mm, p)) + return; + + read_lock(&tasklist_lock); + /* + * Search in the children + */ + list_for_each_entry(c, &p->children, sibling) { + if (c->mm == mm) + goto assign_new_owner; + } + + /* + * Search in the siblings + */ + list_for_each_entry(c, &p->parent->children, sibling) { + if (c->mm == mm) + goto assign_new_owner; + } + + /* + * Search through everything else. We should not get + * here often + */ + do_each_thread(g, c) { + if (c->mm == mm) + goto assign_new_owner; + } while_each_thread(g, c); + + read_unlock(&tasklist_lock); + return; + +assign_new_owner: + BUG_ON(c == p); + get_task_struct(c); + /* + * The task_lock protects c->mm from changing. + * We always want mm->owner->mm == mm + */ + task_lock(c); + /* + * Delay read_unlock() till we have the task_lock() + * to ensure that c does not slip away underneath us + */ + read_unlock(&tasklist_lock); + if (c->mm != mm) { + task_unlock(c); + put_task_struct(c); + goto retry; + } + cgroup_mm_owner_callbacks(mm->owner, c); + mm->owner = c; + task_unlock(c); + put_task_struct(c); +} +#endif /* CONFIG_MM_OWNER */ + /* * Turn us into a lazy TLB process if we * aren't already.. @@ -596,6 +678,7 @@ static void exit_mm(struct task_struct * tsk) /* We don't want this task to be frozen prematurely */ clear_freeze_flag(tsk); task_unlock(tsk); + mm_update_next_owner(mm); mmput(mm); } diff --git a/kernel/fork.c b/kernel/fork.c index 6067e429f281..156db96ff754 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -381,14 +381,13 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) mm->ioctx_list = NULL; mm->free_area_cache = TASK_UNMAPPED_BASE; mm->cached_hole_size = ~0UL; - mm_init_cgroup(mm, p); + mm_init_owner(mm, p); if (likely(!mm_alloc_pgd(mm))) { mm->def_flags = 0; return mm; } - mm_free_cgroup(mm); free_mm(mm); return NULL; } @@ -438,7 +437,6 @@ void mmput(struct mm_struct *mm) spin_unlock(&mmlist_lock); } put_swap_token(mm); - mm_free_cgroup(mm); mmdrop(mm); } } @@ -982,6 +980,13 @@ static void rt_mutex_init_task(struct task_struct *p) #endif } +#ifdef CONFIG_MM_OWNER +void mm_init_owner(struct mm_struct *mm, struct task_struct *p) +{ + mm->owner = p; +} +#endif /* CONFIG_MM_OWNER */ + /* * This creates a new process as a copy of the old one, * but does not actually start it yet. diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d12795cc7622..49d80814798b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -236,26 +236,12 @@ static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) css); } -static struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) +struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) { return container_of(task_subsys_state(p, mem_cgroup_subsys_id), struct mem_cgroup, css); } -void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p) -{ - struct mem_cgroup *mem; - - mem = mem_cgroup_from_task(p); - css_get(&mem->css); - mm->mem_cgroup = mem; -} - -void mm_free_cgroup(struct mm_struct *mm) -{ - css_put(&mm->mem_cgroup->css); -} - static inline int page_cgroup_locked(struct page *page) { return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); @@ -476,6 +462,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, int zid = zone_idx(z); struct mem_cgroup_per_zone *mz; + BUG_ON(!mem_cont); mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); if (active) src = &mz->active_list; @@ -574,7 +561,7 @@ retry: mm = &init_mm; rcu_read_lock(); - mem = rcu_dereference(mm->mem_cgroup); + mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); /* * For every charge from the cgroup, increment reference count */ @@ -985,10 +972,9 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) struct mem_cgroup *mem; int node; - if (unlikely((cont->parent) == NULL)) { + if (unlikely((cont->parent) == NULL)) mem = &init_mem_cgroup; - init_mm.mem_cgroup = mem; - } else + else mem = kzalloc(sizeof(struct mem_cgroup), GFP_KERNEL); if (mem == NULL) @@ -1067,10 +1053,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, if (!thread_group_leader(p)) goto out; - css_get(&mem->css); - rcu_assign_pointer(mm->mem_cgroup, mem); - css_put(&old_mem->css); - out: mmput(mm); } -- cgit v1.2.3-71-gd317 From c84872e168d10926acd2dee975d19172eef79252 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 29 Apr 2008 01:00:17 -0700 Subject: memcgroup: add the max_usage member on the res_counter This field is the maximal value of the usage one since the counter creation (or since the latest reset). To reset this to the usage value simply write anything to the appropriate cgroup file. Signed-off-by: Pavel Emelyanov Acked-by: Balbir Singh Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/res_counter.h | 14 ++++++++++++++ kernel/res_counter.c | 4 ++++ mm/memcontrol.c | 17 +++++++++++++++++ 3 files changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 8cb1ecd420a9..df8085acba16 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -24,6 +24,10 @@ struct res_counter { * the current resource consumption level */ unsigned long long usage; + /* + * the maximal value of the usage from the counter creation + */ + unsigned long long max_usage; /* * the limit that usage cannot exceed */ @@ -67,6 +71,7 @@ ssize_t res_counter_write(struct res_counter *counter, int member, enum { RES_USAGE, + RES_MAX_USAGE, RES_LIMIT, RES_FAILCNT, }; @@ -127,4 +132,13 @@ static inline bool res_counter_check_under_limit(struct res_counter *cnt) return ret; } +static inline void res_counter_reset_max(struct res_counter *cnt) +{ + unsigned long flags; + + spin_lock_irqsave(&cnt->lock, flags); + cnt->max_usage = cnt->usage; + spin_unlock_irqrestore(&cnt->lock, flags); +} + #endif diff --git a/kernel/res_counter.c b/kernel/res_counter.c index 70587657dda3..d3c61b4ebef2 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c @@ -28,6 +28,8 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val) } counter->usage += val; + if (counter->usage > counter->max_usage) + counter->max_usage = counter->usage; return 0; } @@ -66,6 +68,8 @@ res_counter_member(struct res_counter *counter, int member) switch (member) { case RES_USAGE: return &counter->usage; + case RES_MAX_USAGE: + return &counter->max_usage; case RES_LIMIT: return &counter->limit; case RES_FAILCNT: diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 49d80814798b..350a14da6525 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -855,6 +855,17 @@ static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, mem_cgroup_write_strategy); } +static ssize_t mem_cgroup_max_reset(struct cgroup *cont, struct cftype *cft, + struct file *file, const char __user *userbuf, + size_t nbytes, loff_t *ppos) +{ + struct mem_cgroup *mem; + + mem = mem_cgroup_from_cont(cont); + res_counter_reset_max(&mem->res); + return nbytes; +} + static ssize_t mem_force_empty_write(struct cgroup *cont, struct cftype *cft, struct file *file, const char __user *userbuf, @@ -909,6 +920,12 @@ static struct cftype mem_cgroup_files[] = { .private = RES_USAGE, .read_u64 = mem_cgroup_read, }, + { + .name = "max_usage_in_bytes", + .private = RES_MAX_USAGE, + .write = mem_cgroup_max_reset, + .read_u64 = mem_cgroup_read, + }, { .name = "limit_in_bytes", .private = RES_LIMIT, -- cgit v1.2.3-71-gd317 From faebe9fdf35058bb8421e4c09f6f70994eaf8db2 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 29 Apr 2008 01:00:18 -0700 Subject: memcgroups: add a document describing the resource counter abstraction The resource counter is supposed to facilitate the resource accounting of arbitrary resource (and it already does this for memory controller). However, it is about to be used in other resources controllers (swap, kernel memory, networking, etc), so provide a doc describing how to work with it. This will eliminate all the possible future duplications in the appropriate controllers' docs. Fixed errors pointed out by Randy. [akpm@linux-foundation.org: fix documentation tpyo] Signed-off-by: Pavel Emelyanov Cc: Randy Dunlap Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Cc: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/controllers/resource_counter.txt | 181 +++++++++++++++++++++++++ include/linux/res_counter.h | 2 + 2 files changed, 183 insertions(+) create mode 100644 Documentation/controllers/resource_counter.txt (limited to 'include/linux') diff --git a/Documentation/controllers/resource_counter.txt b/Documentation/controllers/resource_counter.txt new file mode 100644 index 000000000000..f196ac1d7d25 --- /dev/null +++ b/Documentation/controllers/resource_counter.txt @@ -0,0 +1,181 @@ + + The Resource Counter + +The resource counter, declared at include/linux/res_counter.h, +is supposed to facilitate the resource management by controllers +by providing common stuff for accounting. + +This "stuff" includes the res_counter structure and routines +to work with it. + + + +1. Crucial parts of the res_counter structure + + a. unsigned long long usage + + The usage value shows the amount of a resource that is consumed + by a group at a given time. The units of measurement should be + determined by the controller that uses this counter. E.g. it can + be bytes, items or any other unit the controller operates on. + + b. unsigned long long max_usage + + The maximal value of the usage over time. + + This value is useful when gathering statistical information about + the particular group, as it shows the actual resource requirements + for a particular group, not just some usage snapshot. + + c. unsigned long long limit + + The maximal allowed amount of resource to consume by the group. In + case the group requests for more resources, so that the usage value + would exceed the limit, the resource allocation is rejected (see + the next section). + + d. unsigned long long failcnt + + The failcnt stands for "failures counter". This is the number of + resource allocation attempts that failed. + + c. spinlock_t lock + + Protects changes of the above values. + + + +2. Basic accounting routines + + a. void res_counter_init(struct res_counter *rc) + + Initializes the resource counter. As usual, should be the first + routine called for a new counter. + + b. int res_counter_charge[_locked] + (struct res_counter *rc, unsigned long val) + + When a resource is about to be allocated it has to be accounted + with the appropriate resource counter (controller should determine + which one to use on its own). This operation is called "charging". + + This is not very important which operation - resource allocation + or charging - is performed first, but + * if the allocation is performed first, this may create a + temporary resource over-usage by the time resource counter is + charged; + * if the charging is performed first, then it should be uncharged + on error path (if the one is called). + + c. void res_counter_uncharge[_locked] + (struct res_counter *rc, unsigned long val) + + When a resource is released (freed) it should be de-accounted + from the resource counter it was accounted to. This is called + "uncharging". + + The _locked routines imply that the res_counter->lock is taken. + + + 2.1 Other accounting routines + + There are more routines that may help you with common needs, like + checking whether the limit is reached or resetting the max_usage + value. They are all declared in include/linux/res_counter.h. + + + +3. Analyzing the resource counter registrations + + a. If the failcnt value constantly grows, this means that the counter's + limit is too tight. Either the group is misbehaving and consumes too + many resources, or the configuration is not suitable for the group + and the limit should be increased. + + b. The max_usage value can be used to quickly tune the group. One may + set the limits to maximal values and either load the container with + a common pattern or leave one for a while. After this the max_usage + value shows the amount of memory the container would require during + its common activity. + + Setting the limit a bit above this value gives a pretty good + configuration that works in most of the cases. + + c. If the max_usage is much less than the limit, but the failcnt value + is growing, then the group tries to allocate a big chunk of resource + at once. + + d. If the max_usage is much less than the limit, but the failcnt value + is 0, then this group is given too high limit, that it does not + require. It is better to lower the limit a bit leaving more resource + for other groups. + + + +4. Communication with the control groups subsystem (cgroups) + +All the resource controllers that are using cgroups and resource counters +should provide files (in the cgroup filesystem) to work with the resource +counter fields. They are recommended to adhere to the following rules: + + a. File names + + Field name File name + --------------------------------------------------- + usage usage_in_ + max_usage max_usage_in_ + limit limit_in_ + failcnt failcnt + lock no file :) + + b. Reading from file should show the corresponding field value in the + appropriate format. + + c. Writing to file + + Field Expected behavior + ---------------------------------- + usage prohibited + max_usage reset to usage + limit set the limit + failcnt reset to zero + + + +5. Usage example + + a. Declare a task group (take a look at cgroups subsystem for this) and + fold a res_counter into it + + struct my_group { + struct res_counter res; + + + } + + b. Put hooks in resource allocation/release paths + + int alloc_something(...) + { + if (res_counter_charge(res_counter_ptr, amount) < 0) + return -ENOMEM; + + + } + + void release_something(...) + { + res_counter_uncharge(res_counter_ptr, amount); + + + } + + In order to keep the usage value self-consistent, both the + "res_counter_ptr" and the "amount" in release_something() should be + the same as they were in the alloc_something() when the releasing + resource was allocated. + + c. Provide the way to read res_counter values and set them (the cgroups + still can help with it). + + c. Compile and run :) diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index df8085acba16..629d0ea2d073 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -9,6 +9,8 @@ * * Author: Pavel Emelianov * + * See Documentation/controllers/resource_counter.txt for more + * info about what this counter is. */ #include -- cgit v1.2.3-71-gd317 From 29f2a4dac856e9433a502b05b40e8e90385d8e27 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 29 Apr 2008 01:00:21 -0700 Subject: memcgroup: implement failcounter reset This is a very common requirement from people using the resource accounting facilities (not only memcgroup but also OpenVZ beancounters). They want to put the cgroup in an initial state without re-creating it. For example after re-configuring a group people want to observe how this new configuration fits the group needs without saving the previous failcnt value. Merge two resets into one mem_cgroup_reset() function to demonstrate how multiplexing work. Besides, I have plans to move the files, that correspond to res_counter to the res_counter.c file and somehow "import" them into controller. I don't know how to make it gracefully yet, but merging resets of max_usage and failcnt in one function will be there for sure. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Pavel Emelyanov Acked-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/res_counter.h | 8 ++++++++ mm/memcontrol.c | 14 +++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 629d0ea2d073..6d9e1fca098c 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -143,4 +143,12 @@ static inline void res_counter_reset_max(struct res_counter *cnt) spin_unlock_irqrestore(&cnt->lock, flags); } +static inline void res_counter_reset_failcnt(struct res_counter *cnt) +{ + unsigned long flags; + + spin_lock_irqsave(&cnt->lock, flags); + cnt->failcnt = 0; + spin_unlock_irqrestore(&cnt->lock, flags); +} #endif diff --git a/mm/memcontrol.c b/mm/memcontrol.c index dc3472f9f68c..f891876efee1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -857,12 +857,19 @@ static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, mem_cgroup_write_strategy); } -static int mem_cgroup_max_reset(struct cgroup *cont, unsigned int event) +static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) { struct mem_cgroup *mem; mem = mem_cgroup_from_cont(cont); - res_counter_reset_max(&mem->res); + switch (event) { + case RES_MAX_USAGE: + res_counter_reset_max(&mem->res); + break; + case RES_FAILCNT: + res_counter_reset_failcnt(&mem->res); + break; + } return 0; } @@ -916,7 +923,7 @@ static struct cftype mem_cgroup_files[] = { { .name = "max_usage_in_bytes", .private = RES_MAX_USAGE, - .trigger = mem_cgroup_max_reset, + .trigger = mem_cgroup_reset, .read_u64 = mem_cgroup_read, }, { @@ -928,6 +935,7 @@ static struct cftype mem_cgroup_files[] = { { .name = "failcnt", .private = RES_FAILCNT, + .trigger = mem_cgroup_reset, .read_u64 = mem_cgroup_read, }, { -- cgit v1.2.3-71-gd317 From 74bc7ceebfa1c84ddd3a843ebfb56df013bf7ef5 Mon Sep 17 00:00:00 2001 From: Arthur Kepner Date: Tue, 29 Apr 2008 01:00:30 -0700 Subject: dma: add dma_*map*_attrs() interfaces Introduce new interfaces, dma_*map*_attrs(), for passing architecture-specific attributes when memory is mapped and unmapped for DMA. Give the interfaces default implementations which ignore attributes. Also introduce the dma_{set|get}_attr() interfaces for setting and retrieving individual attributes. Define one attribute, DMA_ATTR_WRITE_BARRIER, in anticipation of its use by ia64/sn. Select whether architectures implement arch-specific versions of the dma_*map*_attrs() interfaces via HAVE_DMA_ATTRS in Kconfig. [markn@au1.ibm.com: dma_{set,get}_attr() have to be static inline] Signed-off-by: Arthur Kepner Cc: Tony Luck Cc: Jesse Barnes Cc: Jes Sorensen Cc: Randy Dunlap Cc: Roland Dreier Cc: James Bottomley Cc: David Miller Cc: Benjamin Herrenschmidt Cc: Grant Grundler Cc: Michael Ellerman Signed-off-by: Mark Nelson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 3 ++ arch/ia64/Kconfig | 1 + include/linux/dma-attrs.h | 74 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/dma-mapping.h | 17 +++++++++++ 4 files changed, 95 insertions(+) create mode 100644 include/linux/dma-attrs.h (limited to 'include/linux') diff --git a/arch/Kconfig b/arch/Kconfig index 694c9af520bb..3ea332b009e5 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -36,3 +36,6 @@ config HAVE_KPROBES config HAVE_KRETPROBES def_bool n + +config HAVE_DMA_ATTRS + def_bool n diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 07f5d353b54a..0df5f6f75edf 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -19,6 +19,7 @@ config IA64 select HAVE_OPROFILE select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_DMA_ATTRS select HAVE_KVM default y help diff --git a/include/linux/dma-attrs.h b/include/linux/dma-attrs.h new file mode 100644 index 000000000000..1677e2bfa00c --- /dev/null +++ b/include/linux/dma-attrs.h @@ -0,0 +1,74 @@ +#ifndef _DMA_ATTR_H +#define _DMA_ATTR_H + +#include +#include +#include + +/** + * an enum dma_attr represents an attribute associated with a DMA + * mapping. The semantics of each attribute should be defined in + * Documentation/DMA-attributes.txt. + */ +enum dma_attr { + DMA_ATTR_WRITE_BARRIER, + DMA_ATTR_MAX, +}; + +#define __DMA_ATTRS_LONGS BITS_TO_LONGS(DMA_ATTR_MAX) + +/** + * struct dma_attrs - an opaque container for DMA attributes + * @flags - bitmask representing a collection of enum dma_attr + */ +struct dma_attrs { + unsigned long flags[__DMA_ATTRS_LONGS]; +}; + +#define DEFINE_DMA_ATTRS(x) \ + struct dma_attrs x = { \ + .flags = { [0 ... __DMA_ATTRS_LONGS-1] = 0 }, \ + } + +static inline void init_dma_attrs(struct dma_attrs *attrs) +{ + bitmap_zero(attrs->flags, __DMA_ATTRS_LONGS); +} + +#ifdef CONFIG_HAVE_DMA_ATTRS +/** + * dma_set_attr - set a specific attribute + * @attr: attribute to set + * @attrs: struct dma_attrs (may be NULL) + */ +static inline void dma_set_attr(enum dma_attr attr, struct dma_attrs *attrs) +{ + if (attrs == NULL) + return; + BUG_ON(attr >= DMA_ATTR_MAX); + __set_bit(attr, attrs->flags); +} + +/** + * dma_get_attr - check for a specific attribute + * @attr: attribute to set + * @attrs: struct dma_attrs (may be NULL) + */ +static inline int dma_get_attr(enum dma_attr attr, struct dma_attrs *attrs) +{ + if (attrs == NULL) + return 0; + BUG_ON(attr >= DMA_ATTR_MAX); + return test_bit(attr, attrs->flags); +} +#else /* !CONFIG_HAVE_DMA_ATTRS */ +static inline void dma_set_attr(enum dma_attr attr, struct dma_attrs *attrs) +{ +} + +static inline int dma_get_attr(enum dma_attr attr, struct dma_attrs *attrs) +{ + return 0; +} +#endif /* CONFIG_HAVE_DMA_ATTRS */ +#endif /* _DMA_ATTR_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 332030709623..952e0f857ac9 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -146,4 +146,21 @@ static inline void dmam_release_declared_memory(struct device *dev) } #endif /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */ +#ifndef CONFIG_HAVE_DMA_ATTRS +struct dma_attrs; + +#define dma_map_single_attrs(dev, cpu_addr, size, dir, attrs) \ + dma_map_single(dev, cpu_addr, size, dir) + +#define dma_unmap_single_attrs(dev, dma_addr, size, dir, attrs) \ + dma_unmap_single(dev, dma_addr, size, dir) + +#define dma_map_sg_attrs(dev, sgl, nents, dir, attrs) \ + dma_map_sg(dev, sgl, nents, dir) + +#define dma_unmap_sg_attrs(dev, sgl, nents, dir, attrs) \ + dma_unmap_sg(dev, sgl, nents, dir) + +#endif /* CONFIG_HAVE_DMA_ATTRS */ + #endif -- cgit v1.2.3-71-gd317 From f7bf3df8be72d98afa84f5ff183e14c1ba1e560d Mon Sep 17 00:00:00 2001 From: Nadia Derbey Date: Tue, 29 Apr 2008 01:00:39 -0700 Subject: ipc: scale msgmni to the amount of lowmem On large systems we'd like to allow a larger number of message queues. In some cases up to 32K. However simply setting MSGMNI to a larger value may cause problems for smaller systems. The first patch of this series introduces a default maximum number of message queue ids that scales with the amount of lowmem. Since msgmni is per namespace and there is no amount of memory dedicated to each namespace so far, the second patch of this series scales msgmni to the number of ipc namespaces too. Since msgmni depends on the amount of memory, it becomes necessary to recompute it upon memory add/remove. In the 4th patch, memory hotplug management is added: a notifier block is registered into the memory hotplug notifier chain for the ipc subsystem. Since the ipc namespaces are not linked together, they have their own notification chain: one notifier_block is defined per ipc namespace. Each time an ipc namespace is created (removed) it registers (unregisters) its notifier block in (from) the ipcns chain. The callback routine registered in the memory chain invokes the ipcns notifier chain with the IPCNS_MEMCHANGE event. Each callback routine registered in the ipcns namespace, in turn, recomputes msgmni for the owning namespace. The 5th patch makes it possible to keep the memory hotplug notifier chain's lock for a lesser amount of time: instead of directly notifying the ipcns notifier chain upon memory add/remove, a work item is added to the global workqueue. When activated, this work item is the one who notifies the ipcns notifier chain. Since msgmni depends on the number of ipc namespaces, it becomes necessary to recompute it upon ipc namespace creation / removal. The 6th patch uses the ipc namespace notifier chain for that purpose: that chain is notified each time an ipc namespace is created or removed. This makes it possible to recompute msgmni for all the namespaces each time one of them is created or removed. When msgmni is explicitely set from userspace, we should avoid recomputing it upon memory add/remove or ipcns creation/removal. This is what the 7th patch does: it simply unregisters the ipcns callback routine as soon as msgmni has been changed from procfs or sysctl(). Even if msgmni is set by hand, it should be possible to make it back automatically recomputed upon memory add/remove or ipcns creation/removal. This what is achieved in patch 8: if set to a negative value, msgmni is added back to the ipcns notifier chain, making it automatically recomputed again. This patch: Compute msg_ctlmni to make it scale with the amount of lowmem. msg_ctlmni is now set to make the message queues occupy 1/32 of the available lowmem. Some cleaning has also been done for the MSGPOOL constant: the msgctl man page says it's not used, but it also defines it as a size in bytes (the code expresses it in Kbytes). Signed-off-by: Nadia Derbey Cc: Yasunori Goto Cc: Matt Helsley Cc: Mingming Cao Cc: Pierre Peiffer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/msg.h | 14 ++++++++++++-- ipc/msg.c | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/msg.h b/include/linux/msg.h index 10a3d5a1abff..6f3b8e79a991 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -49,16 +49,26 @@ struct msginfo { unsigned short msgseg; }; +/* + * Scaling factor to compute msgmni: + * the memory dedicated to msg queues (msgmni * msgmnb) should occupy + * at most 1/MSG_MEM_SCALE of the lowmem (see the formula in ipc/msg.c): + * up to 8MB : msgmni = 16 (MSGMNI) + * 4 GB : msgmni = 8K + * more than 16 GB : msgmni = 32K (IPCMNI) + */ +#define MSG_MEM_SCALE 32 + #define MSGMNI 16 /* <= IPCMNI */ /* max # of msg queue identifiers */ #define MSGMAX 8192 /* <= INT_MAX */ /* max size of message (bytes) */ #define MSGMNB 16384 /* <= INT_MAX */ /* default max size of a message queue */ /* unused */ -#define MSGPOOL (MSGMNI*MSGMNB/1024) /* size in kilobytes of message pool */ +#define MSGPOOL (MSGMNI * MSGMNB) /* size in bytes of message pool */ #define MSGTQL MSGMNB /* number of system message headers */ #define MSGMAP MSGMNB /* number of entries in message map */ #define MSGSSZ 16 /* message segment size */ -#define __MSGSEG ((MSGPOOL*1024)/ MSGSSZ) /* max no. of segments */ +#define __MSGSEG (MSGPOOL / MSGSSZ) /* max no. of segments */ #define MSGSEG (__MSGSEG <= 0xffff ? __MSGSEG : 0xffff) #ifdef __KERNEL__ diff --git a/ipc/msg.c b/ipc/msg.c index 805ee08ec8bb..9e7211122e27 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -77,11 +78,45 @@ static int newque(struct ipc_namespace *, struct ipc_params *); static int sysvipc_msg_proc_show(struct seq_file *s, void *it); #endif +/* + * Scale msgmni with the available lowmem size: the memory dedicated to msg + * queues should occupy at most 1/MSG_MEM_SCALE of lowmem. + * This should be done staying within the (MSGMNI , IPCMNI) range. + */ +static void recompute_msgmni(struct ipc_namespace *ns) +{ + struct sysinfo i; + unsigned long allowed; + + si_meminfo(&i); + allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit) + / MSGMNB; + + if (allowed < MSGMNI) { + ns->msg_ctlmni = MSGMNI; + goto out_callback; + } + + if (allowed > IPCMNI) { + ns->msg_ctlmni = IPCMNI; + goto out_callback; + } + + ns->msg_ctlmni = allowed; + +out_callback: + + printk(KERN_INFO "msgmni has been set to %d for ipc namespace %p\n", + ns->msg_ctlmni, ns); +} + void msg_init_ns(struct ipc_namespace *ns) { ns->msg_ctlmax = MSGMAX; ns->msg_ctlmnb = MSGMNB; - ns->msg_ctlmni = MSGMNI; + + recompute_msgmni(ns); + atomic_set(&ns->msg_bytes, 0); atomic_set(&ns->msg_hdrs, 0); ipc_init_ids(&ns->ids[IPC_MSG_IDS]); -- cgit v1.2.3-71-gd317 From 4d89dc6ab2711258bfd12c72d753f3ad56b244e2 Mon Sep 17 00:00:00 2001 From: Nadia Derbey Date: Tue, 29 Apr 2008 01:00:40 -0700 Subject: ipc: scale msgmni to the number of ipc namespaces Since all the namespaces see the same amount of memory (the total one) this patch introduces a new variable that counts the ipc namespaces and divides msg_ctlmni by this counter. Signed-off-by: Nadia Derbey Cc: Yasunori Goto Cc: Matt Helsley Cc: Mingming Cao Cc: Pierre Peiffer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 1 + ipc/msg.c | 10 +++++++--- ipc/namespace.c | 3 +++ ipc/util.c | 3 +++ 4 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index e4451d1da753..878d7ac286fa 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -33,6 +33,7 @@ struct ipc_namespace { }; extern struct ipc_namespace init_ipc_ns; +extern atomic_t nr_ipc_ns; #ifdef CONFIG_SYSVIPC #define INIT_IPC_NS(ns) .ns = &init_ipc_ns, diff --git a/ipc/msg.c b/ipc/msg.c index 9e7211122e27..be8449d48a8e 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -81,24 +81,28 @@ static int sysvipc_msg_proc_show(struct seq_file *s, void *it); /* * Scale msgmni with the available lowmem size: the memory dedicated to msg * queues should occupy at most 1/MSG_MEM_SCALE of lowmem. - * This should be done staying within the (MSGMNI , IPCMNI) range. + * Also take into account the number of nsproxies created so far. + * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range. */ static void recompute_msgmni(struct ipc_namespace *ns) { struct sysinfo i; unsigned long allowed; + int nb_ns; si_meminfo(&i); allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit) / MSGMNB; + nb_ns = atomic_read(&nr_ipc_ns); + allowed /= nb_ns; if (allowed < MSGMNI) { ns->msg_ctlmni = MSGMNI; goto out_callback; } - if (allowed > IPCMNI) { - ns->msg_ctlmni = IPCMNI; + if (allowed > IPCMNI / nb_ns) { + ns->msg_ctlmni = IPCMNI / nb_ns; goto out_callback; } diff --git a/ipc/namespace.c b/ipc/namespace.c index 1b967655eb35..fe3c97aa99dc 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -20,6 +20,8 @@ static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns) if (ns == NULL) return ERR_PTR(-ENOMEM); + atomic_inc(&nr_ipc_ns); + sem_init_ns(ns); msg_init_ns(ns); shm_init_ns(ns); @@ -83,4 +85,5 @@ void free_ipc_ns(struct kref *kref) msg_exit_ns(ns); shm_exit_ns(ns); kfree(ns); + atomic_dec(&nr_ipc_ns); } diff --git a/ipc/util.c b/ipc/util.c index cb017c7b9370..c27f0e92f489 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -52,6 +52,9 @@ struct ipc_namespace init_ipc_ns = { }, }; +atomic_t nr_ipc_ns = ATOMIC_INIT(1); + + /** * ipc_init - initialise IPC subsystem * -- cgit v1.2.3-71-gd317 From 0c40ba4fd64f98e7a5cba8ffaedbd68642a85700 Mon Sep 17 00:00:00 2001 From: Nadia Derbey Date: Tue, 29 Apr 2008 01:00:41 -0700 Subject: ipc: define the slab_memory_callback priority as a constant This is a trivial patch that defines the priority of slab_memory_callback in the callback chain as a constant. This is to prepare for next patch in the series. Signed-off-by: Nadia Derbey Cc: Yasunori Goto Cc: Matt Helsley Cc: Mingming Cao Cc: Pierre Peiffer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory.h | 6 ++++++ mm/slub.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memory.h b/include/linux/memory.h index f80e0e331cb7..39628dfe4a4c 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -53,6 +53,12 @@ struct memory_notify { struct notifier_block; struct mem_section; +/* + * Priorities for the hotplug memory callback routines (stored in decreasing + * order in the callback chain) + */ +#define SLAB_CALLBACK_PRI 1 + #ifndef CONFIG_MEMORY_HOTPLUG_SPARSE static inline int memory_dev_init(void) { diff --git a/mm/slub.c b/mm/slub.c index 992ecd4f0d39..b145e798bf3d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2978,7 +2978,7 @@ void __init kmem_cache_init(void) kmalloc_caches[0].refcount = -1; caches++; - hotplug_memory_notifier(slab_memory_callback, 1); + hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); #endif /* Able to allocate the per node structures */ -- cgit v1.2.3-71-gd317 From b6b337ad1c1d6fe11b09b35d75464b84b3e11f07 Mon Sep 17 00:00:00 2001 From: Nadia Derbey Date: Tue, 29 Apr 2008 01:00:42 -0700 Subject: ipc: recompute msgmni on memory add / remove Introduce the registration of a callback routine that recomputes msg_ctlmni upon memory add / remove. A single notifier block is registered in the hotplug memory chain for all the ipc namespaces. Since the ipc namespaces are not linked together, they have their own notification chain: one notifier_block is defined per ipc namespace. Each time an ipc namespace is created (removed) it registers (unregisters) its notifier block in (from) the ipcns chain. The callback routine registered in the memory chain invokes the ipcns notifier chain with the IPCNS_LOWMEM event. Each callback routine registered in the ipcns namespace, in turn, recomputes msgmni for the owning namespace. Signed-off-by: Nadia Derbey Cc: Yasunori Goto Cc: Matt Helsley Cc: Mingming Cao Cc: Pierre Peiffer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 43 ++++++++++++++++++++++++-- include/linux/memory.h | 1 + ipc/Makefile | 3 +- ipc/ipcns_notifier.c | 71 +++++++++++++++++++++++++++++++++++++++++++ ipc/msg.c | 2 +- ipc/namespace.c | 11 +++++++ ipc/util.c | 33 ++++++++++++++++++++ ipc/util.h | 2 ++ 8 files changed, 162 insertions(+), 4 deletions(-) create mode 100644 ipc/ipcns_notifier.c (limited to 'include/linux') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 878d7ac286fa..cfb2a08b28f5 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -4,6 +4,17 @@ #include #include #include +#ifdef CONFIG_MEMORY_HOTPLUG +#include +#endif /* CONFIG_MEMORY_HOTPLUG */ + +/* + * ipc namespace events + */ +#define IPCNS_MEMCHANGED 0x00000001 /* Notify lowmem size changed */ + +#define IPCNS_CALLBACK_PRI 0 + struct ipc_ids { int in_use; @@ -30,6 +41,10 @@ struct ipc_namespace { size_t shm_ctlall; int shm_ctlmni; int shm_tot; + +#ifdef CONFIG_MEMORY_HOTPLUG + struct notifier_block ipcns_nb; +#endif }; extern struct ipc_namespace init_ipc_ns; @@ -37,9 +52,33 @@ extern atomic_t nr_ipc_ns; #ifdef CONFIG_SYSVIPC #define INIT_IPC_NS(ns) .ns = &init_ipc_ns, -#else + +#ifdef CONFIG_MEMORY_HOTPLUG + +extern int register_ipcns_notifier(struct ipc_namespace *); +extern int unregister_ipcns_notifier(struct ipc_namespace *); +extern int ipcns_notify(unsigned long); + +#else /* CONFIG_MEMORY_HOTPLUG */ + +static inline int register_ipcns_notifier(struct ipc_namespace *ipcns) +{ + return 0; +} +static inline int unregister_ipcns_notifier(struct ipc_namespace *ipcns) +{ + return 0; +} +static inline int ipcns_notify(unsigned long ev) +{ + return 0; +} + +#endif /* CONFIG_MEMORY_HOTPLUG */ + +#else /* CONFIG_SYSVIPC */ #define INIT_IPC_NS(ns) -#endif +#endif /* CONFIG_SYSVIPC */ #if defined(CONFIG_SYSVIPC) && defined(CONFIG_IPC_NS) extern void free_ipc_ns(struct kref *kref); diff --git a/include/linux/memory.h b/include/linux/memory.h index 39628dfe4a4c..2f5f8a5ef2a0 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -58,6 +58,7 @@ struct mem_section; * order in the callback chain) */ #define SLAB_CALLBACK_PRI 1 +#define IPC_CALLBACK_PRI 10 #ifndef CONFIG_MEMORY_HOTPLUG_SPARSE static inline int memory_dev_init(void) diff --git a/ipc/Makefile b/ipc/Makefile index 5fc5e33ea047..388e4d259f02 100644 --- a/ipc/Makefile +++ b/ipc/Makefile @@ -3,7 +3,8 @@ # obj-$(CONFIG_SYSVIPC_COMPAT) += compat.o -obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o +obj_mem-$(CONFIG_MEMORY_HOTPLUG) += ipcns_notifier.o +obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o $(obj_mem-y) obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysctl.o obj_mq-$(CONFIG_COMPAT) += compat_mq.o obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y) diff --git a/ipc/ipcns_notifier.c b/ipc/ipcns_notifier.c new file mode 100644 index 000000000000..0786af6ce3ec --- /dev/null +++ b/ipc/ipcns_notifier.c @@ -0,0 +1,71 @@ +/* + * linux/ipc/ipcns_notifier.c + * Copyright (C) 2007 BULL SA. Nadia Derbey + * + * Notification mechanism for ipc namespaces: + * The callback routine registered in the memory chain invokes the ipcns + * notifier chain with the IPCNS_MEMCHANGED event. + * Each callback routine registered in the ipcns namespace recomputes msgmni + * for the owning namespace. + */ + +#include +#include +#include +#include +#include + +#include "util.h" + + + +static BLOCKING_NOTIFIER_HEAD(ipcns_chain); + + +static int ipcns_callback(struct notifier_block *self, + unsigned long action, void *arg) +{ + struct ipc_namespace *ns; + + switch (action) { + case IPCNS_MEMCHANGED: /* amount of lowmem has changed */ + /* + * It's time to recompute msgmni + */ + ns = container_of(self, struct ipc_namespace, ipcns_nb); + /* + * No need to get a reference on the ns: the 1st job of + * free_ipc_ns() is to unregister the callback routine. + * blocking_notifier_chain_unregister takes the wr lock to do + * it. + * When this callback routine is called the rd lock is held by + * blocking_notifier_call_chain. + * So the ipc ns cannot be freed while we are here. + */ + recompute_msgmni(ns); + break; + default: + break; + } + + return NOTIFY_OK; +} + +int register_ipcns_notifier(struct ipc_namespace *ns) +{ + memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb)); + ns->ipcns_nb.notifier_call = ipcns_callback; + ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI; + return blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb); +} + +int unregister_ipcns_notifier(struct ipc_namespace *ns) +{ + return blocking_notifier_chain_unregister(&ipcns_chain, + &ns->ipcns_nb); +} + +int ipcns_notify(unsigned long val) +{ + return blocking_notifier_call_chain(&ipcns_chain, val, NULL); +} diff --git a/ipc/msg.c b/ipc/msg.c index be8449d48a8e..7d9b0694c743 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -84,7 +84,7 @@ static int sysvipc_msg_proc_show(struct seq_file *s, void *it); * Also take into account the number of nsproxies created so far. * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range. */ -static void recompute_msgmni(struct ipc_namespace *ns) +void recompute_msgmni(struct ipc_namespace *ns) { struct sysinfo i; unsigned long allowed; diff --git a/ipc/namespace.c b/ipc/namespace.c index fe3c97aa99dc..f7a35be2e771 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -26,6 +26,8 @@ static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns) msg_init_ns(ns); shm_init_ns(ns); + register_ipcns_notifier(ns); + kref_init(&ns->kref); return ns; } @@ -81,6 +83,15 @@ void free_ipc_ns(struct kref *kref) struct ipc_namespace *ns; ns = container_of(kref, struct ipc_namespace, kref); + /* + * Unregistering the hotplug notifier at the beginning guarantees + * that the ipc namespace won't be freed while we are inside the + * callback routine. Since the blocking_notifier_chain_XXX routines + * hold a rw lock on the notifier list, unregister_ipcns_notifier() + * won't take the rw lock before blocking_notifier_call_chain() has + * released the rd lock. + */ + unregister_ipcns_notifier(ns); sem_exit_ns(ns); msg_exit_ns(ns); shm_exit_ns(ns); diff --git a/ipc/util.c b/ipc/util.c index c27f0e92f489..2d545d7144a7 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -55,11 +56,41 @@ struct ipc_namespace init_ipc_ns = { atomic_t nr_ipc_ns = ATOMIC_INIT(1); +#ifdef CONFIG_MEMORY_HOTPLUG + +static int ipc_memory_callback(struct notifier_block *self, + unsigned long action, void *arg) +{ + switch (action) { + case MEM_ONLINE: /* memory successfully brought online */ + case MEM_OFFLINE: /* or offline: it's time to recompute msgmni */ + /* + * This is done by invoking the ipcns notifier chain with the + * IPC_MEMCHANGED event. + */ + ipcns_notify(IPCNS_MEMCHANGED); + break; + case MEM_GOING_ONLINE: + case MEM_GOING_OFFLINE: + case MEM_CANCEL_ONLINE: + case MEM_CANCEL_OFFLINE: + default: + break; + } + + return NOTIFY_OK; +} + +#endif /* CONFIG_MEMORY_HOTPLUG */ + /** * ipc_init - initialise IPC subsystem * * The various system5 IPC resources (semaphores, messages and shared * memory) are initialised + * A callback routine is registered into the memory hotplug notifier + * chain: since msgmni scales to lowmem this callback routine will be + * called upon successful memory add / remove to recompute msmgni. */ static int __init ipc_init(void) @@ -67,6 +98,8 @@ static int __init ipc_init(void) sem_init(); msg_init(); shm_init(); + hotplug_memory_notifier(ipc_memory_callback, IPC_CALLBACK_PRI); + register_ipcns_notifier(&init_ipc_ns); return 0; } __initcall(ipc_init); diff --git a/ipc/util.h b/ipc/util.h index f37d160c98fe..0e3d79037a2a 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -124,6 +124,8 @@ extern void free_msg(struct msg_msg *msg); extern struct msg_msg *load_msg(const void __user *src, int len); extern int store_msg(void __user *dest, struct msg_msg *msg, int len); +extern void recompute_msgmni(struct ipc_namespace *); + static inline int ipc_buildid(int id, int seq) { return SEQ_MULTIPLIER * seq + id; -- cgit v1.2.3-71-gd317 From e2c284d8a87f95df9b47c6a13168a844ca7c03e9 Mon Sep 17 00:00:00 2001 From: Nadia Derbey Date: Tue, 29 Apr 2008 01:00:44 -0700 Subject: ipc: recompute msgmni on ipc namespace creation/removal Introduce a notification mechanism that aims at recomputing msgmni each time an ipc namespace is created or removed. The ipc namespace notifier chain already defined for memory hotplug management is used for that purpose too. Each time a new ipc namespace is allocated or an existing ipc namespace is removed, the ipcns notifier chain is notified. The callback routine for each registered ipc namespace is then activated in order to recompute msgmni for that namespace. Signed-off-by: Nadia Derbey Cc: Yasunori Goto Cc: Matt Helsley Cc: Mingming Cao Cc: Pierre Peiffer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 25 ++----------------------- ipc/Makefile | 3 +-- ipc/ipcns_notifier.c | 2 ++ ipc/namespace.c | 12 ++++++++++++ 4 files changed, 17 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index cfb2a08b28f5..c3b1da9e5feb 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -4,14 +4,14 @@ #include #include #include -#ifdef CONFIG_MEMORY_HOTPLUG #include -#endif /* CONFIG_MEMORY_HOTPLUG */ /* * ipc namespace events */ #define IPCNS_MEMCHANGED 0x00000001 /* Notify lowmem size changed */ +#define IPCNS_CREATED 0x00000002 /* Notify new ipc namespace created */ +#define IPCNS_REMOVED 0x00000003 /* Notify ipc namespace removed */ #define IPCNS_CALLBACK_PRI 0 @@ -42,9 +42,7 @@ struct ipc_namespace { int shm_ctlmni; int shm_tot; -#ifdef CONFIG_MEMORY_HOTPLUG struct notifier_block ipcns_nb; -#endif }; extern struct ipc_namespace init_ipc_ns; @@ -53,29 +51,10 @@ extern atomic_t nr_ipc_ns; #ifdef CONFIG_SYSVIPC #define INIT_IPC_NS(ns) .ns = &init_ipc_ns, -#ifdef CONFIG_MEMORY_HOTPLUG - extern int register_ipcns_notifier(struct ipc_namespace *); extern int unregister_ipcns_notifier(struct ipc_namespace *); extern int ipcns_notify(unsigned long); -#else /* CONFIG_MEMORY_HOTPLUG */ - -static inline int register_ipcns_notifier(struct ipc_namespace *ipcns) -{ - return 0; -} -static inline int unregister_ipcns_notifier(struct ipc_namespace *ipcns) -{ - return 0; -} -static inline int ipcns_notify(unsigned long ev) -{ - return 0; -} - -#endif /* CONFIG_MEMORY_HOTPLUG */ - #else /* CONFIG_SYSVIPC */ #define INIT_IPC_NS(ns) #endif /* CONFIG_SYSVIPC */ diff --git a/ipc/Makefile b/ipc/Makefile index 388e4d259f02..65c384395801 100644 --- a/ipc/Makefile +++ b/ipc/Makefile @@ -3,8 +3,7 @@ # obj-$(CONFIG_SYSVIPC_COMPAT) += compat.o -obj_mem-$(CONFIG_MEMORY_HOTPLUG) += ipcns_notifier.o -obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o $(obj_mem-y) +obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o ipcns_notifier.o obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysctl.o obj_mq-$(CONFIG_COMPAT) += compat_mq.o obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y) diff --git a/ipc/ipcns_notifier.c b/ipc/ipcns_notifier.c index 0786af6ce3ec..c7974609defc 100644 --- a/ipc/ipcns_notifier.c +++ b/ipc/ipcns_notifier.c @@ -29,6 +29,8 @@ static int ipcns_callback(struct notifier_block *self, switch (action) { case IPCNS_MEMCHANGED: /* amount of lowmem has changed */ + case IPCNS_CREATED: + case IPCNS_REMOVED: /* * It's time to recompute msgmni */ diff --git a/ipc/namespace.c b/ipc/namespace.c index f7a35be2e771..9171d948751e 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -26,6 +26,12 @@ static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns) msg_init_ns(ns); shm_init_ns(ns); + /* + * msgmni has already been computed for the new ipc ns. + * Thus, do the ipcns creation notification before registering that + * new ipcns in the chain. + */ + ipcns_notify(IPCNS_CREATED); register_ipcns_notifier(ns); kref_init(&ns->kref); @@ -97,4 +103,10 @@ void free_ipc_ns(struct kref *kref) shm_exit_ns(ns); kfree(ns); atomic_dec(&nr_ipc_ns); + + /* + * Do the ipcns removal notification after decrementing nr_ipc_ns in + * order to have a correct value when recomputing msgmni. + */ + ipcns_notify(IPCNS_REMOVED); } -- cgit v1.2.3-71-gd317 From 6546bc4279241e8fa432de1bb63a4f6f791fd669 Mon Sep 17 00:00:00 2001 From: Nadia Derbey Date: Tue, 29 Apr 2008 01:00:45 -0700 Subject: ipc: re-enable msgmni automatic recomputing msgmni if set to negative The enhancement as asked for by Yasunori: if msgmni is set to a negative value, register it back into the ipcns notifier chain. A new interface has been added to the notification mechanism: notifier_chain_cond_register() registers a notifier block only if not already registered. With that new interface we avoid taking care of the states changes in procfs. Signed-off-by: Nadia Derbey Cc: Yasunori Goto Cc: Matt Helsley Cc: Mingming Cao Cc: Pierre Peiffer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 1 + include/linux/notifier.h | 4 ++++ ipc/ipc_sysctl.c | 45 +++++++++++++++++++++++++++++++++---------- ipc/ipcns_notifier.c | 9 +++++++++ kernel/notifier.c | 38 ++++++++++++++++++++++++++++++++++++ 5 files changed, 87 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index c3b1da9e5feb..ea6c18a8b0d4 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -52,6 +52,7 @@ extern atomic_t nr_ipc_ns; #define INIT_IPC_NS(ns) .ns = &init_ipc_ns, extern int register_ipcns_notifier(struct ipc_namespace *); +extern int cond_register_ipcns_notifier(struct ipc_namespace *); extern int unregister_ipcns_notifier(struct ipc_namespace *); extern int ipcns_notify(unsigned long); diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 20dfed590183..0ff6224d172a 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -121,6 +121,10 @@ extern int raw_notifier_chain_register(struct raw_notifier_head *nh, extern int srcu_notifier_chain_register(struct srcu_notifier_head *nh, struct notifier_block *nb); +extern int blocking_notifier_chain_cond_register( + struct blocking_notifier_head *nh, + struct notifier_block *nb); + extern int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, struct notifier_block *nb); extern int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh, diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index d12ff5cd2a0b..d3497465cc0a 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include "util.h" static void *get_ipc(ctl_table *table) { @@ -24,6 +26,27 @@ static void *get_ipc(ctl_table *table) return which; } +/* + * Routine that is called when a tunable has successfully been changed by + * hand and it has a callback routine registered on the ipc namespace notifier + * chain: we don't want such tunables to be recomputed anymore upon memory + * add/remove or ipc namespace creation/removal. + * They can come back to a recomputable state by being set to a <0 value. + */ +static void tunable_set_callback(int val) +{ + if (val >= 0) + unregister_ipcns_notifier(current->nsproxy->ipc_ns); + else { + /* + * Re-enable automatic recomputing only if not already + * enabled. + */ + recompute_msgmni(current->nsproxy->ipc_ns); + cond_register_ipcns_notifier(current->nsproxy->ipc_ns); + } +} + #ifdef CONFIG_PROC_FS static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -38,17 +61,17 @@ static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp, static int proc_ipc_callback_dointvec(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct ctl_table ipc_table; size_t lenp_bef = *lenp; int rc; - rc = proc_ipc_dointvec(table, write, filp, buffer, lenp, ppos); + memcpy(&ipc_table, table, sizeof(ipc_table)); + ipc_table.data = get_ipc(table); + + rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); if (write && !rc && lenp_bef == *lenp) - /* - * Tunable has successfully been changed from userland: - * disable its automatic recomputing. - */ - unregister_ipcns_notifier(current->nsproxy->ipc_ns); + tunable_set_callback(*((int *)(ipc_table.data))); return rc; } @@ -119,12 +142,14 @@ static int sysctl_ipc_registered_data(ctl_table *table, int __user *name, rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval, newlen); - if (newval && newlen && rc > 0) + if (newval && newlen && rc > 0) { /* - * Tunable has successfully been changed from userland: - * disable its automatic recomputing. + * Tunable has successfully been changed from userland */ - unregister_ipcns_notifier(current->nsproxy->ipc_ns); + int *data = get_ipc(table); + + tunable_set_callback(*data); + } return rc; } diff --git a/ipc/ipcns_notifier.c b/ipc/ipcns_notifier.c index c7974609defc..70ff09183f7b 100644 --- a/ipc/ipcns_notifier.c +++ b/ipc/ipcns_notifier.c @@ -61,6 +61,15 @@ int register_ipcns_notifier(struct ipc_namespace *ns) return blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb); } +int cond_register_ipcns_notifier(struct ipc_namespace *ns) +{ + memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb)); + ns->ipcns_nb.notifier_call = ipcns_callback; + ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI; + return blocking_notifier_chain_cond_register(&ipcns_chain, + &ns->ipcns_nb); +} + int unregister_ipcns_notifier(struct ipc_namespace *ns) { return blocking_notifier_chain_unregister(&ipcns_chain, diff --git a/kernel/notifier.c b/kernel/notifier.c index 643360d1bb14..823be11584ef 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -31,6 +31,21 @@ static int notifier_chain_register(struct notifier_block **nl, return 0; } +static int notifier_chain_cond_register(struct notifier_block **nl, + struct notifier_block *n) +{ + while ((*nl) != NULL) { + if ((*nl) == n) + return 0; + if (n->priority > (*nl)->priority) + break; + nl = &((*nl)->next); + } + n->next = *nl; + rcu_assign_pointer(*nl, n); + return 0; +} + static int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n) { @@ -204,6 +219,29 @@ int blocking_notifier_chain_register(struct blocking_notifier_head *nh, } EXPORT_SYMBOL_GPL(blocking_notifier_chain_register); +/** + * blocking_notifier_chain_cond_register - Cond add notifier to a blocking notifier chain + * @nh: Pointer to head of the blocking notifier chain + * @n: New entry in notifier chain + * + * Adds a notifier to a blocking notifier chain, only if not already + * present in the chain. + * Must be called in process context. + * + * Currently always returns zero. + */ +int blocking_notifier_chain_cond_register(struct blocking_notifier_head *nh, + struct notifier_block *n) +{ + int ret; + + down_write(&nh->rwsem); + ret = notifier_chain_cond_register(&nh->head, n); + up_write(&nh->rwsem); + return ret; +} +EXPORT_SYMBOL_GPL(blocking_notifier_chain_cond_register); + /** * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain * @nh: Pointer to head of the blocking notifier chain -- cgit v1.2.3-71-gd317 From 44f564a4bf6ac70f2a84806203045cf515bc9367 Mon Sep 17 00:00:00 2001 From: "Zhang, Yanmin" Date: Tue, 29 Apr 2008 01:00:55 -0700 Subject: ipc: add definitions of USHORT_MAX and others Add definitions of USHORT_MAX and others into kernel. ipc uses it and slub implementation might also use it. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Zhang Yanmin Reviewed-by: Christoph Lameter Cc: Nadia Derbey Cc: "Pierre Peiffer" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 3 +++ ipc/msg.c | 12 ++++++------ ipc/util.c | 4 ++-- ipc/util.h | 1 - 4 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index ad5d05efcd1a..53839ba265ec 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -20,6 +20,9 @@ extern const char linux_banner[]; extern const char linux_proc_banner[]; +#define USHORT_MAX ((u16)(~0U)) +#define SHORT_MAX ((s16)(USHORT_MAX>>1)) +#define SHORT_MIN (-SHORT_MAX - 1) #define INT_MAX ((int)(~0U>>1)) #define INT_MIN (-INT_MAX - 1) #define UINT_MAX (~0U) diff --git a/ipc/msg.c b/ipc/msg.c index 4a858f98a760..32494e8cc7a5 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -346,19 +346,19 @@ copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) out.msg_rtime = in->msg_rtime; out.msg_ctime = in->msg_ctime; - if (in->msg_cbytes > USHRT_MAX) - out.msg_cbytes = USHRT_MAX; + if (in->msg_cbytes > USHORT_MAX) + out.msg_cbytes = USHORT_MAX; else out.msg_cbytes = in->msg_cbytes; out.msg_lcbytes = in->msg_cbytes; - if (in->msg_qnum > USHRT_MAX) - out.msg_qnum = USHRT_MAX; + if (in->msg_qnum > USHORT_MAX) + out.msg_qnum = USHORT_MAX; else out.msg_qnum = in->msg_qnum; - if (in->msg_qbytes > USHRT_MAX) - out.msg_qbytes = USHRT_MAX; + if (in->msg_qbytes > USHORT_MAX) + out.msg_qbytes = USHORT_MAX; else out.msg_qbytes = in->msg_qbytes; out.msg_lqbytes = in->msg_qbytes; diff --git a/ipc/util.c b/ipc/util.c index c4f1d33b89e4..4c465cb22360 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -133,8 +133,8 @@ void ipc_init_ids(struct ipc_ids *ids) ids->seq = 0; { int seq_limit = INT_MAX/SEQ_MULTIPLIER; - if(seq_limit > USHRT_MAX) - ids->seq_max = USHRT_MAX; + if (seq_limit > USHORT_MAX) + ids->seq_max = USHORT_MAX; else ids->seq_max = seq_limit; } diff --git a/ipc/util.h b/ipc/util.h index 791c5c012718..cdb966aebe07 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -12,7 +12,6 @@ #include -#define USHRT_MAX 0xffff #define SEQ_MULTIPLIER (IPCMNI) void sem_init (void); -- cgit v1.2.3-71-gd317 From bda4c30aa6f7dc1483f39ea1dfe37bcab8a96207 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 29 Apr 2008 01:01:02 -0700 Subject: ipmi: run to completion fixes The "run_to_completion" mode was somewhat broken. Locks need to be avoided in run_to_completion mode, and it shouldn't be used by normal users, just internally for panic situations. This patch removes locks in run_to_completion mode and removes the user call for setting the mode. The only user was the poweroff code, but it was easily converted to use the polling interface. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/ipmi/ipmi_msghandler.c | 8 ------- drivers/char/ipmi/ipmi_poweroff.c | 20 +++++++++++++---- drivers/char/ipmi/ipmi_si_intf.c | 43 ++++++++++++++++--------------------- include/linux/ipmi.h | 11 ++-------- 4 files changed, 37 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 32b2b22996dc..9f0075ca34ba 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -1197,13 +1197,6 @@ int ipmi_unregister_for_cmd(ipmi_user_t user, return rv; } -void ipmi_user_set_run_to_completion(ipmi_user_t user, int val) -{ - ipmi_smi_t intf = user->intf; - if (intf->handlers) - intf->handlers->set_run_to_completion(intf->send_info, val); -} - static unsigned char ipmb_checksum(unsigned char *data, int size) { @@ -4190,5 +4183,4 @@ EXPORT_SYMBOL(ipmi_get_my_address); EXPORT_SYMBOL(ipmi_set_my_LUN); EXPORT_SYMBOL(ipmi_get_my_LUN); EXPORT_SYMBOL(ipmi_smi_add_proc_entry); -EXPORT_SYMBOL(ipmi_user_set_run_to_completion); EXPORT_SYMBOL(ipmi_free_recv_msg); diff --git a/drivers/char/ipmi/ipmi_poweroff.c b/drivers/char/ipmi/ipmi_poweroff.c index b86186de7f07..b065a53d1ca8 100644 --- a/drivers/char/ipmi/ipmi_poweroff.c +++ b/drivers/char/ipmi/ipmi_poweroff.c @@ -99,11 +99,14 @@ static unsigned char ipmi_version; allocate them, since we may be in a panic situation. The whole thing is single-threaded, anyway, so multiple messages are not required. */ +static atomic_t dummy_count = ATOMIC_INIT(0); static void dummy_smi_free(struct ipmi_smi_msg *msg) { + atomic_dec(&dummy_count); } static void dummy_recv_free(struct ipmi_recv_msg *msg) { + atomic_dec(&dummy_count); } static struct ipmi_smi_msg halt_smi_msg = { @@ -152,17 +155,28 @@ static int ipmi_request_wait_for_response(ipmi_user_t user, return halt_recv_msg.msg.data[0]; } -/* We are in run-to-completion mode, no completion is desired. */ +/* Wait for message to complete, spinning. */ static int ipmi_request_in_rc_mode(ipmi_user_t user, struct ipmi_addr *addr, struct kernel_ipmi_msg *send_msg) { int rv; + atomic_set(&dummy_count, 2); rv = ipmi_request_supply_msgs(user, addr, 0, send_msg, NULL, &halt_smi_msg, &halt_recv_msg, 0); - if (rv) + if (rv) { + atomic_set(&dummy_count, 0); return rv; + } + + /* + * Spin until our message is done. + */ + while (atomic_read(&dummy_count) > 0) { + ipmi_poll_interface(user); + cpu_relax(); + } return halt_recv_msg.msg.data[0]; } @@ -531,9 +545,7 @@ static void ipmi_poweroff_function (void) return; /* Use run-to-completion mode, since interrupts may be off. */ - ipmi_user_set_run_to_completion(ipmi_user, 1); specific_poweroff_func(ipmi_user); - ipmi_user_set_run_to_completion(ipmi_user, 0); } /* Wait for an IPMI interface to be installed, the first one installed diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 1a8c1ca90557..30f535657342 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -806,56 +806,53 @@ static void sender(void *send_info, return; } - spin_lock_irqsave(&(smi_info->msg_lock), flags); #ifdef DEBUG_TIMING do_gettimeofday(&t); printk("**Enqueue: %d.%9.9d\n", t.tv_sec, t.tv_usec); #endif if (smi_info->run_to_completion) { - /* If we are running to completion, then throw it in - the list and run transactions until everything is - clear. Priority doesn't matter here. */ + /* + * If we are running to completion, then throw it in + * the list and run transactions until everything is + * clear. Priority doesn't matter here. + */ + + /* + * Run to completion means we are single-threaded, no + * need for locks. + */ list_add_tail(&(msg->link), &(smi_info->xmit_msgs)); - /* We have to release the msg lock and claim the smi - lock in this case, because of race conditions. */ - spin_unlock_irqrestore(&(smi_info->msg_lock), flags); - - spin_lock_irqsave(&(smi_info->si_lock), flags); result = smi_event_handler(smi_info, 0); while (result != SI_SM_IDLE) { udelay(SI_SHORT_TIMEOUT_USEC); result = smi_event_handler(smi_info, SI_SHORT_TIMEOUT_USEC); } - spin_unlock_irqrestore(&(smi_info->si_lock), flags); return; - } else { - if (priority > 0) { - list_add_tail(&(msg->link), &(smi_info->hp_xmit_msgs)); - } else { - list_add_tail(&(msg->link), &(smi_info->xmit_msgs)); - } } - spin_unlock_irqrestore(&(smi_info->msg_lock), flags); - spin_lock_irqsave(&(smi_info->si_lock), flags); + spin_lock_irqsave(&smi_info->msg_lock, flags); + if (priority > 0) + list_add_tail(&msg->link, &smi_info->hp_xmit_msgs); + else + list_add_tail(&msg->link, &smi_info->xmit_msgs); + spin_unlock_irqrestore(&smi_info->msg_lock, flags); + + spin_lock_irqsave(&smi_info->si_lock, flags); if ((smi_info->si_state == SI_NORMAL) && (smi_info->curr_msg == NULL)) { start_next_msg(smi_info); } - spin_unlock_irqrestore(&(smi_info->si_lock), flags); + spin_unlock_irqrestore(&smi_info->si_lock, flags); } static void set_run_to_completion(void *send_info, int i_run_to_completion) { struct smi_info *smi_info = send_info; enum si_sm_result result; - unsigned long flags; - - spin_lock_irqsave(&(smi_info->si_lock), flags); smi_info->run_to_completion = i_run_to_completion; if (i_run_to_completion) { @@ -866,8 +863,6 @@ static void set_run_to_completion(void *send_info, int i_run_to_completion) SI_SHORT_TIMEOUT_USEC); } } - - spin_unlock_irqrestore(&(smi_info->si_lock), flags); } static int ipmi_thread(void *data) diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index c5bd28b69aec..1144b32f5310 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -368,9 +368,8 @@ int ipmi_request_supply_msgs(ipmi_user_t user, * Poll the IPMI interface for the user. This causes the IPMI code to * do an immediate check for information from the driver and handle * anything that is immediately pending. This will not block in any - * way. This is useful if you need to implement polling from the user - * for things like modifying the watchdog timeout when a panic occurs - * or disabling the watchdog timer on a reboot. + * way. This is useful if you need to spin waiting for something to + * happen in the IPMI driver. */ void ipmi_poll_interface(ipmi_user_t user); @@ -421,12 +420,6 @@ int ipmi_unregister_for_cmd(ipmi_user_t user, int ipmi_get_maintenance_mode(ipmi_user_t user); int ipmi_set_maintenance_mode(ipmi_user_t user, int mode); -/* - * Allow run-to-completion mode to be set for the interface of - * a specific user. - */ -void ipmi_user_set_run_to_completion(ipmi_user_t user, int val); - /* * When the user is created, it will not receive IPMI events by * default. The user must set this to TRUE to get incoming events. -- cgit v1.2.3-71-gd317 From c70d749986f6f1d4e2bb008bfc0c5fc22ec3fc64 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 29 Apr 2008 01:01:09 -0700 Subject: ipmi: style fixes in the base code Lots of style fixes for the base IPMI driver. No functional changes. Basically fixes everything reported by checkpatch and fixes the comment style. Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/ipmi/ipmi_msghandler.c | 995 +++++++++++++++++++++--------------- include/linux/ipmi.h | 72 ++- include/linux/ipmi_smi.h | 8 +- 3 files changed, 614 insertions(+), 461 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index ea6ba35b3d7e..5b13579ca21d 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -63,15 +63,16 @@ static struct proc_dir_entry *proc_ipmi_root; #define MAX_EVENTS_IN_QUEUE 25 -/* Don't let a message sit in a queue forever, always time it with at lest - the max message timer. This is in milliseconds. */ +/* + * Don't let a message sit in a queue forever, always time it with at lest + * the max message timer. This is in milliseconds. + */ #define MAX_MSG_TIMEOUT 60000 /* * The main "user" data structure. */ -struct ipmi_user -{ +struct ipmi_user { struct list_head link; /* Set to "0" when the user is destroyed. */ @@ -90,8 +91,7 @@ struct ipmi_user int gets_events; }; -struct cmd_rcvr -{ +struct cmd_rcvr { struct list_head link; ipmi_user_t user; @@ -105,12 +105,12 @@ struct cmd_rcvr * or change any data until the RCU period completes. So we * use this next variable during mass deletion so we can have * a list and don't have to wait and restart the search on - * every individual deletion of a command. */ + * every individual deletion of a command. + */ struct cmd_rcvr *next; }; -struct seq_table -{ +struct seq_table { unsigned int inuse : 1; unsigned int broadcast : 1; @@ -118,53 +118,60 @@ struct seq_table unsigned long orig_timeout; unsigned int retries_left; - /* To verify on an incoming send message response that this is - the message that the response is for, we keep a sequence id - and increment it every time we send a message. */ + /* + * To verify on an incoming send message response that this is + * the message that the response is for, we keep a sequence id + * and increment it every time we send a message. + */ long seqid; - /* This is held so we can properly respond to the message on a - timeout, and it is used to hold the temporary data for - retransmission, too. */ + /* + * This is held so we can properly respond to the message on a + * timeout, and it is used to hold the temporary data for + * retransmission, too. + */ struct ipmi_recv_msg *recv_msg; }; -/* Store the information in a msgid (long) to allow us to find a - sequence table entry from the msgid. */ +/* + * Store the information in a msgid (long) to allow us to find a + * sequence table entry from the msgid. + */ #define STORE_SEQ_IN_MSGID(seq, seqid) (((seq&0xff)<<26) | (seqid&0x3ffffff)) #define GET_SEQ_FROM_MSGID(msgid, seq, seqid) \ do { \ seq = ((msgid >> 26) & 0x3f); \ seqid = (msgid & 0x3fffff); \ - } while (0) + } while (0) #define NEXT_SEQID(seqid) (((seqid) + 1) & 0x3fffff) -struct ipmi_channel -{ +struct ipmi_channel { unsigned char medium; unsigned char protocol; - /* My slave address. This is initialized to IPMI_BMC_SLAVE_ADDR, - but may be changed by the user. */ + /* + * My slave address. This is initialized to IPMI_BMC_SLAVE_ADDR, + * but may be changed by the user. + */ unsigned char address; - /* My LUN. This should generally stay the SMS LUN, but just in - case... */ + /* + * My LUN. This should generally stay the SMS LUN, but just in + * case... + */ unsigned char lun; }; #ifdef CONFIG_PROC_FS -struct ipmi_proc_entry -{ +struct ipmi_proc_entry { char *name; struct ipmi_proc_entry *next; }; #endif -struct bmc_device -{ +struct bmc_device { struct platform_device *dev; struct ipmi_device_id id; unsigned char guid[16]; @@ -286,8 +293,7 @@ enum ipmi_stat_indexes { #define IPMI_IPMB_NUM_SEQ 64 #define IPMI_MAX_CHANNELS 16 -struct ipmi_smi -{ +struct ipmi_smi { /* What interface number are we? */ int intf_num; @@ -296,8 +302,10 @@ struct ipmi_smi /* Used for a list of interfaces. */ struct list_head link; - /* The list of upper layers that are using me. seq_lock - * protects this. */ + /* + * The list of upper layers that are using me. seq_lock + * protects this. + */ struct list_head users; /* Information to supply to users. */ @@ -311,10 +319,12 @@ struct ipmi_smi char *my_dev_name; char *sysfs_name; - /* This is the lower-layer's sender routine. Note that you + /* + * This is the lower-layer's sender routine. Note that you * must either be holding the ipmi_interfaces_mutex or be in * an umpreemptible region to use this. You must fetch the - * value into a local variable and make sure it is not NULL. */ + * value into a local variable and make sure it is not NULL. + */ struct ipmi_smi_handlers *handlers; void *send_info; @@ -327,35 +337,45 @@ struct ipmi_smi /* Driver-model device for the system interface. */ struct device *si_dev; - /* A table of sequence numbers for this interface. We use the - sequence numbers for IPMB messages that go out of the - interface to match them up with their responses. A routine - is called periodically to time the items in this list. */ + /* + * A table of sequence numbers for this interface. We use the + * sequence numbers for IPMB messages that go out of the + * interface to match them up with their responses. A routine + * is called periodically to time the items in this list. + */ spinlock_t seq_lock; struct seq_table seq_table[IPMI_IPMB_NUM_SEQ]; int curr_seq; - /* Messages that were delayed for some reason (out of memory, - for instance), will go in here to be processed later in a - periodic timer interrupt. */ + /* + * Messages that were delayed for some reason (out of memory, + * for instance), will go in here to be processed later in a + * periodic timer interrupt. + */ spinlock_t waiting_msgs_lock; struct list_head waiting_msgs; - /* The list of command receivers that are registered for commands - on this interface. */ + /* + * The list of command receivers that are registered for commands + * on this interface. + */ struct mutex cmd_rcvrs_mutex; struct list_head cmd_rcvrs; - /* Events that were queues because no one was there to receive - them. */ + /* + * Events that were queues because no one was there to receive + * them. + */ spinlock_t events_lock; /* For dealing with event stuff. */ struct list_head waiting_events; unsigned int waiting_events_count; /* How many events in queue? */ char delivering_events; char event_msg_printed; - /* The event receiver for my BMC, only really used at panic - shutdown as a place to store this. */ + /* + * The event receiver for my BMC, only really used at panic + * shutdown as a place to store this. + */ unsigned char event_receiver; unsigned char event_receiver_lun; unsigned char local_sel_device; @@ -367,14 +387,18 @@ struct ipmi_smi int auto_maintenance_timeout; spinlock_t maintenance_mode_lock; /* Used in a timer... */ - /* A cheap hack, if this is non-null and a message to an - interface comes in with a NULL user, call this routine with - it. Note that the message will still be freed by the - caller. This only works on the system interface. */ + /* + * A cheap hack, if this is non-null and a message to an + * interface comes in with a NULL user, call this routine with + * it. Note that the message will still be freed by the + * caller. This only works on the system interface. + */ void (*null_user_handler)(ipmi_smi_t intf, struct ipmi_recv_msg *msg); - /* When we are scanning the channels for an SMI, this will - tell which channel we are scanning. */ + /* + * When we are scanning the channels for an SMI, this will + * tell which channel we are scanning. + */ int curr_channel; /* Channel information */ @@ -407,8 +431,9 @@ static DEFINE_MUTEX(ipmidriver_mutex); static LIST_HEAD(ipmi_interfaces); static DEFINE_MUTEX(ipmi_interfaces_mutex); -/* List of watchers that want to know when smi's are added and - deleted. */ +/* + * List of watchers that want to know when smi's are added and deleted. + */ static LIST_HEAD(smi_watchers); static DEFINE_MUTEX(smi_watchers_mutex); @@ -462,10 +487,8 @@ static void clean_up_interface_data(ipmi_smi_t intf) for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++) { if ((intf->seq_table[i].inuse) - && (intf->seq_table[i].recv_msg)) - { + && (intf->seq_table[i].recv_msg)) ipmi_free_recv_msg(intf->seq_table[i].recv_msg); - } } } @@ -532,6 +555,7 @@ int ipmi_smi_watcher_register(struct ipmi_smi_watcher *watcher) } return -ENOMEM; } +EXPORT_SYMBOL(ipmi_smi_watcher_register); int ipmi_smi_watcher_unregister(struct ipmi_smi_watcher *watcher) { @@ -540,6 +564,7 @@ int ipmi_smi_watcher_unregister(struct ipmi_smi_watcher *watcher) mutex_unlock(&smi_watchers_mutex); return 0; } +EXPORT_SYMBOL(ipmi_smi_watcher_unregister); /* * Must be called with smi_watchers_mutex held. @@ -575,8 +600,7 @@ ipmi_addr_equal(struct ipmi_addr *addr1, struct ipmi_addr *addr2) } if ((addr1->addr_type == IPMI_IPMB_ADDR_TYPE) - || (addr1->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE)) - { + || (addr1->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE)) { struct ipmi_ipmb_addr *ipmb_addr1 = (struct ipmi_ipmb_addr *) addr1; struct ipmi_ipmb_addr *ipmb_addr2 @@ -604,9 +628,8 @@ ipmi_addr_equal(struct ipmi_addr *addr1, struct ipmi_addr *addr2) int ipmi_validate_addr(struct ipmi_addr *addr, int len) { - if (len < sizeof(struct ipmi_system_interface_addr)) { + if (len < sizeof(struct ipmi_system_interface_addr)) return -EINVAL; - } if (addr->addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE) { if (addr->channel != IPMI_BMC_CHANNEL) @@ -620,23 +643,21 @@ int ipmi_validate_addr(struct ipmi_addr *addr, int len) return -EINVAL; if ((addr->addr_type == IPMI_IPMB_ADDR_TYPE) - || (addr->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE)) - { - if (len < sizeof(struct ipmi_ipmb_addr)) { + || (addr->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE)) { + if (len < sizeof(struct ipmi_ipmb_addr)) return -EINVAL; - } return 0; } if (addr->addr_type == IPMI_LAN_ADDR_TYPE) { - if (len < sizeof(struct ipmi_lan_addr)) { + if (len < sizeof(struct ipmi_lan_addr)) return -EINVAL; - } return 0; } return -EINVAL; } +EXPORT_SYMBOL(ipmi_validate_addr); unsigned int ipmi_addr_length(int addr_type) { @@ -644,16 +665,15 @@ unsigned int ipmi_addr_length(int addr_type) return sizeof(struct ipmi_system_interface_addr); if ((addr_type == IPMI_IPMB_ADDR_TYPE) - || (addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE)) - { + || (addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE)) return sizeof(struct ipmi_ipmb_addr); - } if (addr_type == IPMI_LAN_ADDR_TYPE) return sizeof(struct ipmi_lan_addr); return 0; } +EXPORT_SYMBOL(ipmi_addr_length); static void deliver_response(struct ipmi_recv_msg *msg) { @@ -686,9 +706,11 @@ deliver_err_response(struct ipmi_recv_msg *msg, int err) deliver_response(msg); } -/* Find the next sequence number not being used and add the given - message with the given timeout to the sequence table. This must be - called with the interface's seq_lock held. */ +/* + * Find the next sequence number not being used and add the given + * message with the given timeout to the sequence table. This must be + * called with the interface's seq_lock held. + */ static int intf_next_seq(ipmi_smi_t intf, struct ipmi_recv_msg *recv_msg, unsigned long timeout, @@ -700,10 +722,8 @@ static int intf_next_seq(ipmi_smi_t intf, int rv = 0; unsigned int i; - for (i = intf->curr_seq; - (i+1)%IPMI_IPMB_NUM_SEQ != intf->curr_seq; - i = (i+1)%IPMI_IPMB_NUM_SEQ) - { + for (i = intf->curr_seq; (i+1)%IPMI_IPMB_NUM_SEQ != intf->curr_seq; + i = (i+1)%IPMI_IPMB_NUM_SEQ) { if (!intf->seq_table[i].inuse) break; } @@ -711,8 +731,10 @@ static int intf_next_seq(ipmi_smi_t intf, if (!intf->seq_table[i].inuse) { intf->seq_table[i].recv_msg = recv_msg; - /* Start with the maximum timeout, when the send response - comes in we will start the real timer. */ + /* + * Start with the maximum timeout, when the send response + * comes in we will start the real timer. + */ intf->seq_table[i].timeout = MAX_MSG_TIMEOUT; intf->seq_table[i].orig_timeout = timeout; intf->seq_table[i].retries_left = retries; @@ -725,15 +747,17 @@ static int intf_next_seq(ipmi_smi_t intf, } else { rv = -EAGAIN; } - + return rv; } -/* Return the receive message for the given sequence number and - release the sequence number so it can be reused. Some other data - is passed in to be sure the message matches up correctly (to help - guard against message coming in after their timeout and the - sequence number being reused). */ +/* + * Return the receive message for the given sequence number and + * release the sequence number so it can be reused. Some other data + * is passed in to be sure the message matches up correctly (to help + * guard against message coming in after their timeout and the + * sequence number being reused). + */ static int intf_find_seq(ipmi_smi_t intf, unsigned char seq, short channel, @@ -752,11 +776,9 @@ static int intf_find_seq(ipmi_smi_t intf, if (intf->seq_table[seq].inuse) { struct ipmi_recv_msg *msg = intf->seq_table[seq].recv_msg; - if ((msg->addr.channel == channel) - && (msg->msg.cmd == cmd) - && (msg->msg.netfn == netfn) - && (ipmi_addr_equal(addr, &(msg->addr)))) - { + if ((msg->addr.channel == channel) && (msg->msg.cmd == cmd) + && (msg->msg.netfn == netfn) + && (ipmi_addr_equal(addr, &(msg->addr)))) { *recv_msg = msg; intf->seq_table[seq].inuse = 0; rv = 0; @@ -781,11 +803,12 @@ static int intf_start_seq_timer(ipmi_smi_t intf, GET_SEQ_FROM_MSGID(msgid, seq, seqid); spin_lock_irqsave(&(intf->seq_lock), flags); - /* We do this verification because the user can be deleted - while a message is outstanding. */ + /* + * We do this verification because the user can be deleted + * while a message is outstanding. + */ if ((intf->seq_table[seq].inuse) - && (intf->seq_table[seq].seqid == seqid)) - { + && (intf->seq_table[seq].seqid == seqid)) { struct seq_table *ent = &(intf->seq_table[seq]); ent->timeout = ent->orig_timeout; rv = 0; @@ -810,11 +833,12 @@ static int intf_err_seq(ipmi_smi_t intf, GET_SEQ_FROM_MSGID(msgid, seq, seqid); spin_lock_irqsave(&(intf->seq_lock), flags); - /* We do this verification because the user can be deleted - while a message is outstanding. */ + /* + * We do this verification because the user can be deleted + * while a message is outstanding. + */ if ((intf->seq_table[seq].inuse) - && (intf->seq_table[seq].seqid == seqid)) - { + && (intf->seq_table[seq].seqid == seqid)) { struct seq_table *ent = &(intf->seq_table[seq]); ent->inuse = 0; @@ -840,24 +864,30 @@ int ipmi_create_user(unsigned int if_num, int rv = 0; ipmi_smi_t intf; - /* There is no module usecount here, because it's not - required. Since this can only be used by and called from - other modules, they will implicitly use this module, and - thus this can't be removed unless the other modules are - removed. */ + /* + * There is no module usecount here, because it's not + * required. Since this can only be used by and called from + * other modules, they will implicitly use this module, and + * thus this can't be removed unless the other modules are + * removed. + */ if (handler == NULL) return -EINVAL; - /* Make sure the driver is actually initialized, this handles - problems with initialization order. */ + /* + * Make sure the driver is actually initialized, this handles + * problems with initialization order. + */ if (!initialized) { rv = ipmi_init_msghandler(); if (rv) return rv; - /* The init code doesn't return an error if it was turned - off, but it won't initialize. Check that. */ + /* + * The init code doesn't return an error if it was turned + * off, but it won't initialize. Check that. + */ if (!initialized) return -ENODEV; } @@ -898,8 +928,10 @@ int ipmi_create_user(unsigned int if_num, } } - /* Hold the lock so intf->handlers is guaranteed to be good - * until now */ + /* + * Hold the lock so intf->handlers is guaranteed to be good + * until now + */ mutex_unlock(&ipmi_interfaces_mutex); new_user->valid = 1; @@ -916,6 +948,7 @@ out_kfree: kfree(new_user); return rv; } +EXPORT_SYMBOL(ipmi_create_user); static void free_user(struct kref *ref) { @@ -939,8 +972,7 @@ int ipmi_destroy_user(ipmi_user_t user) for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++) { if (intf->seq_table[i].inuse - && (intf->seq_table[i].recv_msg->user == user)) - { + && (intf->seq_table[i].recv_msg->user == user)) { intf->seq_table[i].inuse = 0; ipmi_free_recv_msg(intf->seq_table[i].recv_msg); } @@ -983,6 +1015,7 @@ int ipmi_destroy_user(ipmi_user_t user) return 0; } +EXPORT_SYMBOL(ipmi_destroy_user); void ipmi_get_version(ipmi_user_t user, unsigned char *major, @@ -991,6 +1024,7 @@ void ipmi_get_version(ipmi_user_t user, *major = user->intf->ipmi_version_major; *minor = user->intf->ipmi_version_minor; } +EXPORT_SYMBOL(ipmi_get_version); int ipmi_set_my_address(ipmi_user_t user, unsigned int channel, @@ -1001,6 +1035,7 @@ int ipmi_set_my_address(ipmi_user_t user, user->intf->channels[channel].address = address; return 0; } +EXPORT_SYMBOL(ipmi_set_my_address); int ipmi_get_my_address(ipmi_user_t user, unsigned int channel, @@ -1011,6 +1046,7 @@ int ipmi_get_my_address(ipmi_user_t user, *address = user->intf->channels[channel].address; return 0; } +EXPORT_SYMBOL(ipmi_get_my_address); int ipmi_set_my_LUN(ipmi_user_t user, unsigned int channel, @@ -1021,6 +1057,7 @@ int ipmi_set_my_LUN(ipmi_user_t user, user->intf->channels[channel].lun = LUN & 0x3; return 0; } +EXPORT_SYMBOL(ipmi_set_my_LUN); int ipmi_get_my_LUN(ipmi_user_t user, unsigned int channel, @@ -1031,6 +1068,7 @@ int ipmi_get_my_LUN(ipmi_user_t user, *address = user->intf->channels[channel].lun; return 0; } +EXPORT_SYMBOL(ipmi_get_my_LUN); int ipmi_get_maintenance_mode(ipmi_user_t user) { @@ -1139,6 +1177,7 @@ int ipmi_set_gets_events(ipmi_user_t user, int val) return 0; } +EXPORT_SYMBOL(ipmi_set_gets_events); static struct cmd_rcvr *find_cmd_rcvr(ipmi_smi_t intf, unsigned char netfn, @@ -1204,6 +1243,7 @@ int ipmi_register_for_cmd(ipmi_user_t user, return rv; } +EXPORT_SYMBOL(ipmi_register_for_cmd); int ipmi_unregister_for_cmd(ipmi_user_t user, unsigned char netfn, @@ -1241,12 +1281,13 @@ int ipmi_unregister_for_cmd(ipmi_user_t user, } return rv; } +EXPORT_SYMBOL(ipmi_unregister_for_cmd); static unsigned char ipmb_checksum(unsigned char *data, int size) { unsigned char csum = 0; - + for (; size > 0; size--, data++) csum += *data; @@ -1288,8 +1329,10 @@ static inline void format_ipmb_msg(struct ipmi_smi_msg *smi_msg, = ipmb_checksum(&(smi_msg->data[i+6]), smi_msg->data_size-6); - /* Add on the checksum size and the offset from the - broadcast. */ + /* + * Add on the checksum size and the offset from the + * broadcast. + */ smi_msg->data_size += 1 + i; smi_msg->msgid = msgid; @@ -1325,17 +1368,21 @@ static inline void format_lan_msg(struct ipmi_smi_msg *smi_msg, = ipmb_checksum(&(smi_msg->data[7]), smi_msg->data_size-7); - /* Add on the checksum size and the offset from the - broadcast. */ + /* + * Add on the checksum size and the offset from the + * broadcast. + */ smi_msg->data_size += 1; smi_msg->msgid = msgid; } -/* Separate from ipmi_request so that the user does not have to be - supplied in certain circumstances (mainly at panic time). If - messages are supplied, they will be freed, even if an error - occurs. */ +/* + * Separate from ipmi_request so that the user does not have to be + * supplied in certain circumstances (mainly at panic time). If + * messages are supplied, they will be freed, even if an error + * occurs. + */ static int i_ipmi_request(ipmi_user_t user, ipmi_smi_t intf, struct ipmi_addr *addr, @@ -1357,19 +1404,18 @@ static int i_ipmi_request(ipmi_user_t user, struct ipmi_smi_handlers *handlers; - if (supplied_recv) { + if (supplied_recv) recv_msg = supplied_recv; - } else { + else { recv_msg = ipmi_alloc_recv_msg(); - if (recv_msg == NULL) { + if (recv_msg == NULL) return -ENOMEM; - } } recv_msg->user_msg_data = user_msg_data; - if (supplied_smi) { + if (supplied_smi) smi_msg = (struct ipmi_smi_msg *) supplied_smi; - } else { + else { smi_msg = ipmi_alloc_smi_msg(); if (smi_msg == NULL) { ipmi_free_recv_msg(recv_msg); @@ -1388,8 +1434,10 @@ static int i_ipmi_request(ipmi_user_t user, if (user) kref_get(&user->refcount); recv_msg->msgid = msgid; - /* Store the message to send in the receive message so timeout - responses can get the proper response data. */ + /* + * Store the message to send in the receive message so timeout + * responses can get the proper response data. + */ recv_msg->msg = *msg; if (addr->addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE) { @@ -1413,10 +1461,11 @@ static int i_ipmi_request(ipmi_user_t user, if ((msg->netfn == IPMI_NETFN_APP_REQUEST) && ((msg->cmd == IPMI_SEND_MSG_CMD) || (msg->cmd == IPMI_GET_MSG_CMD) - || (msg->cmd == IPMI_READ_EVENT_MSG_BUFFER_CMD))) - { - /* We don't let the user do these, since we manage - the sequence numbers. */ + || (msg->cmd == IPMI_READ_EVENT_MSG_BUFFER_CMD))) { + /* + * We don't let the user do these, since we manage + * the sequence numbers. + */ ipmi_inc_stat(intf, sent_invalid_commands); rv = -EINVAL; goto out_err; @@ -1425,14 +1474,12 @@ static int i_ipmi_request(ipmi_user_t user, if (((msg->netfn == IPMI_NETFN_APP_REQUEST) && ((msg->cmd == IPMI_COLD_RESET_CMD) || (msg->cmd == IPMI_WARM_RESET_CMD))) - || (msg->netfn == IPMI_NETFN_FIRMWARE_REQUEST)) - { + || (msg->netfn == IPMI_NETFN_FIRMWARE_REQUEST)) { spin_lock_irqsave(&intf->maintenance_mode_lock, flags); intf->auto_maintenance_timeout = IPMI_MAINTENANCE_MODE_TIMEOUT; if (!intf->maintenance_mode - && !intf->maintenance_mode_enable) - { + && !intf->maintenance_mode_enable) { intf->maintenance_mode_enable = 1; maintenance_mode_update(intf); } @@ -1455,8 +1502,7 @@ static int i_ipmi_request(ipmi_user_t user, smi_msg->data_size = msg->data_len + 2; ipmi_inc_stat(intf, sent_local_commands); } else if ((addr->addr_type == IPMI_IPMB_ADDR_TYPE) - || (addr->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE)) - { + || (addr->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE)) { struct ipmi_ipmb_addr *ipmb_addr; unsigned char ipmb_seq; long seqid; @@ -1469,8 +1515,7 @@ static int i_ipmi_request(ipmi_user_t user, } if (intf->channels[addr->channel].medium - != IPMI_CHANNEL_MEDIUM_IPMB) - { + != IPMI_CHANNEL_MEDIUM_IPMB) { ipmi_inc_stat(intf, sent_invalid_commands); rv = -EINVAL; goto out_err; @@ -1483,9 +1528,11 @@ static int i_ipmi_request(ipmi_user_t user, retries = 4; } if (addr->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE) { - /* Broadcasts add a zero at the beginning of the - message, but otherwise is the same as an IPMB - address. */ + /* + * Broadcasts add a zero at the beginning of the + * message, but otherwise is the same as an IPMB + * address. + */ addr->addr_type = IPMI_IPMB_ADDR_TYPE; broadcast = 1; } @@ -1495,8 +1542,10 @@ static int i_ipmi_request(ipmi_user_t user, if (retry_time_ms == 0) retry_time_ms = 1000; - /* 9 for the header and 1 for the checksum, plus - possibly one for the broadcast. */ + /* + * 9 for the header and 1 for the checksum, plus + * possibly one for the broadcast. + */ if ((msg->data_len + 10 + broadcast) > IPMI_MAX_MSG_LENGTH) { ipmi_inc_stat(intf, sent_invalid_commands); rv = -EMSGSIZE; @@ -1513,15 +1562,19 @@ static int i_ipmi_request(ipmi_user_t user, memcpy(&recv_msg->addr, ipmb_addr, sizeof(*ipmb_addr)); if (recv_msg->msg.netfn & 0x1) { - /* It's a response, so use the user's sequence - from msgid. */ + /* + * It's a response, so use the user's sequence + * from msgid. + */ ipmi_inc_stat(intf, sent_ipmb_responses); format_ipmb_msg(smi_msg, msg, ipmb_addr, msgid, msgid, broadcast, source_address, source_lun); - /* Save the receive message so we can use it - to deliver the response. */ + /* + * Save the receive message so we can use it + * to deliver the response. + */ smi_msg->user_data = recv_msg; } else { /* It's a command, so get a sequence for it. */ @@ -1530,8 +1583,10 @@ static int i_ipmi_request(ipmi_user_t user, ipmi_inc_stat(intf, sent_ipmb_commands); - /* Create a sequence number with a 1 second - timeout and 4 retries. */ + /* + * Create a sequence number with a 1 second + * timeout and 4 retries. + */ rv = intf_next_seq(intf, recv_msg, retry_time_ms, @@ -1540,34 +1595,42 @@ static int i_ipmi_request(ipmi_user_t user, &ipmb_seq, &seqid); if (rv) { - /* We have used up all the sequence numbers, - probably, so abort. */ + /* + * We have used up all the sequence numbers, + * probably, so abort. + */ spin_unlock_irqrestore(&(intf->seq_lock), flags); goto out_err; } - /* Store the sequence number in the message, - so that when the send message response - comes back we can start the timer. */ + /* + * Store the sequence number in the message, + * so that when the send message response + * comes back we can start the timer. + */ format_ipmb_msg(smi_msg, msg, ipmb_addr, STORE_SEQ_IN_MSGID(ipmb_seq, seqid), ipmb_seq, broadcast, source_address, source_lun); - /* Copy the message into the recv message data, so we - can retransmit it later if necessary. */ + /* + * Copy the message into the recv message data, so we + * can retransmit it later if necessary. + */ memcpy(recv_msg->msg_data, smi_msg->data, smi_msg->data_size); recv_msg->msg.data = recv_msg->msg_data; recv_msg->msg.data_len = smi_msg->data_size; - /* We don't unlock until here, because we need - to copy the completed message into the - recv_msg before we release the lock. - Otherwise, race conditions may bite us. I - know that's pretty paranoid, but I prefer - to be correct. */ + /* + * We don't unlock until here, because we need + * to copy the completed message into the + * recv_msg before we release the lock. + * Otherwise, race conditions may bite us. I + * know that's pretty paranoid, but I prefer + * to be correct. + */ spin_unlock_irqrestore(&(intf->seq_lock), flags); } } else if (addr->addr_type == IPMI_LAN_ADDR_TYPE) { @@ -1582,10 +1645,9 @@ static int i_ipmi_request(ipmi_user_t user, } if ((intf->channels[addr->channel].medium - != IPMI_CHANNEL_MEDIUM_8023LAN) + != IPMI_CHANNEL_MEDIUM_8023LAN) && (intf->channels[addr->channel].medium - != IPMI_CHANNEL_MEDIUM_ASYNC)) - { + != IPMI_CHANNEL_MEDIUM_ASYNC)) { ipmi_inc_stat(intf, sent_invalid_commands); rv = -EINVAL; goto out_err; @@ -1614,14 +1676,18 @@ static int i_ipmi_request(ipmi_user_t user, memcpy(&recv_msg->addr, lan_addr, sizeof(*lan_addr)); if (recv_msg->msg.netfn & 0x1) { - /* It's a response, so use the user's sequence - from msgid. */ + /* + * It's a response, so use the user's sequence + * from msgid. + */ ipmi_inc_stat(intf, sent_lan_responses); format_lan_msg(smi_msg, msg, lan_addr, msgid, msgid, source_lun); - /* Save the receive message so we can use it - to deliver the response. */ + /* + * Save the receive message so we can use it + * to deliver the response. + */ smi_msg->user_data = recv_msg; } else { /* It's a command, so get a sequence for it. */ @@ -1630,8 +1696,10 @@ static int i_ipmi_request(ipmi_user_t user, ipmi_inc_stat(intf, sent_lan_commands); - /* Create a sequence number with a 1 second - timeout and 4 retries. */ + /* + * Create a sequence number with a 1 second + * timeout and 4 retries. + */ rv = intf_next_seq(intf, recv_msg, retry_time_ms, @@ -1640,33 +1708,41 @@ static int i_ipmi_request(ipmi_user_t user, &ipmb_seq, &seqid); if (rv) { - /* We have used up all the sequence numbers, - probably, so abort. */ + /* + * We have used up all the sequence numbers, + * probably, so abort. + */ spin_unlock_irqrestore(&(intf->seq_lock), flags); goto out_err; } - /* Store the sequence number in the message, - so that when the send message response - comes back we can start the timer. */ + /* + * Store the sequence number in the message, + * so that when the send message response + * comes back we can start the timer. + */ format_lan_msg(smi_msg, msg, lan_addr, STORE_SEQ_IN_MSGID(ipmb_seq, seqid), ipmb_seq, source_lun); - /* Copy the message into the recv message data, so we - can retransmit it later if necessary. */ + /* + * Copy the message into the recv message data, so we + * can retransmit it later if necessary. + */ memcpy(recv_msg->msg_data, smi_msg->data, smi_msg->data_size); recv_msg->msg.data = recv_msg->msg_data; recv_msg->msg.data_len = smi_msg->data_size; - /* We don't unlock until here, because we need - to copy the completed message into the - recv_msg before we release the lock. - Otherwise, race conditions may bite us. I - know that's pretty paranoid, but I prefer - to be correct. */ + /* + * We don't unlock until here, because we need + * to copy the completed message into the + * recv_msg before we release the lock. + * Otherwise, race conditions may bite us. I + * know that's pretty paranoid, but I prefer + * to be correct. + */ spin_unlock_irqrestore(&(intf->seq_lock), flags); } } else { @@ -1739,6 +1815,7 @@ int ipmi_request_settime(ipmi_user_t user, retries, retry_time_ms); } +EXPORT_SYMBOL(ipmi_request_settime); int ipmi_request_supply_msgs(ipmi_user_t user, struct ipmi_addr *addr, @@ -1770,6 +1847,7 @@ int ipmi_request_supply_msgs(ipmi_user_t user, lun, -1, 0); } +EXPORT_SYMBOL(ipmi_request_supply_msgs); #ifdef CONFIG_PROC_FS static int ipmb_file_read_proc(char *page, char **start, off_t off, @@ -1903,6 +1981,7 @@ int ipmi_smi_add_proc_entry(ipmi_smi_t smi, char *name, return rv; } +EXPORT_SYMBOL(ipmi_smi_add_proc_entry); static int add_proc_entries(ipmi_smi_t smi, int num) { @@ -1913,9 +1992,8 @@ static int add_proc_entries(ipmi_smi_t smi, int num) smi->proc_dir = proc_mkdir(smi->proc_dir_name, proc_ipmi_root); if (!smi->proc_dir) rv = -ENOMEM; - else { + else smi->proc_dir->owner = THIS_MODULE; - } if (rv == 0) rv = ipmi_smi_add_proc_entry(smi, "stats", @@ -2214,37 +2292,47 @@ static int create_files(struct bmc_device *bmc) err = device_create_file(&bmc->dev->dev, &bmc->device_id_attr); - if (err) goto out; + if (err) + goto out; err = device_create_file(&bmc->dev->dev, &bmc->provides_dev_sdrs_attr); - if (err) goto out_devid; + if (err) + goto out_devid; err = device_create_file(&bmc->dev->dev, &bmc->revision_attr); - if (err) goto out_sdrs; + if (err) + goto out_sdrs; err = device_create_file(&bmc->dev->dev, &bmc->firmware_rev_attr); - if (err) goto out_rev; + if (err) + goto out_rev; err = device_create_file(&bmc->dev->dev, &bmc->version_attr); - if (err) goto out_firm; + if (err) + goto out_firm; err = device_create_file(&bmc->dev->dev, &bmc->add_dev_support_attr); - if (err) goto out_version; + if (err) + goto out_version; err = device_create_file(&bmc->dev->dev, &bmc->manufacturer_id_attr); - if (err) goto out_add_dev; + if (err) + goto out_add_dev; err = device_create_file(&bmc->dev->dev, &bmc->product_id_attr); - if (err) goto out_manu; + if (err) + goto out_manu; if (bmc->id.aux_firmware_revision_set) { err = device_create_file(&bmc->dev->dev, &bmc->aux_firmware_rev_attr); - if (err) goto out_prod_id; + if (err) + goto out_prod_id; } if (bmc->guid_set) { err = device_create_file(&bmc->dev->dev, &bmc->guid_attr); - if (err) goto out_aux_firm; + if (err) + goto out_aux_firm; } return 0; @@ -2372,8 +2460,10 @@ static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum, "ipmi_msghandler:" " Unable to register bmc device: %d\n", rv); - /* Don't go to out_err, you can only do that if - the device is registered already. */ + /* + * Don't go to out_err, you can only do that if + * the device is registered already. + */ return rv; } @@ -2564,17 +2654,18 @@ channel_handler(ipmi_smi_t intf, struct ipmi_recv_msg *msg) if ((msg->addr.addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE) && (msg->msg.netfn == IPMI_NETFN_APP_RESPONSE) - && (msg->msg.cmd == IPMI_GET_CHANNEL_INFO_CMD)) - { + && (msg->msg.cmd == IPMI_GET_CHANNEL_INFO_CMD)) { /* It's the one we want */ if (msg->msg.data[0] != 0) { /* Got an error from the channel, just go on. */ if (msg->msg.data[0] == IPMI_INVALID_COMMAND_ERR) { - /* If the MC does not support this - command, that is legal. We just - assume it has one IPMB at channel - zero. */ + /* + * If the MC does not support this + * command, that is legal. We just + * assume it has one IPMB at channel + * zero. + */ intf->channels[0].medium = IPMI_CHANNEL_MEDIUM_IPMB; intf->channels[0].protocol @@ -2595,7 +2686,7 @@ channel_handler(ipmi_smi_t intf, struct ipmi_recv_msg *msg) intf->channels[chan].medium = msg->msg.data[2] & 0x7f; intf->channels[chan].protocol = msg->msg.data[3] & 0x1f; - next_channel: + next_channel: intf->curr_channel++; if (intf->curr_channel >= IPMI_MAX_CHANNELS) wake_up(&intf->waitq); @@ -2623,6 +2714,7 @@ void ipmi_poll_interface(ipmi_user_t user) if (intf->handlers->poll) intf->handlers->poll(intf->send_info); } +EXPORT_SYMBOL(ipmi_poll_interface); int ipmi_register_smi(struct ipmi_smi_handlers *handlers, void *send_info, @@ -2637,14 +2729,18 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, ipmi_smi_t tintf; struct list_head *link; - /* Make sure the driver is actually initialized, this handles - problems with initialization order. */ + /* + * Make sure the driver is actually initialized, this handles + * problems with initialization order. + */ if (!initialized) { rv = ipmi_init_msghandler(); if (rv) return rv; - /* The init code doesn't return an error if it was turned - off, but it won't initialize. Check that. */ + /* + * The init code doesn't return an error if it was turned + * off, but it won't initialize. Check that. + */ if (!initialized) return -ENODEV; } @@ -2722,11 +2818,12 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, get_guid(intf); if ((intf->ipmi_version_major > 1) - || ((intf->ipmi_version_major == 1) - && (intf->ipmi_version_minor >= 5))) - { - /* Start scanning the channels to see what is - available. */ + || ((intf->ipmi_version_major == 1) + && (intf->ipmi_version_minor >= 5))) { + /* + * Start scanning the channels to see what is + * available. + */ intf->null_user_handler = channel_handler; intf->curr_channel = 0; rv = send_channel_info_cmd(intf, 0); @@ -2774,6 +2871,7 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, return rv; } +EXPORT_SYMBOL(ipmi_register_smi); static void cleanup_smi_msgs(ipmi_smi_t intf) { @@ -2808,8 +2906,10 @@ int ipmi_unregister_smi(ipmi_smi_t intf) remove_proc_entries(intf); - /* Call all the watcher interfaces to tell them that - an interface is gone. */ + /* + * Call all the watcher interfaces to tell them that + * an interface is gone. + */ list_for_each_entry(w, &smi_watchers, link) w->smi_gone(intf_num); mutex_unlock(&smi_watchers_mutex); @@ -2817,6 +2917,7 @@ int ipmi_unregister_smi(ipmi_smi_t intf) kref_put(&intf->refcount, intf_free); return 0; } +EXPORT_SYMBOL(ipmi_unregister_smi); static int handle_ipmb_get_msg_rsp(ipmi_smi_t intf, struct ipmi_smi_msg *msg) @@ -2824,9 +2925,10 @@ static int handle_ipmb_get_msg_rsp(ipmi_smi_t intf, struct ipmi_ipmb_addr ipmb_addr; struct ipmi_recv_msg *recv_msg; - - /* This is 11, not 10, because the response must contain a - * completion code. */ + /* + * This is 11, not 10, because the response must contain a + * completion code. + */ if (msg->rsp_size < 11) { /* Message not big enough, just ignore it. */ ipmi_inc_stat(intf, invalid_ipmb_responses); @@ -2843,18 +2945,21 @@ static int handle_ipmb_get_msg_rsp(ipmi_smi_t intf, ipmb_addr.channel = msg->rsp[3] & 0x0f; ipmb_addr.lun = msg->rsp[7] & 3; - /* It's a response from a remote entity. Look up the sequence - number and handle the response. */ + /* + * It's a response from a remote entity. Look up the sequence + * number and handle the response. + */ if (intf_find_seq(intf, msg->rsp[7] >> 2, msg->rsp[3] & 0x0f, msg->rsp[8], (msg->rsp[4] >> 2) & (~1), (struct ipmi_addr *) &(ipmb_addr), - &recv_msg)) - { - /* We were unable to find the sequence number, - so just nuke the message. */ + &recv_msg)) { + /* + * We were unable to find the sequence number, + * so just nuke the message. + */ ipmi_inc_stat(intf, unhandled_ipmb_responses); return 0; } @@ -2862,9 +2967,11 @@ static int handle_ipmb_get_msg_rsp(ipmi_smi_t intf, memcpy(recv_msg->msg_data, &(msg->rsp[9]), msg->rsp_size - 9); - /* THe other fields matched, so no need to set them, except - for netfn, which needs to be the response that was - returned, not the request value. */ + /* + * The other fields matched, so no need to set them, except + * for netfn, which needs to be the response that was + * returned, not the request value. + */ recv_msg->msg.netfn = msg->rsp[4] >> 2; recv_msg->msg.data = recv_msg->msg_data; recv_msg->msg.data_len = msg->rsp_size - 10; @@ -2920,11 +3027,11 @@ static int handle_ipmb_get_msg_cmd(ipmi_smi_t intf, msg->data[1] = IPMI_SEND_MSG_CMD; msg->data[2] = msg->rsp[3]; msg->data[3] = msg->rsp[6]; - msg->data[4] = ((netfn + 1) << 2) | (msg->rsp[7] & 0x3); + msg->data[4] = ((netfn + 1) << 2) | (msg->rsp[7] & 0x3); msg->data[5] = ipmb_checksum(&(msg->data[3]), 2); msg->data[6] = intf->channels[msg->rsp[3] & 0xf].address; - /* rqseq/lun */ - msg->data[7] = (msg->rsp[7] & 0xfc) | (msg->rsp[4] & 0x3); + /* rqseq/lun */ + msg->data[7] = (msg->rsp[7] & 0xfc) | (msg->rsp[4] & 0x3); msg->data[8] = msg->rsp[8]; /* cmd */ msg->data[9] = IPMI_INVALID_CMD_COMPLETION_CODE; msg->data[10] = ipmb_checksum(&(msg->data[6]), 4); @@ -2943,9 +3050,11 @@ static int handle_ipmb_get_msg_cmd(ipmi_smi_t intf, handlers = intf->handlers; if (handlers) { handlers->sender(intf->send_info, msg, 0); - /* We used the message, so return the value - that causes it to not be freed or - queued. */ + /* + * We used the message, so return the value + * that causes it to not be freed or + * queued. + */ rv = -1; } rcu_read_unlock(); @@ -2955,9 +3064,11 @@ static int handle_ipmb_get_msg_cmd(ipmi_smi_t intf, recv_msg = ipmi_alloc_recv_msg(); if (!recv_msg) { - /* We couldn't allocate memory for the - message, so requeue it for handling - later. */ + /* + * We couldn't allocate memory for the + * message, so requeue it for handling + * later. + */ rv = 1; kref_put(&user->refcount, free_user); } else { @@ -2968,8 +3079,10 @@ static int handle_ipmb_get_msg_cmd(ipmi_smi_t intf, ipmb_addr->lun = msg->rsp[7] & 3; ipmb_addr->channel = msg->rsp[3] & 0xf; - /* Extract the rest of the message information - from the IPMB header.*/ + /* + * Extract the rest of the message information + * from the IPMB header. + */ recv_msg->user = user; recv_msg->recv_type = IPMI_CMD_RECV_TYPE; recv_msg->msgid = msg->rsp[7] >> 2; @@ -2977,8 +3090,10 @@ static int handle_ipmb_get_msg_cmd(ipmi_smi_t intf, recv_msg->msg.cmd = msg->rsp[8]; recv_msg->msg.data = recv_msg->msg_data; - /* We chop off 10, not 9 bytes because the checksum - at the end also needs to be removed. */ + /* + * We chop off 10, not 9 bytes because the checksum + * at the end also needs to be removed. + */ recv_msg->msg.data_len = msg->rsp_size - 10; memcpy(recv_msg->msg_data, &(msg->rsp[9]), @@ -2997,8 +3112,10 @@ static int handle_lan_get_msg_rsp(ipmi_smi_t intf, struct ipmi_recv_msg *recv_msg; - /* This is 13, not 12, because the response must contain a - * completion code. */ + /* + * This is 13, not 12, because the response must contain a + * completion code. + */ if (msg->rsp_size < 13) { /* Message not big enough, just ignore it. */ ipmi_inc_stat(intf, invalid_lan_responses); @@ -3018,18 +3135,21 @@ static int handle_lan_get_msg_rsp(ipmi_smi_t intf, lan_addr.privilege = msg->rsp[3] >> 4; lan_addr.lun = msg->rsp[9] & 3; - /* It's a response from a remote entity. Look up the sequence - number and handle the response. */ + /* + * It's a response from a remote entity. Look up the sequence + * number and handle the response. + */ if (intf_find_seq(intf, msg->rsp[9] >> 2, msg->rsp[3] & 0x0f, msg->rsp[10], (msg->rsp[6] >> 2) & (~1), (struct ipmi_addr *) &(lan_addr), - &recv_msg)) - { - /* We were unable to find the sequence number, - so just nuke the message. */ + &recv_msg)) { + /* + * We were unable to find the sequence number, + * so just nuke the message. + */ ipmi_inc_stat(intf, unhandled_lan_responses); return 0; } @@ -3037,9 +3157,11 @@ static int handle_lan_get_msg_rsp(ipmi_smi_t intf, memcpy(recv_msg->msg_data, &(msg->rsp[11]), msg->rsp_size - 11); - /* The other fields matched, so no need to set them, except - for netfn, which needs to be the response that was - returned, not the request value. */ + /* + * The other fields matched, so no need to set them, except + * for netfn, which needs to be the response that was + * returned, not the request value. + */ recv_msg->msg.netfn = msg->rsp[6] >> 2; recv_msg->msg.data = recv_msg->msg_data; recv_msg->msg.data_len = msg->rsp_size - 12; @@ -3090,17 +3212,21 @@ static int handle_lan_get_msg_cmd(ipmi_smi_t intf, /* We didn't find a user, just give up. */ ipmi_inc_stat(intf, unhandled_commands); - rv = 0; /* Don't do anything with these messages, just - allow them to be freed. */ + /* + * Don't do anything with these messages, just allow + * them to be freed. + */ + rv = 0; } else { /* Deliver the message to the user. */ ipmi_inc_stat(intf, handled_commands); recv_msg = ipmi_alloc_recv_msg(); if (!recv_msg) { - /* We couldn't allocate memory for the - message, so requeue it for handling - later. */ + /* + * We couldn't allocate memory for the + * message, so requeue it for handling later. + */ rv = 1; kref_put(&user->refcount, free_user); } else { @@ -3114,8 +3240,10 @@ static int handle_lan_get_msg_cmd(ipmi_smi_t intf, lan_addr->channel = msg->rsp[3] & 0xf; lan_addr->privilege = msg->rsp[3] >> 4; - /* Extract the rest of the message information - from the IPMB header.*/ + /* + * Extract the rest of the message information + * from the IPMB header. + */ recv_msg->user = user; recv_msg->recv_type = IPMI_CMD_RECV_TYPE; recv_msg->msgid = msg->rsp[9] >> 2; @@ -3123,8 +3251,10 @@ static int handle_lan_get_msg_cmd(ipmi_smi_t intf, recv_msg->msg.cmd = msg->rsp[10]; recv_msg->msg.data = recv_msg->msg_data; - /* We chop off 12, not 11 bytes because the checksum - at the end also needs to be removed. */ + /* + * We chop off 12, not 11 bytes because the checksum + * at the end also needs to be removed. + */ recv_msg->msg.data_len = msg->rsp_size - 12; memcpy(recv_msg->msg_data, &(msg->rsp[11]), @@ -3140,7 +3270,7 @@ static void copy_event_into_recv_msg(struct ipmi_recv_msg *recv_msg, struct ipmi_smi_msg *msg) { struct ipmi_system_interface_addr *smi_addr; - + recv_msg->msgid = 0; smi_addr = (struct ipmi_system_interface_addr *) &(recv_msg->addr); smi_addr->addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; @@ -3181,8 +3311,10 @@ static int handle_read_event_rsp(ipmi_smi_t intf, ipmi_inc_stat(intf, events); - /* Allocate and fill in one message for every user that is getting - events. */ + /* + * Allocate and fill in one message for every user that is + * getting events. + */ rcu_read_lock(); list_for_each_entry_rcu(user, &intf->users, link) { if (!user->gets_events) @@ -3196,9 +3328,11 @@ static int handle_read_event_rsp(ipmi_smi_t intf, list_del(&recv_msg->link); ipmi_free_recv_msg(recv_msg); } - /* We couldn't allocate memory for the - message, so requeue it for handling - later. */ + /* + * We couldn't allocate memory for the + * message, so requeue it for handling + * later. + */ rv = 1; goto out; } @@ -3219,13 +3353,17 @@ static int handle_read_event_rsp(ipmi_smi_t intf, deliver_response(recv_msg); } } else if (intf->waiting_events_count < MAX_EVENTS_IN_QUEUE) { - /* No one to receive the message, put it in queue if there's - not already too many things in the queue. */ + /* + * No one to receive the message, put it in queue if there's + * not already too many things in the queue. + */ recv_msg = ipmi_alloc_recv_msg(); if (!recv_msg) { - /* We couldn't allocate memory for the - message, so requeue it for handling - later. */ + /* + * We couldn't allocate memory for the + * message, so requeue it for handling + * later. + */ rv = 1; goto out; } @@ -3234,8 +3372,10 @@ static int handle_read_event_rsp(ipmi_smi_t intf, list_add_tail(&(recv_msg->link), &(intf->waiting_events)); intf->waiting_events_count++; } else if (!intf->event_msg_printed) { - /* There's too many things in the queue, discard this - message. */ + /* + * There's too many things in the queue, discard this + * message. + */ printk(KERN_WARNING PFX "Event queue full, discarding" " incoming events\n"); intf->event_msg_printed = 1; @@ -3254,12 +3394,12 @@ static int handle_bmc_rsp(ipmi_smi_t intf, struct ipmi_user *user; recv_msg = (struct ipmi_recv_msg *) msg->user_data; - if (recv_msg == NULL) - { - printk(KERN_WARNING"IPMI message received with no owner. This\n" - "could be because of a malformed message, or\n" - "because of a hardware error. Contact your\n" - "hardware vender for assistance\n"); + if (recv_msg == NULL) { + printk(KERN_WARNING + "IPMI message received with no owner. This\n" + "could be because of a malformed message, or\n" + "because of a hardware error. Contact your\n" + "hardware vender for assistance\n"); return 0; } @@ -3293,9 +3433,11 @@ static int handle_bmc_rsp(ipmi_smi_t intf, return 0; } -/* Handle a new message. Return 1 if the message should be requeued, - 0 if the message should be freed, or -1 if the message should not - be freed or requeued. */ +/* + * Handle a new message. Return 1 if the message should be requeued, + * 0 if the message should be freed, or -1 if the message should not + * be freed or requeued. + */ static int handle_new_recv_msg(ipmi_smi_t intf, struct ipmi_smi_msg *msg) { @@ -3320,10 +3462,12 @@ static int handle_new_recv_msg(ipmi_smi_t intf, msg->rsp[1] = msg->data[1]; msg->rsp[2] = IPMI_ERR_UNSPECIFIED; msg->rsp_size = 3; - } else if (((msg->rsp[0] >> 2) != ((msg->data[0] >> 2) | 1))/* Netfn */ - || (msg->rsp[1] != msg->data[1])) /* Command */ - { - /* The response is not even marginally correct. */ + } else if (((msg->rsp[0] >> 2) != ((msg->data[0] >> 2) | 1)) + || (msg->rsp[1] != msg->data[1])) { + /* + * The NetFN and Command in the response is not even + * marginally correct. + */ printk(KERN_WARNING PFX "BMC returned incorrect response," " expected netfn %x cmd %x, got netfn %x cmd %x\n", (msg->data[0] >> 2) | 1, msg->data[1], @@ -3338,10 +3482,11 @@ static int handle_new_recv_msg(ipmi_smi_t intf, if ((msg->rsp[0] == ((IPMI_NETFN_APP_REQUEST|1) << 2)) && (msg->rsp[1] == IPMI_SEND_MSG_CMD) - && (msg->user_data != NULL)) - { - /* It's a response to a response we sent. For this we - deliver a send message response to the user. */ + && (msg->user_data != NULL)) { + /* + * It's a response to a response we sent. For this we + * deliver a send message response to the user. + */ struct ipmi_recv_msg *recv_msg = msg->user_data; requeue = 0; @@ -3367,8 +3512,7 @@ static int handle_new_recv_msg(ipmi_smi_t intf, recv_msg->msg_data[0] = msg->rsp[2]; deliver_response(recv_msg); } else if ((msg->rsp[0] == ((IPMI_NETFN_APP_REQUEST|1) << 2)) - && (msg->rsp[1] == IPMI_GET_MSG_CMD)) - { + && (msg->rsp[1] == IPMI_GET_MSG_CMD)) { /* It's from the receive queue. */ chan = msg->rsp[3] & 0xf; if (chan >= IPMI_MAX_CHANNELS) { @@ -3380,12 +3524,16 @@ static int handle_new_recv_msg(ipmi_smi_t intf, switch (intf->channels[chan].medium) { case IPMI_CHANNEL_MEDIUM_IPMB: if (msg->rsp[4] & 0x04) { - /* It's a response, so find the - requesting message and send it up. */ + /* + * It's a response, so find the + * requesting message and send it up. + */ requeue = handle_ipmb_get_msg_rsp(intf, msg); } else { - /* It's a command to the SMS from some other - entity. Handle that. */ + /* + * It's a command to the SMS from some other + * entity. Handle that. + */ requeue = handle_ipmb_get_msg_cmd(intf, msg); } break; @@ -3393,25 +3541,30 @@ static int handle_new_recv_msg(ipmi_smi_t intf, case IPMI_CHANNEL_MEDIUM_8023LAN: case IPMI_CHANNEL_MEDIUM_ASYNC: if (msg->rsp[6] & 0x04) { - /* It's a response, so find the - requesting message and send it up. */ + /* + * It's a response, so find the + * requesting message and send it up. + */ requeue = handle_lan_get_msg_rsp(intf, msg); } else { - /* It's a command to the SMS from some other - entity. Handle that. */ + /* + * It's a command to the SMS from some other + * entity. Handle that. + */ requeue = handle_lan_get_msg_cmd(intf, msg); } break; default: - /* We don't handle the channel type, so just - * free the message. */ + /* + * We don't handle the channel type, so just + * free the message. + */ requeue = 0; } } else if ((msg->rsp[0] == ((IPMI_NETFN_APP_REQUEST|1) << 2)) - && (msg->rsp[1] == IPMI_READ_EVENT_MSG_BUFFER_CMD)) - { + && (msg->rsp[1] == IPMI_READ_EVENT_MSG_BUFFER_CMD)) { /* It's an asyncronous event. */ requeue = handle_read_event_rsp(intf, msg); } else { @@ -3435,23 +3588,25 @@ void ipmi_smi_msg_received(ipmi_smi_t intf, if ((msg->data_size >= 2) && (msg->data[0] == (IPMI_NETFN_APP_REQUEST << 2)) && (msg->data[1] == IPMI_SEND_MSG_CMD) - && (msg->user_data == NULL)) - { - /* This is the local response to a command send, start - the timer for these. The user_data will not be - NULL if this is a response send, and we will let - response sends just go through. */ - - /* Check for errors, if we get certain errors (ones - that mean basically we can try again later), we - ignore them and start the timer. Otherwise we - report the error immediately. */ + && (msg->user_data == NULL)) { + /* + * This is the local response to a command send, start + * the timer for these. The user_data will not be + * NULL if this is a response send, and we will let + * response sends just go through. + */ + + /* + * Check for errors, if we get certain errors (ones + * that mean basically we can try again later), we + * ignore them and start the timer. Otherwise we + * report the error immediately. + */ if ((msg->rsp_size >= 3) && (msg->rsp[2] != 0) && (msg->rsp[2] != IPMI_NODE_BUSY_ERR) && (msg->rsp[2] != IPMI_LOST_ARBITRATION_ERR) && (msg->rsp[2] != IPMI_BUS_ERR) - && (msg->rsp[2] != IPMI_NAK_ON_WRITE_ERR)) - { + && (msg->rsp[2] != IPMI_NAK_ON_WRITE_ERR)) { int chan = msg->rsp[3] & 0xf; /* Got an error sending the message, handle it. */ @@ -3465,17 +3620,18 @@ void ipmi_smi_msg_received(ipmi_smi_t intf, else ipmi_inc_stat(intf, sent_ipmb_command_errs); intf_err_seq(intf, msg->msgid, msg->rsp[2]); - } else { + } else /* The message was sent, start the timer. */ intf_start_seq_timer(intf, msg->msgid); - } ipmi_free_smi_msg(msg); goto out; } - /* To preserve message order, if the list is not empty, we - tack this message onto the end of the list. */ + /* + * To preserve message order, if the list is not empty, we + * tack this message onto the end of the list. + */ run_to_completion = intf->run_to_completion; if (!run_to_completion) spin_lock_irqsave(&intf->waiting_msgs_lock, flags); @@ -3487,11 +3643,13 @@ void ipmi_smi_msg_received(ipmi_smi_t intf, } if (!run_to_completion) spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags); - + rv = handle_new_recv_msg(intf, msg); if (rv > 0) { - /* Could not handle the message now, just add it to a - list to handle later. */ + /* + * Could not handle the message now, just add it to a + * list to handle later. + */ run_to_completion = intf->run_to_completion; if (!run_to_completion) spin_lock_irqsave(&intf->waiting_msgs_lock, flags); @@ -3505,6 +3663,7 @@ void ipmi_smi_msg_received(ipmi_smi_t intf, out: return; } +EXPORT_SYMBOL(ipmi_smi_msg_received); void ipmi_smi_watchdog_pretimeout(ipmi_smi_t intf) { @@ -3519,7 +3678,7 @@ void ipmi_smi_watchdog_pretimeout(ipmi_smi_t intf) } rcu_read_unlock(); } - +EXPORT_SYMBOL(ipmi_smi_watchdog_pretimeout); static struct ipmi_smi_msg * smi_from_recv_msg(ipmi_smi_t intf, struct ipmi_recv_msg *recv_msg, @@ -3527,14 +3686,16 @@ smi_from_recv_msg(ipmi_smi_t intf, struct ipmi_recv_msg *recv_msg, { struct ipmi_smi_msg *smi_msg = ipmi_alloc_smi_msg(); if (!smi_msg) - /* If we can't allocate the message, then just return, we - get 4 retries, so this should be ok. */ + /* + * If we can't allocate the message, then just return, we + * get 4 retries, so this should be ok. + */ return NULL; memcpy(smi_msg->data, recv_msg->msg.data, recv_msg->msg.data_len); smi_msg->data_size = recv_msg->msg.data_len; smi_msg->msgid = STORE_SEQ_IN_MSGID(seq, seqid); - + #ifdef DEBUG_MSGING { int m; @@ -3579,8 +3740,10 @@ static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent, struct ipmi_smi_msg *smi_msg; /* More retries, send again. */ - /* Start with the max timer, set to normal - timer after the message is sent. */ + /* + * Start with the max timer, set to normal timer after + * the message is sent. + */ ent->timeout = MAX_MSG_TIMEOUT; ent->retries_left--; if (ent->recv_msg->addr.addr_type == IPMI_LAN_ADDR_TYPE) @@ -3595,11 +3758,13 @@ static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent, spin_unlock_irqrestore(&intf->seq_lock, *flags); - /* Send the new message. We send with a zero - * priority. It timed out, I doubt time is - * that critical now, and high priority - * messages are really only for messages to the - * local MC, which don't get resent. */ + /* + * Send the new message. We send with a zero + * priority. It timed out, I doubt time is that + * critical now, and high priority messages are really + * only for messages to the local MC, which don't get + * resent. + */ handlers = intf->handlers; if (handlers) intf->handlers->sender(intf->send_info, @@ -3630,16 +3795,20 @@ static void ipmi_timeout_handler(long timeout_period) list_del(&smi_msg->link); ipmi_free_smi_msg(smi_msg); } else { - /* To preserve message order, quit if we - can't handle a message. */ + /* + * To preserve message order, quit if we + * can't handle a message. + */ break; } } spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags); - /* Go through the seq table and find any messages that - have timed out, putting them in the timeouts - list. */ + /* + * Go through the seq table and find any messages that + * have timed out, putting them in the timeouts + * list. + */ INIT_LIST_HEAD(&timeouts); spin_lock_irqsave(&intf->seq_lock, flags); for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++) @@ -3665,8 +3834,7 @@ static void ipmi_timeout_handler(long timeout_period) intf->auto_maintenance_timeout -= timeout_period; if (!intf->maintenance_mode - && (intf->auto_maintenance_timeout <= 0)) - { + && (intf->auto_maintenance_timeout <= 0)) { intf->maintenance_mode_enable = 0; maintenance_mode_update(intf); } @@ -3684,8 +3852,10 @@ static void ipmi_request_event(void) struct ipmi_smi_handlers *handlers; rcu_read_lock(); - /* Called from the timer, no need to check if handlers is - * valid. */ + /* + * Called from the timer, no need to check if handlers is + * valid. + */ list_for_each_entry_rcu(intf, &ipmi_interfaces, link) { /* No event requests when in maintenance mode. */ if (intf->maintenance_mode_enable) @@ -3706,10 +3876,12 @@ static struct timer_list ipmi_timer; /* How many jiffies does it take to get to the timeout time. */ #define IPMI_TIMEOUT_JIFFIES ((IPMI_TIMEOUT_TIME * HZ) / 1000) -/* Request events from the queue every second (this is the number of - IPMI_TIMEOUT_TIMES between event requests). Hopefully, in the - future, IPMI will add a way to know immediately if an event is in - the queue and this silliness can go away. */ +/* + * Request events from the queue every second (this is the number of + * IPMI_TIMEOUT_TIMES between event requests). Hopefully, in the + * future, IPMI will add a way to know immediately if an event is in + * the queue and this silliness can go away. + */ #define IPMI_REQUEST_EV_TIME (1000 / (IPMI_TIMEOUT_TIME)) static atomic_t stop_operation; @@ -3753,6 +3925,7 @@ struct ipmi_smi_msg *ipmi_alloc_smi_msg(void) } return rv; } +EXPORT_SYMBOL(ipmi_alloc_smi_msg); static void free_recv_msg(struct ipmi_recv_msg *msg) { @@ -3779,6 +3952,7 @@ void ipmi_free_recv_msg(struct ipmi_recv_msg *msg) kref_put(&msg->user->refcount, free_user); msg->done(msg); } +EXPORT_SYMBOL(ipmi_free_recv_msg); #ifdef CONFIG_IPMI_PANIC_EVENT @@ -3796,8 +3970,7 @@ static void event_receiver_fetcher(ipmi_smi_t intf, struct ipmi_recv_msg *msg) if ((msg->addr.addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE) && (msg->msg.netfn == IPMI_NETFN_SENSOR_EVENT_RESPONSE) && (msg->msg.cmd == IPMI_GET_EVENT_RECEIVER_CMD) - && (msg->msg.data[0] == IPMI_CC_NO_ERROR)) - { + && (msg->msg.data[0] == IPMI_CC_NO_ERROR)) { /* A get event receiver command, save it. */ intf->event_receiver = msg->msg.data[1]; intf->event_receiver_lun = msg->msg.data[2] & 0x3; @@ -3809,10 +3982,11 @@ static void device_id_fetcher(ipmi_smi_t intf, struct ipmi_recv_msg *msg) if ((msg->addr.addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE) && (msg->msg.netfn == IPMI_NETFN_APP_RESPONSE) && (msg->msg.cmd == IPMI_GET_DEVICE_ID_CMD) - && (msg->msg.data[0] == IPMI_CC_NO_ERROR)) - { - /* A get device id command, save if we are an event - receiver or generator. */ + && (msg->msg.data[0] == IPMI_CC_NO_ERROR)) { + /* + * A get device id command, save if we are an event + * receiver or generator. + */ intf->local_sel_device = (msg->msg.data[6] >> 2) & 1; intf->local_event_generator = (msg->msg.data[6] >> 5) & 1; } @@ -3845,8 +4019,10 @@ static void send_panic_events(char *str) data[4] = 0x6f; /* Sensor specific, IPMI table 36-1 */ data[5] = 0xa1; /* Runtime stop OEM bytes 2 & 3. */ - /* Put a few breadcrumbs in. Hopefully later we can add more things - to make the panic events more useful. */ + /* + * Put a few breadcrumbs in. Hopefully later we can add more things + * to make the panic events more useful. + */ if (str) { data[3] = str[0]; data[6] = str[1]; @@ -3880,9 +4056,11 @@ static void send_panic_events(char *str) } #ifdef CONFIG_IPMI_PANIC_STRING - /* On every interface, dump a bunch of OEM event holding the - string. */ - if (!str) + /* + * On every interface, dump a bunch of OEM event holding the + * string. + */ + if (!str) return; /* For every registered interface, send the event. */ @@ -3903,11 +4081,13 @@ static void send_panic_events(char *str) */ smp_rmb(); - /* First job here is to figure out where to send the - OEM events. There's no way in IPMI to send OEM - events using an event send command, so we have to - find the SEL to put them in and stick them in - there. */ + /* + * First job here is to figure out where to send the + * OEM events. There's no way in IPMI to send OEM + * events using an event send command, so we have to + * find the SEL to put them in and stick them in + * there. + */ /* Get capabilities from the get device id. */ intf->local_sel_device = 0; @@ -3955,24 +4135,29 @@ static void send_panic_events(char *str) } intf->null_user_handler = NULL; - /* Validate the event receiver. The low bit must not - be 1 (it must be a valid IPMB address), it cannot - be zero, and it must not be my address. */ - if (((intf->event_receiver & 1) == 0) + /* + * Validate the event receiver. The low bit must not + * be 1 (it must be a valid IPMB address), it cannot + * be zero, and it must not be my address. + */ + if (((intf->event_receiver & 1) == 0) && (intf->event_receiver != 0) - && (intf->event_receiver != intf->channels[0].address)) - { - /* The event receiver is valid, send an IPMB - message. */ + && (intf->event_receiver != intf->channels[0].address)) { + /* + * The event receiver is valid, send an IPMB + * message. + */ ipmb = (struct ipmi_ipmb_addr *) &addr; ipmb->addr_type = IPMI_IPMB_ADDR_TYPE; ipmb->channel = 0; /* FIXME - is this right? */ ipmb->lun = intf->event_receiver_lun; ipmb->slave_addr = intf->event_receiver; } else if (intf->local_sel_device) { - /* The event receiver was not valid (or was - me), but I am an SEL device, just dump it - in my SEL. */ + /* + * The event receiver was not valid (or was + * me), but I am an SEL device, just dump it + * in my SEL. + */ si = (struct ipmi_system_interface_addr *) &addr; si->addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; si->channel = IPMI_BMC_CHANNEL; @@ -3980,7 +4165,6 @@ static void send_panic_events(char *str) } else continue; /* No where to send the event. */ - msg.netfn = IPMI_NETFN_STORAGE_REQUEST; /* Storage. */ msg.cmd = IPMI_ADD_SEL_ENTRY_CMD; msg.data = data; @@ -3997,8 +4181,10 @@ static void send_panic_events(char *str) data[2] = 0xf0; /* OEM event without timestamp. */ data[3] = intf->channels[0].address; data[4] = j++; /* sequence # */ - /* Always give 11 bytes, so strncpy will fill - it with zeroes for me. */ + /* + * Always give 11 bytes, so strncpy will fill + * it with zeroes for me. + */ strncpy(data+5, p, 11); p += size; @@ -4015,7 +4201,7 @@ static void send_panic_events(char *str) intf->channels[0].lun, 0, 1); /* no retry, and no wait. */ } - } + } #endif /* CONFIG_IPMI_PANIC_STRING */ } #endif /* CONFIG_IPMI_PANIC_EVENT */ @@ -4024,7 +4210,7 @@ static int has_panicked; static int panic_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { ipmi_smi_t intf; @@ -4106,11 +4292,16 @@ static __exit void cleanup_ipmi(void) atomic_notifier_chain_unregister(&panic_notifier_list, &panic_block); - /* This can't be called if any interfaces exist, so no worry about - shutting down the interfaces. */ + /* + * This can't be called if any interfaces exist, so no worry + * about shutting down the interfaces. + */ - /* Tell the timer to stop, then wait for it to stop. This avoids - problems with race conditions removing the timer here. */ + /* + * Tell the timer to stop, then wait for it to stop. This + * avoids problems with race conditions removing the timer + * here. + */ atomic_inc(&stop_operation); del_timer_sync(&ipmi_timer); @@ -4137,30 +4328,6 @@ module_exit(cleanup_ipmi); module_init(ipmi_init_msghandler_mod); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Corey Minyard "); -MODULE_DESCRIPTION("Incoming and outgoing message routing for an IPMI interface."); +MODULE_DESCRIPTION("Incoming and outgoing message routing for an IPMI" + " interface."); MODULE_VERSION(IPMI_DRIVER_VERSION); - -EXPORT_SYMBOL(ipmi_create_user); -EXPORT_SYMBOL(ipmi_destroy_user); -EXPORT_SYMBOL(ipmi_get_version); -EXPORT_SYMBOL(ipmi_request_settime); -EXPORT_SYMBOL(ipmi_request_supply_msgs); -EXPORT_SYMBOL(ipmi_poll_interface); -EXPORT_SYMBOL(ipmi_register_smi); -EXPORT_SYMBOL(ipmi_unregister_smi); -EXPORT_SYMBOL(ipmi_register_for_cmd); -EXPORT_SYMBOL(ipmi_unregister_for_cmd); -EXPORT_SYMBOL(ipmi_smi_msg_received); -EXPORT_SYMBOL(ipmi_smi_watchdog_pretimeout); -EXPORT_SYMBOL(ipmi_alloc_smi_msg); -EXPORT_SYMBOL(ipmi_addr_length); -EXPORT_SYMBOL(ipmi_validate_addr); -EXPORT_SYMBOL(ipmi_set_gets_events); -EXPORT_SYMBOL(ipmi_smi_watcher_register); -EXPORT_SYMBOL(ipmi_smi_watcher_unregister); -EXPORT_SYMBOL(ipmi_set_my_address); -EXPORT_SYMBOL(ipmi_get_my_address); -EXPORT_SYMBOL(ipmi_set_my_LUN); -EXPORT_SYMBOL(ipmi_get_my_LUN); -EXPORT_SYMBOL(ipmi_smi_add_proc_entry); -EXPORT_SYMBOL(ipmi_free_recv_msg); diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 1144b32f5310..2f75c4640b45 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -75,8 +75,7 @@ * work for sockets. */ #define IPMI_MAX_ADDR_SIZE 32 -struct ipmi_addr -{ +struct ipmi_addr { /* Try to take these from the "Channel Medium Type" table in section 6.5 of the IPMI 1.5 manual. */ int addr_type; @@ -90,8 +89,7 @@ struct ipmi_addr * 0), or IPMC_BMC_CHANNEL if communicating directly with the BMC. */ #define IPMI_SYSTEM_INTERFACE_ADDR_TYPE 0x0c -struct ipmi_system_interface_addr -{ +struct ipmi_system_interface_addr { int addr_type; short channel; unsigned char lun; @@ -100,10 +98,9 @@ struct ipmi_system_interface_addr /* An IPMB Address. */ #define IPMI_IPMB_ADDR_TYPE 0x01 /* Used for broadcast get device id as described in section 17.9 of the - IPMI 1.5 manual. */ + IPMI 1.5 manual. */ #define IPMI_IPMB_BROADCAST_ADDR_TYPE 0x41 -struct ipmi_ipmb_addr -{ +struct ipmi_ipmb_addr { int addr_type; short channel; unsigned char slave_addr; @@ -128,8 +125,7 @@ struct ipmi_ipmb_addr * message is a little weird, but this is required. */ #define IPMI_LAN_ADDR_TYPE 0x04 -struct ipmi_lan_addr -{ +struct ipmi_lan_addr { int addr_type; short channel; unsigned char privilege; @@ -162,16 +158,14 @@ struct ipmi_lan_addr * byte of data in the response (as the spec shows the messages laid * out). */ -struct ipmi_msg -{ +struct ipmi_msg { unsigned char netfn; unsigned char cmd; unsigned short data_len; unsigned char __user *data; }; -struct kernel_ipmi_msg -{ +struct kernel_ipmi_msg { unsigned char netfn; unsigned char cmd; unsigned short data_len; @@ -239,12 +233,11 @@ typedef struct ipmi_user *ipmi_user_t; * used after the message is delivered, so the upper layer may use the * link to build a linked list, if it likes. */ -struct ipmi_recv_msg -{ +struct ipmi_recv_msg { struct list_head link; /* The type of message as defined in the "Receive Types" - defines above. */ + defines above. */ int recv_type; ipmi_user_t user; @@ -271,9 +264,8 @@ struct ipmi_recv_msg /* Allocate and free the receive message. */ void ipmi_free_recv_msg(struct ipmi_recv_msg *msg); -struct ipmi_user_hndl -{ - /* Routine type to call when a message needs to be routed to +struct ipmi_user_hndl { + /* Routine type to call when a message needs to be routed to the upper layer. This will be called with some locks held, the only IPMI routines that can be called are ipmi_request and the alloc/free operations. The handler_data is the @@ -433,8 +425,7 @@ int ipmi_set_gets_events(ipmi_user_t user, int val); * every existing interface when a new watcher is registered with * ipmi_smi_watcher_register(). */ -struct ipmi_smi_watcher -{ +struct ipmi_smi_watcher { struct list_head link; /* You must set the owner to the current module, if you are in @@ -505,8 +496,7 @@ int ipmi_validate_addr(struct ipmi_addr *addr, int len); /* Messages sent to the interface are this format. */ -struct ipmi_req -{ +struct ipmi_req { unsigned char __user *addr; /* Address to send the message to. */ unsigned int addr_len; @@ -531,12 +521,11 @@ struct ipmi_req /* Messages sent to the interface with timing parameters are this format. */ -struct ipmi_req_settime -{ +struct ipmi_req_settime { struct ipmi_req req; /* See ipmi_request_settime() above for details on these - values. */ + values. */ int retries; unsigned int retry_time_ms; }; @@ -553,8 +542,7 @@ struct ipmi_req_settime struct ipmi_req_settime) /* Messages received from the interface are this format. */ -struct ipmi_recv -{ +struct ipmi_recv { int recv_type; /* Is this a command, response or an asyncronous event. */ @@ -600,13 +588,12 @@ struct ipmi_recv struct ipmi_recv) /* Register to get commands from other entities on this interface. */ -struct ipmi_cmdspec -{ +struct ipmi_cmdspec { unsigned char netfn; unsigned char cmd; }; -/* +/* * Register to receive a specific command. error values: * - EFAULT - an address supplied was invalid. * - EBUSY - The netfn/cmd supplied was already in use. @@ -629,8 +616,7 @@ struct ipmi_cmdspec * else. The chans field is a bitmask, (1 << channel) for each channel. * It may be IPMI_CHAN_ALL for all channels. */ -struct ipmi_cmdspec_chans -{ +struct ipmi_cmdspec_chans { unsigned int netfn; unsigned int cmd; unsigned int chans; @@ -652,7 +638,7 @@ struct ipmi_cmdspec_chans #define IPMICTL_UNREGISTER_FOR_CMD_CHANS _IOR(IPMI_IOC_MAGIC, 29, \ struct ipmi_cmdspec_chans) -/* +/* * Set whether this interface receives events. Note that the first * user registered for events will get all pending events for the * interface. error values: @@ -668,15 +654,18 @@ struct ipmi_cmdspec_chans * things it takes to determine your address (if not the BMC) and set * it for everyone else. You should probably leave the LUN alone. */ -struct ipmi_channel_lun_address_set -{ +struct ipmi_channel_lun_address_set { unsigned short channel; unsigned char value; }; -#define IPMICTL_SET_MY_CHANNEL_ADDRESS_CMD _IOR(IPMI_IOC_MAGIC, 24, struct ipmi_channel_lun_address_set) -#define IPMICTL_GET_MY_CHANNEL_ADDRESS_CMD _IOR(IPMI_IOC_MAGIC, 25, struct ipmi_channel_lun_address_set) -#define IPMICTL_SET_MY_CHANNEL_LUN_CMD _IOR(IPMI_IOC_MAGIC, 26, struct ipmi_channel_lun_address_set) -#define IPMICTL_GET_MY_CHANNEL_LUN_CMD _IOR(IPMI_IOC_MAGIC, 27, struct ipmi_channel_lun_address_set) +#define IPMICTL_SET_MY_CHANNEL_ADDRESS_CMD \ + _IOR(IPMI_IOC_MAGIC, 24, struct ipmi_channel_lun_address_set) +#define IPMICTL_GET_MY_CHANNEL_ADDRESS_CMD \ + _IOR(IPMI_IOC_MAGIC, 25, struct ipmi_channel_lun_address_set) +#define IPMICTL_SET_MY_CHANNEL_LUN_CMD \ + _IOR(IPMI_IOC_MAGIC, 26, struct ipmi_channel_lun_address_set) +#define IPMICTL_GET_MY_CHANNEL_LUN_CMD \ + _IOR(IPMI_IOC_MAGIC, 27, struct ipmi_channel_lun_address_set) /* Legacy interfaces, these only set IPMB 0. */ #define IPMICTL_SET_MY_ADDRESS_CMD _IOR(IPMI_IOC_MAGIC, 17, unsigned int) #define IPMICTL_GET_MY_ADDRESS_CMD _IOR(IPMI_IOC_MAGIC, 18, unsigned int) @@ -687,8 +676,7 @@ struct ipmi_channel_lun_address_set * Get/set the default timing values for an interface. You shouldn't * generally mess with these. */ -struct ipmi_timing_parms -{ +struct ipmi_timing_parms { int retries; unsigned int retry_time_ms; }; diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h index 6e8cec503380..845a8473ee5d 100644 --- a/include/linux/ipmi_smi.h +++ b/include/linux/ipmi_smi.h @@ -60,8 +60,7 @@ typedef struct ipmi_smi *ipmi_smi_t; * asynchronous data and messages and request them from the * interface. */ -struct ipmi_smi_msg -{ +struct ipmi_smi_msg { struct list_head link; long msgid; @@ -74,12 +73,11 @@ struct ipmi_smi_msg unsigned char rsp[IPMI_MAX_MSG_LENGTH]; /* Will be called when the system is done with the message - (presumably to free it). */ + (presumably to free it). */ void (*done)(struct ipmi_smi_msg *msg); }; -struct ipmi_smi_handlers -{ +struct ipmi_smi_handlers { struct module *owner; /* The low-level interface cannot start sending messages to -- cgit v1.2.3-71-gd317 From fa68be0def375c78f723a7d49221f8f6c8194f29 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 29 Apr 2008 01:01:13 -0700 Subject: ipmi: remove ->write_proc code IPMI code theoretically allows ->write_proc users, but nobody uses this thus far. Signed-off-by: Alexey Dobriyan Acked-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/ipmi/ipmi_msghandler.c | 9 ++++----- drivers/char/ipmi/ipmi_si_intf.c | 6 +++--- include/linux/ipmi_smi.h | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 5b13579ca21d..8c4baddd3731 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -1941,7 +1941,7 @@ static int stat_file_read_proc(char *page, char **start, off_t off, #endif /* CONFIG_PROC_FS */ int ipmi_smi_add_proc_entry(ipmi_smi_t smi, char *name, - read_proc_t *read_proc, write_proc_t *write_proc, + read_proc_t *read_proc, void *data, struct module *owner) { int rv = 0; @@ -1968,7 +1968,6 @@ int ipmi_smi_add_proc_entry(ipmi_smi_t smi, char *name, } else { file->data = data; file->read_proc = read_proc; - file->write_proc = write_proc; file->owner = owner; mutex_lock(&smi->proc_entry_lock); @@ -1997,17 +1996,17 @@ static int add_proc_entries(ipmi_smi_t smi, int num) if (rv == 0) rv = ipmi_smi_add_proc_entry(smi, "stats", - stat_file_read_proc, NULL, + stat_file_read_proc, smi, THIS_MODULE); if (rv == 0) rv = ipmi_smi_add_proc_entry(smi, "ipmb", - ipmb_file_read_proc, NULL, + ipmb_file_read_proc, smi, THIS_MODULE); if (rv == 0) rv = ipmi_smi_add_proc_entry(smi, "version", - version_file_read_proc, NULL, + version_file_read_proc, smi, THIS_MODULE); #endif /* CONFIG_PROC_FS */ diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 97b6225c070b..5a5455585c1d 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2892,7 +2892,7 @@ static int try_smi_init(struct smi_info *new_smi) } rv = ipmi_smi_add_proc_entry(new_smi->intf, "type", - type_file_read_proc, NULL, + type_file_read_proc, new_smi, THIS_MODULE); if (rv) { printk(KERN_ERR @@ -2902,7 +2902,7 @@ static int try_smi_init(struct smi_info *new_smi) } rv = ipmi_smi_add_proc_entry(new_smi->intf, "si_stats", - stat_file_read_proc, NULL, + stat_file_read_proc, new_smi, THIS_MODULE); if (rv) { printk(KERN_ERR @@ -2912,7 +2912,7 @@ static int try_smi_init(struct smi_info *new_smi) } rv = ipmi_smi_add_proc_entry(new_smi->intf, "params", - param_read_proc, NULL, + param_read_proc, new_smi, THIS_MODULE); if (rv) { printk(KERN_ERR diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h index 845a8473ee5d..62b73668b602 100644 --- a/include/linux/ipmi_smi.h +++ b/include/linux/ipmi_smi.h @@ -229,7 +229,7 @@ static inline void ipmi_free_smi_msg(struct ipmi_smi_msg *msg) directory for this interface. Note that the entry will automatically be dstroyed when the interface is destroyed. */ int ipmi_smi_add_proc_entry(ipmi_smi_t smi, char *name, - read_proc_t *read_proc, write_proc_t *write_proc, + read_proc_t *read_proc, void *data, struct module *owner); #endif /* __LINUX_IPMI_SMI_H */ -- cgit v1.2.3-71-gd317 From 66ec2d778657b1a58ad26d0bc3b39b92bca69b53 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Tue, 29 Apr 2008 01:01:14 -0700 Subject: ipmi: make comment match actual preprocessor check Signed-off-by: Robert P. J. Day Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 2f75c4640b45..7ebdb4fb4e54 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -64,7 +64,7 @@ * applications and another for userland applications. The * capabilities are basically the same for both interface, although * the interfaces are somewhat different. The stuff in the - * #ifdef KERNEL below is the in-kernel interface. The userland + * #ifdef __KERNEL__ below is the in-kernel interface. The userland * interface is defined later in the file. */ -- cgit v1.2.3-71-gd317 From eb6900fbfa43cb50391b80b38608e25280705693 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 29 Apr 2008 01:01:17 -0700 Subject: ELF: Use EI_NIDENT instead of numeric value Signed-off-by: Cyrill Gorcunov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/elf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/elf.h b/include/linux/elf.h index bad1b16ec49a..ff9fbed90123 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -208,7 +208,7 @@ typedef struct elf32_hdr{ } Elf32_Ehdr; typedef struct elf64_hdr { - unsigned char e_ident[16]; /* ELF "magic number" */ + unsigned char e_ident[EI_NIDENT]; /* ELF "magic number" */ Elf64_Half e_type; Elf64_Half e_machine; Elf64_Word e_version; -- cgit v1.2.3-71-gd317 From 4a38e122e2cc6294779021ff4ccc784a3997059e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Apr 2008 01:01:24 -0700 Subject: keys: allow the callout data to be passed as a blob rather than a string Allow the callout data to be passed as a blob rather than a string for internal kernel services that call any request_key_*() interface other than request_key(). request_key() itself still takes a NUL-terminated string. The functions that change are: request_key_with_auxdata() request_key_async() request_key_async_with_auxdata() Signed-off-by: David Howells Cc: Paul Moore Cc: Chris Wright Cc: Stephen Smalley Cc: James Morris Cc: Kevin Coffman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/keys-request-key.txt | 11 +++++---- Documentation/keys.txt | 14 +++++++---- include/linux/key.h | 9 ++++--- security/keys/internal.h | 9 ++++--- security/keys/keyctl.c | 7 ++++-- security/keys/request_key.c | 49 +++++++++++++++++++++++--------------- security/keys/request_key_auth.c | 12 ++++++---- 7 files changed, 70 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/Documentation/keys-request-key.txt b/Documentation/keys-request-key.txt index 266955d23ee6..09b55e461740 100644 --- a/Documentation/keys-request-key.txt +++ b/Documentation/keys-request-key.txt @@ -11,26 +11,29 @@ request_key*(): struct key *request_key(const struct key_type *type, const char *description, - const char *callout_string); + const char *callout_info); or: struct key *request_key_with_auxdata(const struct key_type *type, const char *description, - const char *callout_string, + const char *callout_info, + size_t callout_len, void *aux); or: struct key *request_key_async(const struct key_type *type, const char *description, - const char *callout_string); + const char *callout_info, + size_t callout_len); or: struct key *request_key_async_with_auxdata(const struct key_type *type, const char *description, - const char *callout_string, + const char *callout_info, + size_t callout_len, void *aux); Or by userspace invoking the request_key system call: diff --git a/Documentation/keys.txt b/Documentation/keys.txt index 51652d39e61c..b82d38de8b89 100644 --- a/Documentation/keys.txt +++ b/Documentation/keys.txt @@ -771,7 +771,7 @@ payload contents" for more information. struct key *request_key(const struct key_type *type, const char *description, - const char *callout_string); + const char *callout_info); This is used to request a key or keyring with a description that matches the description specified according to the key type's match function. This @@ -793,24 +793,28 @@ payload contents" for more information. struct key *request_key_with_auxdata(const struct key_type *type, const char *description, - const char *callout_string, + const void *callout_info, + size_t callout_len, void *aux); This is identical to request_key(), except that the auxiliary data is - passed to the key_type->request_key() op if it exists. + passed to the key_type->request_key() op if it exists, and the callout_info + is a blob of length callout_len, if given (the length may be 0). (*) A key can be requested asynchronously by calling one of: struct key *request_key_async(const struct key_type *type, const char *description, - const char *callout_string); + const void *callout_info, + size_t callout_len); or: struct key *request_key_async_with_auxdata(const struct key_type *type, const char *description, - const char *callout_string, + const char *callout_info, + size_t callout_len, void *aux); which are asynchronous equivalents of request_key() and diff --git a/include/linux/key.h b/include/linux/key.h index a70b8a8f2005..163f864b6bd4 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -208,16 +208,19 @@ extern struct key *request_key(struct key_type *type, extern struct key *request_key_with_auxdata(struct key_type *type, const char *description, - const char *callout_info, + const void *callout_info, + size_t callout_len, void *aux); extern struct key *request_key_async(struct key_type *type, const char *description, - const char *callout_info); + const void *callout_info, + size_t callout_len); extern struct key *request_key_async_with_auxdata(struct key_type *type, const char *description, - const char *callout_info, + const void *callout_info, + size_t callout_len, void *aux); extern int wait_for_key_construction(struct key *key, bool intr); diff --git a/security/keys/internal.h b/security/keys/internal.h index 7d894ef70370..3cc04c2afe1c 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -109,7 +109,8 @@ extern int install_process_keyring(struct task_struct *tsk); extern struct key *request_key_and_link(struct key_type *type, const char *description, - const char *callout_info, + const void *callout_info, + size_t callout_len, void *aux, struct key *dest_keyring, unsigned long flags); @@ -120,13 +121,15 @@ extern struct key *request_key_and_link(struct key_type *type, struct request_key_auth { struct key *target_key; struct task_struct *context; - char *callout_info; + void *callout_info; + size_t callout_len; pid_t pid; }; extern struct key_type key_type_request_key_auth; extern struct key *request_key_auth_new(struct key *target, - const char *callout_info); + const void *callout_info, + size_t callout_len); extern struct key *key_get_instantiation_authkey(key_serial_t target_id); diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 8ec84326a983..1698bf90ee84 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -152,6 +152,7 @@ asmlinkage long sys_request_key(const char __user *_type, struct key_type *ktype; struct key *key; key_ref_t dest_ref; + size_t callout_len; char type[32], *description, *callout_info; long ret; @@ -169,12 +170,14 @@ asmlinkage long sys_request_key(const char __user *_type, /* pull the callout info into kernel space */ callout_info = NULL; + callout_len = 0; if (_callout_info) { callout_info = strndup_user(_callout_info, PAGE_SIZE); if (IS_ERR(callout_info)) { ret = PTR_ERR(callout_info); goto error2; } + callout_len = strlen(callout_info); } /* get the destination keyring if specified */ @@ -195,8 +198,8 @@ asmlinkage long sys_request_key(const char __user *_type, } /* do the search */ - key = request_key_and_link(ktype, description, callout_info, NULL, - key_ref_to_ptr(dest_ref), + key = request_key_and_link(ktype, description, callout_info, + callout_len, NULL, key_ref_to_ptr(dest_ref), KEY_ALLOC_IN_QUOTA); if (IS_ERR(key)) { ret = PTR_ERR(key); diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 5ecc5057fb54..a3f94c60692d 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -161,21 +161,22 @@ error_alloc: * call out to userspace for key construction * - we ignore program failure and go on key status instead */ -static int construct_key(struct key *key, const char *callout_info, void *aux) +static int construct_key(struct key *key, const void *callout_info, + size_t callout_len, void *aux) { struct key_construction *cons; request_key_actor_t actor; struct key *authkey; int ret; - kenter("%d,%s,%p", key->serial, callout_info, aux); + kenter("%d,%p,%zu,%p", key->serial, callout_info, callout_len, aux); cons = kmalloc(sizeof(*cons), GFP_KERNEL); if (!cons) return -ENOMEM; /* allocate an authorisation key */ - authkey = request_key_auth_new(key, callout_info); + authkey = request_key_auth_new(key, callout_info, callout_len); if (IS_ERR(authkey)) { kfree(cons); ret = PTR_ERR(authkey); @@ -331,6 +332,7 @@ alloc_failed: static struct key *construct_key_and_link(struct key_type *type, const char *description, const char *callout_info, + size_t callout_len, void *aux, struct key *dest_keyring, unsigned long flags) @@ -348,7 +350,7 @@ static struct key *construct_key_and_link(struct key_type *type, key_user_put(user); if (ret == 0) { - ret = construct_key(key, callout_info, aux); + ret = construct_key(key, callout_info, callout_len, aux); if (ret < 0) goto construction_failed; } @@ -370,7 +372,8 @@ construction_failed: */ struct key *request_key_and_link(struct key_type *type, const char *description, - const char *callout_info, + const void *callout_info, + size_t callout_len, void *aux, struct key *dest_keyring, unsigned long flags) @@ -378,8 +381,8 @@ struct key *request_key_and_link(struct key_type *type, struct key *key; key_ref_t key_ref; - kenter("%s,%s,%s,%p,%p,%lx", - type->name, description, callout_info, aux, + kenter("%s,%s,%p,%zu,%p,%p,%lx", + type->name, description, callout_info, callout_len, aux, dest_keyring, flags); /* search all the process keyrings for a key */ @@ -398,7 +401,8 @@ struct key *request_key_and_link(struct key_type *type, goto error; key = construct_key_and_link(type, description, callout_info, - aux, dest_keyring, flags); + callout_len, aux, dest_keyring, + flags); } error: @@ -434,10 +438,13 @@ struct key *request_key(struct key_type *type, const char *callout_info) { struct key *key; + size_t callout_len = 0; int ret; - key = request_key_and_link(type, description, callout_info, NULL, - NULL, KEY_ALLOC_IN_QUOTA); + if (callout_info) + callout_len = strlen(callout_info); + key = request_key_and_link(type, description, callout_info, callout_len, + NULL, NULL, KEY_ALLOC_IN_QUOTA); if (!IS_ERR(key)) { ret = wait_for_key_construction(key, false); if (ret < 0) { @@ -458,14 +465,15 @@ EXPORT_SYMBOL(request_key); */ struct key *request_key_with_auxdata(struct key_type *type, const char *description, - const char *callout_info, + const void *callout_info, + size_t callout_len, void *aux) { struct key *key; int ret; - key = request_key_and_link(type, description, callout_info, aux, - NULL, KEY_ALLOC_IN_QUOTA); + key = request_key_and_link(type, description, callout_info, callout_len, + aux, NULL, KEY_ALLOC_IN_QUOTA); if (!IS_ERR(key)) { ret = wait_for_key_construction(key, false); if (ret < 0) { @@ -485,10 +493,12 @@ EXPORT_SYMBOL(request_key_with_auxdata); */ struct key *request_key_async(struct key_type *type, const char *description, - const char *callout_info) + const void *callout_info, + size_t callout_len) { - return request_key_and_link(type, description, callout_info, NULL, - NULL, KEY_ALLOC_IN_QUOTA); + return request_key_and_link(type, description, callout_info, + callout_len, NULL, NULL, + KEY_ALLOC_IN_QUOTA); } EXPORT_SYMBOL(request_key_async); @@ -500,10 +510,11 @@ EXPORT_SYMBOL(request_key_async); */ struct key *request_key_async_with_auxdata(struct key_type *type, const char *description, - const char *callout_info, + const void *callout_info, + size_t callout_len, void *aux) { - return request_key_and_link(type, description, callout_info, aux, - NULL, KEY_ALLOC_IN_QUOTA); + return request_key_and_link(type, description, callout_info, + callout_len, aux, NULL, KEY_ALLOC_IN_QUOTA); } EXPORT_SYMBOL(request_key_async_with_auxdata); diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index e42b5252486f..c615d473ce7c 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -61,7 +61,7 @@ static void request_key_auth_describe(const struct key *key, seq_puts(m, "key:"); seq_puts(m, key->description); - seq_printf(m, " pid:%d ci:%zu", rka->pid, strlen(rka->callout_info)); + seq_printf(m, " pid:%d ci:%zu", rka->pid, rka->callout_len); } /* end request_key_auth_describe() */ @@ -77,7 +77,7 @@ static long request_key_auth_read(const struct key *key, size_t datalen; long ret; - datalen = strlen(rka->callout_info); + datalen = rka->callout_len; ret = datalen; /* we can return the data as is */ @@ -137,7 +137,8 @@ static void request_key_auth_destroy(struct key *key) * create an authorisation token for /sbin/request-key or whoever to gain * access to the caller's security data */ -struct key *request_key_auth_new(struct key *target, const char *callout_info) +struct key *request_key_auth_new(struct key *target, const void *callout_info, + size_t callout_len) { struct request_key_auth *rka, *irka; struct key *authkey = NULL; @@ -152,7 +153,7 @@ struct key *request_key_auth_new(struct key *target, const char *callout_info) kleave(" = -ENOMEM"); return ERR_PTR(-ENOMEM); } - rka->callout_info = kmalloc(strlen(callout_info) + 1, GFP_KERNEL); + rka->callout_info = kmalloc(callout_len, GFP_KERNEL); if (!rka->callout_info) { kleave(" = -ENOMEM"); kfree(rka); @@ -186,7 +187,8 @@ struct key *request_key_auth_new(struct key *target, const char *callout_info) } rka->target_key = key_get(target); - strcpy(rka->callout_info, callout_info); + memcpy(rka->callout_info, callout_info, callout_len); + rka->callout_len = callout_len; /* allocate the auth key */ sprintf(desc, "%x", target->serial); -- cgit v1.2.3-71-gd317 From 70a5bb72b55e82fbfbf1e22cae6975fac58a1e2d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Apr 2008 01:01:26 -0700 Subject: keys: add keyctl function to get a security label Add a keyctl() function to get the security label of a key. The following is added to Documentation/keys.txt: (*) Get the LSM security context attached to a key. long keyctl(KEYCTL_GET_SECURITY, key_serial_t key, char *buffer, size_t buflen) This function returns a string that represents the LSM security context attached to a key in the buffer provided. Unless there's an error, it always returns the amount of data it could produce, even if that's too big for the buffer, but it won't copy more than requested to userspace. If the buffer pointer is NULL then no copy will take place. A NUL character is included at the end of the string if the buffer is sufficiently big. This is included in the returned count. If no LSM is in force then an empty string will be returned. A process must have view permission on the key for this function to be successful. [akpm@linux-foundation.org: declare keyctl_get_security()] Signed-off-by: David Howells Acked-by: Stephen Smalley Cc: Paul Moore Cc: Chris Wright Cc: James Morris Cc: Kevin Coffman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/keys.txt | 21 +++++++++++++++ include/linux/keyctl.h | 1 + include/linux/security.h | 20 ++++++++++++++- security/dummy.c | 8 ++++++ security/keys/compat.c | 3 +++ security/keys/internal.h | 3 ++- security/keys/keyctl.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++ security/security.c | 5 ++++ security/selinux/hooks.c | 15 +++++++++++ 9 files changed, 140 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/Documentation/keys.txt b/Documentation/keys.txt index b82d38de8b89..be424b02437d 100644 --- a/Documentation/keys.txt +++ b/Documentation/keys.txt @@ -711,6 +711,27 @@ The keyctl syscall functions are: The assumed authoritative key is inherited across fork and exec. + (*) Get the LSM security context attached to a key. + + long keyctl(KEYCTL_GET_SECURITY, key_serial_t key, char *buffer, + size_t buflen) + + This function returns a string that represents the LSM security context + attached to a key in the buffer provided. + + Unless there's an error, it always returns the amount of data it could + produce, even if that's too big for the buffer, but it won't copy more + than requested to userspace. If the buffer pointer is NULL then no copy + will take place. + + A NUL character is included at the end of the string if the buffer is + sufficiently big. This is included in the returned count. If no LSM is + in force then an empty string will be returned. + + A process must have view permission on the key for this function to be + successful. + + =============== KERNEL SERVICES =============== diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h index 3365945640c9..656ee6b77a4a 100644 --- a/include/linux/keyctl.h +++ b/include/linux/keyctl.h @@ -49,5 +49,6 @@ #define KEYCTL_SET_REQKEY_KEYRING 14 /* set default request-key keyring */ #define KEYCTL_SET_TIMEOUT 15 /* set key timeout */ #define KEYCTL_ASSUME_AUTHORITY 16 /* assume request_key() authorisation */ +#define KEYCTL_GET_SECURITY 17 /* get key security label */ #endif /* _LINUX_KEYCTL_H */ diff --git a/include/linux/security.h b/include/linux/security.h index 3ebcdd00b17d..adb09d893ae0 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1009,6 +1009,17 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @perm describes the combination of permissions required of this key. * Return 1 if permission granted, 0 if permission denied and -ve it the * normal permissions model should be effected. + * @key_getsecurity: + * Get a textual representation of the security context attached to a key + * for the purposes of honouring KEYCTL_GETSECURITY. This function + * allocates the storage for the NUL-terminated string and the caller + * should free it. + * @key points to the key to be queried. + * @_buffer points to a pointer that should be set to point to the + * resulting string (if no label or an error occurs). + * Return the length of the string (including terminating NUL) or -ve if + * an error. + * May also return 0 (and a NULL buffer pointer) if there is no label. * * Security hooks affecting all System V IPC operations. * @@ -1538,7 +1549,7 @@ struct security_operations { int (*key_permission) (key_ref_t key_ref, struct task_struct *context, key_perm_t perm); - + int (*key_getsecurity)(struct key *key, char **_buffer); #endif /* CONFIG_KEYS */ #ifdef CONFIG_AUDIT @@ -2732,6 +2743,7 @@ int security_key_alloc(struct key *key, struct task_struct *tsk, unsigned long f void security_key_free(struct key *key); int security_key_permission(key_ref_t key_ref, struct task_struct *context, key_perm_t perm); +int security_key_getsecurity(struct key *key, char **_buffer); #else @@ -2753,6 +2765,12 @@ static inline int security_key_permission(key_ref_t key_ref, return 0; } +static inline int security_key_getsecurity(struct key *key, char **_buffer) +{ + *_buffer = NULL; + return 0; +} + #endif #endif /* CONFIG_KEYS */ diff --git a/security/dummy.c b/security/dummy.c index 26ee06ef0e93..48cf30226e16 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -994,6 +994,13 @@ static inline int dummy_key_permission(key_ref_t key_ref, { return 0; } + +static int dummy_key_getsecurity(struct key *key, char **_buffer) +{ + *_buffer = NULL; + return 0; +} + #endif /* CONFIG_KEYS */ #ifdef CONFIG_AUDIT @@ -1210,6 +1217,7 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, key_alloc); set_to_dummy_if_null(ops, key_free); set_to_dummy_if_null(ops, key_permission); + set_to_dummy_if_null(ops, key_getsecurity); #endif /* CONFIG_KEYS */ #ifdef CONFIG_AUDIT set_to_dummy_if_null(ops, audit_rule_init); diff --git a/security/keys/compat.c b/security/keys/compat.c index e10ec995f275..c766c68a63bc 100644 --- a/security/keys/compat.c +++ b/security/keys/compat.c @@ -79,6 +79,9 @@ asmlinkage long compat_sys_keyctl(u32 option, case KEYCTL_ASSUME_AUTHORITY: return keyctl_assume_authority(arg2); + case KEYCTL_GET_SECURITY: + return keyctl_get_security(arg2, compat_ptr(arg3), arg4); + default: return -EOPNOTSUPP; } diff --git a/security/keys/internal.h b/security/keys/internal.h index 3cc04c2afe1c..6361d3736dbc 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -155,7 +155,8 @@ extern long keyctl_negate_key(key_serial_t, unsigned, key_serial_t); extern long keyctl_set_reqkey_keyring(int); extern long keyctl_set_timeout(key_serial_t, unsigned); extern long keyctl_assume_authority(key_serial_t); - +extern long keyctl_get_security(key_serial_t keyid, char __user *buffer, + size_t buflen); /* * debugging key validation diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 1698bf90ee84..56e963b700b9 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "internal.h" @@ -1080,6 +1081,66 @@ error: } /* end keyctl_assume_authority() */ +/* + * get the security label of a key + * - the key must grant us view permission + * - if there's a buffer, we place up to buflen bytes of data into it + * - unless there's an error, we return the amount of information available, + * irrespective of how much we may have copied (including the terminal NUL) + * - implements keyctl(KEYCTL_GET_SECURITY) + */ +long keyctl_get_security(key_serial_t keyid, + char __user *buffer, + size_t buflen) +{ + struct key *key, *instkey; + key_ref_t key_ref; + char *context; + long ret; + + key_ref = lookup_user_key(NULL, keyid, 0, 1, KEY_VIEW); + if (IS_ERR(key_ref)) { + if (PTR_ERR(key_ref) != -EACCES) + return PTR_ERR(key_ref); + + /* viewing a key under construction is also permitted if we + * have the authorisation token handy */ + instkey = key_get_instantiation_authkey(keyid); + if (IS_ERR(instkey)) + return PTR_ERR(key_ref); + key_put(instkey); + + key_ref = lookup_user_key(NULL, keyid, 0, 1, 0); + if (IS_ERR(key_ref)) + return PTR_ERR(key_ref); + } + + key = key_ref_to_ptr(key_ref); + ret = security_key_getsecurity(key, &context); + if (ret == 0) { + /* if no information was returned, give userspace an empty + * string */ + ret = 1; + if (buffer && buflen > 0 && + copy_to_user(buffer, "", 1) != 0) + ret = -EFAULT; + } else if (ret > 0) { + /* return as much data as there's room for */ + if (buffer && buflen > 0) { + if (buflen > ret) + buflen = ret; + + if (copy_to_user(buffer, context, buflen) != 0) + ret = -EFAULT; + } + + kfree(context); + } + + key_ref_put(key_ref); + return ret; +} + /*****************************************************************************/ /* * the key control system call @@ -1160,6 +1221,11 @@ asmlinkage long sys_keyctl(int option, unsigned long arg2, unsigned long arg3, case KEYCTL_ASSUME_AUTHORITY: return keyctl_assume_authority((key_serial_t) arg2); + case KEYCTL_GET_SECURITY: + return keyctl_get_security((key_serial_t) arg2, + (char *) arg3, + (size_t) arg4); + default: return -EOPNOTSUPP; } diff --git a/security/security.c b/security/security.c index a809035441ab..8e64a29dc55d 100644 --- a/security/security.c +++ b/security/security.c @@ -1156,6 +1156,11 @@ int security_key_permission(key_ref_t key_ref, return security_ops->key_permission(key_ref, context, perm); } +int security_key_getsecurity(struct key *key, char **_buffer) +{ + return security_ops->key_getsecurity(key, _buffer); +} + #endif /* CONFIG_KEYS */ #ifdef CONFIG_AUDIT diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 047365ac9faa..838d1e5e63a1 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5300,6 +5300,20 @@ static int selinux_key_permission(key_ref_t key_ref, SECCLASS_KEY, perm, NULL); } +static int selinux_key_getsecurity(struct key *key, char **_buffer) +{ + struct key_security_struct *ksec = key->security; + char *context = NULL; + unsigned len; + int rc; + + rc = security_sid_to_context(ksec->sid, &context, &len); + if (!rc) + rc = len; + *_buffer = context; + return rc; +} + #endif static struct security_operations selinux_ops = { @@ -5488,6 +5502,7 @@ static struct security_operations selinux_ops = { .key_alloc = selinux_key_alloc, .key_free = selinux_key_free, .key_permission = selinux_key_permission, + .key_getsecurity = selinux_key_getsecurity, #endif #ifdef CONFIG_AUDIT -- cgit v1.2.3-71-gd317 From 6b79ccb5144f9ffb4d4596c23e7570238dd12abc Mon Sep 17 00:00:00 2001 From: Arun Raghavan Date: Tue, 29 Apr 2008 01:01:28 -0700 Subject: keys: allow clients to set key perms in key_create_or_update() The key_create_or_update() function provided by the keyring code has a default set of permissions that are always applied to the key when created. This might not be desirable to all clients. Here's a patch that adds a "perm" parameter to the function to address this, which can be set to KEY_PERM_UNDEF to revert to the current behaviour. Signed-off-by: Arun Raghavan Signed-off-by: David Howells Cc: Satyam Sharma Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/key.h | 3 +++ security/keys/key.c | 18 ++++++++++-------- security/keys/keyctl.c | 3 ++- 3 files changed, 15 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index 163f864b6bd4..8b0bd3393abc 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -67,6 +67,8 @@ struct key; #define KEY_OTH_SETATTR 0x00000020 #define KEY_OTH_ALL 0x0000003f +#define KEY_PERM_UNDEF 0xffffffff + struct seq_file; struct user_struct; struct signal_struct; @@ -232,6 +234,7 @@ extern key_ref_t key_create_or_update(key_ref_t keyring, const char *description, const void *payload, size_t plen, + key_perm_t perm, unsigned long flags); extern int key_update(key_ref_t key, diff --git a/security/keys/key.c b/security/keys/key.c index 654d23baf352..d98c61953be6 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -757,11 +757,11 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, const char *description, const void *payload, size_t plen, + key_perm_t perm, unsigned long flags) { struct key_type *ktype; struct key *keyring, *key = NULL; - key_perm_t perm; key_ref_t key_ref; int ret; @@ -806,15 +806,17 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, goto found_matching_key; } - /* decide on the permissions we want */ - perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR; - perm |= KEY_USR_VIEW | KEY_USR_SEARCH | KEY_USR_LINK | KEY_USR_SETATTR; + /* if the client doesn't provide, decide on the permissions we want */ + if (perm == KEY_PERM_UNDEF) { + perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR; + perm |= KEY_USR_VIEW | KEY_USR_SEARCH | KEY_USR_LINK | KEY_USR_SETATTR; - if (ktype->read) - perm |= KEY_POS_READ | KEY_USR_READ; + if (ktype->read) + perm |= KEY_POS_READ | KEY_USR_READ; - if (ktype == &key_type_keyring || ktype->update) - perm |= KEY_USR_WRITE; + if (ktype == &key_type_keyring || ktype->update) + perm |= KEY_USR_WRITE; + } /* allocate a new key */ key = key_alloc(ktype, description, current->fsuid, current->fsgid, diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 56e963b700b9..993be634a5ef 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -112,7 +112,8 @@ asmlinkage long sys_add_key(const char __user *_type, /* create or update the requested key and add it to the target * keyring */ key_ref = key_create_or_update(keyring_ref, type, description, - payload, plen, KEY_ALLOC_IN_QUOTA); + payload, plen, KEY_PERM_UNDEF, + KEY_ALLOC_IN_QUOTA); if (!IS_ERR(key_ref)) { ret = key_ref_to_ptr(key_ref)->serial; key_ref_put(key_ref); -- cgit v1.2.3-71-gd317 From 69664cf16af4f31cd54d77948a4baf9c7e0ca7b9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Apr 2008 01:01:31 -0700 Subject: keys: don't generate user and user session keyrings unless they're accessed Don't generate the per-UID user and user session keyrings unless they're explicitly accessed. This solves a problem during a login process whereby set*uid() is called before the SELinux PAM module, resulting in the per-UID keyrings having the wrong security labels. This also cures the problem of multiple per-UID keyrings sometimes appearing due to PAM modules (including pam_keyinit) setuiding and causing user_structs to come into and go out of existence whilst the session keyring pins the user keyring. This is achieved by first searching for extant per-UID keyrings before inventing new ones. The serial bound argument is also dropped from find_keyring_by_name() as it's not currently made use of (setting it to 0 disables the feature). Signed-off-by: David Howells Cc: Cc: Cc: Cc: Stephen Smalley Cc: James Morris Cc: Chris Wright Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/key.h | 8 --- kernel/user.c | 15 ++--- security/keys/internal.h | 4 +- security/keys/key.c | 45 +------------- security/keys/keyring.c | 19 +++--- security/keys/process_keys.c | 142 +++++++++++++++++++++++++------------------ security/selinux/hooks.c | 8 --- 7 files changed, 96 insertions(+), 145 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index 8b0bd3393abc..2effd031a817 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -268,9 +268,6 @@ extern struct key *key_lookup(key_serial_t id); /* * the userspace interface */ -extern struct key root_user_keyring, root_session_keyring; -extern int alloc_uid_keyring(struct user_struct *user, - struct task_struct *ctx); extern void switch_uid_keyring(struct user_struct *new_user); extern int copy_keys(unsigned long clone_flags, struct task_struct *tsk); extern int copy_thread_group_keys(struct task_struct *tsk); @@ -299,7 +296,6 @@ extern void key_init(void); #define make_key_ref(k, p) ({ NULL; }) #define key_ref_to_ptr(k) ({ NULL; }) #define is_key_possessed(k) 0 -#define alloc_uid_keyring(u,c) 0 #define switch_uid_keyring(u) do { } while(0) #define __install_session_keyring(t, k) ({ NULL; }) #define copy_keys(f,t) 0 @@ -312,10 +308,6 @@ extern void key_init(void); #define key_fsgid_changed(t) do { } while(0) #define key_init() do { } while(0) -/* Initial keyrings */ -extern struct key root_user_keyring; -extern struct key root_session_keyring; - #endif /* CONFIG_KEYS */ #endif /* __KERNEL__ */ #endif /* _LINUX_KEY_H */ diff --git a/kernel/user.c b/kernel/user.c index debce602bfdd..aefbbfa3159f 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -53,10 +53,6 @@ struct user_struct root_user = { .files = ATOMIC_INIT(0), .sigpending = ATOMIC_INIT(0), .locked_shm = 0, -#ifdef CONFIG_KEYS - .uid_keyring = &root_user_keyring, - .session_keyring = &root_session_keyring, -#endif #ifdef CONFIG_USER_SCHED .tg = &init_task_group, #endif @@ -420,12 +416,12 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) new->mq_bytes = 0; #endif new->locked_shm = 0; - - if (alloc_uid_keyring(new, current) < 0) - goto out_free_user; +#ifdef CONFIG_KEYS + new->uid_keyring = new->session_keyring = NULL; +#endif if (sched_create_user(new) < 0) - goto out_put_keys; + goto out_free_user; if (uids_user_create(new)) goto out_destoy_sched; @@ -459,9 +455,6 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) out_destoy_sched: sched_destroy_user(new); -out_put_keys: - key_put(new->uid_keyring); - key_put(new->session_keyring); out_free_user: kmem_cache_free(uid_cachep, new); out_unlock: diff --git a/security/keys/internal.h b/security/keys/internal.h index 6361d3736dbc..2ab38854c47f 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -77,8 +77,6 @@ extern struct mutex key_construction_mutex; extern wait_queue_head_t request_key_conswq; -extern void keyring_publish_name(struct key *keyring); - extern int __key_link(struct key *keyring, struct key *key); extern key_ref_t __keyring_search_one(key_ref_t keyring_ref, @@ -102,7 +100,7 @@ extern key_ref_t search_process_keyrings(struct key_type *type, key_match_func_t match, struct task_struct *tsk); -extern struct key *find_keyring_by_name(const char *name, key_serial_t bound); +extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check); extern int install_thread_keyring(struct task_struct *tsk); extern int install_process_keyring(struct task_struct *tsk); diff --git a/security/keys/key.c b/security/keys/key.c index d98c61953be6..46f125aa7fa3 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -1,6 +1,6 @@ /* Basic authentication token and access key management * - * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004-2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -137,36 +137,6 @@ void key_user_put(struct key_user *user) } /* end key_user_put() */ -/*****************************************************************************/ -/* - * insert a key with a fixed serial number - */ -static void __init __key_insert_serial(struct key *key) -{ - struct rb_node *parent, **p; - struct key *xkey; - - parent = NULL; - p = &key_serial_tree.rb_node; - - while (*p) { - parent = *p; - xkey = rb_entry(parent, struct key, serial_node); - - if (key->serial < xkey->serial) - p = &(*p)->rb_left; - else if (key->serial > xkey->serial) - p = &(*p)->rb_right; - else - BUG(); - } - - /* we've found a suitable hole - arrange for this key to occupy it */ - rb_link_node(&key->serial_node, parent, p); - rb_insert_color(&key->serial_node, &key_serial_tree); - -} /* end __key_insert_serial() */ - /*****************************************************************************/ /* * assign a key the next unique serial number @@ -1020,17 +990,4 @@ void __init key_init(void) rb_insert_color(&root_key_user.node, &key_user_tree); - /* record root's user standard keyrings */ - key_check(&root_user_keyring); - key_check(&root_session_keyring); - - __key_insert_serial(&root_user_keyring); - __key_insert_serial(&root_session_keyring); - - keyring_publish_name(&root_user_keyring); - keyring_publish_name(&root_session_keyring); - - /* link the two root keyrings together */ - key_link(&root_session_keyring, &root_user_keyring); - } /* end key_init() */ diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 70f0c313c888..a9ab8affc092 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -1,6 +1,6 @@ -/* keyring.c: keyring handling +/* Keyring handling * - * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004-2005, 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -79,7 +79,7 @@ static DECLARE_RWSEM(keyring_serialise_link_sem); * publish the name of a keyring so that it can be found by name (if it has * one) */ -void keyring_publish_name(struct key *keyring) +static void keyring_publish_name(struct key *keyring) { int bucket; @@ -516,10 +516,9 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref, /* * find a keyring with the specified name * - all named keyrings are searched - * - only find keyrings with search permission for the process - * - only find keyrings with a serial number greater than the one specified + * - normally only finds keyrings with search permission for the current process */ -struct key *find_keyring_by_name(const char *name, key_serial_t bound) +struct key *find_keyring_by_name(const char *name, bool skip_perm_check) { struct key *keyring; int bucket; @@ -545,15 +544,11 @@ struct key *find_keyring_by_name(const char *name, key_serial_t bound) if (strcmp(keyring->description, name) != 0) continue; - if (key_permission(make_key_ref(keyring, 0), + if (!skip_perm_check && + key_permission(make_key_ref(keyring, 0), KEY_SEARCH) < 0) continue; - /* found a potential candidate, but we still need to - * check the serial number */ - if (keyring->serial <= bound) - continue; - /* we've got a match */ atomic_inc(&keyring->usage); read_unlock(&keyring_name_lock); diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index c886a2bb792a..5be6d018759a 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -1,6 +1,6 @@ -/* process_keys.c: management of a process's keyrings +/* Management of a process's keyrings * - * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004-2005, 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -23,6 +23,9 @@ /* session keyring create vs join semaphore */ static DEFINE_MUTEX(key_session_mutex); +/* user keyring creation semaphore */ +static DEFINE_MUTEX(key_user_keyring_mutex); + /* the root user's tracking struct */ struct key_user root_key_user = { .usage = ATOMIC_INIT(3), @@ -33,78 +36,84 @@ struct key_user root_key_user = { .uid = 0, }; -/* the root user's UID keyring */ -struct key root_user_keyring = { - .usage = ATOMIC_INIT(1), - .serial = 2, - .type = &key_type_keyring, - .user = &root_key_user, - .sem = __RWSEM_INITIALIZER(root_user_keyring.sem), - .perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL, - .flags = 1 << KEY_FLAG_INSTANTIATED, - .description = "_uid.0", -#ifdef KEY_DEBUGGING - .magic = KEY_DEBUG_MAGIC, -#endif -}; - -/* the root user's default session keyring */ -struct key root_session_keyring = { - .usage = ATOMIC_INIT(1), - .serial = 1, - .type = &key_type_keyring, - .user = &root_key_user, - .sem = __RWSEM_INITIALIZER(root_session_keyring.sem), - .perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL, - .flags = 1 << KEY_FLAG_INSTANTIATED, - .description = "_uid_ses.0", -#ifdef KEY_DEBUGGING - .magic = KEY_DEBUG_MAGIC, -#endif -}; - /*****************************************************************************/ /* - * allocate the keyrings to be associated with a UID + * install user and user session keyrings for a particular UID */ -int alloc_uid_keyring(struct user_struct *user, - struct task_struct *ctx) +static int install_user_keyrings(struct task_struct *tsk) { + struct user_struct *user = tsk->user; struct key *uid_keyring, *session_keyring; char buf[20]; int ret; - /* concoct a default session keyring */ - sprintf(buf, "_uid_ses.%u", user->uid); + kenter("%p{%u}", user, user->uid); - session_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, ctx, - KEY_ALLOC_IN_QUOTA, NULL); - if (IS_ERR(session_keyring)) { - ret = PTR_ERR(session_keyring); - goto error; + if (user->uid_keyring) { + kleave(" = 0 [exist]"); + return 0; } - /* and a UID specific keyring, pointed to by the default session - * keyring */ - sprintf(buf, "_uid.%u", user->uid); + mutex_lock(&key_user_keyring_mutex); + ret = 0; - uid_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, ctx, - KEY_ALLOC_IN_QUOTA, session_keyring); - if (IS_ERR(uid_keyring)) { - key_put(session_keyring); - ret = PTR_ERR(uid_keyring); - goto error; + if (!user->uid_keyring) { + /* get the UID-specific keyring + * - there may be one in existence already as it may have been + * pinned by a session, but the user_struct pointing to it + * may have been destroyed by setuid */ + sprintf(buf, "_uid.%u", user->uid); + + uid_keyring = find_keyring_by_name(buf, true); + if (IS_ERR(uid_keyring)) { + uid_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, + tsk, KEY_ALLOC_IN_QUOTA, + NULL); + if (IS_ERR(uid_keyring)) { + ret = PTR_ERR(uid_keyring); + goto error; + } + } + + /* get a default session keyring (which might also exist + * already) */ + sprintf(buf, "_uid_ses.%u", user->uid); + + session_keyring = find_keyring_by_name(buf, true); + if (IS_ERR(session_keyring)) { + session_keyring = + keyring_alloc(buf, user->uid, (gid_t) -1, + tsk, KEY_ALLOC_IN_QUOTA, NULL); + if (IS_ERR(session_keyring)) { + ret = PTR_ERR(session_keyring); + goto error_release; + } + + /* we install a link from the user session keyring to + * the user keyring */ + ret = key_link(session_keyring, uid_keyring); + if (ret < 0) + goto error_release_both; + } + + /* install the keyrings */ + user->uid_keyring = uid_keyring; + user->session_keyring = session_keyring; } - /* install the keyrings */ - user->uid_keyring = uid_keyring; - user->session_keyring = session_keyring; - ret = 0; + mutex_unlock(&key_user_keyring_mutex); + kleave(" = 0"); + return 0; +error_release_both: + key_put(session_keyring); +error_release: + key_put(uid_keyring); error: + mutex_unlock(&key_user_keyring_mutex); + kleave(" = %d", ret); return ret; - -} /* end alloc_uid_keyring() */ +} /*****************************************************************************/ /* @@ -481,7 +490,7 @@ key_ref_t search_process_keyrings(struct key_type *type, } } /* or search the user-session keyring */ - else { + else if (context->user->session_keyring) { key_ref = keyring_search_aux( make_key_ref(context->user->session_keyring, 1), context, type, description, match); @@ -614,6 +623,9 @@ key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id, if (!context->signal->session_keyring) { /* always install a session keyring upon access if one * doesn't exist yet */ + ret = install_user_keyrings(context); + if (ret < 0) + goto error; ret = install_session_keyring( context, context->user->session_keyring); if (ret < 0) @@ -628,12 +640,24 @@ key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id, break; case KEY_SPEC_USER_KEYRING: + if (!context->user->uid_keyring) { + ret = install_user_keyrings(context); + if (ret < 0) + goto error; + } + key = context->user->uid_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_USER_SESSION_KEYRING: + if (!context->user->session_keyring) { + ret = install_user_keyrings(context); + if (ret < 0) + goto error; + } + key = context->user->session_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); @@ -744,7 +768,7 @@ long join_session_keyring(const char *name) mutex_lock(&key_session_mutex); /* look for an existing keyring of this name */ - keyring = find_keyring_by_name(name, 0); + keyring = find_keyring_by_name(name, false); if (PTR_ERR(keyring) == -ENOKEY) { /* not found - try and create a new one */ keyring = keyring_alloc(name, tsk->uid, tsk->gid, tsk, diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 838d1e5e63a1..4e4de98941ae 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5551,14 +5551,6 @@ static __init int selinux_init(void) else printk(KERN_DEBUG "SELinux: Starting in permissive mode\n"); -#ifdef CONFIG_KEYS - /* Add security information to initial keyrings */ - selinux_key_alloc(&root_user_keyring, current, - KEY_ALLOC_NOT_IN_QUOTA); - selinux_key_alloc(&root_session_keyring, current, - KEY_ALLOC_NOT_IN_QUOTA); -#endif - return 0; } -- cgit v1.2.3-71-gd317 From 0b77f5bfb45c13e1e5142374f9d6ca75292252a4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Apr 2008 01:01:32 -0700 Subject: keys: make the keyring quotas controllable through /proc/sys Make the keyring quotas controllable through /proc/sys files: (*) /proc/sys/kernel/keys/root_maxkeys /proc/sys/kernel/keys/root_maxbytes Maximum number of keys that root may have and the maximum total number of bytes of data that root may have stored in those keys. (*) /proc/sys/kernel/keys/maxkeys /proc/sys/kernel/keys/maxbytes Maximum number of keys that each non-root user may have and the maximum total number of bytes of data that each of those users may have stored in their keys. Also increase the quotas as a number of people have been complaining that it's not big enough. I'm not sure that it's big enough now either, but on the other hand, it can now be set in /etc/sysctl.conf. Signed-off-by: David Howells Cc: Cc: Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/keys.txt | 24 ++++++++++++++++++++++- include/linux/key.h | 5 +++++ kernel/sysctl.c | 9 +++++++++ security/keys/Makefile | 1 + security/keys/internal.h | 14 ++++++++++---- security/keys/key.c | 23 +++++++++++++++++----- security/keys/keyctl.c | 12 +++++++++--- security/keys/proc.c | 9 ++++++--- security/keys/sysctl.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 131 insertions(+), 16 deletions(-) create mode 100644 security/keys/sysctl.c (limited to 'include/linux') diff --git a/Documentation/keys.txt b/Documentation/keys.txt index be424b02437d..d5c7a57d1700 100644 --- a/Documentation/keys.txt +++ b/Documentation/keys.txt @@ -170,7 +170,8 @@ The key service provides a number of features besides keys: amount of description and payload space that can be consumed. The user can view information on this and other statistics through procfs - files. + files. The root user may also alter the quota limits through sysctl files + (see the section "New procfs files"). Process-specific and thread-specific keyrings are not counted towards a user's quota. @@ -329,6 +330,27 @@ about the status of the key service: / Key size quota +Four new sysctl files have been added also for the purpose of controlling the +quota limits on keys: + + (*) /proc/sys/kernel/keys/root_maxkeys + /proc/sys/kernel/keys/root_maxbytes + + These files hold the maximum number of keys that root may have and the + maximum total number of bytes of data that root may have stored in those + keys. + + (*) /proc/sys/kernel/keys/maxkeys + /proc/sys/kernel/keys/maxbytes + + These files hold the maximum number of keys that each non-root user may + have and the maximum total number of bytes of data that each of those + users may have stored in their keys. + +Root may alter these by writing each new limit as a decimal number string to +the appropriate file. + + =============================== USERSPACE SYSTEM CALL INTERFACE =============================== diff --git a/include/linux/key.h b/include/linux/key.h index 2effd031a817..ad02d9cfe170 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #ifdef __KERNEL__ @@ -265,6 +266,10 @@ extern struct key *key_lookup(key_serial_t id); #define key_serial(key) ((key) ? (key)->serial : 0) +#ifdef CONFIG_SYSCTL +extern ctl_table key_sysctls[]; +#endif + /* * the userspace interface */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index fd3364827ccf..0a1d2733cf41 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -809,6 +810,14 @@ static struct ctl_table kern_table[] = { .proc_handler = &proc_dostring, .strategy = &sysctl_string, }, +#ifdef CONFIG_KEYS + { + .ctl_name = CTL_UNNUMBERED, + .procname = "keys", + .mode = 0555, + .child = key_sysctls, + }, +#endif /* * NOTE: do not add new entries to this table unless you have read * Documentation/sysctl/ctl_unnumbered.txt diff --git a/security/keys/Makefile b/security/keys/Makefile index 5145adfb6a05..747a464943af 100644 --- a/security/keys/Makefile +++ b/security/keys/Makefile @@ -14,3 +14,4 @@ obj-y := \ obj-$(CONFIG_KEYS_COMPAT) += compat.o obj-$(CONFIG_PROC_FS) += proc.o +obj-$(CONFIG_SYSCTL) += sysctl.o diff --git a/security/keys/internal.h b/security/keys/internal.h index 2ab38854c47f..8c05587f5018 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -57,10 +57,6 @@ struct key_user { int qnbytes; /* number of bytes allocated to this user */ }; -#define KEYQUOTA_MAX_KEYS 100 -#define KEYQUOTA_MAX_BYTES 10000 -#define KEYQUOTA_LINK_BYTES 4 /* a link in a keyring is worth 4 bytes */ - extern struct rb_root key_user_tree; extern spinlock_t key_user_lock; extern struct key_user root_key_user; @@ -68,6 +64,16 @@ extern struct key_user root_key_user; extern struct key_user *key_user_lookup(uid_t uid); extern void key_user_put(struct key_user *user); +/* + * key quota limits + * - root has its own separate limits to everyone else + */ +extern unsigned key_quota_root_maxkeys; +extern unsigned key_quota_root_maxbytes; +extern unsigned key_quota_maxkeys; +extern unsigned key_quota_maxbytes; + +#define KEYQUOTA_LINK_BYTES 4 /* a link in a keyring is worth 4 bytes */ extern struct rb_root key_serial_tree; diff --git a/security/keys/key.c b/security/keys/key.c index 46f125aa7fa3..14948cf83ef6 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -27,6 +27,11 @@ DEFINE_SPINLOCK(key_serial_lock); struct rb_root key_user_tree; /* tree of quota records indexed by UID */ DEFINE_SPINLOCK(key_user_lock); +unsigned int key_quota_root_maxkeys = 200; /* root's key count quota */ +unsigned int key_quota_root_maxbytes = 20000; /* root's key space quota */ +unsigned int key_quota_maxkeys = 200; /* general key count quota */ +unsigned int key_quota_maxbytes = 20000; /* general key space quota */ + static LIST_HEAD(key_types_list); static DECLARE_RWSEM(key_types_sem); @@ -236,11 +241,16 @@ struct key *key_alloc(struct key_type *type, const char *desc, /* check that the user's quota permits allocation of another key and * its description */ if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) { + unsigned maxkeys = (uid == 0) ? + key_quota_root_maxkeys : key_quota_maxkeys; + unsigned maxbytes = (uid == 0) ? + key_quota_root_maxbytes : key_quota_maxbytes; + spin_lock(&user->lock); if (!(flags & KEY_ALLOC_QUOTA_OVERRUN)) { - if (user->qnkeys + 1 >= KEYQUOTA_MAX_KEYS || - user->qnbytes + quotalen >= KEYQUOTA_MAX_BYTES - ) + if (user->qnkeys + 1 >= maxkeys || + user->qnbytes + quotalen >= maxbytes || + user->qnbytes + quotalen < user->qnbytes) goto no_quota; } @@ -345,11 +355,14 @@ int key_payload_reserve(struct key *key, size_t datalen) /* contemplate the quota adjustment */ if (delta != 0 && test_bit(KEY_FLAG_IN_QUOTA, &key->flags)) { + unsigned maxbytes = (key->user->uid == 0) ? + key_quota_root_maxbytes : key_quota_maxbytes; + spin_lock(&key->user->lock); if (delta > 0 && - key->user->qnbytes + delta > KEYQUOTA_MAX_BYTES - ) { + (key->user->qnbytes + delta >= maxbytes || + key->user->qnbytes + delta < key->user->qnbytes)) { ret = -EDQUOT; } else { diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 993be634a5ef..acc9c89e40a8 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -731,10 +731,16 @@ long keyctl_chown_key(key_serial_t id, uid_t uid, gid_t gid) /* transfer the quota burden to the new user */ if (test_bit(KEY_FLAG_IN_QUOTA, &key->flags)) { + unsigned maxkeys = (uid == 0) ? + key_quota_root_maxkeys : key_quota_maxkeys; + unsigned maxbytes = (uid == 0) ? + key_quota_root_maxbytes : key_quota_maxbytes; + spin_lock(&newowner->lock); - if (newowner->qnkeys + 1 >= KEYQUOTA_MAX_KEYS || - newowner->qnbytes + key->quotalen >= - KEYQUOTA_MAX_BYTES) + if (newowner->qnkeys + 1 >= maxkeys || + newowner->qnbytes + key->quotalen >= maxbytes || + newowner->qnbytes + key->quotalen < + newowner->qnbytes) goto quota_overrun; newowner->qnkeys++; diff --git a/security/keys/proc.c b/security/keys/proc.c index e54679b848cf..f619170da760 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -242,6 +242,10 @@ static int proc_key_users_show(struct seq_file *m, void *v) { struct rb_node *_p = v; struct key_user *user = rb_entry(_p, struct key_user, node); + unsigned maxkeys = (user->uid == 0) ? + key_quota_root_maxkeys : key_quota_maxkeys; + unsigned maxbytes = (user->uid == 0) ? + key_quota_root_maxbytes : key_quota_maxbytes; seq_printf(m, "%5u: %5d %d/%d %d/%d %d/%d\n", user->uid, @@ -249,10 +253,9 @@ static int proc_key_users_show(struct seq_file *m, void *v) atomic_read(&user->nkeys), atomic_read(&user->nikeys), user->qnkeys, - KEYQUOTA_MAX_KEYS, + maxkeys, user->qnbytes, - KEYQUOTA_MAX_BYTES - ); + maxbytes); return 0; diff --git a/security/keys/sysctl.c b/security/keys/sysctl.c new file mode 100644 index 000000000000..b611d493c2d8 --- /dev/null +++ b/security/keys/sysctl.c @@ -0,0 +1,50 @@ +/* Key management controls + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include "internal.h" + +ctl_table key_sysctls[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "maxkeys", + .data = &key_quota_maxkeys, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "maxbytes", + .data = &key_quota_maxbytes, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "root_maxkeys", + .data = &key_quota_root_maxkeys, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "root_maxbytes", + .data = &key_quota_root_maxbytes, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0 } +}; -- cgit v1.2.3-71-gd317 From 7249db2c281ac688977ecc6862cdee9969d310e2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Apr 2008 01:01:34 -0700 Subject: keys: make key_serial() a function if CONFIG_KEYS=y Make key_serial() an inline function rather than a macro if CONFIG_KEYS=y. This prevents double evaluation of the key pointer and also provides better type checking. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/key.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index ad02d9cfe170..c45c962d1cc5 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -264,7 +264,10 @@ extern int keyring_add_key(struct key *keyring, extern struct key *key_lookup(key_serial_t id); -#define key_serial(key) ((key) ? (key)->serial : 0) +static inline key_serial_t key_serial(struct key *key) +{ + return key ? key->serial : 0; +} #ifdef CONFIG_SYSCTL extern ctl_table key_sysctls[]; -- cgit v1.2.3-71-gd317 From 925d1c401fa6cfd0df5d2e37da8981494ccdec07 Mon Sep 17 00:00:00 2001 From: Matt Helsley Date: Tue, 29 Apr 2008 01:01:36 -0700 Subject: procfs task exe symlink The kernel implements readlink of /proc/pid/exe by getting the file from the first executable VMA. Then the path to the file is reconstructed and reported as the result. Because of the VMA walk the code is slightly different on nommu systems. This patch avoids separate /proc/pid/exe code on nommu systems. Instead of walking the VMAs to find the first executable file-backed VMA we store a reference to the exec'd file in the mm_struct. That reference would prevent the filesystem holding the executable file from being unmounted even after unmapping the VMAs. So we track the number of VM_EXECUTABLE VMAs and drop the new reference when the last one is unmapped. This avoids pinning the mounted filesystem. [akpm@linux-foundation.org: improve comments] [yamamoto@valinux.co.jp: fix dup_mmap] Signed-off-by: Matt Helsley Cc: Oleg Nesterov Cc: David Howells Cc:"Eric W. Biederman" Cc: Christoph Hellwig Cc: Al Viro Cc: Hugh Dickins Signed-off-by: YAMAMOTO Takashi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_flat.c | 3 +- fs/exec.c | 2 ++ fs/proc/base.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/proc/internal.h | 1 - fs/proc/task_mmu.c | 34 ---------------------- fs/proc/task_nommu.c | 34 ---------------------- include/linux/mm.h | 13 +++++++++ include/linux/mm_types.h | 6 ++++ include/linux/proc_fs.h | 20 ++++++++++++- kernel/fork.c | 3 ++ mm/mmap.c | 24 +++++++++++++--- mm/nommu.c | 23 +++++++++++---- 12 files changed, 157 insertions(+), 81 deletions(-) (limited to 'include/linux') diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index c12cc362fd3b..3b40d45a3a16 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -531,7 +531,8 @@ static int load_flat_file(struct linux_binprm * bprm, DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n"); down_write(¤t->mm->mmap_sem); - textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_PRIVATE, 0); + textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, + MAP_PRIVATE|MAP_EXECUTABLE, 0); up_write(¤t->mm->mmap_sem); if (!textpos || textpos >= (unsigned long) -4096) { if (!textpos) diff --git a/fs/exec.c b/fs/exec.c index 711bc45d789c..a13883903ee9 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -964,6 +964,8 @@ int flush_old_exec(struct linux_binprm * bprm) if (retval) goto out; + set_mm_exe_file(bprm->mm, bprm->file); + /* * Release all of the old mmap stuff */ diff --git a/fs/proc/base.c b/fs/proc/base.c index c5e412a00b17..b48ddb119945 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1181,6 +1181,81 @@ static const struct file_operations proc_pid_sched_operations = { #endif +/* + * We added or removed a vma mapping the executable. The vmas are only mapped + * during exec and are not mapped with the mmap system call. + * Callers must hold down_write() on the mm's mmap_sem for these + */ +void added_exe_file_vma(struct mm_struct *mm) +{ + mm->num_exe_file_vmas++; +} + +void removed_exe_file_vma(struct mm_struct *mm) +{ + mm->num_exe_file_vmas--; + if ((mm->num_exe_file_vmas == 0) && mm->exe_file){ + fput(mm->exe_file); + mm->exe_file = NULL; + } + +} + +void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) +{ + if (new_exe_file) + get_file(new_exe_file); + if (mm->exe_file) + fput(mm->exe_file); + mm->exe_file = new_exe_file; + mm->num_exe_file_vmas = 0; +} + +struct file *get_mm_exe_file(struct mm_struct *mm) +{ + struct file *exe_file; + + /* We need mmap_sem to protect against races with removal of + * VM_EXECUTABLE vmas */ + down_read(&mm->mmap_sem); + exe_file = mm->exe_file; + if (exe_file) + get_file(exe_file); + up_read(&mm->mmap_sem); + return exe_file; +} + +void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm) +{ + /* It's safe to write the exe_file pointer without exe_file_lock because + * this is called during fork when the task is not yet in /proc */ + newmm->exe_file = get_mm_exe_file(oldmm); +} + +static int proc_exe_link(struct inode *inode, struct path *exe_path) +{ + struct task_struct *task; + struct mm_struct *mm; + struct file *exe_file; + + task = get_proc_task(inode); + if (!task) + return -ENOENT; + mm = get_task_mm(task); + put_task_struct(task); + if (!mm) + return -ENOENT; + exe_file = get_mm_exe_file(mm); + mmput(mm); + if (exe_file) { + *exe_path = exe_file->f_path; + path_get(&exe_file->f_path); + fput(exe_file); + return 0; + } else + return -ENOENT; +} + static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index bc72f5c8c47d..45abb9803988 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -48,7 +48,6 @@ extern int maps_protect; extern void create_seq_entry(char *name, mode_t mode, const struct file_operations *f); -extern int proc_exe_link(struct inode *, struct path *); extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7415eeb7cc3a..e2b8e769f510 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -75,40 +75,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, return mm->total_vm; } -int proc_exe_link(struct inode *inode, struct path *path) -{ - struct vm_area_struct * vma; - int result = -ENOENT; - struct task_struct *task = get_proc_task(inode); - struct mm_struct * mm = NULL; - - if (task) { - mm = get_task_mm(task); - put_task_struct(task); - } - if (!mm) - goto out; - down_read(&mm->mmap_sem); - - vma = mm->mmap; - while (vma) { - if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) - break; - vma = vma->vm_next; - } - - if (vma) { - *path = vma->vm_file->f_path; - path_get(&vma->vm_file->f_path); - result = 0; - } - - up_read(&mm->mmap_sem); - mmput(mm); -out: - return result; -} - static void pad_len_spaces(struct seq_file *m, int len) { len = 25 + sizeof(void*) * 6 - len; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 8011528518bd..4b733f108455 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -103,40 +103,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, return size; } -int proc_exe_link(struct inode *inode, struct path *path) -{ - struct vm_list_struct *vml; - struct vm_area_struct *vma; - struct task_struct *task = get_proc_task(inode); - struct mm_struct *mm = get_task_mm(task); - int result = -ENOENT; - - if (!mm) - goto out; - down_read(&mm->mmap_sem); - - vml = mm->context.vmlist; - vma = NULL; - while (vml) { - if ((vml->vma->vm_flags & VM_EXECUTABLE) && vml->vma->vm_file) { - vma = vml->vma; - break; - } - vml = vml->next; - } - - if (vma) { - *path = vma->vm_file->f_path; - path_get(&vma->vm_file->f_path); - result = 0; - } - - up_read(&mm->mmap_sem); - mmput(mm); -out: - return result; -} - /* * display mapping lines for a particular process's /proc/pid/maps */ diff --git a/include/linux/mm.h b/include/linux/mm.h index fef602d82722..c31a9cd2a30e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1066,6 +1066,19 @@ extern void unlink_file_vma(struct vm_area_struct *); extern struct vm_area_struct *copy_vma(struct vm_area_struct **, unsigned long addr, unsigned long len, pgoff_t pgoff); extern void exit_mmap(struct mm_struct *); + +#ifdef CONFIG_PROC_FS +/* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */ +extern void added_exe_file_vma(struct mm_struct *mm); +extern void removed_exe_file_vma(struct mm_struct *mm); +#else +static inline void added_exe_file_vma(struct mm_struct *mm) +{} + +static inline void removed_exe_file_vma(struct mm_struct *mm) +{} +#endif /* CONFIG_PROC_FS */ + extern int may_expand_vm(struct mm_struct *mm, unsigned long npages); extern int install_special_mapping(struct mm_struct *mm, unsigned long addr, unsigned long len, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index bc97bd54f606..eb7c16cc9559 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -229,6 +229,12 @@ struct mm_struct { struct task_struct *owner; /* The thread group leader that */ /* owns the mm_struct. */ #endif + +#ifdef CONFIG_PROC_FS + /* store ref to file /proc//exe symlink points to */ + struct file *exe_file; + unsigned long num_exe_file_vmas; +#endif }; #endif /* _LINUX_MM_TYPES_H */ diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 9b6c935f69cf..65f2299b772b 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -9,7 +9,6 @@ struct net; struct completion; - /* * The proc filesystem constants/structures */ @@ -206,6 +205,12 @@ extern void proc_net_remove(struct net *net, const char *name); extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, struct proc_dir_entry *parent); +/* While the {get|set|dup}_mm_exe_file functions are for mm_structs, they are + * only needed to implement /proc/|self/exe so we define them here. */ +extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); +extern struct file *get_mm_exe_file(struct mm_struct *mm); +extern void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm); + #else #define proc_root_driver NULL @@ -255,6 +260,19 @@ static inline void pid_ns_release_proc(struct pid_namespace *ns) { } +static inline void set_mm_exe_file(struct mm_struct *mm, + struct file *new_exe_file) +{} + +static inline struct file *get_mm_exe_file(struct mm_struct *mm) +{ + return NULL; +} + +static inline void dup_mm_exe_file(struct mm_struct *oldmm, + struct mm_struct *newmm) +{} + #endif /* CONFIG_PROC_FS */ #if !defined(CONFIG_PROC_KCORE) diff --git a/kernel/fork.c b/kernel/fork.c index de5c16c6b6ec..068ffe007529 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -431,6 +431,7 @@ void mmput(struct mm_struct *mm) if (atomic_dec_and_test(&mm->mm_users)) { exit_aio(mm); exit_mmap(mm); + set_mm_exe_file(mm, NULL); if (!list_empty(&mm->mmlist)) { spin_lock(&mmlist_lock); list_del(&mm->mmlist); @@ -543,6 +544,8 @@ struct mm_struct *dup_mm(struct task_struct *tsk) if (init_new_context(tsk, mm)) goto fail_nocontext; + dup_mm_exe_file(oldmm, mm); + err = dup_mmap(mm, oldmm); if (err) goto free_pt; diff --git a/mm/mmap.c b/mm/mmap.c index 677d184b0d42..fac66337da2a 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -230,8 +230,11 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) might_sleep(); if (vma->vm_ops && vma->vm_ops->close) vma->vm_ops->close(vma); - if (vma->vm_file) + if (vma->vm_file) { fput(vma->vm_file); + if (vma->vm_flags & VM_EXECUTABLE) + removed_exe_file_vma(vma->vm_mm); + } mpol_put(vma_policy(vma)); kmem_cache_free(vm_area_cachep, vma); return next; @@ -623,8 +626,11 @@ again: remove_next = 1 + (end > next->vm_end); spin_unlock(&mapping->i_mmap_lock); if (remove_next) { - if (file) + if (file) { fput(file); + if (next->vm_flags & VM_EXECUTABLE) + removed_exe_file_vma(mm); + } mm->map_count--; mpol_put(vma_policy(next)); kmem_cache_free(vm_area_cachep, next); @@ -1154,6 +1160,8 @@ munmap_back: error = file->f_op->mmap(file, vma); if (error) goto unmap_and_free_vma; + if (vm_flags & VM_EXECUTABLE) + added_exe_file_vma(mm); } else if (vm_flags & VM_SHARED) { error = shmem_zero_setup(vma); if (error) @@ -1185,6 +1193,8 @@ munmap_back: mpol_put(vma_policy(vma)); kmem_cache_free(vm_area_cachep, vma); fput(file); + if (vm_flags & VM_EXECUTABLE) + removed_exe_file_vma(mm); } else { vma_link(mm, vma, prev, rb_link, rb_parent); file = vma->vm_file; @@ -1817,8 +1827,11 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, } vma_set_policy(new, pol); - if (new->vm_file) + if (new->vm_file) { get_file(new->vm_file); + if (vma->vm_flags & VM_EXECUTABLE) + added_exe_file_vma(mm); + } if (new->vm_ops && new->vm_ops->open) new->vm_ops->open(new); @@ -2135,8 +2148,11 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, new_vma->vm_start = addr; new_vma->vm_end = addr + len; new_vma->vm_pgoff = pgoff; - if (new_vma->vm_file) + if (new_vma->vm_file) { get_file(new_vma->vm_file); + if (vma->vm_flags & VM_EXECUTABLE) + added_exe_file_vma(mm); + } if (new_vma->vm_ops && new_vma->vm_ops->open) new_vma->vm_ops->open(new_vma); vma_link(mm, new_vma, prev, rb_link, rb_parent); diff --git a/mm/nommu.c b/mm/nommu.c index 1d32fe89d57b..ef8c62cec697 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -966,8 +966,13 @@ unsigned long do_mmap_pgoff(struct file *file, INIT_LIST_HEAD(&vma->anon_vma_node); atomic_set(&vma->vm_usage, 1); - if (file) + if (file) { get_file(file); + if (vm_flags & VM_EXECUTABLE) { + added_exe_file_vma(current->mm); + vma->vm_mm = current->mm; + } + } vma->vm_file = file; vma->vm_flags = vm_flags; vma->vm_start = addr; @@ -1022,8 +1027,11 @@ unsigned long do_mmap_pgoff(struct file *file, up_write(&nommu_vma_sem); kfree(vml); if (vma) { - if (vma->vm_file) + if (vma->vm_file) { fput(vma->vm_file); + if (vma->vm_flags & VM_EXECUTABLE) + removed_exe_file_vma(vma->vm_mm); + } kfree(vma); } return ret; @@ -1053,7 +1061,7 @@ EXPORT_SYMBOL(do_mmap_pgoff); /* * handle mapping disposal for uClinux */ -static void put_vma(struct vm_area_struct *vma) +static void put_vma(struct mm_struct *mm, struct vm_area_struct *vma) { if (vma) { down_write(&nommu_vma_sem); @@ -1075,8 +1083,11 @@ static void put_vma(struct vm_area_struct *vma) realalloc -= kobjsize(vma); askedalloc -= sizeof(*vma); - if (vma->vm_file) + if (vma->vm_file) { fput(vma->vm_file); + if (vma->vm_flags & VM_EXECUTABLE) + removed_exe_file_vma(mm); + } kfree(vma); } @@ -1113,7 +1124,7 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) found: vml = *parent; - put_vma(vml->vma); + put_vma(mm, vml->vma); *parent = vml->next; realalloc -= kobjsize(vml); @@ -1158,7 +1169,7 @@ void exit_mmap(struct mm_struct * mm) while ((tmp = mm->context.vmlist)) { mm->context.vmlist = tmp->next; - put_vma(tmp->vma); + put_vma(mm, tmp->vma); realalloc -= kobjsize(tmp); askedalloc -= sizeof(*tmp); -- cgit v1.2.3-71-gd317 From 9c37066d888bf6e1b96ad12304971b3ddeabbad0 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 29 Apr 2008 01:01:41 -0700 Subject: proc: remove proc_bus Remove proc_bus export and variable itself. Using pathnames works fine and is slightly more understandable and greppable. Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/kernel/ecard.c | 2 +- drivers/input/input.c | 6 +++--- drivers/nubus/proc.c | 2 +- drivers/pci/proc.c | 2 +- drivers/pnp/isapnp/proc.c | 2 +- drivers/pnp/pnpbios/proc.c | 4 ++-- drivers/usb/core/inode.c | 4 ++-- drivers/zorro/proc.c | 2 +- fs/proc/root.c | 5 ++--- include/linux/proc_fs.h | 2 -- 10 files changed, 14 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kernel/ecard.c b/arch/arm/kernel/ecard.c index f56d48c451ea..2d8ab6b3a586 100644 --- a/arch/arm/kernel/ecard.c +++ b/arch/arm/kernel/ecard.c @@ -778,7 +778,7 @@ static struct proc_dir_entry *proc_bus_ecard_dir = NULL; static void ecard_proc_init(void) { - proc_bus_ecard_dir = proc_mkdir("ecard", proc_bus); + proc_bus_ecard_dir = proc_mkdir("bus/ecard", NULL); create_proc_info_entry("devices", 0, proc_bus_ecard_dir, get_ecard_dev_info); } diff --git a/drivers/input/input.c b/drivers/input/input.c index f02c242c3114..11426604d8a2 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -898,7 +898,7 @@ static int __init input_proc_init(void) { struct proc_dir_entry *entry; - proc_bus_input_dir = proc_mkdir("input", proc_bus); + proc_bus_input_dir = proc_mkdir("bus/input", NULL); if (!proc_bus_input_dir) return -ENOMEM; @@ -921,7 +921,7 @@ static int __init input_proc_init(void) return 0; fail2: remove_proc_entry("devices", proc_bus_input_dir); - fail1: remove_proc_entry("input", proc_bus); + fail1: remove_proc_entry("bus/input", NULL); return -ENOMEM; } @@ -929,7 +929,7 @@ static void input_proc_exit(void) { remove_proc_entry("devices", proc_bus_input_dir); remove_proc_entry("handlers", proc_bus_input_dir); - remove_proc_entry("input", proc_bus); + remove_proc_entry("bus/input", NULL); } #else /* !CONFIG_PROC_FS */ diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c index e07492be1f4a..cb83acef9479 100644 --- a/drivers/nubus/proc.c +++ b/drivers/nubus/proc.c @@ -171,7 +171,7 @@ void __init nubus_proc_init(void) { if (!MACH_IS_MAC) return; - proc_bus_nubus_dir = proc_mkdir("nubus", proc_bus); + proc_bus_nubus_dir = proc_mkdir("bus/nubus", NULL); create_proc_info_entry("devices", 0, proc_bus_nubus_dir, get_nubus_dev_info); proc_bus_nubus_add_devices(); diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index ef18fcd641e2..7b5e45b2fd16 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -472,7 +472,7 @@ static int __init pci_proc_init(void) { struct proc_dir_entry *entry; struct pci_dev *dev = NULL; - proc_bus_pci_dir = proc_mkdir("pci", proc_bus); + proc_bus_pci_dir = proc_mkdir("bus/pci", NULL); entry = create_proc_entry("devices", 0, proc_bus_pci_dir); if (entry) entry->proc_fops = &proc_bus_pci_dev_operations; diff --git a/drivers/pnp/isapnp/proc.c b/drivers/pnp/isapnp/proc.c index 2b8266c3d40f..e1d1f2a10947 100644 --- a/drivers/pnp/isapnp/proc.c +++ b/drivers/pnp/isapnp/proc.c @@ -116,7 +116,7 @@ int __init isapnp_proc_init(void) { struct pnp_dev *dev; - isapnp_proc_bus_dir = proc_mkdir("isapnp", proc_bus); + isapnp_proc_bus_dir = proc_mkdir("bus/isapnp", NULL); protocol_for_each_dev(&isapnp_protocol, dev) { isapnp_proc_attach_device(dev); } diff --git a/drivers/pnp/pnpbios/proc.c b/drivers/pnp/pnpbios/proc.c index bb19bc957bad..46d506f66259 100644 --- a/drivers/pnp/pnpbios/proc.c +++ b/drivers/pnp/pnpbios/proc.c @@ -256,7 +256,7 @@ int pnpbios_interface_attach_device(struct pnp_bios_node *node) */ int __init pnpbios_proc_init(void) { - proc_pnp = proc_mkdir("pnp", proc_bus); + proc_pnp = proc_mkdir("bus/pnp", NULL); if (!proc_pnp) return -EIO; proc_pnp_boot = proc_mkdir("boot", proc_pnp); @@ -294,5 +294,5 @@ void __exit pnpbios_proc_exit(void) remove_proc_entry("configuration_info", proc_pnp); remove_proc_entry("devices", proc_pnp); remove_proc_entry("boot", proc_pnp); - remove_proc_entry("pnp", proc_bus); + remove_proc_entry("bus/pnp", NULL); } diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index 8607846e3c3f..1d253dd4ea81 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -773,7 +773,7 @@ int __init usbfs_init(void) usb_register_notify(&usbfs_nb); /* create mount point for usbfs */ - usbdir = proc_mkdir("usb", proc_bus); + usbdir = proc_mkdir("bus/usb", NULL); return 0; } @@ -783,6 +783,6 @@ void usbfs_cleanup(void) usb_unregister_notify(&usbfs_nb); unregister_filesystem(&usb_fs_type); if (usbdir) - remove_proc_entry("usb", proc_bus); + remove_proc_entry("bus/usb", NULL); } diff --git a/drivers/zorro/proc.c b/drivers/zorro/proc.c index 2ce4cebc31d9..b7a8c7b7f66e 100644 --- a/drivers/zorro/proc.c +++ b/drivers/zorro/proc.c @@ -128,7 +128,7 @@ static int __init zorro_proc_init(void) u_int slot; if (MACH_IS_AMIGA && AMIGAHW_PRESENT(ZORRO)) { - proc_bus_zorro_dir = proc_mkdir("zorro", proc_bus); + proc_bus_zorro_dir = proc_mkdir("bus/zorro", NULL); create_proc_info_entry("devices", 0, proc_bus_zorro_dir, get_zorro_dev_info); for (slot = 0; slot < zorro_num_autocon; slot++) diff --git a/fs/proc/root.c b/fs/proc/root.c index ef0fb57fc9ef..cc46fcba8029 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -22,7 +22,7 @@ #include "internal.h" -struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver; +struct proc_dir_entry *proc_root_fs, *proc_root_driver; static int proc_test_super(struct super_block *sb, void *data) { @@ -137,7 +137,7 @@ void __init proc_root_init(void) #ifdef CONFIG_PROC_DEVICETREE proc_device_tree_init(); #endif - proc_bus = proc_mkdir("bus", NULL); + proc_mkdir("bus", NULL); proc_sys_init(); } @@ -236,5 +236,4 @@ EXPORT_SYMBOL(proc_create); EXPORT_SYMBOL(remove_proc_entry); EXPORT_SYMBOL(proc_root); EXPORT_SYMBOL(proc_root_fs); -EXPORT_SYMBOL(proc_bus); EXPORT_SYMBOL(proc_root_driver); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 65f2299b772b..78c8c41ea9cc 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -98,7 +98,6 @@ struct vmcore { extern struct proc_dir_entry proc_root; extern struct proc_dir_entry *proc_root_fs; -extern struct proc_dir_entry *proc_bus; extern struct proc_dir_entry *proc_root_driver; extern struct proc_dir_entry *proc_root_kcore; @@ -214,7 +213,6 @@ extern void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm); #else #define proc_root_driver NULL -#define proc_bus NULL #define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; }) static inline void proc_net_remove(struct net *net, const char *name) {} -- cgit v1.2.3-71-gd317 From 36a5aeb8787fbf92510ed20d806e229c55726f93 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 29 Apr 2008 01:01:42 -0700 Subject: proc: remove proc_root_fs Use creation by full path instead: "fs/foo". Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/cifs/cifs_debug.c | 4 ++-- fs/ext4/mballoc.c | 7 +++---- fs/jfs/jfs_debug.c | 4 ++-- fs/nfs/client.c | 6 +++--- fs/proc/root.c | 5 ++--- include/linux/proc_fs.h | 1 - 6 files changed, 12 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 0228ed06069e..cc950f69e51e 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -468,7 +468,7 @@ cifs_proc_init(void) { struct proc_dir_entry *pde; - proc_fs_cifs = proc_mkdir("cifs", proc_root_fs); + proc_fs_cifs = proc_mkdir("fs/cifs", NULL); if (proc_fs_cifs == NULL) return; @@ -559,7 +559,7 @@ cifs_proc_clean(void) remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); remove_proc_entry("Experimental", proc_fs_cifs); remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); - remove_proc_entry("cifs", proc_root_fs); + remove_proc_entry("fs/cifs", NULL); } static int diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index ef97f19c2f9d..1efcb934c2d6 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2867,7 +2867,6 @@ static void ext4_mb_free_committed_blocks(struct super_block *sb) mb_debug("freed %u blocks in %u structures\n", count, count2); } -#define EXT4_ROOT "ext4" #define EXT4_MB_STATS_NAME "stats" #define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan" #define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan" @@ -3007,9 +3006,9 @@ int __init init_ext4_mballoc(void) return -ENOMEM; } #ifdef CONFIG_PROC_FS - proc_root_ext4 = proc_mkdir(EXT4_ROOT, proc_root_fs); + proc_root_ext4 = proc_mkdir("fs/ext4", NULL); if (proc_root_ext4 == NULL) - printk(KERN_ERR "EXT4-fs: Unable to create %s\n", EXT4_ROOT); + printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n"); #endif return 0; } @@ -3020,7 +3019,7 @@ void exit_ext4_mballoc(void) kmem_cache_destroy(ext4_pspace_cachep); kmem_cache_destroy(ext4_ac_cachep); #ifdef CONFIG_PROC_FS - remove_proc_entry(EXT4_ROOT, proc_root_fs); + remove_proc_entry("fs/ext4", NULL); #endif } diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c index 887f5759e536..bf6ab19b86ee 100644 --- a/fs/jfs/jfs_debug.c +++ b/fs/jfs/jfs_debug.c @@ -89,7 +89,7 @@ void jfs_proc_init(void) { int i; - if (!(base = proc_mkdir("jfs", proc_root_fs))) + if (!(base = proc_mkdir("fs/jfs", NULL))) return; base->owner = THIS_MODULE; @@ -109,7 +109,7 @@ void jfs_proc_clean(void) if (base) { for (i = 0; i < NPROCENT; i++) remove_proc_entry(Entries[i].name, base); - remove_proc_entry("jfs", proc_root_fs); + remove_proc_entry("fs/jfs", NULL); } } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f2f3b284e6dd..0e066dcd4700 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1500,7 +1500,7 @@ int __init nfs_fs_proc_init(void) { struct proc_dir_entry *p; - proc_fs_nfs = proc_mkdir("nfsfs", proc_root_fs); + proc_fs_nfs = proc_mkdir("fs/nfsfs", NULL); if (!proc_fs_nfs) goto error_0; @@ -1526,7 +1526,7 @@ int __init nfs_fs_proc_init(void) error_2: remove_proc_entry("servers", proc_fs_nfs); error_1: - remove_proc_entry("nfsfs", proc_root_fs); + remove_proc_entry("fs/nfsfs", NULL); error_0: return -ENOMEM; } @@ -1538,7 +1538,7 @@ void nfs_fs_proc_exit(void) { remove_proc_entry("volumes", proc_fs_nfs); remove_proc_entry("servers", proc_fs_nfs); - remove_proc_entry("nfsfs", proc_root_fs); + remove_proc_entry("fs/nfsfs", NULL); } #endif /* CONFIG_PROC_FS */ diff --git a/fs/proc/root.c b/fs/proc/root.c index cc46fcba8029..596abb690afa 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -22,7 +22,7 @@ #include "internal.h" -struct proc_dir_entry *proc_root_fs, *proc_root_driver; +struct proc_dir_entry *proc_root_driver; static int proc_test_super(struct super_block *sb, void *data) { @@ -126,7 +126,7 @@ void __init proc_root_init(void) #ifdef CONFIG_SYSVIPC proc_mkdir("sysvipc", NULL); #endif - proc_root_fs = proc_mkdir("fs", NULL); + proc_mkdir("fs", NULL); proc_root_driver = proc_mkdir("driver", NULL); proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) @@ -235,5 +235,4 @@ EXPORT_SYMBOL(create_proc_entry); EXPORT_SYMBOL(proc_create); EXPORT_SYMBOL(remove_proc_entry); EXPORT_SYMBOL(proc_root); -EXPORT_SYMBOL(proc_root_fs); EXPORT_SYMBOL(proc_root_driver); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 78c8c41ea9cc..65582f0384e0 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -97,7 +97,6 @@ struct vmcore { #ifdef CONFIG_PROC_FS extern struct proc_dir_entry proc_root; -extern struct proc_dir_entry *proc_root_fs; extern struct proc_dir_entry *proc_root_driver; extern struct proc_dir_entry *proc_root_kcore; -- cgit v1.2.3-71-gd317 From 928b4d8c8963e75bdb133f562b03b07f9aa4844a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 29 Apr 2008 01:01:44 -0700 Subject: proc: remove proc_root_driver Use creation by full path: "driver/foo". Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/cciss.c | 4 ++-- drivers/block/cpqarray.c | 4 ++-- drivers/block/pktcdvd.c | 4 ++-- drivers/net/wireless/airo.c | 8 ++++---- fs/proc/root.c | 5 +---- include/linux/proc_fs.h | 3 --- 6 files changed, 11 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index cf6083a1f928..e539be5750dc 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -425,7 +425,7 @@ static void __devinit cciss_procinit(int i) struct proc_dir_entry *pde; if (proc_cciss == NULL) - proc_cciss = proc_mkdir("cciss", proc_root_driver); + proc_cciss = proc_mkdir("driver/cciss", NULL); if (!proc_cciss) return; pde = proc_create(hba[i]->devname, S_IWUSR | S_IRUSR | S_IRGRP | @@ -3700,7 +3700,7 @@ static void __exit cciss_cleanup(void) cciss_remove_one(hba[i]->pdev); } } - remove_proc_entry("cciss", proc_root_driver); + remove_proc_entry("driver/cciss", NULL); } static void fail_all_cmds(unsigned long ctlr) diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 69199185ff4b..09c14341e6e3 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -214,7 +214,7 @@ static struct proc_dir_entry *proc_array; static void __init ida_procinit(int i) { if (proc_array == NULL) { - proc_array = proc_mkdir("cpqarray", proc_root_driver); + proc_array = proc_mkdir("driver/cpqarray", NULL); if (!proc_array) return; } @@ -1796,7 +1796,7 @@ static void __exit cpqarray_exit(void) } } - remove_proc_entry("cpqarray", proc_root_driver); + remove_proc_entry("driver/cpqarray", NULL); } module_init(cpqarray_init) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 18feb1c7c33b..0431e5977bcb 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -3101,7 +3101,7 @@ static int __init pkt_init(void) goto out_misc; } - pkt_proc = proc_mkdir(DRIVER_NAME, proc_root_driver); + pkt_proc = proc_mkdir("driver/"DRIVER_NAME, NULL); return 0; @@ -3117,7 +3117,7 @@ out2: static void __exit pkt_exit(void) { - remove_proc_entry(DRIVER_NAME, proc_root_driver); + remove_proc_entry("driver/"DRIVER_NAME, NULL); misc_deregister(&pkt_misc); pkt_debugfs_cleanup(); diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index 932d6b1c9d0b..6c395fcece58 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -5625,9 +5625,9 @@ static int __init airo_init_module( void ) int have_isa_dev = 0; #endif - airo_entry = create_proc_entry("aironet", + airo_entry = create_proc_entry("driver/aironet", S_IFDIR | airo_perm, - proc_root_driver); + NULL); if (airo_entry) { airo_entry->uid = proc_uid; @@ -5651,7 +5651,7 @@ static int __init airo_init_module( void ) airo_print_info("", "Finished probing for PCI adapters"); if (i) { - remove_proc_entry("aironet", proc_root_driver); + remove_proc_entry("driver/aironet", NULL); return i; } #endif @@ -5673,7 +5673,7 @@ static void __exit airo_cleanup_module( void ) #ifdef CONFIG_PCI pci_unregister_driver(&airo_driver); #endif - remove_proc_entry("aironet", proc_root_driver); + remove_proc_entry("driver/aironet", NULL); } /* diff --git a/fs/proc/root.c b/fs/proc/root.c index 596abb690afa..5e93e9b0124e 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -22,8 +22,6 @@ #include "internal.h" -struct proc_dir_entry *proc_root_driver; - static int proc_test_super(struct super_block *sb, void *data) { return sb->s_fs_info == data; @@ -127,7 +125,7 @@ void __init proc_root_init(void) proc_mkdir("sysvipc", NULL); #endif proc_mkdir("fs", NULL); - proc_root_driver = proc_mkdir("driver", NULL); + proc_mkdir("driver", NULL); proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) /* just give it a mountpoint */ @@ -235,4 +233,3 @@ EXPORT_SYMBOL(create_proc_entry); EXPORT_SYMBOL(proc_create); EXPORT_SYMBOL(remove_proc_entry); EXPORT_SYMBOL(proc_root); -EXPORT_SYMBOL(proc_root_driver); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 65582f0384e0..f56205cbebc0 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -97,7 +97,6 @@ struct vmcore { #ifdef CONFIG_PROC_FS extern struct proc_dir_entry proc_root; -extern struct proc_dir_entry *proc_root_driver; extern struct proc_dir_entry *proc_root_kcore; extern spinlock_t proc_subdir_lock; @@ -211,8 +210,6 @@ extern void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm); #else -#define proc_root_driver NULL - #define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; }) static inline void proc_net_remove(struct net *net, const char *name) {} -- cgit v1.2.3-71-gd317 From c74c120a21d87b0b6925ada5830d8cac21e852d9 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 29 Apr 2008 01:01:44 -0700 Subject: proc: remove proc_root from drivers Remove proc_root export. Creation and removal works well if parent PDE is supplied as NULL -- it worked always that way. So, one useless export removed and consistency added, some drivers created PDEs with &proc_root as parent but removed them as NULL and so on. Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/kernel/atags.c | 2 +- arch/m68k/mac/iop.c | 2 +- arch/mips/basler/excite/excite_procfs.c | 2 +- arch/um/kernel/exitcode.c | 2 +- arch/um/kernel/process.c | 2 +- arch/x86/kernel/cpu/mtrr/if.c | 2 +- drivers/char/ip2/ip2main.c | 4 ++-- drivers/mca/mca-proc.c | 2 +- drivers/misc/hdpuftrs/hdpu_cpustate.c | 2 +- drivers/misc/hdpuftrs/hdpu_nexus.c | 12 ++++++------ drivers/s390/block/dasd_proc.c | 6 +++--- drivers/s390/char/tape_proc.c | 4 ++-- drivers/s390/cio/blacklist.c | 2 +- drivers/s390/cio/qdio.c | 4 ++-- drivers/scsi/megaraid.c | 6 +++--- drivers/video/clps711xfb.c | 2 +- fs/proc/internal.h | 1 + fs/proc/proc_misc.c | 2 +- fs/proc/root.c | 1 - include/linux/proc_fs.h | 3 --- kernel/configs.c | 5 ++--- sound/core/info.c | 4 ++-- 22 files changed, 34 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kernel/atags.c b/arch/arm/kernel/atags.c index e2e934c38080..64c420805e6f 100644 --- a/arch/arm/kernel/atags.c +++ b/arch/arm/kernel/atags.c @@ -35,7 +35,7 @@ create_proc_entries(void) { struct proc_dir_entry* tags_entry; - tags_entry = create_proc_read_entry("atags", 0400, &proc_root, read_buffer, &tags_buffer); + tags_entry = create_proc_read_entry("atags", 0400, NULL, read_buffer, &tags_buffer); if (!tags_entry) return -ENOMEM; diff --git a/arch/m68k/mac/iop.c b/arch/m68k/mac/iop.c index 5b2799eb96a6..092d4b3d8f76 100644 --- a/arch/m68k/mac/iop.c +++ b/arch/m68k/mac/iop.c @@ -302,7 +302,7 @@ void __init iop_init(void) #if 0 /* Crashing in 2.4 now, not yet sure why. --jmt */ #ifdef CONFIG_PROC_FS - create_proc_info_entry("mac_iop", 0, &proc_root, iop_get_proc_info); + create_proc_info_entry("mac_iop", 0, NULL, iop_get_proc_info); #endif #endif } diff --git a/arch/mips/basler/excite/excite_procfs.c b/arch/mips/basler/excite/excite_procfs.c index 9ee67a95f6b9..6c08b386fdad 100644 --- a/arch/mips/basler/excite/excite_procfs.c +++ b/arch/mips/basler/excite/excite_procfs.c @@ -65,7 +65,7 @@ excite_bootrom_read(char *page, char **start, off_t off, int count, void excite_procfs_init(void) { /* Create & populate /proc/excite */ - struct proc_dir_entry * const pdir = proc_mkdir("excite", &proc_root); + struct proc_dir_entry * const pdir = proc_mkdir("excite", NULL); if (pdir) { struct proc_dir_entry * e; diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c index 984f80e668ca..6540d2c9fbb7 100644 --- a/arch/um/kernel/exitcode.c +++ b/arch/um/kernel/exitcode.c @@ -59,7 +59,7 @@ static int make_proc_exitcode(void) { struct proc_dir_entry *ent; - ent = create_proc_entry("exitcode", 0600, &proc_root); + ent = create_proc_entry("exitcode", 0600, NULL); if (ent == NULL) { printk(KERN_WARNING "make_proc_exitcode : Failed to register " "/proc/exitcode\n"); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index e8cb9ff183e9..83603cfbde81 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -364,7 +364,7 @@ int __init make_proc_sysemu(void) if (!sysemu_supported) return 0; - ent = create_proc_entry("sysemu", 0600, &proc_root); + ent = create_proc_entry("sysemu", 0600, NULL); if (ent == NULL) { diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 1960f1985e5e..84c480bb3715 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -424,7 +424,7 @@ static int __init mtrr_if_init(void) return -ENODEV; proc_root_mtrr = - proc_create("mtrr", S_IWUSR | S_IRUGO, &proc_root, &mtrr_fops); + proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops); if (proc_root_mtrr) proc_root_mtrr->owner = THIS_MODULE; diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c index b1d6cad84282..a784f5e22ee9 100644 --- a/drivers/char/ip2/ip2main.c +++ b/drivers/char/ip2/ip2main.c @@ -423,7 +423,7 @@ cleanup_module(void) } put_tty_driver(ip2_tty_driver); unregister_chrdev(IP2_IPL_MAJOR, pcIpl); - remove_proc_entry("ip2mem", &proc_root); + remove_proc_entry("ip2mem", NULL); // free memory for (i = 0; i < IP2_MAX_BOARDS; i++) { @@ -695,7 +695,7 @@ ip2_loadmain(int *iop, int *irqp, unsigned char *firmware, int firmsize) } } /* Register the read_procmem thing */ - if (!create_proc_info_entry("ip2mem",0,&proc_root,ip2_read_procmem)) { + if (!create_proc_info_entry("ip2mem",0,NULL,ip2_read_procmem)) { printk(KERN_ERR "IP2: failed to register read_procmem\n"); } else { diff --git a/drivers/mca/mca-proc.c b/drivers/mca/mca-proc.c index 33d5e0820cc5..81ea0d377bf4 100644 --- a/drivers/mca/mca-proc.c +++ b/drivers/mca/mca-proc.c @@ -183,7 +183,7 @@ void __init mca_do_proc_init(void) struct proc_dir_entry* node = NULL; struct mca_device *mca_dev; - proc_mca = proc_mkdir("mca", &proc_root); + proc_mca = proc_mkdir("mca", NULL); create_proc_read_entry("pos",0,proc_mca,get_mca_info,NULL); create_proc_read_entry("machine",0,proc_mca,get_mca_machine_info,NULL); diff --git a/drivers/misc/hdpuftrs/hdpu_cpustate.c b/drivers/misc/hdpuftrs/hdpu_cpustate.c index 302e92418bbe..154155c9b638 100644 --- a/drivers/misc/hdpuftrs/hdpu_cpustate.c +++ b/drivers/misc/hdpuftrs/hdpu_cpustate.c @@ -210,7 +210,7 @@ static int hdpu_cpustate_probe(struct platform_device *pdev) return ret; } - proc_de = create_proc_entry("sky_cpustate", 0666, &proc_root); + proc_de = create_proc_entry("sky_cpustate", 0666, NULL); if (!proc_de) { printk(KERN_WARNING "sky_cpustate: " "Unable to create proc entry\n"); diff --git a/drivers/misc/hdpuftrs/hdpu_nexus.c b/drivers/misc/hdpuftrs/hdpu_nexus.c index 2fa36f7a6eb3..e92b7efccc72 100644 --- a/drivers/misc/hdpuftrs/hdpu_nexus.c +++ b/drivers/misc/hdpuftrs/hdpu_nexus.c @@ -102,8 +102,8 @@ static int hdpu_nexus_probe(struct platform_device *pdev) printk(KERN_ERR "sky_nexus: Could not map slot id\n"); } - hdpu_slot_id = create_proc_entry("sky_slot_id", 0666, &proc_root); - if (!hdpu_slot_id) { + hdpu_slot_id = create_proc_entry("sky_slot_id", 0666, NULL); + if (!hdpu_slot_id) printk(KERN_WARNING "sky_nexus: " "Unable to create proc dir entry: sky_slot_id\n"); } else { @@ -111,8 +111,8 @@ static int hdpu_nexus_probe(struct platform_device *pdev) hdpu_slot_id->owner = THIS_MODULE; } - hdpu_chassis_id = create_proc_entry("sky_chassis_id", 0666, &proc_root); - if (!hdpu_chassis_id) { + hdpu_chassis_id = create_proc_entry("sky_chassis_id", 0666, NULL); + if (!hdpu_chassis_id) printk(KERN_WARNING "sky_nexus: " "Unable to create proc dir entry: sky_chassis_id\n"); } else { @@ -128,8 +128,8 @@ static int hdpu_nexus_remove(struct platform_device *pdev) slot_id = -1; chassis_id = -1; - remove_proc_entry("sky_slot_id", &proc_root); - remove_proc_entry("sky_chassis_id", &proc_root); + remove_proc_entry("sky_slot_id", NULL); + remove_proc_entry("sky_chassis_id", NULL); hdpu_slot_id = 0; hdpu_chassis_id = 0; diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c index 556063e8f7a9..8ae9406b10ad 100644 --- a/drivers/s390/block/dasd_proc.c +++ b/drivers/s390/block/dasd_proc.c @@ -311,7 +311,7 @@ out_error: int dasd_proc_init(void) { - dasd_proc_root_entry = proc_mkdir("dasd", &proc_root); + dasd_proc_root_entry = proc_mkdir("dasd", NULL); if (!dasd_proc_root_entry) goto out_nodasd; dasd_proc_root_entry->owner = THIS_MODULE; @@ -335,7 +335,7 @@ dasd_proc_init(void) out_nostatistics: remove_proc_entry("devices", dasd_proc_root_entry); out_nodevices: - remove_proc_entry("dasd", &proc_root); + remove_proc_entry("dasd", NULL); out_nodasd: return -ENOENT; } @@ -345,5 +345,5 @@ dasd_proc_exit(void) { remove_proc_entry("devices", dasd_proc_root_entry); remove_proc_entry("statistics", dasd_proc_root_entry); - remove_proc_entry("dasd", &proc_root); + remove_proc_entry("dasd", NULL); } diff --git a/drivers/s390/char/tape_proc.c b/drivers/s390/char/tape_proc.c index c9b96d51b28f..0c39636b2174 100644 --- a/drivers/s390/char/tape_proc.c +++ b/drivers/s390/char/tape_proc.c @@ -125,7 +125,7 @@ tape_proc_init(void) { tape_proc_devices = create_proc_entry ("tapedevices", S_IFREG | S_IRUGO | S_IWUSR, - &proc_root); + NULL); if (tape_proc_devices == NULL) { PRINT_WARN("tape: Cannot register procfs entry tapedevices\n"); return; @@ -141,5 +141,5 @@ void tape_proc_cleanup(void) { if (tape_proc_devices != NULL) - remove_proc_entry ("tapedevices", &proc_root); + remove_proc_entry ("tapedevices", NULL); } diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c index e8597ec92247..ef33d5df2229 100644 --- a/drivers/s390/cio/blacklist.c +++ b/drivers/s390/cio/blacklist.c @@ -375,7 +375,7 @@ cio_ignore_proc_init (void) struct proc_dir_entry *entry; entry = create_proc_entry ("cio_ignore", S_IFREG | S_IRUGO | S_IWUSR, - &proc_root); + NULL); if (!entry) return -ENOENT; diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c index 10aa1e780801..43876e287370 100644 --- a/drivers/s390/cio/qdio.c +++ b/drivers/s390/cio/qdio.c @@ -3632,7 +3632,7 @@ qdio_add_procfs_entry(void) { proc_perf_file_registration=0; qdio_perf_proc_file=create_proc_entry(QDIO_PERF, - S_IFREG|0444,&proc_root); + S_IFREG|0444,NULL); if (qdio_perf_proc_file) { qdio_perf_proc_file->read_proc=&qdio_perf_procfile_read; } else proc_perf_file_registration=-1; @@ -3647,7 +3647,7 @@ static void qdio_remove_procfs_entry(void) { if (!proc_perf_file_registration) /* means if it went ok earlier */ - remove_proc_entry(QDIO_PERF,&proc_root); + remove_proc_entry(QDIO_PERF,NULL); } /** diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index b135a1ed4b2c..18551aaf5e09 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -4996,7 +4996,7 @@ static int __init megaraid_init(void) max_mbox_busy_wait = MBOX_BUSY_WAIT; #ifdef CONFIG_PROC_FS - mega_proc_dir_entry = proc_mkdir("megaraid", &proc_root); + mega_proc_dir_entry = proc_mkdir("megaraid", NULL); if (!mega_proc_dir_entry) { printk(KERN_WARNING "megaraid: failed to create megaraid root\n"); @@ -5005,7 +5005,7 @@ static int __init megaraid_init(void) error = pci_register_driver(&megaraid_pci_driver); if (error) { #ifdef CONFIG_PROC_FS - remove_proc_entry("megaraid", &proc_root); + remove_proc_entry("megaraid", NULL); #endif return error; } @@ -5035,7 +5035,7 @@ static void __exit megaraid_exit(void) pci_unregister_driver(&megaraid_pci_driver); #ifdef CONFIG_PROC_FS - remove_proc_entry("megaraid", &proc_root); + remove_proc_entry("megaraid", NULL); #endif } diff --git a/drivers/video/clps711xfb.c b/drivers/video/clps711xfb.c index 17b5267f44d7..9f8a389dc7ae 100644 --- a/drivers/video/clps711xfb.c +++ b/drivers/video/clps711xfb.c @@ -381,7 +381,7 @@ int __init clps711xfb_init(void) /* Register the /proc entries. */ clps7111fb_backlight_proc_entry = create_proc_entry("backlight", 0444, - &proc_root); + NULL); if (clps7111fb_backlight_proc_entry == NULL) { printk("Couldn't create the /proc entry for the backlight.\n"); return -EINVAL; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index b1d6df671edf..28cbca805905 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -11,6 +11,7 @@ #include +extern struct proc_dir_entry proc_root; #ifdef CONFIG_PROC_SYSCTL extern int proc_sys_init(void); #else diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index c4137bb94a9e..48bcf20cec2f 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -854,7 +854,7 @@ void __init proc_misc_init(void) /* And now for trickier ones */ #ifdef CONFIG_PRINTK - proc_create("kmsg", S_IRUSR, &proc_root, &proc_kmsg_operations); + proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); #endif proc_create("locks", 0, NULL, &proc_locks_operations); proc_create("devices", 0, NULL, &proc_devinfo_operations); diff --git a/fs/proc/root.c b/fs/proc/root.c index 5e93e9b0124e..c741b45a5503 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -232,4 +232,3 @@ EXPORT_SYMBOL(proc_mkdir); EXPORT_SYMBOL(create_proc_entry); EXPORT_SYMBOL(proc_create); EXPORT_SYMBOL(remove_proc_entry); -EXPORT_SYMBOL(proc_root); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index f56205cbebc0..2183ffdc5489 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -96,7 +96,6 @@ struct vmcore { #ifdef CONFIG_PROC_FS -extern struct proc_dir_entry proc_root; extern struct proc_dir_entry *proc_root_kcore; extern spinlock_t proc_subdir_lock; @@ -243,8 +242,6 @@ struct tty_driver; static inline void proc_tty_register_driver(struct tty_driver *driver) {}; static inline void proc_tty_unregister_driver(struct tty_driver *driver) {}; -extern struct proc_dir_entry proc_root; - static inline int pid_ns_prepare_proc(struct pid_namespace *ns) { return 0; diff --git a/kernel/configs.c b/kernel/configs.c index e84d3f9c6c7b..d3a4b82a8a96 100644 --- a/kernel/configs.c +++ b/kernel/configs.c @@ -79,8 +79,7 @@ static int __init ikconfig_init(void) struct proc_dir_entry *entry; /* create the current config file */ - entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO, - &proc_root); + entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO, NULL); if (!entry) return -ENOMEM; @@ -95,7 +94,7 @@ static int __init ikconfig_init(void) static void __exit ikconfig_cleanup(void) { - remove_proc_entry("config.gz", &proc_root); + remove_proc_entry("config.gz", NULL); } module_init(ikconfig_init); diff --git a/sound/core/info.c b/sound/core/info.c index 9977ec2eace3..cb5ead3e202d 100644 --- a/sound/core/info.c +++ b/sound/core/info.c @@ -544,7 +544,7 @@ int __init snd_info_init(void) { struct proc_dir_entry *p; - p = snd_create_proc_entry("asound", S_IFDIR | S_IRUGO | S_IXUGO, &proc_root); + p = snd_create_proc_entry("asound", S_IFDIR | S_IRUGO | S_IXUGO, NULL); if (p == NULL) return -ENOMEM; snd_proc_root = p; @@ -594,7 +594,7 @@ int __exit snd_info_done(void) #ifdef CONFIG_SND_OSSEMUL snd_info_free_entry(snd_oss_root); #endif - snd_remove_proc_entry(&proc_root, snd_proc_root); + snd_remove_proc_entry(NULL, snd_proc_root); } return 0; } -- cgit v1.2.3-71-gd317 From 8731f14d37825b54ad0c4c309cba2bc8fdf13a86 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 29 Apr 2008 01:01:58 -0700 Subject: proc: remove ->get_info infrastructure Now that last dozen or so users of ->get_info were removed, ditch it too. Everyone sane shouldd have switched to seq_file interface long ago. P.S.: Co-existing 3 interfaces (->get_info/->read_proc/->proc_fops) for proc is long-standing crap, BTW, thus a) put ->read_proc/->write_proc/read_proc_entry() users on death row, b) new such users should be rejected, c) everyone is encouraged to convert his favourite ->read_proc user or I'll do it, lazy bastards. Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/generic.c | 7 +------ include/linux/proc_fs.h | 15 +-------------- 2 files changed, 2 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 8b406e21a258..0f3d97d41b0f 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -69,12 +69,7 @@ proc_file_read(struct file *file, char __user *buf, size_t nbytes, count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); start = NULL; - if (dp->get_info) { - /* Handle old net routines */ - n = dp->get_info(page, &start, *ppos, count); - if (n < count) - eof = 1; - } else if (dp->read_proc) { + if (dp->read_proc) { /* * How to be a proc read function * ------------------------------ diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 2183ffdc5489..29abcb805754 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -40,7 +40,7 @@ enum { * /proc file has a parent, but "subdir" is NULL for all * non-directory entries). * - * "get_info" is called at "read", while "owner" is used to protect module + * "owner" is used to protect module * from unloading while proc_dir_entry is in use */ @@ -48,7 +48,6 @@ typedef int (read_proc_t)(char *page, char **start, off_t off, int count, int *eof, void *data); typedef int (write_proc_t)(struct file *file, const char __user *buffer, unsigned long count, void *data); -typedef int (get_info_t)(char *, char **, off_t, int); struct proc_dir_entry { unsigned int low_ino; @@ -69,7 +68,6 @@ struct proc_dir_entry { * somewhere. */ const struct file_operations *proc_fops; - get_info_t *get_info; struct module *owner; struct proc_dir_entry *next, *parent, *subdir; void *data; @@ -187,14 +185,6 @@ static inline struct proc_dir_entry *create_proc_read_entry(const char *name, return res; } -static inline struct proc_dir_entry *create_proc_info_entry(const char *name, - mode_t mode, struct proc_dir_entry *base, get_info_t *get_info) -{ - struct proc_dir_entry *res=create_proc_entry(name,mode,base); - if (res) res->get_info=get_info; - return res; -} - extern struct proc_dir_entry *proc_net_fops_create(struct net *net, const char *name, mode_t mode, const struct file_operations *fops); extern void proc_net_remove(struct net *net, const char *name); @@ -234,9 +224,6 @@ static inline struct proc_dir_entry *proc_mkdir(const char *name, static inline struct proc_dir_entry *create_proc_read_entry(const char *name, mode_t mode, struct proc_dir_entry *base, read_proc_t *read_proc, void * data) { return NULL; } -static inline struct proc_dir_entry *create_proc_info_entry(const char *name, - mode_t mode, struct proc_dir_entry *base, get_info_t *get_info) - { return NULL; } struct tty_driver; static inline void proc_tty_register_driver(struct tty_driver *driver) {}; -- cgit v1.2.3-71-gd317 From 59b7435149eab2dd06dd678742faff6049cb655f Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 29 Apr 2008 01:02:00 -0700 Subject: proc: introduce proc_create_data to setup de->data This set of patches fixes an proc ->open'less usage due to ->proc_fops flip in the most part of the kernel code. The original OOPS is described in the commit 2d3a4e3666325a9709cc8ea2e88151394e8f20fc: Typical PDE creation code looks like: pde = create_proc_entry("foo", 0, NULL); if (pde) pde->proc_fops = &foo_proc_fops; Notice that PDE is first created, only then ->proc_fops is set up to final value. This is a problem because right after creation a) PDE is fully visible in /proc , and b) ->proc_fops are proc_file_operations which do not have ->open callback. So, it's possible to ->read without ->open (see one class of oopses below). The fix is new API called proc_create() which makes sure ->proc_fops are set up before gluing PDE to main tree. Typical new code looks like: pde = proc_create("foo", 0, NULL, &foo_proc_fops); if (!pde) return -ENOMEM; Fix most networking users for a start. In the long run, create_proc_entry() for regular files will go. In addition to this, proc_create_data is introduced to fix reading from proc without PDE->data. The race is basically the same as above. create_proc_entries is replaced in the entire kernel code as new method is also simply better. This patch: The problem is the same as for de->proc_fops. Right now PDE becomes visible without data set. So, the entry could be looked up without data. This, in most cases, will simply OOPS. proc_create_data call is created to address this issue. proc_create now becomes a wrapper around it. Signed-off-by: Denis V. Lunev Cc: "Eric W. Biederman" Cc: "J. Bruce Fields" Cc: Alessandro Zummo Cc: Alexey Dobriyan Cc: Bartlomiej Zolnierkiewicz Cc: Benjamin Herrenschmidt Cc: Bjorn Helgaas Cc: Chris Mason Acked-by: David Howells Cc: Dmitry Torokhov Cc: Geert Uytterhoeven Cc: Grant Grundler Cc: Greg Kroah-Hartman Cc: Haavard Skinnemoen Cc: Heiko Carstens Cc: Ingo Molnar Cc: James Bottomley Cc: Jaroslav Kysela Cc: Jeff Garzik Cc: Jeff Mahoney Cc: Jesper Nilsson Cc: Karsten Keil Cc: Kyle McMartin Cc: Len Brown Cc: Martin Schwidefsky Cc: Mathieu Desnoyers Cc: Matthew Wilcox Cc: Mauro Carvalho Chehab Cc: Mikael Starvik Cc: Nadia Derbey Cc: Neil Brown Cc: Paul Mackerras Cc: Peter Osterlund Cc: Pierre Peiffer Cc: Russell King Cc: Takashi Iwai Cc: Tony Luck Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/generic.c | 8 +++++--- fs/proc/root.c | 2 +- include/linux/proc_fs.h | 17 +++++++++++++++-- 3 files changed, 21 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 0f3d97d41b0f..9d53b39a9cf8 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -675,9 +675,10 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, return ent; } -struct proc_dir_entry *proc_create(const char *name, mode_t mode, - struct proc_dir_entry *parent, - const struct file_operations *proc_fops) +struct proc_dir_entry *proc_create_data(const char *name, mode_t mode, + struct proc_dir_entry *parent, + const struct file_operations *proc_fops, + void *data) { struct proc_dir_entry *pde; nlink_t nlink; @@ -698,6 +699,7 @@ struct proc_dir_entry *proc_create(const char *name, mode_t mode, if (!pde) goto out; pde->proc_fops = proc_fops; + pde->data = data; if (proc_register(parent, pde) < 0) goto out_free; return pde; diff --git a/fs/proc/root.c b/fs/proc/root.c index c741b45a5503..95117538a4f6 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -230,5 +230,5 @@ void pid_ns_release_proc(struct pid_namespace *ns) EXPORT_SYMBOL(proc_symlink); EXPORT_SYMBOL(proc_mkdir); EXPORT_SYMBOL(create_proc_entry); -EXPORT_SYMBOL(proc_create); +EXPORT_SYMBOL(proc_create_data); EXPORT_SYMBOL(remove_proc_entry); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 29abcb805754..9883bc942262 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -116,9 +116,10 @@ void de_put(struct proc_dir_entry *de); extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, struct proc_dir_entry *parent); -struct proc_dir_entry *proc_create(const char *name, mode_t mode, +struct proc_dir_entry *proc_create_data(const char *name, mode_t mode, struct proc_dir_entry *parent, - const struct file_operations *proc_fops); + const struct file_operations *proc_fops, + void *data); extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent); extern struct vfsmount *proc_mnt; @@ -173,6 +174,12 @@ extern struct proc_dir_entry *proc_mkdir(const char *,struct proc_dir_entry *); extern struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, struct proc_dir_entry *parent); +static inline struct proc_dir_entry *proc_create(const char *name, mode_t mode, + struct proc_dir_entry *parent, const struct file_operations *proc_fops) +{ + return proc_create_data(name, mode, parent, proc_fops, NULL); +} + static inline struct proc_dir_entry *create_proc_read_entry(const char *name, mode_t mode, struct proc_dir_entry *base, read_proc_t *read_proc, void * data) @@ -214,6 +221,12 @@ static inline struct proc_dir_entry *proc_create(const char *name, { return NULL; } +static inline struct proc_dir_entry *proc_create_data(const char *name, + mode_t mode, struct proc_dir_entry *parent, + const struct file_operations *proc_fops, void *data) +{ + return NULL; +} #define remove_proc_entry(name, parent) do {} while (0) static inline struct proc_dir_entry *proc_symlink(const char *name, -- cgit v1.2.3-71-gd317 From 1a46674b996bf9a15f0333178f5829ca2d7c32e2 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 29 Apr 2008 01:02:38 -0700 Subject: include/linux/sysctl.h: remove empty #else Remove an empty #else. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 571f01d20a86..5432b34a1e51 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1085,8 +1085,6 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, void unregister_sysctl_table(struct ctl_table_header * table); int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table); -#else /* __KERNEL__ */ - #endif /* __KERNEL__ */ #endif /* _LINUX_SYSCTL_H */ -- cgit v1.2.3-71-gd317 From 2c4c7155f25192da3511a6c911db4d08102d36c4 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 29 Apr 2008 01:02:41 -0700 Subject: sysctl: clean from unneeded extern and forward declarations The do_sysctl_strategy isn't used outside kernel/sysctl.c, so this can be static and without a prototype in header. Besides, move this one and parse_table() above their callers and drop the forward declarations of the latter call. One more "besides" - fix two checkpatch warnings: space before a ( and an extra space at the end of a line. Signed-off-by: Pavel Emelyanov Acked-by: David S. Miller Cc: "Eric W. Biederman" Cc: Alexey Dobriyan Cc: Denis V. Lunev Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 5 -- kernel/sysctl.c | 144 +++++++++++++++++++++++-------------------------- 2 files changed, 68 insertions(+), 81 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 5432b34a1e51..39eafd8f97a3 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -981,11 +981,6 @@ extern int do_sysctl (int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen); -extern int do_sysctl_strategy (struct ctl_table *table, - int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen); - extern ctl_handler sysctl_data; extern ctl_handler sysctl_string; extern ctl_handler sysctl_intvec; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1cdfe942d160..874e813e40c8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -145,12 +145,6 @@ extern int no_unaligned_warning; extern int max_lock_depth; #endif -#ifdef CONFIG_SYSCTL_SYSCALL -static int parse_table(int __user *, int, void __user *, size_t __user *, - void __user *, size_t, struct ctl_table *); -#endif - - #ifdef CONFIG_PROC_SYSCTL static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); @@ -1439,6 +1433,74 @@ void register_sysctl_root(struct ctl_table_root *root) } #ifdef CONFIG_SYSCTL_SYSCALL +/* Perform the actual read/write of a sysctl table entry. */ +static int do_sysctl_strategy(struct ctl_table *table, + int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) +{ + int op = 0, rc; + + if (oldval) + op |= 004; + if (newval) + op |= 002; + if (sysctl_perm(table, op)) + return -EPERM; + + if (table->strategy) { + rc = table->strategy(table, name, nlen, oldval, oldlenp, + newval, newlen); + if (rc < 0) + return rc; + if (rc > 0) + return 0; + } + + /* If there is no strategy routine, or if the strategy returns + * zero, proceed with automatic r/w */ + if (table->data && table->maxlen) { + rc = sysctl_data(table, name, nlen, oldval, oldlenp, + newval, newlen); + if (rc < 0) + return rc; + } + return 0; +} + +static int parse_table(int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen, + struct ctl_table *table) +{ + int n; +repeat: + if (!nlen) + return -ENOTDIR; + if (get_user(n, name)) + return -EFAULT; + for ( ; table->ctl_name || table->procname; table++) { + if (!table->ctl_name) + continue; + if (n == table->ctl_name) { + int error; + if (table->child) { + if (sysctl_perm(table, 001)) + return -EPERM; + name++; + nlen--; + table = table->child; + goto repeat; + } + error = do_sysctl_strategy(table, name, nlen, + oldval, oldlenp, + newval, newlen); + return error; + } + } + return -ENOTDIR; +} + int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -1511,76 +1573,6 @@ int sysctl_perm(struct ctl_table *table, int op) return test_perm(table->mode, op); } -#ifdef CONFIG_SYSCTL_SYSCALL -static int parse_table(int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - struct ctl_table *table) -{ - int n; -repeat: - if (!nlen) - return -ENOTDIR; - if (get_user(n, name)) - return -EFAULT; - for ( ; table->ctl_name || table->procname; table++) { - if (!table->ctl_name) - continue; - if (n == table->ctl_name) { - int error; - if (table->child) { - if (sysctl_perm(table, 001)) - return -EPERM; - name++; - nlen--; - table = table->child; - goto repeat; - } - error = do_sysctl_strategy(table, name, nlen, - oldval, oldlenp, - newval, newlen); - return error; - } - } - return -ENOTDIR; -} - -/* Perform the actual read/write of a sysctl table entry. */ -int do_sysctl_strategy (struct ctl_table *table, - int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - int op = 0, rc; - - if (oldval) - op |= 004; - if (newval) - op |= 002; - if (sysctl_perm(table, op)) - return -EPERM; - - if (table->strategy) { - rc = table->strategy(table, name, nlen, oldval, oldlenp, - newval, newlen); - if (rc < 0) - return rc; - if (rc > 0) - return 0; - } - - /* If there is no strategy routine, or if the strategy returns - * zero, proceed with automatic r/w */ - if (table->data && table->maxlen) { - rc = sysctl_data(table, name, nlen, oldval, oldlenp, - newval, newlen); - if (rc < 0) - return rc; - } - return 0; -} -#endif /* CONFIG_SYSCTL_SYSCALL */ - static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) { for (; table->ctl_name || table->procname; table++) { -- cgit v1.2.3-71-gd317 From d7321cd62470b70d2717dae5a963e7a8fabff4d5 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 29 Apr 2008 01:02:44 -0700 Subject: sysctl: add the ->permissions callback on the ctl_table_root When reading from/writing to some table, a root, which this table came from, may affect this table's permissions, depending on who is working with the table. The core hunk is at the bottom of this patch. All the rest is just pushing the ctl_table_root argument up to the sysctl_perm() function. This will be mostly (only?) used in the net sysctls. Signed-off-by: Pavel Emelyanov Acked-by: David S. Miller Cc: "Eric W. Biederman" Cc: Alexey Dobriyan Cc: Denis V. Lunev Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_sysctl.c | 4 ++-- include/linux/sysctl.h | 7 ++++++- kernel/sysctl.c | 25 ++++++++++++++++++------- 3 files changed, 26 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 5e31585292a0..5acc001d49f6 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -190,7 +190,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, * and won't be until we finish. */ error = -EPERM; - if (sysctl_perm(table, write ? MAY_WRITE : MAY_READ)) + if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ)) goto out; /* careful: calling conventions are nasty here */ @@ -388,7 +388,7 @@ static int proc_sys_permission(struct inode *inode, int mask, struct nameidata * goto out; /* Use the permissions on the sysctl table entry */ - error = sysctl_perm(table, mask); + error = sysctl_perm(head->root, table, mask); out: sysctl_head_finish(head); return error; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 39eafd8f97a3..24141b4d1a11 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -945,11 +945,14 @@ enum /* For the /proc/sys support */ struct ctl_table; struct nsproxy; +struct ctl_table_root; + extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev); extern struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, struct ctl_table_header *prev); extern void sysctl_head_finish(struct ctl_table_header *prev); -extern int sysctl_perm(struct ctl_table *table, int op); +extern int sysctl_perm(struct ctl_table_root *root, + struct ctl_table *table, int op); typedef struct ctl_table ctl_table; @@ -1049,6 +1052,8 @@ struct ctl_table_root { struct list_head header_list; struct list_head *(*lookup)(struct ctl_table_root *root, struct nsproxy *namespaces); + int (*permissions)(struct ctl_table_root *root, + struct nsproxy *namespaces, struct ctl_table *table); }; /* struct ctl_table_header is used to maintain dynamic lists of diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 874e813e40c8..d7ffdc59816a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1434,7 +1434,8 @@ void register_sysctl_root(struct ctl_table_root *root) #ifdef CONFIG_SYSCTL_SYSCALL /* Perform the actual read/write of a sysctl table entry. */ -static int do_sysctl_strategy(struct ctl_table *table, +static int do_sysctl_strategy(struct ctl_table_root *root, + struct ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) @@ -1445,7 +1446,7 @@ static int do_sysctl_strategy(struct ctl_table *table, op |= 004; if (newval) op |= 002; - if (sysctl_perm(table, op)) + if (sysctl_perm(root, table, op)) return -EPERM; if (table->strategy) { @@ -1471,6 +1472,7 @@ static int do_sysctl_strategy(struct ctl_table *table, static int parse_table(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen, + struct ctl_table_root *root, struct ctl_table *table) { int n; @@ -1485,14 +1487,14 @@ repeat: if (n == table->ctl_name) { int error; if (table->child) { - if (sysctl_perm(table, 001)) + if (sysctl_perm(root, table, 001)) return -EPERM; name++; nlen--; table = table->child; goto repeat; } - error = do_sysctl_strategy(table, name, nlen, + error = do_sysctl_strategy(root, table, name, nlen, oldval, oldlenp, newval, newlen); return error; @@ -1518,7 +1520,8 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) { error = parse_table(name, nlen, oldval, oldlenp, - newval, newlen, head->ctl_table); + newval, newlen, + head->root, head->ctl_table); if (error != -ENOTDIR) { sysctl_head_finish(head); break; @@ -1564,13 +1567,21 @@ static int test_perm(int mode, int op) return -EACCES; } -int sysctl_perm(struct ctl_table *table, int op) +int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) { int error; + int mode; + error = security_sysctl(table, op); if (error) return error; - return test_perm(table->mode, op); + + if (root->permissions) + mode = root->permissions(root, current->nsproxy, table); + else + mode = table->mode; + + return test_perm(mode, op); } static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) -- cgit v1.2.3-71-gd317 From 48cf6061b30205b29b306bf9bc22dd6f0b091461 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Tue, 29 Apr 2008 01:02:46 -0700 Subject: NBD: allow nbd to be used locally This patch allows Network Block Device to be mounted locally (nbd-client to nbd-server over 127.0.0.1). It creates a kthread to avoid the deadlock described in NBD tools documentation. So, if nbd-client hangs waiting for pages, the kblockd thread can continue its work and free pages. I have tested the patch to verify that it avoids the hang that always occurs when writing to a localhost nbd connection. I have also tested to verify that no performance degradation results from the additional thread and queue. Patch originally from Laurent Vivier. Signed-off-by: Paul Clements Signed-off-by: Laurent Vivier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/nbd.c | 144 ++++++++++++++++++++++++++++++++++------------------ include/linux/nbd.h | 4 +- 2 files changed, 98 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 60cc54368b66..8e33de6bea33 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -441,6 +442,85 @@ static void nbd_clear_que(struct nbd_device *lo) } +static void nbd_handle_req(struct nbd_device *lo, struct request *req) +{ + if (!blk_fs_request(req)) + goto error_out; + + nbd_cmd(req) = NBD_CMD_READ; + if (rq_data_dir(req) == WRITE) { + nbd_cmd(req) = NBD_CMD_WRITE; + if (lo->flags & NBD_READ_ONLY) { + printk(KERN_ERR "%s: Write on read-only\n", + lo->disk->disk_name); + goto error_out; + } + } + + req->errors = 0; + + mutex_lock(&lo->tx_lock); + if (unlikely(!lo->sock)) { + mutex_unlock(&lo->tx_lock); + printk(KERN_ERR "%s: Attempted send on closed socket\n", + lo->disk->disk_name); + req->errors++; + nbd_end_request(req); + return; + } + + lo->active_req = req; + + if (nbd_send_req(lo, req) != 0) { + printk(KERN_ERR "%s: Request send failed\n", + lo->disk->disk_name); + req->errors++; + nbd_end_request(req); + } else { + spin_lock(&lo->queue_lock); + list_add(&req->queuelist, &lo->queue_head); + spin_unlock(&lo->queue_lock); + } + + lo->active_req = NULL; + mutex_unlock(&lo->tx_lock); + wake_up_all(&lo->active_wq); + + return; + +error_out: + req->errors++; + nbd_end_request(req); +} + +static int nbd_thread(void *data) +{ + struct nbd_device *lo = data; + struct request *req; + + set_user_nice(current, -20); + while (!kthread_should_stop() || !list_empty(&lo->waiting_queue)) { + /* wait for something to do */ + wait_event_interruptible(lo->waiting_wq, + kthread_should_stop() || + !list_empty(&lo->waiting_queue)); + + /* extract request */ + if (list_empty(&lo->waiting_queue)) + continue; + + spin_lock_irq(&lo->queue_lock); + req = list_entry(lo->waiting_queue.next, struct request, + queuelist); + list_del_init(&req->queuelist); + spin_unlock_irq(&lo->queue_lock); + + /* handle request */ + nbd_handle_req(lo, req); + } + return 0; +} + /* * We always wait for result of write, for now. It would be nice to make it optional * in future @@ -456,65 +536,23 @@ static void do_nbd_request(struct request_queue * q) struct nbd_device *lo; blkdev_dequeue_request(req); + + spin_unlock_irq(q->queue_lock); + dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n", req->rq_disk->disk_name, req, req->cmd_type); - if (!blk_fs_request(req)) - goto error_out; - lo = req->rq_disk->private_data; BUG_ON(lo->magic != LO_MAGIC); - nbd_cmd(req) = NBD_CMD_READ; - if (rq_data_dir(req) == WRITE) { - nbd_cmd(req) = NBD_CMD_WRITE; - if (lo->flags & NBD_READ_ONLY) { - printk(KERN_ERR "%s: Write on read-only\n", - lo->disk->disk_name); - goto error_out; - } - } - - req->errors = 0; - spin_unlock_irq(q->queue_lock); - - mutex_lock(&lo->tx_lock); - if (unlikely(!lo->sock)) { - mutex_unlock(&lo->tx_lock); - printk(KERN_ERR "%s: Attempted send on closed socket\n", - lo->disk->disk_name); - req->errors++; - nbd_end_request(req); - spin_lock_irq(q->queue_lock); - continue; - } + spin_lock_irq(&lo->queue_lock); + list_add_tail(&req->queuelist, &lo->waiting_queue); + spin_unlock_irq(&lo->queue_lock); - lo->active_req = req; - - if (nbd_send_req(lo, req) != 0) { - printk(KERN_ERR "%s: Request send failed\n", - lo->disk->disk_name); - req->errors++; - nbd_end_request(req); - } else { - spin_lock(&lo->queue_lock); - list_add(&req->queuelist, &lo->queue_head); - spin_unlock(&lo->queue_lock); - } - - lo->active_req = NULL; - mutex_unlock(&lo->tx_lock); - wake_up_all(&lo->active_wq); + wake_up(&lo->waiting_wq); spin_lock_irq(q->queue_lock); - continue; - -error_out: - req->errors++; - spin_unlock(q->queue_lock); - nbd_end_request(req); - spin_lock(q->queue_lock); } } @@ -524,6 +562,7 @@ static int nbd_ioctl(struct inode *inode, struct file *file, struct nbd_device *lo = inode->i_bdev->bd_disk->private_data; int error; struct request sreq ; + struct task_struct *thread; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -606,7 +645,12 @@ static int nbd_ioctl(struct inode *inode, struct file *file, case NBD_DO_IT: if (!lo->file) return -EINVAL; + thread = kthread_create(nbd_thread, lo, lo->disk->disk_name); + if (IS_ERR(thread)) + return PTR_ERR(thread); + wake_up_process(thread); error = nbd_do_it(lo); + kthread_stop(thread); if (error) return error; sock_shutdown(lo, 1); @@ -695,10 +739,12 @@ static int __init nbd_init(void) nbd_dev[i].file = NULL; nbd_dev[i].magic = LO_MAGIC; nbd_dev[i].flags = 0; + INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); spin_lock_init(&nbd_dev[i].queue_lock); INIT_LIST_HEAD(&nbd_dev[i].queue_head); mutex_init(&nbd_dev[i].tx_lock); init_waitqueue_head(&nbd_dev[i].active_wq); + init_waitqueue_head(&nbd_dev[i].waiting_wq); nbd_dev[i].blksize = 1024; nbd_dev[i].bytesize = 0; disk->major = NBD_MAJOR; diff --git a/include/linux/nbd.h b/include/linux/nbd.h index 986572081e19..69075517c511 100644 --- a/include/linux/nbd.h +++ b/include/linux/nbd.h @@ -56,9 +56,11 @@ struct nbd_device { int magic; spinlock_t queue_lock; - struct list_head queue_head;/* Requests are added here... */ + struct list_head queue_head; /* Requests waiting result */ struct request *active_req; wait_queue_head_t active_wq; + struct list_head waiting_queue; /* Requests to be sent */ + wait_queue_head_t waiting_wq; struct mutex tx_lock; struct gendisk *disk; -- cgit v1.2.3-71-gd317 From 098ef1c0ea7b1b3ff9d89364af5ebc5b672cf932 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Tue, 29 Apr 2008 01:02:52 -0700 Subject: nbd: delete superfluous test for __GNUC__ Since already tests for __GNUC__, there's no point in nbd.h repeating that test. Signed-off-by: Robert P. J. Day Cc: Paul Clements Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nbd.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nbd.h b/include/linux/nbd.h index 69075517c511..155719dab813 100644 --- a/include/linux/nbd.h +++ b/include/linux/nbd.h @@ -88,11 +88,7 @@ struct nbd_request { char handle[8]; __be64 from; __be32 len; -} -#ifdef __GNUC__ - __attribute__ ((packed)) -#endif -; +} __attribute__ ((packed)); /* * This is the reply packet that nbd-server sends back to the client after -- cgit v1.2.3-71-gd317 From 199f0ca514f9c17668eec4f935c4ba24cd789f85 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 29 Apr 2008 01:03:13 -0700 Subject: idr: create idr_layer_cache at boot time Avoid a possible kmem_cache_create() failure by creating idr_layer_cache unconditionary at boot time rather than creating it on-demand when idr_init() is called the first time. This change also enables us to eliminate the check every time idr_init() is called. [akpm@linux-foundation.org: rename init_id_cache() to idr_init_cache()] [akpm@linux-foundation.org: fix alpha build] Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 3 +++ init/main.c | 2 ++ lib/idr.c | 10 ++++------ 3 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 0edda411959c..9a2d762124de 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -14,6 +14,7 @@ #include #include +#include #if BITS_PER_LONG == 32 # define IDR_BITS 5 @@ -115,4 +116,6 @@ void ida_remove(struct ida *ida, int id); void ida_destroy(struct ida *ida); void ida_init(struct ida *ida); +void __init idr_init_cache(void); + #endif /* __IDR_H__ */ diff --git a/init/main.c b/init/main.c index c62c98f381f2..624266b524d4 100644 --- a/init/main.c +++ b/init/main.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include @@ -637,6 +638,7 @@ asmlinkage void __init start_kernel(void) enable_debug_pagealloc(); cpu_hotplug_init(); kmem_cache_init(); + idr_init_cache(); setup_per_cpu_pageset(); numa_policy_init(); if (late_time_init) diff --git a/lib/idr.c b/lib/idr.c index afbb0b1023d4..8368c81fcb7d 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -585,12 +585,11 @@ static void idr_cache_ctor(struct kmem_cache *idr_layer_cache, void *idr_layer) memset(idr_layer, 0, sizeof(struct idr_layer)); } -static int init_id_cache(void) +void __init idr_init_cache(void) { - if (!idr_layer_cache) - idr_layer_cache = kmem_cache_create("idr_layer_cache", - sizeof(struct idr_layer), 0, 0, idr_cache_ctor); - return 0; + idr_layer_cache = kmem_cache_create("idr_layer_cache", + sizeof(struct idr_layer), 0, SLAB_PANIC, + idr_cache_ctor); } /** @@ -602,7 +601,6 @@ static int init_id_cache(void) */ void idr_init(struct idr *idp) { - init_id_cache(); memset(idp, 0, sizeof(struct idr)); spin_lock_init(&idp->lock); } -- cgit v1.2.3-71-gd317 From c3c52bce6993c6d37af2c2de9b482a7013d646a7 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Tue, 29 Apr 2008 01:03:18 -0700 Subject: edac: fix module initialization on several modules 2nd time I implemented opstate_init() as a inline function in linux/edac.h. added calling opstate_init() to: i82443bxgx_edac.c i82860_edac.c i82875p_edac.c i82975x_edac.c I wrote a fixed patch of edac-fix-module-initialization-on-several-modules.patch, and tested building 2.6.25-rc7 with applying this. It was succeed. I think the patch is now correct. Cc: Alan Cox Signed-off-by: Hitoshi Mitake Signed-off-by: Doug Thompson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/edac/amd76x_edac.c | 7 +++++++ drivers/edac/e752x_edac.c | 15 +++++---------- drivers/edac/e7xxx_edac.c | 13 +++---------- drivers/edac/i3000_edac.c | 13 ++++--------- drivers/edac/i5000_edac.c | 14 ++++---------- drivers/edac/i82443bxgx_edac.c | 7 +++++++ drivers/edac/i82860_edac.c | 7 +++++++ drivers/edac/i82875p_edac.c | 9 +++++++++ drivers/edac/i82975x_edac.c | 8 +++++++- drivers/edac/pasemi_edac.c | 6 ++++++ drivers/edac/r82600_edac.c | 7 +++++++ include/linux/edac.h | 14 +++++++++++++- 12 files changed, 79 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/drivers/edac/amd76x_edac.c b/drivers/edac/amd76x_edac.c index f22075410591..2b95f1a3edfc 100644 --- a/drivers/edac/amd76x_edac.c +++ b/drivers/edac/amd76x_edac.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "edac_core.h" #define AMD76X_REVISION " Ver: 2.0.2 " __DATE__ @@ -344,6 +345,9 @@ static struct pci_driver amd76x_driver = { static int __init amd76x_init(void) { + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + return pci_register_driver(&amd76x_driver); } @@ -358,3 +362,6 @@ module_exit(amd76x_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh"); MODULE_DESCRIPTION("MC support for AMD 76x memory controllers"); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c index 12e7677b834b..c94a0eb492cb 100644 --- a/drivers/edac/e752x_edac.c +++ b/drivers/edac/e752x_edac.c @@ -1117,16 +1117,6 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) debugf0("%s(): mci\n", __func__); debugf0("Starting Probe1\n"); - /* make sure error reporting method is sane */ - switch (edac_op_state) { - case EDAC_OPSTATE_POLL: - case EDAC_OPSTATE_NMI: - break; - default: - edac_op_state = EDAC_OPSTATE_POLL; - break; - } - /* check to see if device 0 function 1 is enabled; if it isn't, we * assume the BIOS has reserved it for a reason and is expecting * exclusive access, we take care not to violate that assumption and @@ -1303,6 +1293,10 @@ static int __init e752x_init(void) int pci_rc; debugf3("%s()\n", __func__); + + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + pci_rc = pci_register_driver(&e752x_driver); return (pci_rc < 0) ? pci_rc : 0; } @@ -1323,6 +1317,7 @@ MODULE_DESCRIPTION("MC support for Intel e752x/3100 memory controllers"); module_param(force_function_unhide, int, 0444); MODULE_PARM_DESC(force_function_unhide, "if BIOS sets Dev0:Fun1 up as hidden:" " 1=force unhide and hope BIOS doesn't fight driver for Dev0:Fun1 access"); + module_param(edac_op_state, int, 0444); MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c index 96ecc4926641..c7d11cc4e21a 100644 --- a/drivers/edac/e7xxx_edac.c +++ b/drivers/edac/e7xxx_edac.c @@ -414,16 +414,6 @@ static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx) debugf0("%s(): mci\n", __func__); - /* make sure error reporting method is sane */ - switch (edac_op_state) { - case EDAC_OPSTATE_POLL: - case EDAC_OPSTATE_NMI: - break; - default: - edac_op_state = EDAC_OPSTATE_POLL; - break; - } - pci_read_config_dword(pdev, E7XXX_DRC, &drc); drc_chan = dual_channel_active(drc, dev_idx); @@ -565,6 +555,9 @@ static struct pci_driver e7xxx_driver = { static int __init e7xxx_init(void) { + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + return pci_register_driver(&e7xxx_driver); } diff --git a/drivers/edac/i3000_edac.c b/drivers/edac/i3000_edac.c index 5d4292811c14..6c9a0f2a593c 100644 --- a/drivers/edac/i3000_edac.c +++ b/drivers/edac/i3000_edac.c @@ -326,15 +326,6 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx) return -ENODEV; } - switch (edac_op_state) { - case EDAC_OPSTATE_POLL: - case EDAC_OPSTATE_NMI: - break; - default: - edac_op_state = EDAC_OPSTATE_POLL; - break; - } - c0dra[0] = readb(window + I3000_C0DRA + 0); /* ranks 0,1 */ c0dra[1] = readb(window + I3000_C0DRA + 1); /* ranks 2,3 */ c1dra[0] = readb(window + I3000_C1DRA + 0); /* ranks 0,1 */ @@ -503,6 +494,10 @@ static int __init i3000_init(void) int pci_rc; debugf3("MC: %s()\n", __func__); + + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + pci_rc = pci_register_driver(&i3000_driver); if (pci_rc < 0) goto fail0; diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index 5a852017c17a..4a16b5b61cfb 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -1286,16 +1286,6 @@ static int i5000_probe1(struct pci_dev *pdev, int dev_idx) if (PCI_FUNC(pdev->devfn) != 0) return -ENODEV; - /* make sure error reporting method is sane */ - switch (edac_op_state) { - case EDAC_OPSTATE_POLL: - case EDAC_OPSTATE_NMI: - break; - default: - edac_op_state = EDAC_OPSTATE_POLL; - break; - } - /* Ask the devices for the number of CSROWS and CHANNELS so * that we can calculate the memory resources, etc * @@ -1478,6 +1468,9 @@ static int __init i5000_init(void) debugf2("MC: " __FILE__ ": %s()\n", __func__); + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + pci_rc = pci_register_driver(&i5000_driver); return (pci_rc < 0) ? pci_rc : 0; @@ -1501,5 +1494,6 @@ MODULE_AUTHOR ("Linux Networx (http://lnxi.com) Doug Thompson "); MODULE_DESCRIPTION("MC Driver for Intel I5000 memory controllers - " I5000_REVISION); + module_param(edac_op_state, int, 0444); MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/i82443bxgx_edac.c b/drivers/edac/i82443bxgx_edac.c index 83bfe37c4bbb..c5305e3ee434 100644 --- a/drivers/edac/i82443bxgx_edac.c +++ b/drivers/edac/i82443bxgx_edac.c @@ -29,6 +29,7 @@ #include +#include #include "edac_core.h" #define I82443_REVISION "0.1" @@ -386,6 +387,9 @@ static struct pci_driver i82443bxgx_edacmc_driver = { static int __init i82443bxgx_edacmc_init(void) { + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + return pci_register_driver(&i82443bxgx_edacmc_driver); } @@ -400,3 +404,6 @@ module_exit(i82443bxgx_edacmc_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Tim Small - WPAD"); MODULE_DESCRIPTION("EDAC MC support for Intel 82443BX/GX memory controllers"); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c index f5ecd2c4d813..c0088ba9672b 100644 --- a/drivers/edac/i82860_edac.c +++ b/drivers/edac/i82860_edac.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "edac_core.h" #define I82860_REVISION " Ver: 2.0.2 " __DATE__ @@ -294,6 +295,9 @@ static int __init i82860_init(void) debugf3("%s()\n", __func__); + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + if ((pci_rc = pci_register_driver(&i82860_driver)) < 0) goto fail0; @@ -345,3 +349,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com) " "Ben Woodard "); MODULE_DESCRIPTION("ECC support for Intel 82860 memory hub controllers"); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/i82875p_edac.c b/drivers/edac/i82875p_edac.c index 031abadc439a..e43bdc43a1bf 100644 --- a/drivers/edac/i82875p_edac.c +++ b/drivers/edac/i82875p_edac.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "edac_core.h" #define I82875P_REVISION " Ver: 2.0.2 " __DATE__ @@ -393,6 +394,7 @@ static int i82875p_probe1(struct pci_dev *pdev, int dev_idx) struct i82875p_error_info discard; debugf0("%s()\n", __func__); + ovrfl_pdev = pci_get_device(PCI_VEND_DEV(INTEL, 82875_6), NULL); if (i82875p_setup_overfl_dev(pdev, &ovrfl_pdev, &ovrfl_window)) @@ -532,6 +534,10 @@ static int __init i82875p_init(void) int pci_rc; debugf3("%s()\n", __func__); + + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + pci_rc = pci_register_driver(&i82875p_driver); if (pci_rc < 0) @@ -586,3 +592,6 @@ module_exit(i82875p_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh"); MODULE_DESCRIPTION("MC support for Intel 82875 memory hub controllers"); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c index 0ee888456932..2eed3ea2cf62 100644 --- a/drivers/edac/i82975x_edac.c +++ b/drivers/edac/i82975x_edac.c @@ -14,7 +14,7 @@ #include #include #include - +#include #include "edac_core.h" #define I82975X_REVISION " Ver: 1.0.0 " __DATE__ @@ -611,6 +611,9 @@ static int __init i82975x_init(void) debugf3("%s()\n", __func__); + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + pci_rc = pci_register_driver(&i82975x_driver); if (pci_rc < 0) goto fail0; @@ -664,3 +667,6 @@ module_exit(i82975x_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arvind R. "); MODULE_DESCRIPTION("MC support for Intel 82975 memory hub controllers"); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/pasemi_edac.c b/drivers/edac/pasemi_edac.c index 90320917be28..3fd65a563848 100644 --- a/drivers/edac/pasemi_edac.c +++ b/drivers/edac/pasemi_edac.c @@ -284,6 +284,9 @@ static struct pci_driver pasemi_edac_driver = { static int __init pasemi_edac_init(void) { + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + return pci_register_driver(&pasemi_edac_driver); } @@ -298,3 +301,6 @@ module_exit(pasemi_edac_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Egor Martovetsky "); MODULE_DESCRIPTION("MC support for PA Semi PWRficient memory controller"); +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); + diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c index e25f712f2dc3..9900675e9598 100644 --- a/drivers/edac/r82600_edac.c +++ b/drivers/edac/r82600_edac.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "edac_core.h" #define R82600_REVISION " Ver: 2.0.2 " __DATE__ @@ -393,6 +394,9 @@ static struct pci_driver r82600_driver = { static int __init r82600_init(void) { + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + return pci_register_driver(&r82600_driver); } @@ -412,3 +416,6 @@ MODULE_DESCRIPTION("MC support for Radisys 82600 memory controllers"); module_param(disable_hardware_scrub, bool, 0644); MODULE_PARM_DESC(disable_hardware_scrub, "If set, disable the chipset's automatic scrub for CEs"); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/include/linux/edac.h b/include/linux/edac.h index eab451e69a91..7cf92e8a4196 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -3,7 +3,7 @@ * * Author: Dave Jiang * - * 2006-2007 (c) MontaVista Software, Inc. This file is licensed under + * 2006-2008 (c) MontaVista Software, Inc. This file is licensed under * the terms of the GNU General Public License version 2. This program * is licensed "as is" without any warranty of any kind, whether express * or implied. @@ -26,4 +26,16 @@ extern atomic_t edac_handlers; extern int edac_handler_set(void); extern void edac_atomic_assert_error(void); +static inline void opstate_init(void) +{ + switch (edac_op_state) { + case EDAC_OPSTATE_POLL: + case EDAC_OPSTATE_NMI: + break; + default: + edac_op_state = EDAC_OPSTATE_POLL; + } + return; +} + #endif -- cgit v1.2.3-71-gd317 From dddfbaf8f86894415abb8256b55da68dab966ebe Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Tue, 29 Apr 2008 01:03:26 -0700 Subject: sysv fs: remove superfluous check for __GNUC__ compiler Since isn't exported to userspace, there is little point checking that this is a GNU-compatible compiler. Signed-off-by: Robert P. J. Day Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysv_fs.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysv_fs.h b/include/linux/sysv_fs.h index e0248631e461..96411306eec6 100644 --- a/include/linux/sysv_fs.h +++ b/include/linux/sysv_fs.h @@ -1,11 +1,7 @@ #ifndef _LINUX_SYSV_FS_H #define _LINUX_SYSV_FS_H -#if defined(__GNUC__) -# define __packed2__ __attribute__((packed, aligned(2))) -#else ->> I want to scream! << -#endif +#define __packed2__ __attribute__((packed, aligned(2))) #ifndef __KERNEL__ -- cgit v1.2.3-71-gd317 From 064106a91be5e76cb42c1ddf5d3871e3a1bd2a23 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 29 Apr 2008 01:03:27 -0700 Subject: kernel: add common infrastructure for unaligned access Create a linux/unaligned directory similar in spirit to the linux/byteorder folder to hold generic implementations collected from various arches. Currently there are five implementations: 1) packed_struct.h: C-struct based, from asm-generic/unaligned.h 2) le_byteshift.h: Open coded byte-swapping, heavily based on asm-arm 3) be_byteshift.h: Open coded byte-swapping, heavily based on asm-arm 4) memmove.h: taken from multiple implementations in tree 5) access_ok.h: taken from x86 and others, unaligned access is ok. All of the new implementations checks for sizes not equal to 1,2,4,8 and will fail to link. API additions: get_unaligned_{le16|le32|le64|be16|be32|be64}(p) which is meant to replace code of the form: le16_to_cpu(get_unaligned((__le16 *)p)); put_unaligned_{le16|le32|le64|be16|be32|be64}(val, pointer) which is meant to replace code of the form: put_unaligned(cpu_to_le16(val), (__le16 *)p); The headers that arches should include from their asm/unaligned.h: access_ok.h : Wrappers of the byteswapping functions in asm/byteorder Choose a particular implementation for little-endian access: le_byteshift.h le_memmove.h (arch must be LE) le_struct.h (arch must be LE) Choose a particular implementation for big-endian access: be_byteshift.h be_memmove.h (arch must be BE) be_struct.h (arch must be BE) After including as needed from the above, include unaligned/generic.h and define your arch's get/put_unaligned as (for LE): Signed-off-by: Harvey Harrison Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/unaligned/access_ok.h | 67 +++++++++++++++++++++++++++++++ include/linux/unaligned/be_byteshift.h | 70 +++++++++++++++++++++++++++++++++ include/linux/unaligned/be_memmove.h | 36 +++++++++++++++++ include/linux/unaligned/be_struct.h | 36 +++++++++++++++++ include/linux/unaligned/generic.h | 68 ++++++++++++++++++++++++++++++++ include/linux/unaligned/le_byteshift.h | 70 +++++++++++++++++++++++++++++++++ include/linux/unaligned/le_memmove.h | 36 +++++++++++++++++ include/linux/unaligned/le_struct.h | 36 +++++++++++++++++ include/linux/unaligned/memmove.h | 45 +++++++++++++++++++++ include/linux/unaligned/packed_struct.h | 46 ++++++++++++++++++++++ 10 files changed, 510 insertions(+) create mode 100644 include/linux/unaligned/access_ok.h create mode 100644 include/linux/unaligned/be_byteshift.h create mode 100644 include/linux/unaligned/be_memmove.h create mode 100644 include/linux/unaligned/be_struct.h create mode 100644 include/linux/unaligned/generic.h create mode 100644 include/linux/unaligned/le_byteshift.h create mode 100644 include/linux/unaligned/le_memmove.h create mode 100644 include/linux/unaligned/le_struct.h create mode 100644 include/linux/unaligned/memmove.h create mode 100644 include/linux/unaligned/packed_struct.h (limited to 'include/linux') diff --git a/include/linux/unaligned/access_ok.h b/include/linux/unaligned/access_ok.h new file mode 100644 index 000000000000..99c1b4d20b0f --- /dev/null +++ b/include/linux/unaligned/access_ok.h @@ -0,0 +1,67 @@ +#ifndef _LINUX_UNALIGNED_ACCESS_OK_H +#define _LINUX_UNALIGNED_ACCESS_OK_H + +#include +#include + +static inline u16 get_unaligned_le16(const void *p) +{ + return le16_to_cpup((__le16 *)p); +} + +static inline u32 get_unaligned_le32(const void *p) +{ + return le32_to_cpup((__le32 *)p); +} + +static inline u64 get_unaligned_le64(const void *p) +{ + return le64_to_cpup((__le64 *)p); +} + +static inline u16 get_unaligned_be16(const void *p) +{ + return be16_to_cpup((__be16 *)p); +} + +static inline u32 get_unaligned_be32(const void *p) +{ + return be32_to_cpup((__be32 *)p); +} + +static inline u64 get_unaligned_be64(const void *p) +{ + return be64_to_cpup((__be64 *)p); +} + +static inline void put_unaligned_le16(u16 val, void *p) +{ + *((__le16 *)p) = cpu_to_le16(val); +} + +static inline void put_unaligned_le32(u32 val, void *p) +{ + *((__le32 *)p) = cpu_to_le32(val); +} + +static inline void put_unaligned_le64(u64 val, void *p) +{ + *((__le64 *)p) = cpu_to_le64(val); +} + +static inline void put_unaligned_be16(u16 val, void *p) +{ + *((__be16 *)p) = cpu_to_be16(val); +} + +static inline void put_unaligned_be32(u32 val, void *p) +{ + *((__be32 *)p) = cpu_to_be32(val); +} + +static inline void put_unaligned_be64(u64 val, void *p) +{ + *((__be64 *)p) = cpu_to_be64(val); +} + +#endif /* _LINUX_UNALIGNED_ACCESS_OK_H */ diff --git a/include/linux/unaligned/be_byteshift.h b/include/linux/unaligned/be_byteshift.h new file mode 100644 index 000000000000..46dd12c5709e --- /dev/null +++ b/include/linux/unaligned/be_byteshift.h @@ -0,0 +1,70 @@ +#ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H +#define _LINUX_UNALIGNED_BE_BYTESHIFT_H + +#include + +static inline u16 __get_unaligned_be16(const u8 *p) +{ + return p[0] << 8 | p[1]; +} + +static inline u32 __get_unaligned_be32(const u8 *p) +{ + return p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3]; +} + +static inline u64 __get_unaligned_be64(const u8 *p) +{ + return (u64)__get_unaligned_be32(p) << 32 | + __get_unaligned_be32(p + 4); +} + +static inline void __put_unaligned_be16(u16 val, u8 *p) +{ + *p++ = val >> 8; + *p++ = val; +} + +static inline void __put_unaligned_be32(u32 val, u8 *p) +{ + __put_unaligned_be16(val >> 16, p); + __put_unaligned_be16(val, p + 2); +} + +static inline void __put_unaligned_be64(u64 val, u8 *p) +{ + __put_unaligned_be32(val >> 32, p); + __put_unaligned_be32(val, p + 4); +} + +static inline u16 get_unaligned_be16(const void *p) +{ + return __get_unaligned_be16((const u8 *)p); +} + +static inline u32 get_unaligned_be32(const void *p) +{ + return __get_unaligned_be32((const u8 *)p); +} + +static inline u64 get_unaligned_be64(const void *p) +{ + return __get_unaligned_be64((const u8 *)p); +} + +static inline void put_unaligned_be16(u16 val, void *p) +{ + __put_unaligned_be16(val, p); +} + +static inline void put_unaligned_be32(u32 val, void *p) +{ + __put_unaligned_be32(val, p); +} + +static inline void put_unaligned_be64(u64 val, void *p) +{ + __put_unaligned_be64(val, p); +} + +#endif /* _LINUX_UNALIGNED_BE_BYTESHIFT_H */ diff --git a/include/linux/unaligned/be_memmove.h b/include/linux/unaligned/be_memmove.h new file mode 100644 index 000000000000..c2a76c5c9ed0 --- /dev/null +++ b/include/linux/unaligned/be_memmove.h @@ -0,0 +1,36 @@ +#ifndef _LINUX_UNALIGNED_BE_MEMMOVE_H +#define _LINUX_UNALIGNED_BE_MEMMOVE_H + +#include + +static inline u16 get_unaligned_be16(const void *p) +{ + return __get_unaligned_memmove16((const u8 *)p); +} + +static inline u32 get_unaligned_be32(const void *p) +{ + return __get_unaligned_memmove32((const u8 *)p); +} + +static inline u64 get_unaligned_be64(const void *p) +{ + return __get_unaligned_memmove64((const u8 *)p); +} + +static inline void put_unaligned_be16(u16 val, void *p) +{ + __put_unaligned_memmove16(val, p); +} + +static inline void put_unaligned_be32(u32 val, void *p) +{ + __put_unaligned_memmove32(val, p); +} + +static inline void put_unaligned_be64(u64 val, void *p) +{ + __put_unaligned_memmove64(val, p); +} + +#endif /* _LINUX_UNALIGNED_LE_MEMMOVE_H */ diff --git a/include/linux/unaligned/be_struct.h b/include/linux/unaligned/be_struct.h new file mode 100644 index 000000000000..132415836c50 --- /dev/null +++ b/include/linux/unaligned/be_struct.h @@ -0,0 +1,36 @@ +#ifndef _LINUX_UNALIGNED_BE_STRUCT_H +#define _LINUX_UNALIGNED_BE_STRUCT_H + +#include + +static inline u16 get_unaligned_be16(const void *p) +{ + return __get_unaligned_cpu16((const u8 *)p); +} + +static inline u32 get_unaligned_be32(const void *p) +{ + return __get_unaligned_cpu32((const u8 *)p); +} + +static inline u64 get_unaligned_be64(const void *p) +{ + return __get_unaligned_cpu64((const u8 *)p); +} + +static inline void put_unaligned_be16(u16 val, void *p) +{ + __put_unaligned_cpu16(val, p); +} + +static inline void put_unaligned_be32(u32 val, void *p) +{ + __put_unaligned_cpu32(val, p); +} + +static inline void put_unaligned_be64(u64 val, void *p) +{ + __put_unaligned_cpu64(val, p); +} + +#endif /* _LINUX_UNALIGNED_BE_STRUCT_H */ diff --git a/include/linux/unaligned/generic.h b/include/linux/unaligned/generic.h new file mode 100644 index 000000000000..02d97ff3df70 --- /dev/null +++ b/include/linux/unaligned/generic.h @@ -0,0 +1,68 @@ +#ifndef _LINUX_UNALIGNED_GENERIC_H +#define _LINUX_UNALIGNED_GENERIC_H + +/* + * Cause a link-time error if we try an unaligned access other than + * 1,2,4 or 8 bytes long + */ +extern void __bad_unaligned_access_size(void); + +#define __get_unaligned_le(ptr) ((__force typeof(*(ptr)))({ \ + __builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr), \ + __builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_le16((ptr)), \ + __builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_le32((ptr)), \ + __builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_le64((ptr)), \ + __bad_unaligned_access_size())))); \ + })) + +#define __get_unaligned_be(ptr) ((__force typeof(*(ptr)))({ \ + __builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr), \ + __builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_be16((ptr)), \ + __builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_be32((ptr)), \ + __builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_be64((ptr)), \ + __bad_unaligned_access_size())))); \ + })) + +#define __put_unaligned_le(val, ptr) ({ \ + void *__gu_p = (ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: \ + *(u8 *)__gu_p = (__force u8)(val); \ + break; \ + case 2: \ + put_unaligned_le16((__force u16)(val), __gu_p); \ + break; \ + case 4: \ + put_unaligned_le32((__force u32)(val), __gu_p); \ + break; \ + case 8: \ + put_unaligned_le64((__force u64)(val), __gu_p); \ + break; \ + default: \ + __bad_unaligned_access_size(); \ + break; \ + } \ + (void)0; }) + +#define __put_unaligned_be(val, ptr) ({ \ + void *__gu_p = (ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: \ + *(u8 *)__gu_p = (__force u8)(val); \ + break; \ + case 2: \ + put_unaligned_be16((__force u16)(val), __gu_p); \ + break; \ + case 4: \ + put_unaligned_be32((__force u32)(val), __gu_p); \ + break; \ + case 8: \ + put_unaligned_be64((__force u64)(val), __gu_p); \ + break; \ + default: \ + __bad_unaligned_access_size(); \ + break; \ + } \ + (void)0; }) + +#endif /* _LINUX_UNALIGNED_GENERIC_H */ diff --git a/include/linux/unaligned/le_byteshift.h b/include/linux/unaligned/le_byteshift.h new file mode 100644 index 000000000000..59777e951baf --- /dev/null +++ b/include/linux/unaligned/le_byteshift.h @@ -0,0 +1,70 @@ +#ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H +#define _LINUX_UNALIGNED_LE_BYTESHIFT_H + +#include + +static inline u16 __get_unaligned_le16(const u8 *p) +{ + return p[0] | p[1] << 8; +} + +static inline u32 __get_unaligned_le32(const u8 *p) +{ + return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24; +} + +static inline u64 __get_unaligned_le64(const u8 *p) +{ + return (u64)__get_unaligned_le32(p + 4) << 32 | + __get_unaligned_le32(p); +} + +static inline void __put_unaligned_le16(u16 val, u8 *p) +{ + *p++ = val; + *p++ = val >> 8; +} + +static inline void __put_unaligned_le32(u32 val, u8 *p) +{ + __put_unaligned_le16(val >> 16, p + 2); + __put_unaligned_le16(val, p); +} + +static inline void __put_unaligned_le64(u64 val, u8 *p) +{ + __put_unaligned_le32(val >> 32, p + 4); + __put_unaligned_le32(val, p); +} + +static inline u16 get_unaligned_le16(const void *p) +{ + return __get_unaligned_le16((const u8 *)p); +} + +static inline u32 get_unaligned_le32(const void *p) +{ + return __get_unaligned_le32((const u8 *)p); +} + +static inline u64 get_unaligned_le64(const void *p) +{ + return __get_unaligned_le64((const u8 *)p); +} + +static inline void put_unaligned_le16(u16 val, void *p) +{ + __put_unaligned_le16(val, p); +} + +static inline void put_unaligned_le32(u32 val, void *p) +{ + __put_unaligned_le32(val, p); +} + +static inline void put_unaligned_le64(u64 val, void *p) +{ + __put_unaligned_le64(val, p); +} + +#endif /* _LINUX_UNALIGNED_LE_BYTESHIFT_H */ diff --git a/include/linux/unaligned/le_memmove.h b/include/linux/unaligned/le_memmove.h new file mode 100644 index 000000000000..269849bee4ec --- /dev/null +++ b/include/linux/unaligned/le_memmove.h @@ -0,0 +1,36 @@ +#ifndef _LINUX_UNALIGNED_LE_MEMMOVE_H +#define _LINUX_UNALIGNED_LE_MEMMOVE_H + +#include + +static inline u16 get_unaligned_le16(const void *p) +{ + return __get_unaligned_memmove16((const u8 *)p); +} + +static inline u32 get_unaligned_le32(const void *p) +{ + return __get_unaligned_memmove32((const u8 *)p); +} + +static inline u64 get_unaligned_le64(const void *p) +{ + return __get_unaligned_memmove64((const u8 *)p); +} + +static inline void put_unaligned_le16(u16 val, void *p) +{ + __put_unaligned_memmove16(val, p); +} + +static inline void put_unaligned_le32(u32 val, void *p) +{ + __put_unaligned_memmove32(val, p); +} + +static inline void put_unaligned_le64(u64 val, void *p) +{ + __put_unaligned_memmove64(val, p); +} + +#endif /* _LINUX_UNALIGNED_LE_MEMMOVE_H */ diff --git a/include/linux/unaligned/le_struct.h b/include/linux/unaligned/le_struct.h new file mode 100644 index 000000000000..088c4572faa8 --- /dev/null +++ b/include/linux/unaligned/le_struct.h @@ -0,0 +1,36 @@ +#ifndef _LINUX_UNALIGNED_LE_STRUCT_H +#define _LINUX_UNALIGNED_LE_STRUCT_H + +#include + +static inline u16 get_unaligned_le16(const void *p) +{ + return __get_unaligned_cpu16((const u8 *)p); +} + +static inline u32 get_unaligned_le32(const void *p) +{ + return __get_unaligned_cpu32((const u8 *)p); +} + +static inline u64 get_unaligned_le64(const void *p) +{ + return __get_unaligned_cpu64((const u8 *)p); +} + +static inline void put_unaligned_le16(u16 val, void *p) +{ + __put_unaligned_cpu16(val, p); +} + +static inline void put_unaligned_le32(u32 val, void *p) +{ + __put_unaligned_cpu32(val, p); +} + +static inline void put_unaligned_le64(u64 val, void *p) +{ + __put_unaligned_cpu64(val, p); +} + +#endif /* _LINUX_UNALIGNED_LE_STRUCT_H */ diff --git a/include/linux/unaligned/memmove.h b/include/linux/unaligned/memmove.h new file mode 100644 index 000000000000..eeb5a779a4fd --- /dev/null +++ b/include/linux/unaligned/memmove.h @@ -0,0 +1,45 @@ +#ifndef _LINUX_UNALIGNED_MEMMOVE_H +#define _LINUX_UNALIGNED_MEMMOVE_H + +#include +#include + +/* Use memmove here, so gcc does not insert a __builtin_memcpy. */ + +static inline u16 __get_unaligned_memmove16(const void *p) +{ + u16 tmp; + memmove(&tmp, p, 2); + return tmp; +} + +static inline u32 __get_unaligned_memmove32(const void *p) +{ + u32 tmp; + memmove(&tmp, p, 4); + return tmp; +} + +static inline u64 __get_unaligned_memmove64(const void *p) +{ + u64 tmp; + memmove(&tmp, p, 8); + return tmp; +} + +static inline void __put_unaligned_memmove16(u16 val, void *p) +{ + memmove(p, &val, 2); +} + +static inline void __put_unaligned_memmove32(u32 val, void *p) +{ + memmove(p, &val, 4); +} + +static inline void __put_unaligned_memmove64(u64 val, void *p) +{ + memmove(p, &val, 8); +} + +#endif /* _LINUX_UNALIGNED_MEMMOVE_H */ diff --git a/include/linux/unaligned/packed_struct.h b/include/linux/unaligned/packed_struct.h new file mode 100644 index 000000000000..2498bb9fe002 --- /dev/null +++ b/include/linux/unaligned/packed_struct.h @@ -0,0 +1,46 @@ +#ifndef _LINUX_UNALIGNED_PACKED_STRUCT_H +#define _LINUX_UNALIGNED_PACKED_STRUCT_H + +#include + +struct __una_u16 { u16 x __attribute__((packed)); }; +struct __una_u32 { u32 x __attribute__((packed)); }; +struct __una_u64 { u64 x __attribute__((packed)); }; + +static inline u16 __get_unaligned_cpu16(const void *p) +{ + const struct __una_u16 *ptr = (const struct __una_u16 *)p; + return ptr->x; +} + +static inline u32 __get_unaligned_cpu32(const void *p) +{ + const struct __una_u32 *ptr = (const struct __una_u32 *)p; + return ptr->x; +} + +static inline u64 __get_unaligned_cpu64(const void *p) +{ + const struct __una_u64 *ptr = (const struct __una_u64 *)p; + return ptr->x; +} + +static inline void __put_unaligned_cpu16(u16 val, void *p) +{ + struct __una_u16 *ptr = (struct __una_u16 *)p; + ptr->x = val; +} + +static inline void __put_unaligned_cpu32(u32 val, void *p) +{ + struct __una_u32 *ptr = (struct __una_u32 *)p; + ptr->x = val; +} + +static inline void __put_unaligned_cpu64(u64 val, void *p) +{ + struct __una_u64 *ptr = (struct __una_u64 *)p; + ptr->x = val; +} + +#endif /* _LINUX_UNALIGNED_PACKED_STRUCT_H */ -- cgit v1.2.3-71-gd317 From 37487a56523d402e25650da16c337acf4cecd13d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 29 Apr 2008 01:03:49 -0700 Subject: Add kbuild.h that contains common definitions for kbuild users The same definitions are used for the bounds logic and the asm-offsets.h generation by kbuild. Put them into include/linux/kbuild.h file. Also add a new feature COMMENT("text") which can be used to insert lines of ocmments into asm-offsets.h and bounds.h. Cc: Sam Ravnborg Signed-off-by: Christoph Lameter Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Ralf Baechle Cc: Jay Estabrook Cc: Ivan Kokshaysky Cc: Richard Henderson Cc: "Luck, Tony" Cc: Russell King Cc: Chris Zankel Cc: David S. Miller Cc: Haavard Skinnemoen Cc: Bryan Wu Cc: Mike Frysinger Cc: Yoshinori Sato Cc: Geert Uytterhoeven Cc: Roman Zippel Cc: Greg Ungerer Cc: David Howells Cc: Kyle McMartin Cc: Grant Grundler Cc: Matthew Wilcox Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Paul Mundt Cc: Miles Bader Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kbuild.h | 15 +++++++++++++++ kernel/bounds.c | 6 +----- 2 files changed, 16 insertions(+), 5 deletions(-) create mode 100644 include/linux/kbuild.h (limited to 'include/linux') diff --git a/include/linux/kbuild.h b/include/linux/kbuild.h new file mode 100644 index 000000000000..22a72198c14b --- /dev/null +++ b/include/linux/kbuild.h @@ -0,0 +1,15 @@ +#ifndef __LINUX_KBUILD_H +#define __LINUX_KBUILD_H + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define BLANK() asm volatile("\n->" : : ) + +#define OFFSET(sym, str, mem) \ + DEFINE(sym, offsetof(struct str, mem)) + +#define COMMENT(x) \ + asm volatile("\n->#" x) + +#endif diff --git a/kernel/bounds.c b/kernel/bounds.c index c3c55544db2f..3c5301381837 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -8,11 +8,7 @@ /* Include headers that define the enum constants of interest */ #include #include - -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) +#include void foo(void) { -- cgit v1.2.3-71-gd317 From fee4b19fb3f28d17c0b9f9ea0668db5275697178 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 29 Apr 2008 12:01:02 +0200 Subject: bitops: remove "optimizations" The mapsize optimizations which were moved from x86 to the generic code in commit 64970b68d2b3ed32b964b0b30b1b98518fde388e increased the binary size on non x86 architectures. Looking into the real effects of the "optimizations" it turned out that they are not used in find_next_bit() and find_next_zero_bit(). The ones in find_first_bit() and find_first_zero_bit() are used in a couple of places but none of them is a real hot path. Remove the "optimizations" all together and call the library functions unconditionally. Boot-tested on x86 and compile tested on every cross compiler I have. Signed-off-by: Thomas Gleixner Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 115 ++++++------------------------------------------- lib/find_next_bit.c | 22 +++++----- 2 files changed, 22 insertions(+), 115 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 8340a3aba49a..024f2b027244 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -114,8 +114,6 @@ static inline unsigned fls_long(unsigned long l) #ifdef __KERNEL__ #ifdef CONFIG_GENERIC_FIND_FIRST_BIT -extern unsigned long __find_first_bit(const unsigned long *addr, - unsigned long size); /** * find_first_bit - find the first set bit in a memory region @@ -124,28 +122,8 @@ extern unsigned long __find_first_bit(const unsigned long *addr, * * Returns the bit number of the first set bit. */ -static __always_inline unsigned long -find_first_bit(const unsigned long *addr, unsigned long size) -{ - /* Avoid a function call if the bitmap size is a constant */ - /* and not bigger than BITS_PER_LONG. */ - - /* insert a sentinel so that __ffs returns size if there */ - /* are no set bits in the bitmap */ - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) - return __ffs((*addr) | (1ul << size)); - - /* the result of __ffs(0) is undefined, so it needs to be */ - /* handled separately */ - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) - return ((*addr) == 0) ? BITS_PER_LONG : __ffs(*addr); - - /* size is not constant or too big */ - return __find_first_bit(addr, size); -} - -extern unsigned long __find_first_zero_bit(const unsigned long *addr, - unsigned long size); +extern unsigned long find_first_bit(const unsigned long *addr, + unsigned long size); /** * find_first_zero_bit - find the first cleared bit in a memory region @@ -154,31 +132,12 @@ extern unsigned long __find_first_zero_bit(const unsigned long *addr, * * Returns the bit number of the first cleared bit. */ -static __always_inline unsigned long -find_first_zero_bit(const unsigned long *addr, unsigned long size) -{ - /* Avoid a function call if the bitmap size is a constant */ - /* and not bigger than BITS_PER_LONG. */ - - /* insert a sentinel so that __ffs returns size if there */ - /* are no set bits in the bitmap */ - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { - return __ffs(~(*addr) | (1ul << size)); - } - - /* the result of __ffs(0) is undefined, so it needs to be */ - /* handled separately */ - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) - return (~(*addr) == 0) ? BITS_PER_LONG : __ffs(~(*addr)); - - /* size is not constant or too big */ - return __find_first_zero_bit(addr, size); -} +extern unsigned long find_first_zero_bit(const unsigned long *addr, + unsigned long size); + #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ #ifdef CONFIG_GENERIC_FIND_NEXT_BIT -extern unsigned long __find_next_bit(const unsigned long *addr, - unsigned long size, unsigned long offset); /** * find_next_bit - find the next set bit in a memory region @@ -186,36 +145,8 @@ extern unsigned long __find_next_bit(const unsigned long *addr, * @offset: The bitnumber to start searching at * @size: The bitmap size in bits */ -static __always_inline unsigned long -find_next_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - unsigned long value; - - /* Avoid a function call if the bitmap size is a constant */ - /* and not bigger than BITS_PER_LONG. */ - - /* insert a sentinel so that __ffs returns size if there */ - /* are no set bits in the bitmap */ - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { - value = (*addr) & ((~0ul) << offset); - value |= (1ul << size); - return __ffs(value); - } - - /* the result of __ffs(0) is undefined, so it needs to be */ - /* handled separately */ - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) { - value = (*addr) & ((~0ul) << offset); - return (value == 0) ? BITS_PER_LONG : __ffs(value); - } - - /* size is not constant or too big */ - return __find_next_bit(addr, size, offset); -} - -extern unsigned long __find_next_zero_bit(const unsigned long *addr, - unsigned long size, unsigned long offset); +extern unsigned long find_next_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); /** * find_next_zero_bit - find the next cleared bit in a memory region @@ -223,33 +154,11 @@ extern unsigned long __find_next_zero_bit(const unsigned long *addr, * @offset: The bitnumber to start searching at * @size: The bitmap size in bits */ -static __always_inline unsigned long -find_next_zero_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - unsigned long value; - - /* Avoid a function call if the bitmap size is a constant */ - /* and not bigger than BITS_PER_LONG. */ - - /* insert a sentinel so that __ffs returns size if there */ - /* are no set bits in the bitmap */ - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { - value = (~(*addr)) & ((~0ul) << offset); - value |= (1ul << size); - return __ffs(value); - } - - /* the result of __ffs(0) is undefined, so it needs to be */ - /* handled separately */ - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) { - value = (~(*addr)) & ((~0ul) << offset); - return (value == 0) ? BITS_PER_LONG : __ffs(value); - } - - /* size is not constant or too big */ - return __find_next_zero_bit(addr, size, offset); -} + +extern unsigned long find_next_zero_bit(const unsigned long *addr, + unsigned long size, + unsigned long offset); + #endif /* CONFIG_GENERIC_FIND_NEXT_BIT */ #endif /* __KERNEL__ */ #endif diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c index d3f5784807b4..24c59ded47a0 100644 --- a/lib/find_next_bit.c +++ b/lib/find_next_bit.c @@ -20,8 +20,8 @@ /* * Find the next set bit in a memory region. */ -unsigned long __find_next_bit(const unsigned long *addr, - unsigned long size, unsigned long offset) +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) { const unsigned long *p = addr + BITOP_WORD(offset); unsigned long result = offset & ~(BITS_PER_LONG-1); @@ -58,14 +58,14 @@ found_first: found_middle: return result + __ffs(tmp); } -EXPORT_SYMBOL(__find_next_bit); +EXPORT_SYMBOL(find_next_bit); /* * This implementation of find_{first,next}_zero_bit was stolen from * Linus' asm-alpha/bitops.h. */ -unsigned long __find_next_zero_bit(const unsigned long *addr, - unsigned long size, unsigned long offset) +unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) { const unsigned long *p = addr + BITOP_WORD(offset); unsigned long result = offset & ~(BITS_PER_LONG-1); @@ -102,15 +102,14 @@ found_first: found_middle: return result + ffz(tmp); } -EXPORT_SYMBOL(__find_next_zero_bit); +EXPORT_SYMBOL(find_next_zero_bit); #endif /* CONFIG_GENERIC_FIND_NEXT_BIT */ #ifdef CONFIG_GENERIC_FIND_FIRST_BIT /* * Find the first set bit in a memory region. */ -unsigned long __find_first_bit(const unsigned long *addr, - unsigned long size) +unsigned long find_first_bit(const unsigned long *addr, unsigned long size) { const unsigned long *p = addr; unsigned long result = 0; @@ -131,13 +130,12 @@ unsigned long __find_first_bit(const unsigned long *addr, found: return result + __ffs(tmp); } -EXPORT_SYMBOL(__find_first_bit); +EXPORT_SYMBOL(find_first_bit); /* * Find the first cleared bit in a memory region. */ -unsigned long __find_first_zero_bit(const unsigned long *addr, - unsigned long size) +unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) { const unsigned long *p = addr; unsigned long result = 0; @@ -158,7 +156,7 @@ unsigned long __find_first_zero_bit(const unsigned long *addr, found: return result + ffz(tmp); } -EXPORT_SYMBOL(__find_first_zero_bit); +EXPORT_SYMBOL(find_first_zero_bit); #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ #ifdef __BIG_ENDIAN -- cgit v1.2.3-71-gd317 From 8e149e09f91098fd72bf9ac5b4a77a693abf721e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 23 Apr 2008 14:56:30 -0700 Subject: pci/irq: restore mask_bits in msi shutdown -v3 [PATCH 1/2] pci/irq: restore mask_bits in msi shutdown -v3 Yinghai found that kexec'ing a RHEL 5.1 kernel with 2.6.25-rc3+ kernels prevents his NIC from working. He bisected to | commit 89d694b9dbe769ca1004e01db0ca43964806a611 | Author: Thomas Gleixner | Date: Mon Feb 18 18:25:17 2008 +0100 | | genirq: do not leave interupts enabled on free_irq | | The default_disable() function was changed in commit: | | 76d2160147f43f982dfe881404cfde9fd0a9da21 | genirq: do not mask interrupts by default | For MSI, default_shutdown will call mask_bit for msi device. All mask bits will left disabled after free_irq. Then in the kexec case, the next kernel can only use msi_enable bit, so all device's MSI can not be used. So lets to restore the mask bit to its pci reset defined value (enabled) when we disable the kernels use of msi to be a little friendlier to kexec'd kernels. Extend msi_set_mask_bit to msi_set_mask_bits to take mask, so we can fully restore that to 0x00 instead of 0xfe. Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 21 ++++++++++++++------- include/linux/msi.h | 1 + 2 files changed, 15 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 26938da8f438..e3a05cc9a595 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -123,7 +123,7 @@ static void msix_flush_writes(unsigned int irq) } } -static void msi_set_mask_bit(unsigned int irq, int flag) +static void msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag) { struct msi_desc *entry; @@ -137,8 +137,8 @@ static void msi_set_mask_bit(unsigned int irq, int flag) pos = (long)entry->mask_base; pci_read_config_dword(entry->dev, pos, &mask_bits); - mask_bits &= ~(1); - mask_bits |= flag; + mask_bits &= ~(mask); + mask_bits |= flag & mask; pci_write_config_dword(entry->dev, pos, mask_bits); } else { msi_set_enable(entry->dev, !flag); @@ -241,13 +241,13 @@ void write_msi_msg(unsigned int irq, struct msi_msg *msg) void mask_msi_irq(unsigned int irq) { - msi_set_mask_bit(irq, 1); + msi_set_mask_bits(irq, 1, 1); msix_flush_writes(irq); } void unmask_msi_irq(unsigned int irq) { - msi_set_mask_bit(irq, 0); + msi_set_mask_bits(irq, 1, 0); msix_flush_writes(irq); } @@ -291,7 +291,8 @@ static void __pci_restore_msi_state(struct pci_dev *dev) msi_set_enable(dev, 0); write_msi_msg(dev->irq, &entry->msg); if (entry->msi_attrib.maskbit) - msi_set_mask_bit(dev->irq, entry->msi_attrib.masked); + msi_set_mask_bits(dev->irq, entry->msi_attrib.maskbits_mask, + entry->msi_attrib.masked); pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); control &= ~(PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE); @@ -315,7 +316,7 @@ static void __pci_restore_msix_state(struct pci_dev *dev) list_for_each_entry(entry, &dev->msi_list, list) { write_msi_msg(entry->irq, &entry->msg); - msi_set_mask_bit(entry->irq, entry->msi_attrib.masked); + msi_set_mask_bits(entry->irq, 1, entry->msi_attrib.masked); } BUG_ON(list_empty(&dev->msi_list)); @@ -382,6 +383,7 @@ static int msi_capability_init(struct pci_dev *dev) pci_write_config_dword(dev, msi_mask_bits_reg(pos, is_64bit_address(control)), maskbits); + entry->msi_attrib.maskbits_mask = temp; } list_add_tail(&entry->list, &dev->msi_list); @@ -583,6 +585,11 @@ void pci_disable_msi(struct pci_dev* dev) BUG_ON(list_empty(&dev->msi_list)); entry = list_entry(dev->msi_list.next, struct msi_desc, list); + /* Return the the pci reset with msi irqs unmasked */ + if (entry->msi_attrib.maskbit) { + u32 mask = entry->msi_attrib.maskbits_mask; + msi_set_mask_bits(dev->irq, mask, ~mask); + } if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) { return; } diff --git a/include/linux/msi.h b/include/linux/msi.h index 94bb46d82efd..8f2939227207 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -22,6 +22,7 @@ struct msi_desc { __u8 masked : 1; __u8 is_64 : 1; /* Address size: 0=32bit 1=64bit */ __u8 pos; /* Location of the msi capability */ + __u32 maskbits_mask; /* mask bits mask */ __u16 entry_nr; /* specific enabled entry */ unsigned default_irq; /* default pre-assigned irq */ }msi_attrib; -- cgit v1.2.3-71-gd317 From d52877c7b1afb8c37ebe17e2005040b79cb618b0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 23 Apr 2008 14:58:09 -0700 Subject: pci/irq: let pci_device_shutdown to call pci_msi_shutdown v2 [PATCH 2/2] pci/irq: let pci_device_shutdown to call pci_msi_shutdown v2 this change | commit 23a274c8a5adafc74a66f16988776fc7dd6f6e51 | Author: Prakash, Sathya | Date: Fri Mar 7 15:53:21 2008 +0530 | | [SCSI] mpt fusion: Enable MSI by default for SAS controllers | | This patch modifies the driver to enable MSI by default for all SAS chips. | | Signed-off-by: Sathya Prakash | Signed-off-by: James Bottomley | Causes the kexec of a RHEL 5.1 kernel to fail. root casue: the rhel 5.1 kernel still uses INTx emulation. and mptscsih_shutdown doesn't call pci_disable_msi to reenable INTx on kexec path So call pci_msi_shutdown in the shutdown path to do the same thing to msix Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 35 ++++++++++++++++++++++++++--------- drivers/pci/pci-driver.c | 2 ++ include/linux/pci.h | 6 ++++++ 3 files changed, 34 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index e3a05cc9a595..8c61304cbb37 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -571,10 +571,9 @@ int pci_enable_msi(struct pci_dev* dev) } EXPORT_SYMBOL(pci_enable_msi); -void pci_disable_msi(struct pci_dev* dev) +void pci_msi_shutdown(struct pci_dev* dev) { struct msi_desc *entry; - int default_irq; if (!pci_msi_enable || !dev || !dev->msi_enabled) return; @@ -590,15 +589,26 @@ void pci_disable_msi(struct pci_dev* dev) u32 mask = entry->msi_attrib.maskbits_mask; msi_set_mask_bits(dev->irq, mask, ~mask); } - if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) { + if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) return; - } - - default_irq = entry->msi_attrib.default_irq; - msi_free_irqs(dev); /* Restore dev->irq to its default pin-assertion irq */ - dev->irq = default_irq; + dev->irq = entry->msi_attrib.default_irq; +} +void pci_disable_msi(struct pci_dev* dev) +{ + struct msi_desc *entry; + + if (!pci_msi_enable || !dev || !dev->msi_enabled) + return; + + pci_msi_shutdown(dev); + + entry = list_entry(dev->msi_list.next, struct msi_desc, list); + if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) + return; + + msi_free_irqs(dev); } EXPORT_SYMBOL(pci_disable_msi); @@ -691,7 +701,7 @@ static void msix_free_all_irqs(struct pci_dev *dev) msi_free_irqs(dev); } -void pci_disable_msix(struct pci_dev* dev) +void pci_msix_shutdown(struct pci_dev* dev) { if (!pci_msi_enable || !dev || !dev->msix_enabled) return; @@ -699,6 +709,13 @@ void pci_disable_msix(struct pci_dev* dev) msix_set_enable(dev, 0); pci_intx_for_msi(dev, 1); dev->msix_enabled = 0; +} +void pci_disable_msix(struct pci_dev* dev) +{ + if (!pci_msi_enable || !dev || !dev->msix_enabled) + return; + + pci_msix_shutdown(dev); msix_free_all_irqs(dev); } diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index e8d94fafc280..72cf61ed8f96 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -360,6 +360,8 @@ static void pci_device_shutdown(struct device *dev) if (drv && drv->shutdown) drv->shutdown(pci_dev); + pci_msi_shutdown(pci_dev); + pci_msix_shutdown(pci_dev); } /** diff --git a/include/linux/pci.h b/include/linux/pci.h index 7a0770d4c4e2..e09c57e9c373 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -701,6 +701,8 @@ static inline int pci_enable_msi(struct pci_dev *dev) return -1; } +static inline void pci_msi_shutdown(struct pci_dev *dev) +{ } static inline void pci_disable_msi(struct pci_dev *dev) { } @@ -710,6 +712,8 @@ static inline int pci_enable_msix(struct pci_dev *dev, return -1; } +static inline void pci_msix_shutdown(struct pci_dev *dev) +{ } static inline void pci_disable_msix(struct pci_dev *dev) { } @@ -720,9 +724,11 @@ static inline void pci_restore_msi_state(struct pci_dev *dev) { } #else extern int pci_enable_msi(struct pci_dev *dev); +extern void pci_msi_shutdown(struct pci_dev *dev); extern void pci_disable_msi(struct pci_dev *dev); extern int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec); +extern void pci_msix_shutdown(struct pci_dev *dev); extern void pci_disable_msix(struct pci_dev *dev); extern void msi_remove_pci_irq_vectors(struct pci_dev *dev); extern void pci_restore_msi_state(struct pci_dev *dev); -- cgit v1.2.3-71-gd317 From 8f45c1a58a25c3a1a2f42521445e1e786c4c0b92 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 29 Apr 2008 10:16:38 -0700 Subject: block: fix queue locking verification The new queue_flag_set/clear() functions verify that the queue is locked, but in doing so they will actually instead oops if the queue lock hasn't been initialized at all. So fix the lock debug test to consider the "no lock" case to be unlocked. This way you get a nice WARN_ON_ONCE() instead of a fatal oops. Bug introduced by commit 75ad23bc0fcb4f992a5d06982bf0857ab1738e9e ("block: make queue flags non-atomic"). Cc: Jens Axboe Cc: Nick Piggin Signed-off-by: Linus Torvalds --- include/linux/blkdev.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c09696a90d6a..95864b3ff298 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -410,6 +410,12 @@ struct request_queue #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ #define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ +static inline int queue_is_locked(struct request_queue *q) +{ + spinlock_t *lock = q->queue_lock; + return lock && spin_is_locked(lock); +} + static inline void queue_flag_set_unlocked(unsigned int flag, struct request_queue *q) { @@ -418,7 +424,7 @@ static inline void queue_flag_set_unlocked(unsigned int flag, static inline void queue_flag_set(unsigned int flag, struct request_queue *q) { - WARN_ON_ONCE(!spin_is_locked(q->queue_lock)); + WARN_ON_ONCE(!queue_is_locked(q)); __set_bit(flag, &q->queue_flags); } @@ -430,7 +436,7 @@ static inline void queue_flag_clear_unlocked(unsigned int flag, static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) { - WARN_ON_ONCE(!spin_is_locked(q->queue_lock)); + WARN_ON_ONCE(!queue_is_locked(q)); __clear_bit(flag, &q->queue_flags); } -- cgit v1.2.3-71-gd317 From 7663c1e2792a9662b23dec6e19bfcd3d55360b8f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 29 Apr 2008 21:31:27 +0200 Subject: Improve queue_is_locked() spin_is_locked() doesn't work on UP without spinlock debugging. Make it safer and just return 1 on UP, so we don't get false positives. The plan is to kill this debug function during the -rc cycle. Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- include/linux/blkdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 95864b3ff298..d2a1b71e93c3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -412,8 +412,12 @@ struct request_queue static inline int queue_is_locked(struct request_queue *q) { +#ifdef CONFIG_SMP spinlock_t *lock = q->queue_lock; return lock && spin_is_locked(lock); +#else + return 1; +#endif } static inline void queue_flag_set_unlocked(unsigned int flag, -- cgit v1.2.3-71-gd317 From 98db6f193c93e9b4729215af2c9101210e11d26c Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Tue, 29 Apr 2008 22:38:48 +0200 Subject: x86: fix section mismatch in pci_scan_bus Fix following section mismatch warning: WARNING: vmlinux.o(.text+0x275616): Section mismatch in reference from the function pci_scan_bus() to the function .devinit.text:pci_scan_bus_parented() The warning was seen with a CONFIG_DEBUG_SECTION_MISMATCH=y build. The inline function pci_scan_bus refer to functions annotated __devinit - so annotate it __devinit too. This revealed a few x86 specific functions that were only used from __init or __devinit context. So annotate these __devinit and the warning was killed. The added include in pci.h was not strictly required but added to avoid being dependent on indirect includes. Signed-off-by: Sam Ravnborg Signed-off-by: Jesse Barnes --- arch/x86/pci/common.c | 4 ++-- include/linux/pci.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 2a4d751818b7..88b5416cf009 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -489,7 +489,7 @@ void pcibios_disable_device (struct pci_dev *dev) pcibios_disable_irq(dev); } -struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) +struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) { struct pci_bus *bus = NULL; struct pci_sysdata *sd; @@ -512,7 +512,7 @@ struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) return bus; } -struct pci_bus *pci_scan_bus_with_sysdata(int busno) +struct pci_bus * __devinit pci_scan_bus_with_sysdata(int busno) { return pci_scan_bus_on_node(busno, &pci_root_ops, -1); } diff --git a/include/linux/pci.h b/include/linux/pci.h index 96acd0dae241..a59517b4930f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -44,6 +44,7 @@ #include #include +#include #include #include #include @@ -474,7 +475,7 @@ extern struct pci_bus *pci_find_bus(int domain, int busnr); void pci_bus_add_devices(struct pci_bus *bus); struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus, struct pci_ops *ops, void *sysdata); -static inline struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, +static inline struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata) { struct pci_bus *root_bus; -- cgit v1.2.3-71-gd317 From 3dcf54515aa4981a647ad74859199032965193a5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Apr 2008 18:13:32 -0400 Subject: ext4: move headers out of include/linux Move ext4 headers out of include/linux. This is just the trivial move, there's some more thing that could be done later. Signed-off-by: Christoph Hellwig Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/acl.c | 4 +- fs/ext4/balloc.c | 6 +- fs/ext4/bitmap.c | 2 +- fs/ext4/dir.c | 2 +- fs/ext4/ext4.h | 1205 +++++++++++++++++++++++++++++++++++++++ fs/ext4/ext4_extents.h | 232 ++++++++ fs/ext4/ext4_i.h | 167 ++++++ fs/ext4/ext4_jbd2.c | 2 +- fs/ext4/ext4_jbd2.h | 231 ++++++++ fs/ext4/ext4_sb.h | 148 +++++ fs/ext4/extents.c | 4 +- fs/ext4/file.c | 4 +- fs/ext4/fsync.c | 4 +- fs/ext4/hash.c | 2 +- fs/ext4/ialloc.c | 5 +- fs/ext4/inode.c | 2 +- fs/ext4/ioctl.c | 4 +- fs/ext4/mballoc.c | 4 +- fs/ext4/migrate.c | 4 +- fs/ext4/namei.c | 4 +- fs/ext4/resize.c | 3 +- fs/ext4/super.c | 5 +- fs/ext4/symlink.c | 2 +- fs/ext4/xattr.c | 4 +- fs/ext4/xattr_security.c | 4 +- fs/ext4/xattr_trusted.c | 4 +- fs/ext4/xattr_user.c | 4 +- include/linux/ext4_fs.h | 1205 --------------------------------------- include/linux/ext4_fs_extents.h | 232 -------- include/linux/ext4_fs_i.h | 167 ------ include/linux/ext4_fs_sb.h | 148 ----- include/linux/ext4_jbd2.h | 231 -------- 32 files changed, 2021 insertions(+), 2024 deletions(-) create mode 100644 fs/ext4/ext4.h create mode 100644 fs/ext4/ext4_extents.h create mode 100644 fs/ext4/ext4_i.h create mode 100644 fs/ext4/ext4_jbd2.h create mode 100644 fs/ext4/ext4_sb.h delete mode 100644 include/linux/ext4_fs.h delete mode 100644 include/linux/ext4_fs_extents.h delete mode 100644 include/linux/ext4_fs_i.h delete mode 100644 include/linux/ext4_fs_sb.h delete mode 100644 include/linux/ext4_jbd2.h (limited to 'include/linux') diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index cc92624a46a2..3c8dab880d91 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -9,8 +9,8 @@ #include #include #include -#include -#include +#include "ext4_jbd2.h" +#include "ext4.h" #include "xattr.h" #include "acl.h" diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index af5032b23c29..da994374ec3b 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -15,12 +15,12 @@ #include #include #include -#include -#include #include #include - +#include "ext4.h" +#include "ext4_jbd2.h" #include "group.h" + /* * balloc.c contains the blocks allocation and deallocation routines */ diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index 420554f8f79d..d37ea6750454 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c @@ -9,7 +9,7 @@ #include #include -#include +#include "ext4.h" #ifdef EXT4FS_DEBUG diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 88c97f7312be..2bf0331ea194 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -23,10 +23,10 @@ #include #include -#include #include #include #include +#include "ext4.h" static unsigned char ext4_filetype_table[] = { DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h new file mode 100644 index 000000000000..8158083f7ac0 --- /dev/null +++ b/fs/ext4/ext4.h @@ -0,0 +1,1205 @@ +/* + * ext4.h + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#ifndef _EXT4_H +#define _EXT4_H + +#include +#include +#include +#include "ext4_i.h" + +/* + * The second extended filesystem constants/structures + */ + +/* + * Define EXT4FS_DEBUG to produce debug messages + */ +#undef EXT4FS_DEBUG + +/* + * Define EXT4_RESERVATION to reserve data blocks for expanding files + */ +#define EXT4_DEFAULT_RESERVE_BLOCKS 8 +/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */ +#define EXT4_MAX_RESERVE_BLOCKS 1027 +#define EXT4_RESERVE_WINDOW_NOT_ALLOCATED 0 + +/* + * Debug code + */ +#ifdef EXT4FS_DEBUG +#define ext4_debug(f, a...) \ + do { \ + printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ + __FILE__, __LINE__, __FUNCTION__); \ + printk (KERN_DEBUG f, ## a); \ + } while (0) +#else +#define ext4_debug(f, a...) do {} while (0) +#endif + +#define EXT4_MULTIBLOCK_ALLOCATOR 1 + +/* prefer goal again. length */ +#define EXT4_MB_HINT_MERGE 1 +/* blocks already reserved */ +#define EXT4_MB_HINT_RESERVED 2 +/* metadata is being allocated */ +#define EXT4_MB_HINT_METADATA 4 +/* first blocks in the file */ +#define EXT4_MB_HINT_FIRST 8 +/* search for the best chunk */ +#define EXT4_MB_HINT_BEST 16 +/* data is being allocated */ +#define EXT4_MB_HINT_DATA 32 +/* don't preallocate (for tails) */ +#define EXT4_MB_HINT_NOPREALLOC 64 +/* allocate for locality group */ +#define EXT4_MB_HINT_GROUP_ALLOC 128 +/* allocate goal blocks or none */ +#define EXT4_MB_HINT_GOAL_ONLY 256 +/* goal is meaningful */ +#define EXT4_MB_HINT_TRY_GOAL 512 + +struct ext4_allocation_request { + /* target inode for block we're allocating */ + struct inode *inode; + /* logical block in target inode */ + ext4_lblk_t logical; + /* phys. target (a hint) */ + ext4_fsblk_t goal; + /* the closest logical allocated block to the left */ + ext4_lblk_t lleft; + /* phys. block for ^^^ */ + ext4_fsblk_t pleft; + /* the closest logical allocated block to the right */ + ext4_lblk_t lright; + /* phys. block for ^^^ */ + ext4_fsblk_t pright; + /* how many blocks we want to allocate */ + unsigned long len; + /* flags. see above EXT4_MB_HINT_* */ + unsigned long flags; +}; + +/* + * Special inodes numbers + */ +#define EXT4_BAD_INO 1 /* Bad blocks inode */ +#define EXT4_ROOT_INO 2 /* Root inode */ +#define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */ +#define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */ +#define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */ +#define EXT4_JOURNAL_INO 8 /* Journal inode */ + +/* First non-reserved inode for old ext4 filesystems */ +#define EXT4_GOOD_OLD_FIRST_INO 11 + +/* + * Maximal count of links to a file + */ +#define EXT4_LINK_MAX 65000 + +/* + * Macro-instructions used to manage several block sizes + */ +#define EXT4_MIN_BLOCK_SIZE 1024 +#define EXT4_MAX_BLOCK_SIZE 65536 +#define EXT4_MIN_BLOCK_LOG_SIZE 10 +#ifdef __KERNEL__ +# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) +#else +# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) +#endif +#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32)) +#ifdef __KERNEL__ +# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) +#else +# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) +#endif +#ifdef __KERNEL__ +#define EXT4_ADDR_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_addr_per_block_bits) +#define EXT4_INODE_SIZE(s) (EXT4_SB(s)->s_inode_size) +#define EXT4_FIRST_INO(s) (EXT4_SB(s)->s_first_ino) +#else +#define EXT4_INODE_SIZE(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \ + EXT4_GOOD_OLD_INODE_SIZE : \ + (s)->s_inode_size) +#define EXT4_FIRST_INO(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \ + EXT4_GOOD_OLD_FIRST_INO : \ + (s)->s_first_ino) +#endif +#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits))) + +/* + * Structure of a blocks group descriptor + */ +struct ext4_group_desc +{ + __le32 bg_block_bitmap_lo; /* Blocks bitmap block */ + __le32 bg_inode_bitmap_lo; /* Inodes bitmap block */ + __le32 bg_inode_table_lo; /* Inodes table block */ + __le16 bg_free_blocks_count; /* Free blocks count */ + __le16 bg_free_inodes_count; /* Free inodes count */ + __le16 bg_used_dirs_count; /* Directories count */ + __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */ + __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ + __le16 bg_itable_unused; /* Unused inodes count */ + __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ + __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ + __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */ + __le32 bg_inode_table_hi; /* Inodes table block MSB */ + __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */ + __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */ + __le16 bg_used_dirs_count_hi; /* Directories count MSB */ + __le16 bg_itable_unused_hi; /* Unused inodes count MSB */ + __u32 bg_reserved2[3]; +}; + +#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ +#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ +#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ + +#ifdef __KERNEL__ +#include "ext4_sb.h" +#endif +/* + * Macro-instructions used to manage group descriptors + */ +#define EXT4_MIN_DESC_SIZE 32 +#define EXT4_MIN_DESC_SIZE_64BIT 64 +#define EXT4_MAX_DESC_SIZE EXT4_MIN_BLOCK_SIZE +#define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size) +#ifdef __KERNEL__ +# define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group) +# define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block) +# define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group) +# define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits) +#else +# define EXT4_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group) +# define EXT4_DESC_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s)) +# define EXT4_INODES_PER_GROUP(s) ((s)->s_inodes_per_group) +#endif + +/* + * Constants relative to the data blocks + */ +#define EXT4_NDIR_BLOCKS 12 +#define EXT4_IND_BLOCK EXT4_NDIR_BLOCKS +#define EXT4_DIND_BLOCK (EXT4_IND_BLOCK + 1) +#define EXT4_TIND_BLOCK (EXT4_DIND_BLOCK + 1) +#define EXT4_N_BLOCKS (EXT4_TIND_BLOCK + 1) + +/* + * Inode flags + */ +#define EXT4_SECRM_FL 0x00000001 /* Secure deletion */ +#define EXT4_UNRM_FL 0x00000002 /* Undelete */ +#define EXT4_COMPR_FL 0x00000004 /* Compress file */ +#define EXT4_SYNC_FL 0x00000008 /* Synchronous updates */ +#define EXT4_IMMUTABLE_FL 0x00000010 /* Immutable file */ +#define EXT4_APPEND_FL 0x00000020 /* writes to file may only append */ +#define EXT4_NODUMP_FL 0x00000040 /* do not dump file */ +#define EXT4_NOATIME_FL 0x00000080 /* do not update atime */ +/* Reserved for compression usage... */ +#define EXT4_DIRTY_FL 0x00000100 +#define EXT4_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ +#define EXT4_NOCOMPR_FL 0x00000400 /* Don't compress */ +#define EXT4_ECOMPR_FL 0x00000800 /* Compression error */ +/* End compression flags --- maybe not all used */ +#define EXT4_INDEX_FL 0x00001000 /* hash-indexed directory */ +#define EXT4_IMAGIC_FL 0x00002000 /* AFS directory */ +#define EXT4_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ +#define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */ +#define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ +#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ +#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ +#define EXT4_EXT_MIGRATE 0x00100000 /* Inode is migrating */ +#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ + +#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ +#define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ + +/* + * Inode dynamic state flags + */ +#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */ +#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */ +#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ +#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ + +/* Used to pass group descriptor data when online resize is done */ +struct ext4_new_group_input { + __u32 group; /* Group number for this data */ + __u64 block_bitmap; /* Absolute block number of block bitmap */ + __u64 inode_bitmap; /* Absolute block number of inode bitmap */ + __u64 inode_table; /* Absolute block number of inode table start */ + __u32 blocks_count; /* Total number of blocks in this group */ + __u16 reserved_blocks; /* Number of reserved blocks in this group */ + __u16 unused; +}; + +/* The struct ext4_new_group_input in kernel space, with free_blocks_count */ +struct ext4_new_group_data { + __u32 group; + __u64 block_bitmap; + __u64 inode_bitmap; + __u64 inode_table; + __u32 blocks_count; + __u16 reserved_blocks; + __u16 unused; + __u32 free_blocks_count; +}; + +/* + * Following is used by preallocation code to tell get_blocks() that we + * want uninitialzed extents. + */ +#define EXT4_CREATE_UNINITIALIZED_EXT 2 + +/* + * ioctl commands + */ +#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS +#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS +#define EXT4_IOC_GETVERSION _IOR('f', 3, long) +#define EXT4_IOC_SETVERSION _IOW('f', 4, long) +#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) +#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input) +#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION +#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION +#ifdef CONFIG_JBD2_DEBUG +#define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) +#endif +#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) +#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) +#define EXT4_IOC_MIGRATE _IO('f', 7) + +/* + * ioctl commands in 32 bit emulation + */ +#define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS +#define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS +#define EXT4_IOC32_GETVERSION _IOR('f', 3, int) +#define EXT4_IOC32_SETVERSION _IOW('f', 4, int) +#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) +#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) +#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) +#ifdef CONFIG_JBD2_DEBUG +#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) +#endif +#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION +#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION + + +/* + * Mount options + */ +struct ext4_mount_options { + unsigned long s_mount_opt; + uid_t s_resuid; + gid_t s_resgid; + unsigned long s_commit_interval; +#ifdef CONFIG_QUOTA + int s_jquota_fmt; + char *s_qf_names[MAXQUOTAS]; +#endif +}; + +/* + * Structure of an inode on the disk + */ +struct ext4_inode { + __le16 i_mode; /* File mode */ + __le16 i_uid; /* Low 16 bits of Owner Uid */ + __le32 i_size_lo; /* Size in bytes */ + __le32 i_atime; /* Access time */ + __le32 i_ctime; /* Inode Change time */ + __le32 i_mtime; /* Modification time */ + __le32 i_dtime; /* Deletion Time */ + __le16 i_gid; /* Low 16 bits of Group Id */ + __le16 i_links_count; /* Links count */ + __le32 i_blocks_lo; /* Blocks count */ + __le32 i_flags; /* File flags */ + union { + struct { + __le32 l_i_version; + } linux1; + struct { + __u32 h_i_translator; + } hurd1; + struct { + __u32 m_i_reserved1; + } masix1; + } osd1; /* OS dependent 1 */ + __le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */ + __le32 i_generation; /* File version (for NFS) */ + __le32 i_file_acl_lo; /* File ACL */ + __le32 i_size_high; + __le32 i_obso_faddr; /* Obsoleted fragment address */ + union { + struct { + __le16 l_i_blocks_high; /* were l_i_reserved1 */ + __le16 l_i_file_acl_high; + __le16 l_i_uid_high; /* these 2 fields */ + __le16 l_i_gid_high; /* were reserved2[0] */ + __u32 l_i_reserved2; + } linux2; + struct { + __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ + __u16 h_i_mode_high; + __u16 h_i_uid_high; + __u16 h_i_gid_high; + __u32 h_i_author; + } hurd2; + struct { + __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ + __le16 m_i_file_acl_high; + __u32 m_i_reserved2[2]; + } masix2; + } osd2; /* OS dependent 2 */ + __le16 i_extra_isize; + __le16 i_pad1; + __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ + __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ + __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ + __le32 i_crtime; /* File Creation time */ + __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ + __le32 i_version_hi; /* high 32 bits for 64-bit version */ +}; + + +#define EXT4_EPOCH_BITS 2 +#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) +#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS) + +/* + * Extended fields will fit into an inode if the filesystem was formatted + * with large inodes (-I 256 or larger) and there are not currently any EAs + * consuming all of the available space. For new inodes we always reserve + * enough space for the kernel's known extended fields, but for inodes + * created with an old kernel this might not have been the case. None of + * the extended inode fields is critical for correct filesystem operation. + * This macro checks if a certain field fits in the inode. Note that + * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize + */ +#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \ + ((offsetof(typeof(*ext4_inode), field) + \ + sizeof((ext4_inode)->field)) \ + <= (EXT4_GOOD_OLD_INODE_SIZE + \ + (einode)->i_extra_isize)) \ + +static inline __le32 ext4_encode_extra_time(struct timespec *time) +{ + return cpu_to_le32((sizeof(time->tv_sec) > 4 ? + time->tv_sec >> 32 : 0) | + ((time->tv_nsec << 2) & EXT4_NSEC_MASK)); +} + +static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) +{ + if (sizeof(time->tv_sec) > 4) + time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) + << 32; + time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2; +} + +#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \ +do { \ + (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ + (raw_inode)->xtime ## _extra = \ + ext4_encode_extra_time(&(inode)->xtime); \ +} while (0) + +#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \ +do { \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ + (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ + (raw_inode)->xtime ## _extra = \ + ext4_encode_extra_time(&(einode)->xtime); \ +} while (0) + +#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \ +do { \ + (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \ + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ + ext4_decode_extra_time(&(inode)->xtime, \ + raw_inode->xtime ## _extra); \ +} while (0) + +#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \ +do { \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ + (einode)->xtime.tv_sec = \ + (signed)le32_to_cpu((raw_inode)->xtime); \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ + ext4_decode_extra_time(&(einode)->xtime, \ + raw_inode->xtime ## _extra); \ +} while (0) + +#define i_disk_version osd1.linux1.l_i_version + +#if defined(__KERNEL__) || defined(__linux__) +#define i_reserved1 osd1.linux1.l_i_reserved1 +#define i_file_acl_high osd2.linux2.l_i_file_acl_high +#define i_blocks_high osd2.linux2.l_i_blocks_high +#define i_uid_low i_uid +#define i_gid_low i_gid +#define i_uid_high osd2.linux2.l_i_uid_high +#define i_gid_high osd2.linux2.l_i_gid_high +#define i_reserved2 osd2.linux2.l_i_reserved2 + +#elif defined(__GNU__) + +#define i_translator osd1.hurd1.h_i_translator +#define i_uid_high osd2.hurd2.h_i_uid_high +#define i_gid_high osd2.hurd2.h_i_gid_high +#define i_author osd2.hurd2.h_i_author + +#elif defined(__masix__) + +#define i_reserved1 osd1.masix1.m_i_reserved1 +#define i_file_acl_high osd2.masix2.m_i_file_acl_high +#define i_reserved2 osd2.masix2.m_i_reserved2 + +#endif /* defined(__KERNEL__) || defined(__linux__) */ + +/* + * File system states + */ +#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */ +#define EXT4_ERROR_FS 0x0002 /* Errors detected */ +#define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */ + +/* + * Misc. filesystem flags + */ +#define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */ +#define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */ +#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */ + +/* + * Mount flags + */ +#define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */ +#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ +#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ +#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ +#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ +#define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ +#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ +#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ +#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ +#define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */ +#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ +#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ +#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ +#define EXT4_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */ +#define EXT4_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */ +#define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */ +#define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */ +#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ +#define EXT4_MOUNT_RESERVATION 0x10000 /* Preallocation */ +#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */ +#define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */ +#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ +#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ +#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ +#define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */ +#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ +#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ +#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ +#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */ +/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ +#ifndef _LINUX_EXT2_FS_H +#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt +#define set_opt(o, opt) o |= EXT4_MOUNT_##opt +#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \ + EXT4_MOUNT_##opt) +#else +#define EXT2_MOUNT_NOLOAD EXT4_MOUNT_NOLOAD +#define EXT2_MOUNT_ABORT EXT4_MOUNT_ABORT +#define EXT2_MOUNT_DATA_FLAGS EXT4_MOUNT_DATA_FLAGS +#endif + +#define ext4_set_bit ext2_set_bit +#define ext4_set_bit_atomic ext2_set_bit_atomic +#define ext4_clear_bit ext2_clear_bit +#define ext4_clear_bit_atomic ext2_clear_bit_atomic +#define ext4_test_bit ext2_test_bit +#define ext4_find_first_zero_bit ext2_find_first_zero_bit +#define ext4_find_next_zero_bit ext2_find_next_zero_bit +#define ext4_find_next_bit ext2_find_next_bit + +/* + * Maximal mount counts between two filesystem checks + */ +#define EXT4_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */ +#define EXT4_DFL_CHECKINTERVAL 0 /* Don't use interval check */ + +/* + * Behaviour when detecting errors + */ +#define EXT4_ERRORS_CONTINUE 1 /* Continue execution */ +#define EXT4_ERRORS_RO 2 /* Remount fs read-only */ +#define EXT4_ERRORS_PANIC 3 /* Panic */ +#define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE + +/* + * Structure of the super block + */ +struct ext4_super_block { +/*00*/ __le32 s_inodes_count; /* Inodes count */ + __le32 s_blocks_count_lo; /* Blocks count */ + __le32 s_r_blocks_count_lo; /* Reserved blocks count */ + __le32 s_free_blocks_count_lo; /* Free blocks count */ +/*10*/ __le32 s_free_inodes_count; /* Free inodes count */ + __le32 s_first_data_block; /* First Data Block */ + __le32 s_log_block_size; /* Block size */ + __le32 s_obso_log_frag_size; /* Obsoleted fragment size */ +/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */ + __le32 s_obso_frags_per_group; /* Obsoleted fragments per group */ + __le32 s_inodes_per_group; /* # Inodes per group */ + __le32 s_mtime; /* Mount time */ +/*30*/ __le32 s_wtime; /* Write time */ + __le16 s_mnt_count; /* Mount count */ + __le16 s_max_mnt_count; /* Maximal mount count */ + __le16 s_magic; /* Magic signature */ + __le16 s_state; /* File system state */ + __le16 s_errors; /* Behaviour when detecting errors */ + __le16 s_minor_rev_level; /* minor revision level */ +/*40*/ __le32 s_lastcheck; /* time of last check */ + __le32 s_checkinterval; /* max. time between checks */ + __le32 s_creator_os; /* OS */ + __le32 s_rev_level; /* Revision level */ +/*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */ + __le16 s_def_resgid; /* Default gid for reserved blocks */ + /* + * These fields are for EXT4_DYNAMIC_REV superblocks only. + * + * Note: the difference between the compatible feature set and + * the incompatible feature set is that if there is a bit set + * in the incompatible feature set that the kernel doesn't + * know about, it should refuse to mount the filesystem. + * + * e2fsck's requirements are more strict; if it doesn't know + * about a feature in either the compatible or incompatible + * feature set, it must abort and not try to meddle with + * things it doesn't understand... + */ + __le32 s_first_ino; /* First non-reserved inode */ + __le16 s_inode_size; /* size of inode structure */ + __le16 s_block_group_nr; /* block group # of this superblock */ + __le32 s_feature_compat; /* compatible feature set */ +/*60*/ __le32 s_feature_incompat; /* incompatible feature set */ + __le32 s_feature_ro_compat; /* readonly-compatible feature set */ +/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */ +/*78*/ char s_volume_name[16]; /* volume name */ +/*88*/ char s_last_mounted[64]; /* directory where last mounted */ +/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */ + /* + * Performance hints. Directory preallocation should only + * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on. + */ + __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/ + __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ + __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */ + /* + * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set. + */ +/*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */ +/*E0*/ __le32 s_journal_inum; /* inode number of journal file */ + __le32 s_journal_dev; /* device number of journal file */ + __le32 s_last_orphan; /* start of list of inodes to delete */ + __le32 s_hash_seed[4]; /* HTREE hash seed */ + __u8 s_def_hash_version; /* Default hash version to use */ + __u8 s_reserved_char_pad; + __le16 s_desc_size; /* size of group descriptor */ +/*100*/ __le32 s_default_mount_opts; + __le32 s_first_meta_bg; /* First metablock block group */ + __le32 s_mkfs_time; /* When the filesystem was created */ + __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ + /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */ +/*150*/ __le32 s_blocks_count_hi; /* Blocks count */ + __le32 s_r_blocks_count_hi; /* Reserved blocks count */ + __le32 s_free_blocks_count_hi; /* Free blocks count */ + __le16 s_min_extra_isize; /* All inodes have at least # bytes */ + __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ + __le32 s_flags; /* Miscellaneous flags */ + __le16 s_raid_stride; /* RAID stride */ + __le16 s_mmp_interval; /* # seconds to wait in MMP checking */ + __le64 s_mmp_block; /* Block for multi-mount protection */ + __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ + __u32 s_reserved[163]; /* Padding to the end of the block */ +}; + +#ifdef __KERNEL__ +static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} +static inline struct ext4_inode_info *EXT4_I(struct inode *inode) +{ + return container_of(inode, struct ext4_inode_info, vfs_inode); +} + +static inline struct timespec ext4_current_time(struct inode *inode) +{ + return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? + current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; +} + + +static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) +{ + return ino == EXT4_ROOT_INO || + ino == EXT4_JOURNAL_INO || + ino == EXT4_RESIZE_INO || + (ino >= EXT4_FIRST_INO(sb) && + ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); +} +#else +/* Assume that user mode programs are passing in an ext4fs superblock, not + * a kernel struct super_block. This will allow us to call the feature-test + * macros from user land. */ +#define EXT4_SB(sb) (sb) +#endif + +#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime + +/* + * Codes for operating systems + */ +#define EXT4_OS_LINUX 0 +#define EXT4_OS_HURD 1 +#define EXT4_OS_MASIX 2 +#define EXT4_OS_FREEBSD 3 +#define EXT4_OS_LITES 4 + +/* + * Revision levels + */ +#define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */ +#define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */ + +#define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV +#define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV + +#define EXT4_GOOD_OLD_INODE_SIZE 128 + +/* + * Feature set definitions + */ + +#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ + ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) ) +#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ + ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) ) +#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ + ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) ) +#define EXT4_SET_COMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) +#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask) +#define EXT4_SET_INCOMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask) +#define EXT4_CLEAR_COMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask) +#define EXT4_CLEAR_RO_COMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask) +#define EXT4_CLEAR_INCOMPAT_FEATURE(sb,mask) \ + EXT4_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask) + +#define EXT4_FEATURE_COMPAT_DIR_PREALLOC 0x0001 +#define EXT4_FEATURE_COMPAT_IMAGIC_INODES 0x0002 +#define EXT4_FEATURE_COMPAT_HAS_JOURNAL 0x0004 +#define EXT4_FEATURE_COMPAT_EXT_ATTR 0x0008 +#define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010 +#define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020 + +#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 +#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 +#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 +#define EXT4_FEATURE_RO_COMPAT_HUGE_FILE 0x0008 +#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 +#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 +#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 + +#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 +#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 +#define EXT4_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ +#define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ +#define EXT4_FEATURE_INCOMPAT_META_BG 0x0010 +#define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ +#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 +#define EXT4_FEATURE_INCOMPAT_MMP 0x0100 +#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 + +#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR +#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ + EXT4_FEATURE_INCOMPAT_RECOVER| \ + EXT4_FEATURE_INCOMPAT_META_BG| \ + EXT4_FEATURE_INCOMPAT_EXTENTS| \ + EXT4_FEATURE_INCOMPAT_64BIT| \ + EXT4_FEATURE_INCOMPAT_FLEX_BG) +#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ + EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ + EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ + EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ + EXT4_FEATURE_RO_COMPAT_HUGE_FILE) + +/* + * Default values for user and/or group using reserved blocks + */ +#define EXT4_DEF_RESUID 0 +#define EXT4_DEF_RESGID 0 + +/* + * Default mount options + */ +#define EXT4_DEFM_DEBUG 0x0001 +#define EXT4_DEFM_BSDGROUPS 0x0002 +#define EXT4_DEFM_XATTR_USER 0x0004 +#define EXT4_DEFM_ACL 0x0008 +#define EXT4_DEFM_UID16 0x0010 +#define EXT4_DEFM_JMODE 0x0060 +#define EXT4_DEFM_JMODE_DATA 0x0020 +#define EXT4_DEFM_JMODE_ORDERED 0x0040 +#define EXT4_DEFM_JMODE_WBACK 0x0060 + +/* + * Structure of a directory entry + */ +#define EXT4_NAME_LEN 255 + +struct ext4_dir_entry { + __le32 inode; /* Inode number */ + __le16 rec_len; /* Directory entry length */ + __le16 name_len; /* Name length */ + char name[EXT4_NAME_LEN]; /* File name */ +}; + +/* + * The new version of the directory entry. Since EXT4 structures are + * stored in intel byte order, and the name_len field could never be + * bigger than 255 chars, it's safe to reclaim the extra byte for the + * file_type field. + */ +struct ext4_dir_entry_2 { + __le32 inode; /* Inode number */ + __le16 rec_len; /* Directory entry length */ + __u8 name_len; /* Name length */ + __u8 file_type; + char name[EXT4_NAME_LEN]; /* File name */ +}; + +/* + * Ext4 directory file types. Only the low 3 bits are used. The + * other bits are reserved for now. + */ +#define EXT4_FT_UNKNOWN 0 +#define EXT4_FT_REG_FILE 1 +#define EXT4_FT_DIR 2 +#define EXT4_FT_CHRDEV 3 +#define EXT4_FT_BLKDEV 4 +#define EXT4_FT_FIFO 5 +#define EXT4_FT_SOCK 6 +#define EXT4_FT_SYMLINK 7 + +#define EXT4_FT_MAX 8 + +/* + * EXT4_DIR_PAD defines the directory entries boundaries + * + * NOTE: It must be a multiple of 4 + */ +#define EXT4_DIR_PAD 4 +#define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1) +#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \ + ~EXT4_DIR_ROUND) +#define EXT4_MAX_REC_LEN ((1<<16)-1) + +static inline unsigned ext4_rec_len_from_disk(__le16 dlen) +{ + unsigned len = le16_to_cpu(dlen); + + if (len == EXT4_MAX_REC_LEN) + return 1 << 16; + return len; +} + +static inline __le16 ext4_rec_len_to_disk(unsigned len) +{ + if (len == (1 << 16)) + return cpu_to_le16(EXT4_MAX_REC_LEN); + else if (len > (1 << 16)) + BUG(); + return cpu_to_le16(len); +} + +/* + * Hash Tree Directory indexing + * (c) Daniel Phillips, 2001 + */ + +#define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \ + EXT4_FEATURE_COMPAT_DIR_INDEX) && \ + (EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) +#define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX) +#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) + +/* Legal values for the dx_root hash_version field: */ + +#define DX_HASH_LEGACY 0 +#define DX_HASH_HALF_MD4 1 +#define DX_HASH_TEA 2 + +#ifdef __KERNEL__ + +/* hash info structure used by the directory hash */ +struct dx_hash_info +{ + u32 hash; + u32 minor_hash; + int hash_version; + u32 *seed; +}; + +#define EXT4_HTREE_EOF 0x7fffffff + +/* + * Control parameters used by ext4_htree_next_block + */ +#define HASH_NB_ALWAYS 1 + + +/* + * Describe an inode's exact location on disk and in memory + */ +struct ext4_iloc +{ + struct buffer_head *bh; + unsigned long offset; + ext4_group_t block_group; +}; + +static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc) +{ + return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset); +} + +/* + * This structure is stuffed into the struct file's private_data field + * for directories. It is where we put information so that we can do + * readdir operations in hash tree order. + */ +struct dir_private_info { + struct rb_root root; + struct rb_node *curr_node; + struct fname *extra_fname; + loff_t last_pos; + __u32 curr_hash; + __u32 curr_minor_hash; + __u32 next_hash; +}; + +/* calculate the first block number of the group */ +static inline ext4_fsblk_t +ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) +{ + return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) + + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); +} + +/* + * Special error return code only used by dx_probe() and its callers. + */ +#define ERR_BAD_DX_DIR -75000 + +void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, + unsigned long *blockgrpp, ext4_grpblk_t *offsetp); + +/* + * Function prototypes + */ + +/* + * Ok, these declarations are also in but none of the + * ext4 source programs needs to include it so they are duplicated here. + */ +# define NORET_TYPE /**/ +# define ATTRIB_NORET __attribute__((noreturn)) +# define NORET_AND noreturn, + +/* balloc.c */ +extern unsigned int ext4_block_group(struct super_block *sb, + ext4_fsblk_t blocknr); +extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, + ext4_fsblk_t blocknr); +extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group); +extern unsigned long ext4_bg_num_gdb(struct super_block *sb, + ext4_group_t group); +extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, int *errp); +extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, unsigned long *count, int *errp); +extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, unsigned long *count, int *errp); +extern void ext4_free_blocks (handle_t *handle, struct inode *inode, + ext4_fsblk_t block, unsigned long count, int metadata); +extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, + ext4_fsblk_t block, unsigned long count, + unsigned long *pdquot_freed_blocks); +extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *); +extern void ext4_check_blocks_bitmap (struct super_block *); +extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, + ext4_group_t block_group, + struct buffer_head ** bh); +extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); +extern void ext4_init_block_alloc_info(struct inode *); +extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv); + +/* dir.c */ +extern int ext4_check_dir_entry(const char *, struct inode *, + struct ext4_dir_entry_2 *, + struct buffer_head *, unsigned long); +extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, + __u32 minor_hash, + struct ext4_dir_entry_2 *dirent); +extern void ext4_htree_free_dir_info(struct dir_private_info *p); + +/* fsync.c */ +extern int ext4_sync_file (struct file *, struct dentry *, int); + +/* hash.c */ +extern int ext4fs_dirhash(const char *name, int len, struct + dx_hash_info *hinfo); + +/* ialloc.c */ +extern struct inode * ext4_new_inode (handle_t *, struct inode *, int); +extern void ext4_free_inode (handle_t *, struct inode *); +extern struct inode * ext4_orphan_get (struct super_block *, unsigned long); +extern unsigned long ext4_count_free_inodes (struct super_block *); +extern unsigned long ext4_count_dirs (struct super_block *); +extern void ext4_check_inodes_bitmap (struct super_block *); +extern unsigned long ext4_count_free (struct buffer_head *, unsigned); + +/* mballoc.c */ +extern long ext4_mb_stats; +extern long ext4_mb_max_to_scan; +extern int ext4_mb_init(struct super_block *, int); +extern int ext4_mb_release(struct super_block *); +extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, + struct ext4_allocation_request *, int *); +extern int ext4_mb_reserve_blocks(struct super_block *, int); +extern void ext4_mb_discard_inode_preallocations(struct inode *); +extern int __init init_ext4_mballoc(void); +extern void exit_ext4_mballoc(void); +extern void ext4_mb_free_blocks(handle_t *, struct inode *, + unsigned long, unsigned long, int, unsigned long *); + + +/* inode.c */ +int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, + struct buffer_head *bh, ext4_fsblk_t blocknr); +struct buffer_head *ext4_getblk(handle_t *, struct inode *, + ext4_lblk_t, int, int *); +struct buffer_head *ext4_bread(handle_t *, struct inode *, + ext4_lblk_t, int, int *); +int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, + ext4_lblk_t iblock, unsigned long maxblocks, + struct buffer_head *bh_result, + int create, int extend_disksize); + +extern struct inode *ext4_iget(struct super_block *, unsigned long); +extern int ext4_write_inode (struct inode *, int); +extern int ext4_setattr (struct dentry *, struct iattr *); +extern void ext4_delete_inode (struct inode *); +extern int ext4_sync_inode (handle_t *, struct inode *); +extern void ext4_discard_reservation (struct inode *); +extern void ext4_dirty_inode(struct inode *); +extern int ext4_change_inode_journal_flag(struct inode *, int); +extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); +extern void ext4_truncate (struct inode *); +extern void ext4_set_inode_flags(struct inode *); +extern void ext4_get_inode_flags(struct ext4_inode_info *); +extern void ext4_set_aops(struct inode *inode); +extern int ext4_writepage_trans_blocks(struct inode *); +extern int ext4_block_truncate_page(handle_t *handle, struct page *page, + struct address_space *mapping, loff_t from); + +/* ioctl.c */ +extern long ext4_ioctl(struct file *, unsigned int, unsigned long); +extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); + +/* migrate.c */ +extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int, + unsigned long); +/* namei.c */ +extern int ext4_orphan_add(handle_t *, struct inode *); +extern int ext4_orphan_del(handle_t *, struct inode *); +extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, + __u32 start_minor_hash, __u32 *next_hash); + +/* resize.c */ +extern int ext4_group_add(struct super_block *sb, + struct ext4_new_group_data *input); +extern int ext4_group_extend(struct super_block *sb, + struct ext4_super_block *es, + ext4_fsblk_t n_blocks_count); + +/* super.c */ +extern void ext4_error (struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +extern void __ext4_std_error (struct super_block *, const char *, int); +extern void ext4_abort (struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +extern void ext4_warning (struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +extern void ext4_update_dynamic_rev (struct super_block *sb); +extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, + __u32 compat); +extern int ext4_update_rocompat_feature(handle_t *handle, + struct super_block *sb, __u32 rocompat); +extern int ext4_update_incompat_feature(handle_t *handle, + struct super_block *sb, __u32 incompat); +extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, + struct ext4_group_desc *bg); +extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, + struct ext4_group_desc *bg); +extern ext4_fsblk_t ext4_inode_table(struct super_block *sb, + struct ext4_group_desc *bg); +extern void ext4_block_bitmap_set(struct super_block *sb, + struct ext4_group_desc *bg, ext4_fsblk_t blk); +extern void ext4_inode_bitmap_set(struct super_block *sb, + struct ext4_group_desc *bg, ext4_fsblk_t blk); +extern void ext4_inode_table_set(struct super_block *sb, + struct ext4_group_desc *bg, ext4_fsblk_t blk); + +static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) +{ + return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) | + le32_to_cpu(es->s_blocks_count_lo); +} + +static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es) +{ + return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) | + le32_to_cpu(es->s_r_blocks_count_lo); +} + +static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es) +{ + return ((ext4_fsblk_t)le32_to_cpu(es->s_free_blocks_count_hi) << 32) | + le32_to_cpu(es->s_free_blocks_count_lo); +} + +static inline void ext4_blocks_count_set(struct ext4_super_block *es, + ext4_fsblk_t blk) +{ + es->s_blocks_count_lo = cpu_to_le32((u32)blk); + es->s_blocks_count_hi = cpu_to_le32(blk >> 32); +} + +static inline void ext4_free_blocks_count_set(struct ext4_super_block *es, + ext4_fsblk_t blk) +{ + es->s_free_blocks_count_lo = cpu_to_le32((u32)blk); + es->s_free_blocks_count_hi = cpu_to_le32(blk >> 32); +} + +static inline void ext4_r_blocks_count_set(struct ext4_super_block *es, + ext4_fsblk_t blk) +{ + es->s_r_blocks_count_lo = cpu_to_le32((u32)blk); + es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32); +} + +static inline loff_t ext4_isize(struct ext4_inode *raw_inode) +{ + return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) | + le32_to_cpu(raw_inode->i_size_lo); +} + +static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) +{ + raw_inode->i_size_lo = cpu_to_le32(i_size); + raw_inode->i_size_high = cpu_to_le32(i_size >> 32); +} + +static inline +struct ext4_group_info *ext4_get_group_info(struct super_block *sb, + ext4_group_t group) +{ + struct ext4_group_info ***grp_info; + long indexv, indexh; + grp_info = EXT4_SB(sb)->s_group_info; + indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); + indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); + return grp_info[indexv][indexh]; +} + + +#define ext4_std_error(sb, errno) \ +do { \ + if ((errno)) \ + __ext4_std_error((sb), __FUNCTION__, (errno)); \ +} while (0) + +/* + * Inodes and files operations + */ + +/* dir.c */ +extern const struct file_operations ext4_dir_operations; + +/* file.c */ +extern const struct inode_operations ext4_file_inode_operations; +extern const struct file_operations ext4_file_operations; + +/* namei.c */ +extern const struct inode_operations ext4_dir_inode_operations; +extern const struct inode_operations ext4_special_inode_operations; + +/* symlink.c */ +extern const struct inode_operations ext4_symlink_inode_operations; +extern const struct inode_operations ext4_fast_symlink_inode_operations; + +/* extents.c */ +extern int ext4_ext_tree_init(handle_t *handle, struct inode *); +extern int ext4_ext_writepage_trans_blocks(struct inode *, int); +extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, + ext4_lblk_t iblock, + unsigned long max_blocks, struct buffer_head *bh_result, + int create, int extend_disksize); +extern void ext4_ext_truncate(struct inode *, struct page *); +extern void ext4_ext_init(struct super_block *); +extern void ext4_ext_release(struct super_block *); +extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, + loff_t len); +extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, + sector_t block, unsigned long max_blocks, + struct buffer_head *bh, int create, + int extend_disksize); +#endif /* __KERNEL__ */ + +#endif /* _EXT4_H */ diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h new file mode 100644 index 000000000000..75333b595fab --- /dev/null +++ b/fs/ext4/ext4_extents.h @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ + +#ifndef _EXT4_EXTENTS +#define _EXT4_EXTENTS + +#include "ext4.h" + +/* + * With AGGRESSIVE_TEST defined, the capacity of index/leaf blocks + * becomes very small, so index split, in-depth growing and + * other hard changes happen much more often. + * This is for debug purposes only. + */ +#define AGGRESSIVE_TEST_ + +/* + * With EXTENTS_STATS defined, the number of blocks and extents + * are collected in the truncate path. They'll be shown at + * umount time. + */ +#define EXTENTS_STATS__ + +/* + * If CHECK_BINSEARCH is defined, then the results of the binary search + * will also be checked by linear search. + */ +#define CHECK_BINSEARCH__ + +/* + * If EXT_DEBUG is defined you can use the 'extdebug' mount option + * to get lots of info about what's going on. + */ +#define EXT_DEBUG__ +#ifdef EXT_DEBUG +#define ext_debug(a...) printk(a) +#else +#define ext_debug(a...) +#endif + +/* + * If EXT_STATS is defined then stats numbers are collected. + * These number will be displayed at umount time. + */ +#define EXT_STATS_ + + +/* + * ext4_inode has i_block array (60 bytes total). + * The first 12 bytes store ext4_extent_header; + * the remainder stores an array of ext4_extent. + */ + +/* + * This is the extent on-disk structure. + * It's used at the bottom of the tree. + */ +struct ext4_extent { + __le32 ee_block; /* first logical block extent covers */ + __le16 ee_len; /* number of blocks covered by extent */ + __le16 ee_start_hi; /* high 16 bits of physical block */ + __le32 ee_start_lo; /* low 32 bits of physical block */ +}; + +/* + * This is index on-disk structure. + * It's used at all the levels except the bottom. + */ +struct ext4_extent_idx { + __le32 ei_block; /* index covers logical blocks from 'block' */ + __le32 ei_leaf_lo; /* pointer to the physical block of the next * + * level. leaf or next index could be there */ + __le16 ei_leaf_hi; /* high 16 bits of physical block */ + __u16 ei_unused; +}; + +/* + * Each block (leaves and indexes), even inode-stored has header. + */ +struct ext4_extent_header { + __le16 eh_magic; /* probably will support different formats */ + __le16 eh_entries; /* number of valid entries */ + __le16 eh_max; /* capacity of store in entries */ + __le16 eh_depth; /* has tree real underlying blocks? */ + __le32 eh_generation; /* generation of the tree */ +}; + +#define EXT4_EXT_MAGIC cpu_to_le16(0xf30a) + +/* + * Array of ext4_ext_path contains path to some extent. + * Creation/lookup routines use it for traversal/splitting/etc. + * Truncate uses it to simulate recursive walking. + */ +struct ext4_ext_path { + ext4_fsblk_t p_block; + __u16 p_depth; + struct ext4_extent *p_ext; + struct ext4_extent_idx *p_idx; + struct ext4_extent_header *p_hdr; + struct buffer_head *p_bh; +}; + +/* + * structure for external API + */ + +#define EXT4_EXT_CACHE_NO 0 +#define EXT4_EXT_CACHE_GAP 1 +#define EXT4_EXT_CACHE_EXTENT 2 + + +#define EXT_MAX_BLOCK 0xffffffff + +/* + * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an + * initialized extent. This is 2^15 and not (2^16 - 1), since we use the + * MSB of ee_len field in the extent datastructure to signify if this + * particular extent is an initialized extent or an uninitialized (i.e. + * preallocated). + * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an + * uninitialized extent. + * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an + * uninitialized one. In other words, if MSB of ee_len is set, it is an + * uninitialized extent with only one special scenario when ee_len = 0x8000. + * In this case we can not have an uninitialized extent of zero length and + * thus we make it as a special case of initialized extent with 0x8000 length. + * This way we get better extent-to-group alignment for initialized extents. + * Hence, the maximum number of blocks we can have in an *initialized* + * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767). + */ +#define EXT_INIT_MAX_LEN (1UL << 15) +#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1) + + +#define EXT_FIRST_EXTENT(__hdr__) \ + ((struct ext4_extent *) (((char *) (__hdr__)) + \ + sizeof(struct ext4_extent_header))) +#define EXT_FIRST_INDEX(__hdr__) \ + ((struct ext4_extent_idx *) (((char *) (__hdr__)) + \ + sizeof(struct ext4_extent_header))) +#define EXT_HAS_FREE_INDEX(__path__) \ + (le16_to_cpu((__path__)->p_hdr->eh_entries) \ + < le16_to_cpu((__path__)->p_hdr->eh_max)) +#define EXT_LAST_EXTENT(__hdr__) \ + (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1) +#define EXT_LAST_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1) +#define EXT_MAX_EXTENT(__hdr__) \ + (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1) + +static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode) +{ + return (struct ext4_extent_header *) EXT4_I(inode)->i_data; +} + +static inline struct ext4_extent_header *ext_block_hdr(struct buffer_head *bh) +{ + return (struct ext4_extent_header *) bh->b_data; +} + +static inline unsigned short ext_depth(struct inode *inode) +{ + return le16_to_cpu(ext_inode_hdr(inode)->eh_depth); +} + +static inline void ext4_ext_tree_changed(struct inode *inode) +{ + EXT4_I(inode)->i_ext_generation++; +} + +static inline void +ext4_ext_invalidate_cache(struct inode *inode) +{ + EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO; +} + +static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext) +{ + /* We can not have an uninitialized extent of zero length! */ + BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0); + ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN); +} + +static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext) +{ + /* Extent with ee_len of 0x8000 is treated as an initialized extent */ + return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN); +} + +static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) +{ + return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ? + le16_to_cpu(ext->ee_len) : + (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN)); +} + +extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); +extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); +extern int ext4_extent_tree_init(handle_t *, struct inode *); +extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); +extern int ext4_ext_try_to_merge(struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *); +extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); +extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); +extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, + struct ext4_ext_path *); +extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, + ext4_lblk_t *, ext4_fsblk_t *); +extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *, + ext4_lblk_t *, ext4_fsblk_t *); +extern void ext4_ext_drop_refs(struct ext4_ext_path *); +#endif /* _EXT4_EXTENTS */ + diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h new file mode 100644 index 000000000000..26a4ae255d79 --- /dev/null +++ b/fs/ext4/ext4_i.h @@ -0,0 +1,167 @@ +/* + * ext4_i.h + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs_i.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#ifndef _EXT4_I +#define _EXT4_I + +#include +#include +#include +#include + +/* data type for block offset of block group */ +typedef int ext4_grpblk_t; + +/* data type for filesystem-wide blocks number */ +typedef unsigned long long ext4_fsblk_t; + +/* data type for file logical block number */ +typedef __u32 ext4_lblk_t; + +/* data type for block group number */ +typedef unsigned long ext4_group_t; + +struct ext4_reserve_window { + ext4_fsblk_t _rsv_start; /* First byte reserved */ + ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */ +}; + +struct ext4_reserve_window_node { + struct rb_node rsv_node; + __u32 rsv_goal_size; + __u32 rsv_alloc_hit; + struct ext4_reserve_window rsv_window; +}; + +struct ext4_block_alloc_info { + /* information about reservation window */ + struct ext4_reserve_window_node rsv_window_node; + /* + * was i_next_alloc_block in ext4_inode_info + * is the logical (file-relative) number of the + * most-recently-allocated block in this file. + * We use this for detecting linearly ascending allocation requests. + */ + ext4_lblk_t last_alloc_logical_block; + /* + * Was i_next_alloc_goal in ext4_inode_info + * is the *physical* companion to i_next_alloc_block. + * it the physical block number of the block which was most-recentl + * allocated to this file. This give us the goal (target) for the next + * allocation when we detect linearly ascending requests. + */ + ext4_fsblk_t last_alloc_physical_block; +}; + +#define rsv_start rsv_window._rsv_start +#define rsv_end rsv_window._rsv_end + +/* + * storage for cached extent + */ +struct ext4_ext_cache { + ext4_fsblk_t ec_start; + ext4_lblk_t ec_block; + __u32 ec_len; /* must be 32bit to return holes */ + __u32 ec_type; +}; + +/* + * third extended file system inode data in memory + */ +struct ext4_inode_info { + __le32 i_data[15]; /* unconverted */ + __u32 i_flags; + ext4_fsblk_t i_file_acl; + __u32 i_dtime; + + /* + * i_block_group is the number of the block group which contains + * this file's inode. Constant across the lifetime of the inode, + * it is ued for making block allocation decisions - we try to + * place a file's data blocks near its inode block, and new inodes + * near to their parent directory's inode. + */ + ext4_group_t i_block_group; + __u32 i_state; /* Dynamic state flags for ext4 */ + + /* block reservation info */ + struct ext4_block_alloc_info *i_block_alloc_info; + + ext4_lblk_t i_dir_start_lookup; +#ifdef CONFIG_EXT4DEV_FS_XATTR + /* + * Extended attributes can be read independently of the main file + * data. Taking i_mutex even when reading would cause contention + * between readers of EAs and writers of regular file data, so + * instead we synchronize on xattr_sem when reading or changing + * EAs. + */ + struct rw_semaphore xattr_sem; +#endif +#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL + struct posix_acl *i_acl; + struct posix_acl *i_default_acl; +#endif + + struct list_head i_orphan; /* unlinked but open inodes */ + + /* + * i_disksize keeps track of what the inode size is ON DISK, not + * in memory. During truncate, i_size is set to the new size by + * the VFS prior to calling ext4_truncate(), but the filesystem won't + * set i_disksize to 0 until the truncate is actually under way. + * + * The intent is that i_disksize always represents the blocks which + * are used by this file. This allows recovery to restart truncate + * on orphans if we crash during truncate. We actually write i_disksize + * into the on-disk inode when writing inodes out, instead of i_size. + * + * The only time when i_disksize and i_size may be different is when + * a truncate is in progress. The only things which change i_disksize + * are ext4_get_block (growth) and ext4_truncate (shrinkth). + */ + loff_t i_disksize; + + /* on-disk additional length */ + __u16 i_extra_isize; + + /* + * i_data_sem is for serialising ext4_truncate() against + * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's + * data tree are chopped off during truncate. We can't do that in + * ext4 because whenever we perform intermediate commits during + * truncate, the inode and all the metadata blocks *must* be in a + * consistent state which allows truncation of the orphans to restart + * during recovery. Hence we must fix the get_block-vs-truncate race + * by other means, so we have i_data_sem. + */ + struct rw_semaphore i_data_sem; + struct inode vfs_inode; + + unsigned long i_ext_generation; + struct ext4_ext_cache i_cached_extent; + /* + * File creation time. Its function is same as that of + * struct timespec i_{a,c,m}time in the generic inode. + */ + struct timespec i_crtime; + + /* mballoc */ + struct list_head i_prealloc_list; + spinlock_t i_prealloc_lock; +}; + +#endif /* _EXT4_I */ diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 2e6007d418dc..c75384b34f2c 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -2,7 +2,7 @@ * Interface between ext4 and JBD */ -#include +#include "ext4_jbd2.h" int __ext4_journal_get_undo_access(const char *where, handle_t *handle, struct buffer_head *bh) diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h new file mode 100644 index 000000000000..9255a7d28b24 --- /dev/null +++ b/fs/ext4/ext4_jbd2.h @@ -0,0 +1,231 @@ +/* + * ext4_jbd2.h + * + * Written by Stephen C. Tweedie , 1999 + * + * Copyright 1998--1999 Red Hat corp --- All Rights Reserved + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + * + * Ext4-specific journaling extensions. + */ + +#ifndef _EXT4_JBD2_H +#define _EXT4_JBD2_H + +#include +#include +#include "ext4.h" + +#define EXT4_JOURNAL(inode) (EXT4_SB((inode)->i_sb)->s_journal) + +/* Define the number of blocks we need to account to a transaction to + * modify one block of data. + * + * We may have to touch one inode, one bitmap buffer, up to three + * indirection blocks, the group and superblock summaries, and the data + * block to complete the transaction. + * + * For extents-enabled fs we may have to allocate and modify up to + * 5 levels of tree + root which are stored in the inode. */ + +#define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \ + (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \ + || test_opt(sb, EXTENTS) ? 27U : 8U) + +/* Extended attribute operations touch at most two data buffers, + * two bitmap buffers, and two group summaries, in addition to the inode + * and the superblock, which are already accounted for. */ + +#define EXT4_XATTR_TRANS_BLOCKS 6U + +/* Define the minimum size for a transaction which modifies data. This + * needs to take into account the fact that we may end up modifying two + * quota files too (one for the group, one for the user quota). The + * superblock only gets updated once, of course, so don't bother + * counting that again for the quota updates. */ + +#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \ + EXT4_XATTR_TRANS_BLOCKS - 2 + \ + 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) + +/* Delete operations potentially hit one directory's namespace plus an + * entire inode, plus arbitrary amounts of bitmap/indirection data. Be + * generous. We can grow the delete transaction later if necessary. */ + +#define EXT4_DELETE_TRANS_BLOCKS(sb) (2 * EXT4_DATA_TRANS_BLOCKS(sb) + 64) + +/* Define an arbitrary limit for the amount of data we will anticipate + * writing to any given transaction. For unbounded transactions such as + * write(2) and truncate(2) we can write more than this, but we always + * start off at the maximum transaction size and grow the transaction + * optimistically as we go. */ + +#define EXT4_MAX_TRANS_DATA 64U + +/* We break up a large truncate or write transaction once the handle's + * buffer credits gets this low, we need either to extend the + * transaction or to start a new one. Reserve enough space here for + * inode, bitmap, superblock, group and indirection updates for at least + * one block, plus two quota updates. Quota allocations are not + * needed. */ + +#define EXT4_RESERVE_TRANS_BLOCKS 12U + +#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 8 + +#ifdef CONFIG_QUOTA +/* Amount of blocks needed for quota update - we know that the structure was + * allocated so we need to update only inode+data */ +#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) +/* Amount of blocks needed for quota insert/delete - we do some block writes + * but inode, sb and group updates are done only once */ +#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ + (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0) +#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ + (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0) +#else +#define EXT4_QUOTA_TRANS_BLOCKS(sb) 0 +#define EXT4_QUOTA_INIT_BLOCKS(sb) 0 +#define EXT4_QUOTA_DEL_BLOCKS(sb) 0 +#endif + +int +ext4_mark_iloc_dirty(handle_t *handle, + struct inode *inode, + struct ext4_iloc *iloc); + +/* + * On success, We end up with an outstanding reference count against + * iloc->bh. This _must_ be cleaned up later. + */ + +int ext4_reserve_inode_write(handle_t *handle, struct inode *inode, + struct ext4_iloc *iloc); + +int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); + +/* + * Wrapper functions with which ext4 calls into JBD. The intent here is + * to allow these to be turned into appropriate stubs so ext4 can control + * ext2 filesystems, so ext2+ext4 systems only nee one fs. This work hasn't + * been done yet. + */ + +static inline void ext4_journal_release_buffer(handle_t *handle, + struct buffer_head *bh) +{ + jbd2_journal_release_buffer(handle, bh); +} + +void ext4_journal_abort_handle(const char *caller, const char *err_fn, + struct buffer_head *bh, handle_t *handle, int err); + +int __ext4_journal_get_undo_access(const char *where, handle_t *handle, + struct buffer_head *bh); + +int __ext4_journal_get_write_access(const char *where, handle_t *handle, + struct buffer_head *bh); + +int __ext4_journal_forget(const char *where, handle_t *handle, + struct buffer_head *bh); + +int __ext4_journal_revoke(const char *where, handle_t *handle, + ext4_fsblk_t blocknr, struct buffer_head *bh); + +int __ext4_journal_get_create_access(const char *where, + handle_t *handle, struct buffer_head *bh); + +int __ext4_journal_dirty_metadata(const char *where, + handle_t *handle, struct buffer_head *bh); + +#define ext4_journal_get_undo_access(handle, bh) \ + __ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh)) +#define ext4_journal_get_write_access(handle, bh) \ + __ext4_journal_get_write_access(__FUNCTION__, (handle), (bh)) +#define ext4_journal_revoke(handle, blocknr, bh) \ + __ext4_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh)) +#define ext4_journal_get_create_access(handle, bh) \ + __ext4_journal_get_create_access(__FUNCTION__, (handle), (bh)) +#define ext4_journal_dirty_metadata(handle, bh) \ + __ext4_journal_dirty_metadata(__FUNCTION__, (handle), (bh)) +#define ext4_journal_forget(handle, bh) \ + __ext4_journal_forget(__FUNCTION__, (handle), (bh)) + +int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh); + +handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); +int __ext4_journal_stop(const char *where, handle_t *handle); + +static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) +{ + return ext4_journal_start_sb(inode->i_sb, nblocks); +} + +#define ext4_journal_stop(handle) \ + __ext4_journal_stop(__FUNCTION__, (handle)) + +static inline handle_t *ext4_journal_current_handle(void) +{ + return journal_current_handle(); +} + +static inline int ext4_journal_extend(handle_t *handle, int nblocks) +{ + return jbd2_journal_extend(handle, nblocks); +} + +static inline int ext4_journal_restart(handle_t *handle, int nblocks) +{ + return jbd2_journal_restart(handle, nblocks); +} + +static inline int ext4_journal_blocks_per_page(struct inode *inode) +{ + return jbd2_journal_blocks_per_page(inode); +} + +static inline int ext4_journal_force_commit(journal_t *journal) +{ + return jbd2_journal_force_commit(journal); +} + +/* super.c */ +int ext4_force_commit(struct super_block *sb); + +static inline int ext4_should_journal_data(struct inode *inode) +{ + if (!S_ISREG(inode->i_mode)) + return 1; + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) + return 1; + if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) + return 1; + return 0; +} + +static inline int ext4_should_order_data(struct inode *inode) +{ + if (!S_ISREG(inode->i_mode)) + return 0; + if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) + return 0; + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) + return 1; + return 0; +} + +static inline int ext4_should_writeback_data(struct inode *inode) +{ + if (!S_ISREG(inode->i_mode)) + return 0; + if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) + return 0; + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) + return 1; + return 0; +} + +#endif /* _EXT4_JBD2_H */ diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h new file mode 100644 index 000000000000..5802e69f2191 --- /dev/null +++ b/fs/ext4/ext4_sb.h @@ -0,0 +1,148 @@ +/* + * ext4_sb.h + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs_sb.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#ifndef _EXT4_SB +#define _EXT4_SB + +#ifdef __KERNEL__ +#include +#include +#include +#include +#endif +#include + +/* + * third extended-fs super-block data in memory + */ +struct ext4_sb_info { + unsigned long s_desc_size; /* Size of a group descriptor in bytes */ + unsigned long s_inodes_per_block;/* Number of inodes per block */ + unsigned long s_blocks_per_group;/* Number of blocks in a group */ + unsigned long s_inodes_per_group;/* Number of inodes in a group */ + unsigned long s_itb_per_group; /* Number of inode table blocks per group */ + unsigned long s_gdb_count; /* Number of group descriptor blocks */ + unsigned long s_desc_per_block; /* Number of group descriptors per block */ + ext4_group_t s_groups_count; /* Number of groups in the fs */ + unsigned long s_overhead_last; /* Last calculated overhead */ + unsigned long s_blocks_last; /* Last seen block count */ + loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ + struct buffer_head * s_sbh; /* Buffer containing the super block */ + struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */ + struct buffer_head ** s_group_desc; + unsigned long s_mount_opt; + ext4_fsblk_t s_sb_block; + uid_t s_resuid; + gid_t s_resgid; + unsigned short s_mount_state; + unsigned short s_pad; + int s_addr_per_block_bits; + int s_desc_per_block_bits; + int s_inode_size; + int s_first_ino; + spinlock_t s_next_gen_lock; + u32 s_next_generation; + u32 s_hash_seed[4]; + int s_def_hash_version; + struct percpu_counter s_freeblocks_counter; + struct percpu_counter s_freeinodes_counter; + struct percpu_counter s_dirs_counter; + struct blockgroup_lock s_blockgroup_lock; + + /* root of the per fs reservation window tree */ + spinlock_t s_rsv_window_lock; + struct rb_root s_rsv_window_root; + struct ext4_reserve_window_node s_rsv_window_head; + + /* Journaling */ + struct inode * s_journal_inode; + struct journal_s * s_journal; + struct list_head s_orphan; + unsigned long s_commit_interval; + struct block_device *journal_bdev; +#ifdef CONFIG_JBD2_DEBUG + struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ + wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ +#endif +#ifdef CONFIG_QUOTA + char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ + int s_jquota_fmt; /* Format of quota to use */ +#endif + unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ + +#ifdef EXTENTS_STATS + /* ext4 extents stats */ + unsigned long s_ext_min; + unsigned long s_ext_max; + unsigned long s_depth_max; + spinlock_t s_ext_stats_lock; + unsigned long s_ext_blocks; + unsigned long s_ext_extents; +#endif + + /* for buddy allocator */ + struct ext4_group_info ***s_group_info; + struct inode *s_buddy_cache; + long s_blocks_reserved; + spinlock_t s_reserve_lock; + struct list_head s_active_transaction; + struct list_head s_closed_transaction; + struct list_head s_committed_transaction; + spinlock_t s_md_lock; + tid_t s_last_transaction; + unsigned short *s_mb_offsets, *s_mb_maxs; + + /* tunables */ + unsigned long s_stripe; + unsigned long s_mb_stream_request; + unsigned long s_mb_max_to_scan; + unsigned long s_mb_min_to_scan; + unsigned long s_mb_stats; + unsigned long s_mb_order2_reqs; + unsigned long s_mb_group_prealloc; + /* where last allocation was done - for stream allocation */ + unsigned long s_mb_last_group; + unsigned long s_mb_last_start; + + /* history to debug policy */ + struct ext4_mb_history *s_mb_history; + int s_mb_history_cur; + int s_mb_history_max; + int s_mb_history_num; + struct proc_dir_entry *s_mb_proc; + spinlock_t s_mb_history_lock; + int s_mb_history_filter; + + /* stats for buddy allocator */ + spinlock_t s_mb_pa_lock; + atomic_t s_bal_reqs; /* number of reqs with len > 1 */ + atomic_t s_bal_success; /* we found long enough chunks */ + atomic_t s_bal_allocated; /* in blocks */ + atomic_t s_bal_ex_scanned; /* total extents scanned */ + atomic_t s_bal_goals; /* goal hits */ + atomic_t s_bal_breaks; /* too long searches */ + atomic_t s_bal_2orders; /* 2^order hits */ + spinlock_t s_bal_lock; + unsigned long s_mb_buddies_generated; + unsigned long long s_mb_generation_time; + atomic_t s_mb_lost_chunks; + atomic_t s_mb_preallocated; + atomic_t s_mb_discarded; + + /* locality groups */ + struct ext4_locality_group *s_locality_groups; +}; + +#endif /* _EXT4_SB */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4e6afc812fda..a472bc046363 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -40,8 +39,9 @@ #include #include #include -#include #include +#include "ext4_jbd2.h" +#include "ext4_extents.h" /* diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 20507a24506a..4159be6366ab 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -21,8 +21,8 @@ #include #include #include -#include -#include +#include "ext4.h" +#include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index a04a1ac4e0cf..1c8ba48d4f8d 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -27,8 +27,8 @@ #include #include #include -#include -#include +#include "ext4.h" +#include "ext4_jbd2.h" /* * akpm: A new design for ext4_sync_file(). diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index 1555024e3b36..1d6329dbe390 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c @@ -11,8 +11,8 @@ #include #include -#include #include +#include "ext4.h" #define DELTA 0x9E3779B9 diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index a86377401ff0..d59bdf7233b5 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -15,8 +15,6 @@ #include #include #include -#include -#include #include #include #include @@ -25,7 +23,8 @@ #include #include #include - +#include "ext4.h" +#include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" #include "group.h" diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bd1a391725c0..0c94db462c2f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -36,6 +35,7 @@ #include #include #include +#include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index ce937fe432a0..7a6c2f1faba6 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -10,13 +10,13 @@ #include #include #include -#include -#include #include #include #include #include #include +#include "ext4_jbd2.h" +#include "ext4.h" long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 0b46fc0ca196..f87471de3af7 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -24,8 +24,6 @@ #include #include #include -#include -#include #include #include #include @@ -34,6 +32,8 @@ #include #include #include +#include "ext4_jbd2.h" +#include "ext4.h" #include "group.h" /* diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 9b4fb07d192c..b9e077ba07e9 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -13,8 +13,8 @@ */ #include -#include -#include +#include "ext4_jbd2.h" +#include "ext4_extents.h" /* * The contiguous blocks details which can be diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 7fc1bc1c16d1..ab16beaa830d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -28,14 +28,14 @@ #include #include #include -#include -#include #include #include #include #include #include #include +#include "ext4.h" +#include "ext4_jbd2.h" #include "namei.h" #include "xattr.h" diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 0ca63dcbdf88..9f086a6a472b 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -11,11 +11,10 @@ #define EXT4FS_DEBUG -#include - #include #include +#include "ext4_jbd2.h" #include "group.h" #define outside(b, first, last) ((b) < (first) || (b) >= (last)) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e3b3483b600d..3435184114c4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -21,8 +21,6 @@ #include #include #include -#include -#include #include #include #include @@ -38,9 +36,10 @@ #include #include #include - #include +#include "ext4.h" +#include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" #include "namei.h" diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index e6f9da4287c4..e9178643dc01 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -19,8 +19,8 @@ #include #include -#include #include +#include "ext4.h" #include "xattr.h" static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index df4810d5a387..3fbc2c6c3d0e 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -53,11 +53,11 @@ #include #include #include -#include -#include #include #include #include +#include "ext4_jbd2.h" +#include "ext4.h" #include "xattr.h" #include "acl.h" diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index f17eaf2321b9..ca5f89fc6cae 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c @@ -6,9 +6,9 @@ #include #include #include -#include -#include #include +#include "ext4_jbd2.h" +#include "ext4.h" #include "xattr.h" static size_t diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c index e0f05acdafec..fff33382cadc 100644 --- a/fs/ext4/xattr_trusted.c +++ b/fs/ext4/xattr_trusted.c @@ -9,8 +9,8 @@ #include #include #include -#include -#include +#include "ext4_jbd2.h" +#include "ext4.h" #include "xattr.h" #define XATTR_TRUSTED_PREFIX "trusted." diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c index 7ed3d8ebf096..67be723fcc4e 100644 --- a/fs/ext4/xattr_user.c +++ b/fs/ext4/xattr_user.c @@ -8,8 +8,8 @@ #include #include #include -#include -#include +#include "ext4_jbd2.h" +#include "ext4.h" #include "xattr.h" #define XATTR_USER_PREFIX "user." diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h deleted file mode 100644 index 1ae0f965f386..000000000000 --- a/include/linux/ext4_fs.h +++ /dev/null @@ -1,1205 +0,0 @@ -/* - * linux/include/linux/ext4_fs.h - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/include/linux/minix_fs.h - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -#ifndef _LINUX_EXT4_FS_H -#define _LINUX_EXT4_FS_H - -#include -#include -#include -#include - -/* - * The second extended filesystem constants/structures - */ - -/* - * Define EXT4FS_DEBUG to produce debug messages - */ -#undef EXT4FS_DEBUG - -/* - * Define EXT4_RESERVATION to reserve data blocks for expanding files - */ -#define EXT4_DEFAULT_RESERVE_BLOCKS 8 -/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */ -#define EXT4_MAX_RESERVE_BLOCKS 1027 -#define EXT4_RESERVE_WINDOW_NOT_ALLOCATED 0 - -/* - * Debug code - */ -#ifdef EXT4FS_DEBUG -#define ext4_debug(f, a...) \ - do { \ - printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ - __FILE__, __LINE__, __FUNCTION__); \ - printk (KERN_DEBUG f, ## a); \ - } while (0) -#else -#define ext4_debug(f, a...) do {} while (0) -#endif - -#define EXT4_MULTIBLOCK_ALLOCATOR 1 - -/* prefer goal again. length */ -#define EXT4_MB_HINT_MERGE 1 -/* blocks already reserved */ -#define EXT4_MB_HINT_RESERVED 2 -/* metadata is being allocated */ -#define EXT4_MB_HINT_METADATA 4 -/* first blocks in the file */ -#define EXT4_MB_HINT_FIRST 8 -/* search for the best chunk */ -#define EXT4_MB_HINT_BEST 16 -/* data is being allocated */ -#define EXT4_MB_HINT_DATA 32 -/* don't preallocate (for tails) */ -#define EXT4_MB_HINT_NOPREALLOC 64 -/* allocate for locality group */ -#define EXT4_MB_HINT_GROUP_ALLOC 128 -/* allocate goal blocks or none */ -#define EXT4_MB_HINT_GOAL_ONLY 256 -/* goal is meaningful */ -#define EXT4_MB_HINT_TRY_GOAL 512 - -struct ext4_allocation_request { - /* target inode for block we're allocating */ - struct inode *inode; - /* logical block in target inode */ - ext4_lblk_t logical; - /* phys. target (a hint) */ - ext4_fsblk_t goal; - /* the closest logical allocated block to the left */ - ext4_lblk_t lleft; - /* phys. block for ^^^ */ - ext4_fsblk_t pleft; - /* the closest logical allocated block to the right */ - ext4_lblk_t lright; - /* phys. block for ^^^ */ - ext4_fsblk_t pright; - /* how many blocks we want to allocate */ - unsigned long len; - /* flags. see above EXT4_MB_HINT_* */ - unsigned long flags; -}; - -/* - * Special inodes numbers - */ -#define EXT4_BAD_INO 1 /* Bad blocks inode */ -#define EXT4_ROOT_INO 2 /* Root inode */ -#define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */ -#define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */ -#define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */ -#define EXT4_JOURNAL_INO 8 /* Journal inode */ - -/* First non-reserved inode for old ext4 filesystems */ -#define EXT4_GOOD_OLD_FIRST_INO 11 - -/* - * Maximal count of links to a file - */ -#define EXT4_LINK_MAX 65000 - -/* - * Macro-instructions used to manage several block sizes - */ -#define EXT4_MIN_BLOCK_SIZE 1024 -#define EXT4_MAX_BLOCK_SIZE 65536 -#define EXT4_MIN_BLOCK_LOG_SIZE 10 -#ifdef __KERNEL__ -# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) -#else -# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) -#endif -#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32)) -#ifdef __KERNEL__ -# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -#else -# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) -#endif -#ifdef __KERNEL__ -#define EXT4_ADDR_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_addr_per_block_bits) -#define EXT4_INODE_SIZE(s) (EXT4_SB(s)->s_inode_size) -#define EXT4_FIRST_INO(s) (EXT4_SB(s)->s_first_ino) -#else -#define EXT4_INODE_SIZE(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \ - EXT4_GOOD_OLD_INODE_SIZE : \ - (s)->s_inode_size) -#define EXT4_FIRST_INO(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \ - EXT4_GOOD_OLD_FIRST_INO : \ - (s)->s_first_ino) -#endif -#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits))) - -/* - * Structure of a blocks group descriptor - */ -struct ext4_group_desc -{ - __le32 bg_block_bitmap_lo; /* Blocks bitmap block */ - __le32 bg_inode_bitmap_lo; /* Inodes bitmap block */ - __le32 bg_inode_table_lo; /* Inodes table block */ - __le16 bg_free_blocks_count; /* Free blocks count */ - __le16 bg_free_inodes_count; /* Free inodes count */ - __le16 bg_used_dirs_count; /* Directories count */ - __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */ - __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ - __le16 bg_itable_unused; /* Unused inodes count */ - __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ - __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ - __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */ - __le32 bg_inode_table_hi; /* Inodes table block MSB */ - __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */ - __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */ - __le16 bg_used_dirs_count_hi; /* Directories count MSB */ - __le16 bg_itable_unused_hi; /* Unused inodes count MSB */ - __u32 bg_reserved2[3]; -}; - -#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ -#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ -#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ - -#ifdef __KERNEL__ -#include -#endif -/* - * Macro-instructions used to manage group descriptors - */ -#define EXT4_MIN_DESC_SIZE 32 -#define EXT4_MIN_DESC_SIZE_64BIT 64 -#define EXT4_MAX_DESC_SIZE EXT4_MIN_BLOCK_SIZE -#define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size) -#ifdef __KERNEL__ -# define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group) -# define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block) -# define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group) -# define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits) -#else -# define EXT4_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group) -# define EXT4_DESC_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s)) -# define EXT4_INODES_PER_GROUP(s) ((s)->s_inodes_per_group) -#endif - -/* - * Constants relative to the data blocks - */ -#define EXT4_NDIR_BLOCKS 12 -#define EXT4_IND_BLOCK EXT4_NDIR_BLOCKS -#define EXT4_DIND_BLOCK (EXT4_IND_BLOCK + 1) -#define EXT4_TIND_BLOCK (EXT4_DIND_BLOCK + 1) -#define EXT4_N_BLOCKS (EXT4_TIND_BLOCK + 1) - -/* - * Inode flags - */ -#define EXT4_SECRM_FL 0x00000001 /* Secure deletion */ -#define EXT4_UNRM_FL 0x00000002 /* Undelete */ -#define EXT4_COMPR_FL 0x00000004 /* Compress file */ -#define EXT4_SYNC_FL 0x00000008 /* Synchronous updates */ -#define EXT4_IMMUTABLE_FL 0x00000010 /* Immutable file */ -#define EXT4_APPEND_FL 0x00000020 /* writes to file may only append */ -#define EXT4_NODUMP_FL 0x00000040 /* do not dump file */ -#define EXT4_NOATIME_FL 0x00000080 /* do not update atime */ -/* Reserved for compression usage... */ -#define EXT4_DIRTY_FL 0x00000100 -#define EXT4_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ -#define EXT4_NOCOMPR_FL 0x00000400 /* Don't compress */ -#define EXT4_ECOMPR_FL 0x00000800 /* Compression error */ -/* End compression flags --- maybe not all used */ -#define EXT4_INDEX_FL 0x00001000 /* hash-indexed directory */ -#define EXT4_IMAGIC_FL 0x00002000 /* AFS directory */ -#define EXT4_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ -#define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */ -#define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ -#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ -#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ -#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ -#define EXT4_EXT_MIGRATE 0x00100000 /* Inode is migrating */ -#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ - -#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ -#define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ - -/* - * Inode dynamic state flags - */ -#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */ -#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */ -#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ -#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ - -/* Used to pass group descriptor data when online resize is done */ -struct ext4_new_group_input { - __u32 group; /* Group number for this data */ - __u64 block_bitmap; /* Absolute block number of block bitmap */ - __u64 inode_bitmap; /* Absolute block number of inode bitmap */ - __u64 inode_table; /* Absolute block number of inode table start */ - __u32 blocks_count; /* Total number of blocks in this group */ - __u16 reserved_blocks; /* Number of reserved blocks in this group */ - __u16 unused; -}; - -/* The struct ext4_new_group_input in kernel space, with free_blocks_count */ -struct ext4_new_group_data { - __u32 group; - __u64 block_bitmap; - __u64 inode_bitmap; - __u64 inode_table; - __u32 blocks_count; - __u16 reserved_blocks; - __u16 unused; - __u32 free_blocks_count; -}; - -/* - * Following is used by preallocation code to tell get_blocks() that we - * want uninitialzed extents. - */ -#define EXT4_CREATE_UNINITIALIZED_EXT 2 - -/* - * ioctl commands - */ -#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS -#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS -#define EXT4_IOC_GETVERSION _IOR('f', 3, long) -#define EXT4_IOC_SETVERSION _IOW('f', 4, long) -#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) -#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input) -#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION -#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION -#ifdef CONFIG_JBD2_DEBUG -#define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) -#endif -#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) -#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) -#define EXT4_IOC_MIGRATE _IO('f', 7) - -/* - * ioctl commands in 32 bit emulation - */ -#define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS -#define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS -#define EXT4_IOC32_GETVERSION _IOR('f', 3, int) -#define EXT4_IOC32_SETVERSION _IOW('f', 4, int) -#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) -#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) -#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) -#ifdef CONFIG_JBD2_DEBUG -#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) -#endif -#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION -#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION - - -/* - * Mount options - */ -struct ext4_mount_options { - unsigned long s_mount_opt; - uid_t s_resuid; - gid_t s_resgid; - unsigned long s_commit_interval; -#ifdef CONFIG_QUOTA - int s_jquota_fmt; - char *s_qf_names[MAXQUOTAS]; -#endif -}; - -/* - * Structure of an inode on the disk - */ -struct ext4_inode { - __le16 i_mode; /* File mode */ - __le16 i_uid; /* Low 16 bits of Owner Uid */ - __le32 i_size_lo; /* Size in bytes */ - __le32 i_atime; /* Access time */ - __le32 i_ctime; /* Inode Change time */ - __le32 i_mtime; /* Modification time */ - __le32 i_dtime; /* Deletion Time */ - __le16 i_gid; /* Low 16 bits of Group Id */ - __le16 i_links_count; /* Links count */ - __le32 i_blocks_lo; /* Blocks count */ - __le32 i_flags; /* File flags */ - union { - struct { - __le32 l_i_version; - } linux1; - struct { - __u32 h_i_translator; - } hurd1; - struct { - __u32 m_i_reserved1; - } masix1; - } osd1; /* OS dependent 1 */ - __le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */ - __le32 i_generation; /* File version (for NFS) */ - __le32 i_file_acl_lo; /* File ACL */ - __le32 i_size_high; - __le32 i_obso_faddr; /* Obsoleted fragment address */ - union { - struct { - __le16 l_i_blocks_high; /* were l_i_reserved1 */ - __le16 l_i_file_acl_high; - __le16 l_i_uid_high; /* these 2 fields */ - __le16 l_i_gid_high; /* were reserved2[0] */ - __u32 l_i_reserved2; - } linux2; - struct { - __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ - __u16 h_i_mode_high; - __u16 h_i_uid_high; - __u16 h_i_gid_high; - __u32 h_i_author; - } hurd2; - struct { - __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ - __le16 m_i_file_acl_high; - __u32 m_i_reserved2[2]; - } masix2; - } osd2; /* OS dependent 2 */ - __le16 i_extra_isize; - __le16 i_pad1; - __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ - __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ - __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ - __le32 i_crtime; /* File Creation time */ - __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ - __le32 i_version_hi; /* high 32 bits for 64-bit version */ -}; - - -#define EXT4_EPOCH_BITS 2 -#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) -#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS) - -/* - * Extended fields will fit into an inode if the filesystem was formatted - * with large inodes (-I 256 or larger) and there are not currently any EAs - * consuming all of the available space. For new inodes we always reserve - * enough space for the kernel's known extended fields, but for inodes - * created with an old kernel this might not have been the case. None of - * the extended inode fields is critical for correct filesystem operation. - * This macro checks if a certain field fits in the inode. Note that - * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize - */ -#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \ - ((offsetof(typeof(*ext4_inode), field) + \ - sizeof((ext4_inode)->field)) \ - <= (EXT4_GOOD_OLD_INODE_SIZE + \ - (einode)->i_extra_isize)) \ - -static inline __le32 ext4_encode_extra_time(struct timespec *time) -{ - return cpu_to_le32((sizeof(time->tv_sec) > 4 ? - time->tv_sec >> 32 : 0) | - ((time->tv_nsec << 2) & EXT4_NSEC_MASK)); -} - -static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) -{ - if (sizeof(time->tv_sec) > 4) - time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) - << 32; - time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2; -} - -#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \ -do { \ - (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ - if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ - (raw_inode)->xtime ## _extra = \ - ext4_encode_extra_time(&(inode)->xtime); \ -} while (0) - -#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \ -do { \ - if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ - (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \ - if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ - (raw_inode)->xtime ## _extra = \ - ext4_encode_extra_time(&(einode)->xtime); \ -} while (0) - -#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \ -do { \ - (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \ - if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ - ext4_decode_extra_time(&(inode)->xtime, \ - raw_inode->xtime ## _extra); \ -} while (0) - -#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \ -do { \ - if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ - (einode)->xtime.tv_sec = \ - (signed)le32_to_cpu((raw_inode)->xtime); \ - if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ - ext4_decode_extra_time(&(einode)->xtime, \ - raw_inode->xtime ## _extra); \ -} while (0) - -#define i_disk_version osd1.linux1.l_i_version - -#if defined(__KERNEL__) || defined(__linux__) -#define i_reserved1 osd1.linux1.l_i_reserved1 -#define i_file_acl_high osd2.linux2.l_i_file_acl_high -#define i_blocks_high osd2.linux2.l_i_blocks_high -#define i_uid_low i_uid -#define i_gid_low i_gid -#define i_uid_high osd2.linux2.l_i_uid_high -#define i_gid_high osd2.linux2.l_i_gid_high -#define i_reserved2 osd2.linux2.l_i_reserved2 - -#elif defined(__GNU__) - -#define i_translator osd1.hurd1.h_i_translator -#define i_uid_high osd2.hurd2.h_i_uid_high -#define i_gid_high osd2.hurd2.h_i_gid_high -#define i_author osd2.hurd2.h_i_author - -#elif defined(__masix__) - -#define i_reserved1 osd1.masix1.m_i_reserved1 -#define i_file_acl_high osd2.masix2.m_i_file_acl_high -#define i_reserved2 osd2.masix2.m_i_reserved2 - -#endif /* defined(__KERNEL__) || defined(__linux__) */ - -/* - * File system states - */ -#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */ -#define EXT4_ERROR_FS 0x0002 /* Errors detected */ -#define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */ - -/* - * Misc. filesystem flags - */ -#define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */ -#define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */ -#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */ - -/* - * Mount flags - */ -#define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */ -#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ -#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ -#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ -#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ -#define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ -#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ -#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ -#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ -#define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */ -#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ -#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ -#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ -#define EXT4_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */ -#define EXT4_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */ -#define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */ -#define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */ -#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ -#define EXT4_MOUNT_RESERVATION 0x10000 /* Preallocation */ -#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */ -#define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */ -#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ -#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ -#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ -#define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */ -#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ -#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ -#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ -#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */ -/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ -#ifndef _LINUX_EXT2_FS_H -#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt -#define set_opt(o, opt) o |= EXT4_MOUNT_##opt -#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \ - EXT4_MOUNT_##opt) -#else -#define EXT2_MOUNT_NOLOAD EXT4_MOUNT_NOLOAD -#define EXT2_MOUNT_ABORT EXT4_MOUNT_ABORT -#define EXT2_MOUNT_DATA_FLAGS EXT4_MOUNT_DATA_FLAGS -#endif - -#define ext4_set_bit ext2_set_bit -#define ext4_set_bit_atomic ext2_set_bit_atomic -#define ext4_clear_bit ext2_clear_bit -#define ext4_clear_bit_atomic ext2_clear_bit_atomic -#define ext4_test_bit ext2_test_bit -#define ext4_find_first_zero_bit ext2_find_first_zero_bit -#define ext4_find_next_zero_bit ext2_find_next_zero_bit -#define ext4_find_next_bit ext2_find_next_bit - -/* - * Maximal mount counts between two filesystem checks - */ -#define EXT4_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */ -#define EXT4_DFL_CHECKINTERVAL 0 /* Don't use interval check */ - -/* - * Behaviour when detecting errors - */ -#define EXT4_ERRORS_CONTINUE 1 /* Continue execution */ -#define EXT4_ERRORS_RO 2 /* Remount fs read-only */ -#define EXT4_ERRORS_PANIC 3 /* Panic */ -#define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE - -/* - * Structure of the super block - */ -struct ext4_super_block { -/*00*/ __le32 s_inodes_count; /* Inodes count */ - __le32 s_blocks_count_lo; /* Blocks count */ - __le32 s_r_blocks_count_lo; /* Reserved blocks count */ - __le32 s_free_blocks_count_lo; /* Free blocks count */ -/*10*/ __le32 s_free_inodes_count; /* Free inodes count */ - __le32 s_first_data_block; /* First Data Block */ - __le32 s_log_block_size; /* Block size */ - __le32 s_obso_log_frag_size; /* Obsoleted fragment size */ -/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */ - __le32 s_obso_frags_per_group; /* Obsoleted fragments per group */ - __le32 s_inodes_per_group; /* # Inodes per group */ - __le32 s_mtime; /* Mount time */ -/*30*/ __le32 s_wtime; /* Write time */ - __le16 s_mnt_count; /* Mount count */ - __le16 s_max_mnt_count; /* Maximal mount count */ - __le16 s_magic; /* Magic signature */ - __le16 s_state; /* File system state */ - __le16 s_errors; /* Behaviour when detecting errors */ - __le16 s_minor_rev_level; /* minor revision level */ -/*40*/ __le32 s_lastcheck; /* time of last check */ - __le32 s_checkinterval; /* max. time between checks */ - __le32 s_creator_os; /* OS */ - __le32 s_rev_level; /* Revision level */ -/*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */ - __le16 s_def_resgid; /* Default gid for reserved blocks */ - /* - * These fields are for EXT4_DYNAMIC_REV superblocks only. - * - * Note: the difference between the compatible feature set and - * the incompatible feature set is that if there is a bit set - * in the incompatible feature set that the kernel doesn't - * know about, it should refuse to mount the filesystem. - * - * e2fsck's requirements are more strict; if it doesn't know - * about a feature in either the compatible or incompatible - * feature set, it must abort and not try to meddle with - * things it doesn't understand... - */ - __le32 s_first_ino; /* First non-reserved inode */ - __le16 s_inode_size; /* size of inode structure */ - __le16 s_block_group_nr; /* block group # of this superblock */ - __le32 s_feature_compat; /* compatible feature set */ -/*60*/ __le32 s_feature_incompat; /* incompatible feature set */ - __le32 s_feature_ro_compat; /* readonly-compatible feature set */ -/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */ -/*78*/ char s_volume_name[16]; /* volume name */ -/*88*/ char s_last_mounted[64]; /* directory where last mounted */ -/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */ - /* - * Performance hints. Directory preallocation should only - * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on. - */ - __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/ - __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ - __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */ - /* - * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set. - */ -/*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */ -/*E0*/ __le32 s_journal_inum; /* inode number of journal file */ - __le32 s_journal_dev; /* device number of journal file */ - __le32 s_last_orphan; /* start of list of inodes to delete */ - __le32 s_hash_seed[4]; /* HTREE hash seed */ - __u8 s_def_hash_version; /* Default hash version to use */ - __u8 s_reserved_char_pad; - __le16 s_desc_size; /* size of group descriptor */ -/*100*/ __le32 s_default_mount_opts; - __le32 s_first_meta_bg; /* First metablock block group */ - __le32 s_mkfs_time; /* When the filesystem was created */ - __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ - /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */ -/*150*/ __le32 s_blocks_count_hi; /* Blocks count */ - __le32 s_r_blocks_count_hi; /* Reserved blocks count */ - __le32 s_free_blocks_count_hi; /* Free blocks count */ - __le16 s_min_extra_isize; /* All inodes have at least # bytes */ - __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ - __le32 s_flags; /* Miscellaneous flags */ - __le16 s_raid_stride; /* RAID stride */ - __le16 s_mmp_interval; /* # seconds to wait in MMP checking */ - __le64 s_mmp_block; /* Block for multi-mount protection */ - __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ - __u32 s_reserved[163]; /* Padding to the end of the block */ -}; - -#ifdef __KERNEL__ -static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb) -{ - return sb->s_fs_info; -} -static inline struct ext4_inode_info *EXT4_I(struct inode *inode) -{ - return container_of(inode, struct ext4_inode_info, vfs_inode); -} - -static inline struct timespec ext4_current_time(struct inode *inode) -{ - return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? - current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; -} - - -static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) -{ - return ino == EXT4_ROOT_INO || - ino == EXT4_JOURNAL_INO || - ino == EXT4_RESIZE_INO || - (ino >= EXT4_FIRST_INO(sb) && - ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); -} -#else -/* Assume that user mode programs are passing in an ext4fs superblock, not - * a kernel struct super_block. This will allow us to call the feature-test - * macros from user land. */ -#define EXT4_SB(sb) (sb) -#endif - -#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime - -/* - * Codes for operating systems - */ -#define EXT4_OS_LINUX 0 -#define EXT4_OS_HURD 1 -#define EXT4_OS_MASIX 2 -#define EXT4_OS_FREEBSD 3 -#define EXT4_OS_LITES 4 - -/* - * Revision levels - */ -#define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */ -#define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */ - -#define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV -#define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV - -#define EXT4_GOOD_OLD_INODE_SIZE 128 - -/* - * Feature set definitions - */ - -#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ - ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) ) -#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ - ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) ) -#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ - ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) ) -#define EXT4_SET_COMPAT_FEATURE(sb,mask) \ - EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) -#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ - EXT4_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask) -#define EXT4_SET_INCOMPAT_FEATURE(sb,mask) \ - EXT4_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask) -#define EXT4_CLEAR_COMPAT_FEATURE(sb,mask) \ - EXT4_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask) -#define EXT4_CLEAR_RO_COMPAT_FEATURE(sb,mask) \ - EXT4_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask) -#define EXT4_CLEAR_INCOMPAT_FEATURE(sb,mask) \ - EXT4_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask) - -#define EXT4_FEATURE_COMPAT_DIR_PREALLOC 0x0001 -#define EXT4_FEATURE_COMPAT_IMAGIC_INODES 0x0002 -#define EXT4_FEATURE_COMPAT_HAS_JOURNAL 0x0004 -#define EXT4_FEATURE_COMPAT_EXT_ATTR 0x0008 -#define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010 -#define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020 - -#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 -#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 -#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 -#define EXT4_FEATURE_RO_COMPAT_HUGE_FILE 0x0008 -#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 -#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 -#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 - -#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 -#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 -#define EXT4_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ -#define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ -#define EXT4_FEATURE_INCOMPAT_META_BG 0x0010 -#define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ -#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 -#define EXT4_FEATURE_INCOMPAT_MMP 0x0100 -#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 - -#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR -#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ - EXT4_FEATURE_INCOMPAT_RECOVER| \ - EXT4_FEATURE_INCOMPAT_META_BG| \ - EXT4_FEATURE_INCOMPAT_EXTENTS| \ - EXT4_FEATURE_INCOMPAT_64BIT| \ - EXT4_FEATURE_INCOMPAT_FLEX_BG) -#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ - EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ - EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ - EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ - EXT4_FEATURE_RO_COMPAT_HUGE_FILE) - -/* - * Default values for user and/or group using reserved blocks - */ -#define EXT4_DEF_RESUID 0 -#define EXT4_DEF_RESGID 0 - -/* - * Default mount options - */ -#define EXT4_DEFM_DEBUG 0x0001 -#define EXT4_DEFM_BSDGROUPS 0x0002 -#define EXT4_DEFM_XATTR_USER 0x0004 -#define EXT4_DEFM_ACL 0x0008 -#define EXT4_DEFM_UID16 0x0010 -#define EXT4_DEFM_JMODE 0x0060 -#define EXT4_DEFM_JMODE_DATA 0x0020 -#define EXT4_DEFM_JMODE_ORDERED 0x0040 -#define EXT4_DEFM_JMODE_WBACK 0x0060 - -/* - * Structure of a directory entry - */ -#define EXT4_NAME_LEN 255 - -struct ext4_dir_entry { - __le32 inode; /* Inode number */ - __le16 rec_len; /* Directory entry length */ - __le16 name_len; /* Name length */ - char name[EXT4_NAME_LEN]; /* File name */ -}; - -/* - * The new version of the directory entry. Since EXT4 structures are - * stored in intel byte order, and the name_len field could never be - * bigger than 255 chars, it's safe to reclaim the extra byte for the - * file_type field. - */ -struct ext4_dir_entry_2 { - __le32 inode; /* Inode number */ - __le16 rec_len; /* Directory entry length */ - __u8 name_len; /* Name length */ - __u8 file_type; - char name[EXT4_NAME_LEN]; /* File name */ -}; - -/* - * Ext4 directory file types. Only the low 3 bits are used. The - * other bits are reserved for now. - */ -#define EXT4_FT_UNKNOWN 0 -#define EXT4_FT_REG_FILE 1 -#define EXT4_FT_DIR 2 -#define EXT4_FT_CHRDEV 3 -#define EXT4_FT_BLKDEV 4 -#define EXT4_FT_FIFO 5 -#define EXT4_FT_SOCK 6 -#define EXT4_FT_SYMLINK 7 - -#define EXT4_FT_MAX 8 - -/* - * EXT4_DIR_PAD defines the directory entries boundaries - * - * NOTE: It must be a multiple of 4 - */ -#define EXT4_DIR_PAD 4 -#define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1) -#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \ - ~EXT4_DIR_ROUND) -#define EXT4_MAX_REC_LEN ((1<<16)-1) - -static inline unsigned ext4_rec_len_from_disk(__le16 dlen) -{ - unsigned len = le16_to_cpu(dlen); - - if (len == EXT4_MAX_REC_LEN) - return 1 << 16; - return len; -} - -static inline __le16 ext4_rec_len_to_disk(unsigned len) -{ - if (len == (1 << 16)) - return cpu_to_le16(EXT4_MAX_REC_LEN); - else if (len > (1 << 16)) - BUG(); - return cpu_to_le16(len); -} - -/* - * Hash Tree Directory indexing - * (c) Daniel Phillips, 2001 - */ - -#define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \ - EXT4_FEATURE_COMPAT_DIR_INDEX) && \ - (EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) -#define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX) -#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) - -/* Legal values for the dx_root hash_version field: */ - -#define DX_HASH_LEGACY 0 -#define DX_HASH_HALF_MD4 1 -#define DX_HASH_TEA 2 - -#ifdef __KERNEL__ - -/* hash info structure used by the directory hash */ -struct dx_hash_info -{ - u32 hash; - u32 minor_hash; - int hash_version; - u32 *seed; -}; - -#define EXT4_HTREE_EOF 0x7fffffff - -/* - * Control parameters used by ext4_htree_next_block - */ -#define HASH_NB_ALWAYS 1 - - -/* - * Describe an inode's exact location on disk and in memory - */ -struct ext4_iloc -{ - struct buffer_head *bh; - unsigned long offset; - ext4_group_t block_group; -}; - -static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc) -{ - return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset); -} - -/* - * This structure is stuffed into the struct file's private_data field - * for directories. It is where we put information so that we can do - * readdir operations in hash tree order. - */ -struct dir_private_info { - struct rb_root root; - struct rb_node *curr_node; - struct fname *extra_fname; - loff_t last_pos; - __u32 curr_hash; - __u32 curr_minor_hash; - __u32 next_hash; -}; - -/* calculate the first block number of the group */ -static inline ext4_fsblk_t -ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) -{ - return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) + - le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); -} - -/* - * Special error return code only used by dx_probe() and its callers. - */ -#define ERR_BAD_DX_DIR -75000 - -void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, - unsigned long *blockgrpp, ext4_grpblk_t *offsetp); - -/* - * Function prototypes - */ - -/* - * Ok, these declarations are also in but none of the - * ext4 source programs needs to include it so they are duplicated here. - */ -# define NORET_TYPE /**/ -# define ATTRIB_NORET __attribute__((noreturn)) -# define NORET_AND noreturn, - -/* balloc.c */ -extern unsigned int ext4_block_group(struct super_block *sb, - ext4_fsblk_t blocknr); -extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, - ext4_fsblk_t blocknr); -extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group); -extern unsigned long ext4_bg_num_gdb(struct super_block *sb, - ext4_group_t group); -extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, int *errp); -extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, unsigned long *count, int *errp); -extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, unsigned long *count, int *errp); -extern void ext4_free_blocks (handle_t *handle, struct inode *inode, - ext4_fsblk_t block, unsigned long count, int metadata); -extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, - ext4_fsblk_t block, unsigned long count, - unsigned long *pdquot_freed_blocks); -extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *); -extern void ext4_check_blocks_bitmap (struct super_block *); -extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, - ext4_group_t block_group, - struct buffer_head ** bh); -extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); -extern void ext4_init_block_alloc_info(struct inode *); -extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv); - -/* dir.c */ -extern int ext4_check_dir_entry(const char *, struct inode *, - struct ext4_dir_entry_2 *, - struct buffer_head *, unsigned long); -extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, - __u32 minor_hash, - struct ext4_dir_entry_2 *dirent); -extern void ext4_htree_free_dir_info(struct dir_private_info *p); - -/* fsync.c */ -extern int ext4_sync_file (struct file *, struct dentry *, int); - -/* hash.c */ -extern int ext4fs_dirhash(const char *name, int len, struct - dx_hash_info *hinfo); - -/* ialloc.c */ -extern struct inode * ext4_new_inode (handle_t *, struct inode *, int); -extern void ext4_free_inode (handle_t *, struct inode *); -extern struct inode * ext4_orphan_get (struct super_block *, unsigned long); -extern unsigned long ext4_count_free_inodes (struct super_block *); -extern unsigned long ext4_count_dirs (struct super_block *); -extern void ext4_check_inodes_bitmap (struct super_block *); -extern unsigned long ext4_count_free (struct buffer_head *, unsigned); - -/* mballoc.c */ -extern long ext4_mb_stats; -extern long ext4_mb_max_to_scan; -extern int ext4_mb_init(struct super_block *, int); -extern int ext4_mb_release(struct super_block *); -extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, - struct ext4_allocation_request *, int *); -extern int ext4_mb_reserve_blocks(struct super_block *, int); -extern void ext4_mb_discard_inode_preallocations(struct inode *); -extern int __init init_ext4_mballoc(void); -extern void exit_ext4_mballoc(void); -extern void ext4_mb_free_blocks(handle_t *, struct inode *, - unsigned long, unsigned long, int, unsigned long *); - - -/* inode.c */ -int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, - struct buffer_head *bh, ext4_fsblk_t blocknr); -struct buffer_head *ext4_getblk(handle_t *, struct inode *, - ext4_lblk_t, int, int *); -struct buffer_head *ext4_bread(handle_t *, struct inode *, - ext4_lblk_t, int, int *); -int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, unsigned long maxblocks, - struct buffer_head *bh_result, - int create, int extend_disksize); - -extern struct inode *ext4_iget(struct super_block *, unsigned long); -extern int ext4_write_inode (struct inode *, int); -extern int ext4_setattr (struct dentry *, struct iattr *); -extern void ext4_delete_inode (struct inode *); -extern int ext4_sync_inode (handle_t *, struct inode *); -extern void ext4_discard_reservation (struct inode *); -extern void ext4_dirty_inode(struct inode *); -extern int ext4_change_inode_journal_flag(struct inode *, int); -extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); -extern void ext4_truncate (struct inode *); -extern void ext4_set_inode_flags(struct inode *); -extern void ext4_get_inode_flags(struct ext4_inode_info *); -extern void ext4_set_aops(struct inode *inode); -extern int ext4_writepage_trans_blocks(struct inode *); -extern int ext4_block_truncate_page(handle_t *handle, struct page *page, - struct address_space *mapping, loff_t from); - -/* ioctl.c */ -extern long ext4_ioctl(struct file *, unsigned int, unsigned long); -extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); - -/* migrate.c */ -extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int, - unsigned long); -/* namei.c */ -extern int ext4_orphan_add(handle_t *, struct inode *); -extern int ext4_orphan_del(handle_t *, struct inode *); -extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, - __u32 start_minor_hash, __u32 *next_hash); - -/* resize.c */ -extern int ext4_group_add(struct super_block *sb, - struct ext4_new_group_data *input); -extern int ext4_group_extend(struct super_block *sb, - struct ext4_super_block *es, - ext4_fsblk_t n_blocks_count); - -/* super.c */ -extern void ext4_error (struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); -extern void __ext4_std_error (struct super_block *, const char *, int); -extern void ext4_abort (struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); -extern void ext4_warning (struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); -extern void ext4_update_dynamic_rev (struct super_block *sb); -extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, - __u32 compat); -extern int ext4_update_rocompat_feature(handle_t *handle, - struct super_block *sb, __u32 rocompat); -extern int ext4_update_incompat_feature(handle_t *handle, - struct super_block *sb, __u32 incompat); -extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, - struct ext4_group_desc *bg); -extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, - struct ext4_group_desc *bg); -extern ext4_fsblk_t ext4_inode_table(struct super_block *sb, - struct ext4_group_desc *bg); -extern void ext4_block_bitmap_set(struct super_block *sb, - struct ext4_group_desc *bg, ext4_fsblk_t blk); -extern void ext4_inode_bitmap_set(struct super_block *sb, - struct ext4_group_desc *bg, ext4_fsblk_t blk); -extern void ext4_inode_table_set(struct super_block *sb, - struct ext4_group_desc *bg, ext4_fsblk_t blk); - -static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) -{ - return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) | - le32_to_cpu(es->s_blocks_count_lo); -} - -static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es) -{ - return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) | - le32_to_cpu(es->s_r_blocks_count_lo); -} - -static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es) -{ - return ((ext4_fsblk_t)le32_to_cpu(es->s_free_blocks_count_hi) << 32) | - le32_to_cpu(es->s_free_blocks_count_lo); -} - -static inline void ext4_blocks_count_set(struct ext4_super_block *es, - ext4_fsblk_t blk) -{ - es->s_blocks_count_lo = cpu_to_le32((u32)blk); - es->s_blocks_count_hi = cpu_to_le32(blk >> 32); -} - -static inline void ext4_free_blocks_count_set(struct ext4_super_block *es, - ext4_fsblk_t blk) -{ - es->s_free_blocks_count_lo = cpu_to_le32((u32)blk); - es->s_free_blocks_count_hi = cpu_to_le32(blk >> 32); -} - -static inline void ext4_r_blocks_count_set(struct ext4_super_block *es, - ext4_fsblk_t blk) -{ - es->s_r_blocks_count_lo = cpu_to_le32((u32)blk); - es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32); -} - -static inline loff_t ext4_isize(struct ext4_inode *raw_inode) -{ - return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) | - le32_to_cpu(raw_inode->i_size_lo); -} - -static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) -{ - raw_inode->i_size_lo = cpu_to_le32(i_size); - raw_inode->i_size_high = cpu_to_le32(i_size >> 32); -} - -static inline -struct ext4_group_info *ext4_get_group_info(struct super_block *sb, - ext4_group_t group) -{ - struct ext4_group_info ***grp_info; - long indexv, indexh; - grp_info = EXT4_SB(sb)->s_group_info; - indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); - indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); - return grp_info[indexv][indexh]; -} - - -#define ext4_std_error(sb, errno) \ -do { \ - if ((errno)) \ - __ext4_std_error((sb), __FUNCTION__, (errno)); \ -} while (0) - -/* - * Inodes and files operations - */ - -/* dir.c */ -extern const struct file_operations ext4_dir_operations; - -/* file.c */ -extern const struct inode_operations ext4_file_inode_operations; -extern const struct file_operations ext4_file_operations; - -/* namei.c */ -extern const struct inode_operations ext4_dir_inode_operations; -extern const struct inode_operations ext4_special_inode_operations; - -/* symlink.c */ -extern const struct inode_operations ext4_symlink_inode_operations; -extern const struct inode_operations ext4_fast_symlink_inode_operations; - -/* extents.c */ -extern int ext4_ext_tree_init(handle_t *handle, struct inode *); -extern int ext4_ext_writepage_trans_blocks(struct inode *, int); -extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, - unsigned long max_blocks, struct buffer_head *bh_result, - int create, int extend_disksize); -extern void ext4_ext_truncate(struct inode *, struct page *); -extern void ext4_ext_init(struct super_block *); -extern void ext4_ext_release(struct super_block *); -extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, - loff_t len); -extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, - sector_t block, unsigned long max_blocks, - struct buffer_head *bh, int create, - int extend_disksize); -#endif /* __KERNEL__ */ - -#endif /* _LINUX_EXT4_FS_H */ diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h deleted file mode 100644 index 1285c583b2d8..000000000000 --- a/include/linux/ext4_fs_extents.h +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com - * Written by Alex Tomas - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public Licens - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- - */ - -#ifndef _LINUX_EXT4_EXTENTS -#define _LINUX_EXT4_EXTENTS - -#include - -/* - * With AGGRESSIVE_TEST defined, the capacity of index/leaf blocks - * becomes very small, so index split, in-depth growing and - * other hard changes happen much more often. - * This is for debug purposes only. - */ -#define AGGRESSIVE_TEST_ - -/* - * With EXTENTS_STATS defined, the number of blocks and extents - * are collected in the truncate path. They'll be shown at - * umount time. - */ -#define EXTENTS_STATS__ - -/* - * If CHECK_BINSEARCH is defined, then the results of the binary search - * will also be checked by linear search. - */ -#define CHECK_BINSEARCH__ - -/* - * If EXT_DEBUG is defined you can use the 'extdebug' mount option - * to get lots of info about what's going on. - */ -#define EXT_DEBUG__ -#ifdef EXT_DEBUG -#define ext_debug(a...) printk(a) -#else -#define ext_debug(a...) -#endif - -/* - * If EXT_STATS is defined then stats numbers are collected. - * These number will be displayed at umount time. - */ -#define EXT_STATS_ - - -/* - * ext4_inode has i_block array (60 bytes total). - * The first 12 bytes store ext4_extent_header; - * the remainder stores an array of ext4_extent. - */ - -/* - * This is the extent on-disk structure. - * It's used at the bottom of the tree. - */ -struct ext4_extent { - __le32 ee_block; /* first logical block extent covers */ - __le16 ee_len; /* number of blocks covered by extent */ - __le16 ee_start_hi; /* high 16 bits of physical block */ - __le32 ee_start_lo; /* low 32 bits of physical block */ -}; - -/* - * This is index on-disk structure. - * It's used at all the levels except the bottom. - */ -struct ext4_extent_idx { - __le32 ei_block; /* index covers logical blocks from 'block' */ - __le32 ei_leaf_lo; /* pointer to the physical block of the next * - * level. leaf or next index could be there */ - __le16 ei_leaf_hi; /* high 16 bits of physical block */ - __u16 ei_unused; -}; - -/* - * Each block (leaves and indexes), even inode-stored has header. - */ -struct ext4_extent_header { - __le16 eh_magic; /* probably will support different formats */ - __le16 eh_entries; /* number of valid entries */ - __le16 eh_max; /* capacity of store in entries */ - __le16 eh_depth; /* has tree real underlying blocks? */ - __le32 eh_generation; /* generation of the tree */ -}; - -#define EXT4_EXT_MAGIC cpu_to_le16(0xf30a) - -/* - * Array of ext4_ext_path contains path to some extent. - * Creation/lookup routines use it for traversal/splitting/etc. - * Truncate uses it to simulate recursive walking. - */ -struct ext4_ext_path { - ext4_fsblk_t p_block; - __u16 p_depth; - struct ext4_extent *p_ext; - struct ext4_extent_idx *p_idx; - struct ext4_extent_header *p_hdr; - struct buffer_head *p_bh; -}; - -/* - * structure for external API - */ - -#define EXT4_EXT_CACHE_NO 0 -#define EXT4_EXT_CACHE_GAP 1 -#define EXT4_EXT_CACHE_EXTENT 2 - - -#define EXT_MAX_BLOCK 0xffffffff - -/* - * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an - * initialized extent. This is 2^15 and not (2^16 - 1), since we use the - * MSB of ee_len field in the extent datastructure to signify if this - * particular extent is an initialized extent or an uninitialized (i.e. - * preallocated). - * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an - * uninitialized extent. - * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an - * uninitialized one. In other words, if MSB of ee_len is set, it is an - * uninitialized extent with only one special scenario when ee_len = 0x8000. - * In this case we can not have an uninitialized extent of zero length and - * thus we make it as a special case of initialized extent with 0x8000 length. - * This way we get better extent-to-group alignment for initialized extents. - * Hence, the maximum number of blocks we can have in an *initialized* - * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767). - */ -#define EXT_INIT_MAX_LEN (1UL << 15) -#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1) - - -#define EXT_FIRST_EXTENT(__hdr__) \ - ((struct ext4_extent *) (((char *) (__hdr__)) + \ - sizeof(struct ext4_extent_header))) -#define EXT_FIRST_INDEX(__hdr__) \ - ((struct ext4_extent_idx *) (((char *) (__hdr__)) + \ - sizeof(struct ext4_extent_header))) -#define EXT_HAS_FREE_INDEX(__path__) \ - (le16_to_cpu((__path__)->p_hdr->eh_entries) \ - < le16_to_cpu((__path__)->p_hdr->eh_max)) -#define EXT_LAST_EXTENT(__hdr__) \ - (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1) -#define EXT_LAST_INDEX(__hdr__) \ - (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1) -#define EXT_MAX_EXTENT(__hdr__) \ - (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1) -#define EXT_MAX_INDEX(__hdr__) \ - (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1) - -static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode) -{ - return (struct ext4_extent_header *) EXT4_I(inode)->i_data; -} - -static inline struct ext4_extent_header *ext_block_hdr(struct buffer_head *bh) -{ - return (struct ext4_extent_header *) bh->b_data; -} - -static inline unsigned short ext_depth(struct inode *inode) -{ - return le16_to_cpu(ext_inode_hdr(inode)->eh_depth); -} - -static inline void ext4_ext_tree_changed(struct inode *inode) -{ - EXT4_I(inode)->i_ext_generation++; -} - -static inline void -ext4_ext_invalidate_cache(struct inode *inode) -{ - EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO; -} - -static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext) -{ - /* We can not have an uninitialized extent of zero length! */ - BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0); - ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN); -} - -static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext) -{ - /* Extent with ee_len of 0x8000 is treated as an initialized extent */ - return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN); -} - -static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) -{ - return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ? - le16_to_cpu(ext->ee_len) : - (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN)); -} - -extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); -extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); -extern int ext4_extent_tree_init(handle_t *, struct inode *); -extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); -extern int ext4_ext_try_to_merge(struct inode *inode, - struct ext4_ext_path *path, - struct ext4_extent *); -extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); -extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); -extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, - struct ext4_ext_path *); -extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, - ext4_lblk_t *, ext4_fsblk_t *); -extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *, - ext4_lblk_t *, ext4_fsblk_t *); -extern void ext4_ext_drop_refs(struct ext4_ext_path *); -#endif /* _LINUX_EXT4_EXTENTS */ - diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h deleted file mode 100644 index d5508d3cf290..000000000000 --- a/include/linux/ext4_fs_i.h +++ /dev/null @@ -1,167 +0,0 @@ -/* - * linux/include/linux/ext4_fs_i.h - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/include/linux/minix_fs_i.h - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -#ifndef _LINUX_EXT4_FS_I -#define _LINUX_EXT4_FS_I - -#include -#include -#include -#include - -/* data type for block offset of block group */ -typedef int ext4_grpblk_t; - -/* data type for filesystem-wide blocks number */ -typedef unsigned long long ext4_fsblk_t; - -/* data type for file logical block number */ -typedef __u32 ext4_lblk_t; - -/* data type for block group number */ -typedef unsigned long ext4_group_t; - -struct ext4_reserve_window { - ext4_fsblk_t _rsv_start; /* First byte reserved */ - ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */ -}; - -struct ext4_reserve_window_node { - struct rb_node rsv_node; - __u32 rsv_goal_size; - __u32 rsv_alloc_hit; - struct ext4_reserve_window rsv_window; -}; - -struct ext4_block_alloc_info { - /* information about reservation window */ - struct ext4_reserve_window_node rsv_window_node; - /* - * was i_next_alloc_block in ext4_inode_info - * is the logical (file-relative) number of the - * most-recently-allocated block in this file. - * We use this for detecting linearly ascending allocation requests. - */ - ext4_lblk_t last_alloc_logical_block; - /* - * Was i_next_alloc_goal in ext4_inode_info - * is the *physical* companion to i_next_alloc_block. - * it the physical block number of the block which was most-recentl - * allocated to this file. This give us the goal (target) for the next - * allocation when we detect linearly ascending requests. - */ - ext4_fsblk_t last_alloc_physical_block; -}; - -#define rsv_start rsv_window._rsv_start -#define rsv_end rsv_window._rsv_end - -/* - * storage for cached extent - */ -struct ext4_ext_cache { - ext4_fsblk_t ec_start; - ext4_lblk_t ec_block; - __u32 ec_len; /* must be 32bit to return holes */ - __u32 ec_type; -}; - -/* - * third extended file system inode data in memory - */ -struct ext4_inode_info { - __le32 i_data[15]; /* unconverted */ - __u32 i_flags; - ext4_fsblk_t i_file_acl; - __u32 i_dtime; - - /* - * i_block_group is the number of the block group which contains - * this file's inode. Constant across the lifetime of the inode, - * it is ued for making block allocation decisions - we try to - * place a file's data blocks near its inode block, and new inodes - * near to their parent directory's inode. - */ - ext4_group_t i_block_group; - __u32 i_state; /* Dynamic state flags for ext4 */ - - /* block reservation info */ - struct ext4_block_alloc_info *i_block_alloc_info; - - ext4_lblk_t i_dir_start_lookup; -#ifdef CONFIG_EXT4DEV_FS_XATTR - /* - * Extended attributes can be read independently of the main file - * data. Taking i_mutex even when reading would cause contention - * between readers of EAs and writers of regular file data, so - * instead we synchronize on xattr_sem when reading or changing - * EAs. - */ - struct rw_semaphore xattr_sem; -#endif -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; -#endif - - struct list_head i_orphan; /* unlinked but open inodes */ - - /* - * i_disksize keeps track of what the inode size is ON DISK, not - * in memory. During truncate, i_size is set to the new size by - * the VFS prior to calling ext4_truncate(), but the filesystem won't - * set i_disksize to 0 until the truncate is actually under way. - * - * The intent is that i_disksize always represents the blocks which - * are used by this file. This allows recovery to restart truncate - * on orphans if we crash during truncate. We actually write i_disksize - * into the on-disk inode when writing inodes out, instead of i_size. - * - * The only time when i_disksize and i_size may be different is when - * a truncate is in progress. The only things which change i_disksize - * are ext4_get_block (growth) and ext4_truncate (shrinkth). - */ - loff_t i_disksize; - - /* on-disk additional length */ - __u16 i_extra_isize; - - /* - * i_data_sem is for serialising ext4_truncate() against - * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's - * data tree are chopped off during truncate. We can't do that in - * ext4 because whenever we perform intermediate commits during - * truncate, the inode and all the metadata blocks *must* be in a - * consistent state which allows truncation of the orphans to restart - * during recovery. Hence we must fix the get_block-vs-truncate race - * by other means, so we have i_data_sem. - */ - struct rw_semaphore i_data_sem; - struct inode vfs_inode; - - unsigned long i_ext_generation; - struct ext4_ext_cache i_cached_extent; - /* - * File creation time. Its function is same as that of - * struct timespec i_{a,c,m}time in the generic inode. - */ - struct timespec i_crtime; - - /* mballoc */ - struct list_head i_prealloc_list; - spinlock_t i_prealloc_lock; -}; - -#endif /* _LINUX_EXT4_FS_I */ diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h deleted file mode 100644 index abaae2c8cccf..000000000000 --- a/include/linux/ext4_fs_sb.h +++ /dev/null @@ -1,148 +0,0 @@ -/* - * linux/include/linux/ext4_fs_sb.h - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/include/linux/minix_fs_sb.h - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -#ifndef _LINUX_EXT4_FS_SB -#define _LINUX_EXT4_FS_SB - -#ifdef __KERNEL__ -#include -#include -#include -#include -#endif -#include - -/* - * third extended-fs super-block data in memory - */ -struct ext4_sb_info { - unsigned long s_desc_size; /* Size of a group descriptor in bytes */ - unsigned long s_inodes_per_block;/* Number of inodes per block */ - unsigned long s_blocks_per_group;/* Number of blocks in a group */ - unsigned long s_inodes_per_group;/* Number of inodes in a group */ - unsigned long s_itb_per_group; /* Number of inode table blocks per group */ - unsigned long s_gdb_count; /* Number of group descriptor blocks */ - unsigned long s_desc_per_block; /* Number of group descriptors per block */ - ext4_group_t s_groups_count; /* Number of groups in the fs */ - unsigned long s_overhead_last; /* Last calculated overhead */ - unsigned long s_blocks_last; /* Last seen block count */ - loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ - struct buffer_head * s_sbh; /* Buffer containing the super block */ - struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */ - struct buffer_head ** s_group_desc; - unsigned long s_mount_opt; - ext4_fsblk_t s_sb_block; - uid_t s_resuid; - gid_t s_resgid; - unsigned short s_mount_state; - unsigned short s_pad; - int s_addr_per_block_bits; - int s_desc_per_block_bits; - int s_inode_size; - int s_first_ino; - spinlock_t s_next_gen_lock; - u32 s_next_generation; - u32 s_hash_seed[4]; - int s_def_hash_version; - struct percpu_counter s_freeblocks_counter; - struct percpu_counter s_freeinodes_counter; - struct percpu_counter s_dirs_counter; - struct blockgroup_lock s_blockgroup_lock; - - /* root of the per fs reservation window tree */ - spinlock_t s_rsv_window_lock; - struct rb_root s_rsv_window_root; - struct ext4_reserve_window_node s_rsv_window_head; - - /* Journaling */ - struct inode * s_journal_inode; - struct journal_s * s_journal; - struct list_head s_orphan; - unsigned long s_commit_interval; - struct block_device *journal_bdev; -#ifdef CONFIG_JBD2_DEBUG - struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ - wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ -#endif -#ifdef CONFIG_QUOTA - char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ - int s_jquota_fmt; /* Format of quota to use */ -#endif - unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ - -#ifdef EXTENTS_STATS - /* ext4 extents stats */ - unsigned long s_ext_min; - unsigned long s_ext_max; - unsigned long s_depth_max; - spinlock_t s_ext_stats_lock; - unsigned long s_ext_blocks; - unsigned long s_ext_extents; -#endif - - /* for buddy allocator */ - struct ext4_group_info ***s_group_info; - struct inode *s_buddy_cache; - long s_blocks_reserved; - spinlock_t s_reserve_lock; - struct list_head s_active_transaction; - struct list_head s_closed_transaction; - struct list_head s_committed_transaction; - spinlock_t s_md_lock; - tid_t s_last_transaction; - unsigned short *s_mb_offsets, *s_mb_maxs; - - /* tunables */ - unsigned long s_stripe; - unsigned long s_mb_stream_request; - unsigned long s_mb_max_to_scan; - unsigned long s_mb_min_to_scan; - unsigned long s_mb_stats; - unsigned long s_mb_order2_reqs; - unsigned long s_mb_group_prealloc; - /* where last allocation was done - for stream allocation */ - unsigned long s_mb_last_group; - unsigned long s_mb_last_start; - - /* history to debug policy */ - struct ext4_mb_history *s_mb_history; - int s_mb_history_cur; - int s_mb_history_max; - int s_mb_history_num; - struct proc_dir_entry *s_mb_proc; - spinlock_t s_mb_history_lock; - int s_mb_history_filter; - - /* stats for buddy allocator */ - spinlock_t s_mb_pa_lock; - atomic_t s_bal_reqs; /* number of reqs with len > 1 */ - atomic_t s_bal_success; /* we found long enough chunks */ - atomic_t s_bal_allocated; /* in blocks */ - atomic_t s_bal_ex_scanned; /* total extents scanned */ - atomic_t s_bal_goals; /* goal hits */ - atomic_t s_bal_breaks; /* too long searches */ - atomic_t s_bal_2orders; /* 2^order hits */ - spinlock_t s_bal_lock; - unsigned long s_mb_buddies_generated; - unsigned long long s_mb_generation_time; - atomic_t s_mb_lost_chunks; - atomic_t s_mb_preallocated; - atomic_t s_mb_discarded; - - /* locality groups */ - struct ext4_locality_group *s_locality_groups; -}; - -#endif /* _LINUX_EXT4_FS_SB */ diff --git a/include/linux/ext4_jbd2.h b/include/linux/ext4_jbd2.h deleted file mode 100644 index 38c71d3c8dbf..000000000000 --- a/include/linux/ext4_jbd2.h +++ /dev/null @@ -1,231 +0,0 @@ -/* - * linux/include/linux/ext4_jbd2.h - * - * Written by Stephen C. Tweedie , 1999 - * - * Copyright 1998--1999 Red Hat corp --- All Rights Reserved - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * - * Ext4-specific journaling extensions. - */ - -#ifndef _LINUX_EXT4_JBD2_H -#define _LINUX_EXT4_JBD2_H - -#include -#include -#include - -#define EXT4_JOURNAL(inode) (EXT4_SB((inode)->i_sb)->s_journal) - -/* Define the number of blocks we need to account to a transaction to - * modify one block of data. - * - * We may have to touch one inode, one bitmap buffer, up to three - * indirection blocks, the group and superblock summaries, and the data - * block to complete the transaction. - * - * For extents-enabled fs we may have to allocate and modify up to - * 5 levels of tree + root which are stored in the inode. */ - -#define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \ - (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \ - || test_opt(sb, EXTENTS) ? 27U : 8U) - -/* Extended attribute operations touch at most two data buffers, - * two bitmap buffers, and two group summaries, in addition to the inode - * and the superblock, which are already accounted for. */ - -#define EXT4_XATTR_TRANS_BLOCKS 6U - -/* Define the minimum size for a transaction which modifies data. This - * needs to take into account the fact that we may end up modifying two - * quota files too (one for the group, one for the user quota). The - * superblock only gets updated once, of course, so don't bother - * counting that again for the quota updates. */ - -#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \ - EXT4_XATTR_TRANS_BLOCKS - 2 + \ - 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) - -/* Delete operations potentially hit one directory's namespace plus an - * entire inode, plus arbitrary amounts of bitmap/indirection data. Be - * generous. We can grow the delete transaction later if necessary. */ - -#define EXT4_DELETE_TRANS_BLOCKS(sb) (2 * EXT4_DATA_TRANS_BLOCKS(sb) + 64) - -/* Define an arbitrary limit for the amount of data we will anticipate - * writing to any given transaction. For unbounded transactions such as - * write(2) and truncate(2) we can write more than this, but we always - * start off at the maximum transaction size and grow the transaction - * optimistically as we go. */ - -#define EXT4_MAX_TRANS_DATA 64U - -/* We break up a large truncate or write transaction once the handle's - * buffer credits gets this low, we need either to extend the - * transaction or to start a new one. Reserve enough space here for - * inode, bitmap, superblock, group and indirection updates for at least - * one block, plus two quota updates. Quota allocations are not - * needed. */ - -#define EXT4_RESERVE_TRANS_BLOCKS 12U - -#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 8 - -#ifdef CONFIG_QUOTA -/* Amount of blocks needed for quota update - we know that the structure was - * allocated so we need to update only inode+data */ -#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) -/* Amount of blocks needed for quota insert/delete - we do some block writes - * but inode, sb and group updates are done only once */ -#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ - (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0) -#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ - (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0) -#else -#define EXT4_QUOTA_TRANS_BLOCKS(sb) 0 -#define EXT4_QUOTA_INIT_BLOCKS(sb) 0 -#define EXT4_QUOTA_DEL_BLOCKS(sb) 0 -#endif - -int -ext4_mark_iloc_dirty(handle_t *handle, - struct inode *inode, - struct ext4_iloc *iloc); - -/* - * On success, We end up with an outstanding reference count against - * iloc->bh. This _must_ be cleaned up later. - */ - -int ext4_reserve_inode_write(handle_t *handle, struct inode *inode, - struct ext4_iloc *iloc); - -int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); - -/* - * Wrapper functions with which ext4 calls into JBD. The intent here is - * to allow these to be turned into appropriate stubs so ext4 can control - * ext2 filesystems, so ext2+ext4 systems only nee one fs. This work hasn't - * been done yet. - */ - -static inline void ext4_journal_release_buffer(handle_t *handle, - struct buffer_head *bh) -{ - jbd2_journal_release_buffer(handle, bh); -} - -void ext4_journal_abort_handle(const char *caller, const char *err_fn, - struct buffer_head *bh, handle_t *handle, int err); - -int __ext4_journal_get_undo_access(const char *where, handle_t *handle, - struct buffer_head *bh); - -int __ext4_journal_get_write_access(const char *where, handle_t *handle, - struct buffer_head *bh); - -int __ext4_journal_forget(const char *where, handle_t *handle, - struct buffer_head *bh); - -int __ext4_journal_revoke(const char *where, handle_t *handle, - ext4_fsblk_t blocknr, struct buffer_head *bh); - -int __ext4_journal_get_create_access(const char *where, - handle_t *handle, struct buffer_head *bh); - -int __ext4_journal_dirty_metadata(const char *where, - handle_t *handle, struct buffer_head *bh); - -#define ext4_journal_get_undo_access(handle, bh) \ - __ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh)) -#define ext4_journal_get_write_access(handle, bh) \ - __ext4_journal_get_write_access(__FUNCTION__, (handle), (bh)) -#define ext4_journal_revoke(handle, blocknr, bh) \ - __ext4_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh)) -#define ext4_journal_get_create_access(handle, bh) \ - __ext4_journal_get_create_access(__FUNCTION__, (handle), (bh)) -#define ext4_journal_dirty_metadata(handle, bh) \ - __ext4_journal_dirty_metadata(__FUNCTION__, (handle), (bh)) -#define ext4_journal_forget(handle, bh) \ - __ext4_journal_forget(__FUNCTION__, (handle), (bh)) - -int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh); - -handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); -int __ext4_journal_stop(const char *where, handle_t *handle); - -static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) -{ - return ext4_journal_start_sb(inode->i_sb, nblocks); -} - -#define ext4_journal_stop(handle) \ - __ext4_journal_stop(__FUNCTION__, (handle)) - -static inline handle_t *ext4_journal_current_handle(void) -{ - return journal_current_handle(); -} - -static inline int ext4_journal_extend(handle_t *handle, int nblocks) -{ - return jbd2_journal_extend(handle, nblocks); -} - -static inline int ext4_journal_restart(handle_t *handle, int nblocks) -{ - return jbd2_journal_restart(handle, nblocks); -} - -static inline int ext4_journal_blocks_per_page(struct inode *inode) -{ - return jbd2_journal_blocks_per_page(inode); -} - -static inline int ext4_journal_force_commit(journal_t *journal) -{ - return jbd2_journal_force_commit(journal); -} - -/* super.c */ -int ext4_force_commit(struct super_block *sb); - -static inline int ext4_should_journal_data(struct inode *inode) -{ - if (!S_ISREG(inode->i_mode)) - return 1; - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) - return 1; - if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) - return 1; - return 0; -} - -static inline int ext4_should_order_data(struct inode *inode) -{ - if (!S_ISREG(inode->i_mode)) - return 0; - if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) - return 0; - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) - return 1; - return 0; -} - -static inline int ext4_should_writeback_data(struct inode *inode) -{ - if (!S_ISREG(inode->i_mode)) - return 0; - if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) - return 0; - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) - return 1; - return 0; -} - -#endif /* _LINUX_EXT4_JBD2_H */ -- cgit v1.2.3-71-gd317 From 70b9f7dc1435412ca2b89b13a8353bd9915a7189 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 28 Apr 2008 16:27:23 -0700 Subject: x86/pci: remove flag in pci_cfg_space_size_ext so let pci_cfg_space_size call it directly without flag. Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- arch/x86/pci/fixup.c | 2 +- drivers/pci/probe.c | 33 +++++++++++++++++---------------- include/linux/pci.h | 2 +- 3 files changed, 19 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index b60b2abd480c..ff3a6a336342 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -502,7 +502,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015, */ static void fam10h_pci_cfg_space_size(struct pci_dev *dev) { - dev->cfg_size = pci_cfg_space_size_ext(dev, 0); + dev->cfg_size = pci_cfg_space_size_ext(dev); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4a55bf380957..3706ce7972dd 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -842,13 +842,25 @@ static void set_pcie_port_type(struct pci_dev *pdev) * reading the dword at 0x100 which must either be 0 or a valid extended * capability header. */ -int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix) +int pci_cfg_space_size_ext(struct pci_dev *dev) { - int pos; u32 status; - if (!check_exp_pcix) - goto skip; + if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL) + goto fail; + if (status == 0xffffffff) + goto fail; + + return PCI_CFG_SPACE_EXP_SIZE; + + fail: + return PCI_CFG_SPACE_SIZE; +} + +int pci_cfg_space_size(struct pci_dev *dev) +{ + int pos; + u32 status; pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (!pos) { @@ -861,23 +873,12 @@ int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix) goto fail; } - skip: - if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL) - goto fail; - if (status == 0xffffffff) - goto fail; - - return PCI_CFG_SPACE_EXP_SIZE; + return pci_cfg_space_size_ext(dev); fail: return PCI_CFG_SPACE_SIZE; } -int pci_cfg_space_size(struct pci_dev *dev) -{ - return pci_cfg_space_size_ext(dev, 1); -} - static void pci_release_bus_bridge_dev(struct device *dev) { kfree(dev); diff --git a/include/linux/pci.h b/include/linux/pci.h index a59517b4930f..509159bcd4e7 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -667,7 +667,7 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *), void *userdata); -int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix); +int pci_cfg_space_size_ext(struct pci_dev *dev); int pci_cfg_space_size(struct pci_dev *dev); unsigned char pci_bus_max_busnr(struct pci_bus *bus); -- cgit v1.2.3-71-gd317 From 2f3517418dc0684a32318f2c5b53257416448b1e Mon Sep 17 00:00:00 2001 From: Bryan Wu Date: Wed, 30 Apr 2008 00:52:12 -0700 Subject: Blackfin serial driver: this driver enable SPORTs on Blackfin emulate UART Signed-off-by: Bryan Wu Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/serial/Kconfig | 43 +++ drivers/serial/Makefile | 1 + drivers/serial/bfin_sport_uart.c | 614 +++++++++++++++++++++++++++++++++++++++ drivers/serial/bfin_sport_uart.h | 63 ++++ include/linux/serial_core.h | 6 +- 5 files changed, 725 insertions(+), 2 deletions(-) create mode 100644 drivers/serial/bfin_sport_uart.c create mode 100644 drivers/serial/bfin_sport_uart.h (limited to 'include/linux') diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index 34b809e3b596..36acbcca2d48 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -1355,4 +1355,47 @@ config SERIAL_SC26XX_CONSOLE help Support for Console on SC2681/SC2692 serial ports. +config SERIAL_BFIN_SPORT + tristate "Blackfin SPORT emulate UART (EXPERIMENTAL)" + depends on BFIN && EXPERIMENTAL + select SERIAL_CORE + help + Enble support SPORT emulate UART on Blackfin series. + + To compile this driver as a module, choose M here: the + module will be called bfin_sport_uart. + +choice + prompt "Baud rate for Blackfin SPORT UART" + depends on SERIAL_BFIN_SPORT + default SERIAL_SPORT_BAUD_RATE_57600 + help + Choose a baud rate for the SPORT UART, other uart settings are + 8 bit, 1 stop bit, no parity, no flow control. + +config SERIAL_SPORT_BAUD_RATE_115200 + bool "115200" + +config SERIAL_SPORT_BAUD_RATE_57600 + bool "57600" + +config SERIAL_SPORT_BAUD_RATE_38400 + bool "38400" + +config SERIAL_SPORT_BAUD_RATE_19200 + bool "19200" + +config SERIAL_SPORT_BAUD_RATE_9600 + bool "9600" +endchoice + +config SPORT_BAUD_RATE + int + depends on SERIAL_BFIN_SPORT + default 115200 if (SERIAL_SPORT_BAUD_RATE_115200) + default 57600 if (SERIAL_SPORT_BAUD_RATE_57600) + default 38400 if (SERIAL_SPORT_BAUD_RATE_38400) + default 19200 if (SERIAL_SPORT_BAUD_RATE_19200) + default 9600 if (SERIAL_SPORT_BAUD_RATE_9600) + endmenu diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index f02ff9fad017..0d9c09b1e836 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_SERIAL_PXA) += pxa.o obj-$(CONFIG_SERIAL_PNX8XXX) += pnx8xxx_uart.o obj-$(CONFIG_SERIAL_SA1100) += sa1100.o obj-$(CONFIG_SERIAL_BFIN) += bfin_5xx.o +obj-$(CONFIG_SERIAL_BFIN_SPORT) += bfin_sport_uart.o obj-$(CONFIG_SERIAL_S3C2410) += s3c2410.o obj-$(CONFIG_SERIAL_SUNCORE) += suncore.o obj-$(CONFIG_SERIAL_SUNHV) += sunhv.o diff --git a/drivers/serial/bfin_sport_uart.c b/drivers/serial/bfin_sport_uart.c new file mode 100644 index 000000000000..aca1240ad808 --- /dev/null +++ b/drivers/serial/bfin_sport_uart.c @@ -0,0 +1,614 @@ +/* + * File: linux/drivers/serial/bfin_sport_uart.c + * + * Based on: drivers/serial/bfin_5xx.c by Aubrey Li. + * Author: Roy Huang + * + * Created: Nov 22, 2006 + * Copyright: (c) 2006-2007 Analog Devices Inc. + * Description: this driver enable SPORTs on Blackfin emulate UART. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see the file COPYING, or write + * to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * This driver and the hardware supported are in term of EE-191 of ADI. + * http://www.analog.com/UploadedFiles/Application_Notes/399447663EE191.pdf + * This application note describe how to implement a UART on a Sharc DSP, + * but this driver is implemented on Blackfin Processor. + */ + +/* After reset, there is a prelude of low level pulse when transmit data first + * time. No addtional pulse in following transmit. + * According to document: + * The SPORTs are ready to start transmitting or receiving data no later than + * three serial clock cycles after they are enabled in the SPORTx_TCR1 or + * SPORTx_RCR1 register. No serial clock cycles are lost from this point on. + * The first internal frame sync will occur one frame sync delay after the + * SPORTs are ready. External frame syncs can occur as soon as the SPORT is + * ready. + */ + +/* Thanks to Axel Alatalo for fixing sport rx bug. Sometimes + * sport receives data incorrectly. The following is Axel's words. + * As EE-191, sport rx samples 3 times of the UART baudrate and takes the + * middle smaple of every 3 samples as the data bit. For a 8-N-1 UART setting, + * 30 samples will be required for a byte. If transmitter sends a 1/3 bit short + * byte due to buadrate drift, then the 30th sample of a byte, this sample is + * also the third sample of the stop bit, will happens on the immediately + * following start bit which will be thrown away and missed. Thus since parts + * of the startbit will be missed and the receiver will begin to drift, the + * effect accumulates over time until synchronization is lost. + * If only require 2 samples of the stopbit (by sampling in total 29 samples), + * then a to short byte as in the case above will be tolerated. Then the 1/3 + * early startbit will trigger a framesync since the last read is complete + * after only 2/3 stopbit and framesync is active during the last 1/3 looking + * for a possible early startbit. */ + +//#define DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "bfin_sport_uart.h" + +unsigned short bfin_uart_pin_req_sport0[] = + {P_SPORT0_TFS, P_SPORT0_DTPRI, P_SPORT0_TSCLK, P_SPORT0_RFS, \ + P_SPORT0_DRPRI, P_SPORT0_RSCLK, P_SPORT0_DRSEC, P_SPORT0_DTSEC, 0}; + +unsigned short bfin_uart_pin_req_sport1[] = + {P_SPORT1_TFS, P_SPORT1_DTPRI, P_SPORT1_TSCLK, P_SPORT1_RFS, \ + P_SPORT1_DRPRI, P_SPORT1_RSCLK, P_SPORT1_DRSEC, P_SPORT1_DTSEC, 0}; + +#define DRV_NAME "bfin-sport-uart" + +struct sport_uart_port { + struct uart_port port; + char *name; + + int tx_irq; + int rx_irq; + int err_irq; +}; + +static void sport_uart_tx_chars(struct sport_uart_port *up); +static void sport_stop_tx(struct uart_port *port); + +static inline void tx_one_byte(struct sport_uart_port *up, unsigned int value) +{ + pr_debug("%s value:%x\n", __FUNCTION__, value); + /* Place a Start and Stop bit */ + __asm__ volatile ( + "R2 = b#01111111100;\n\t" + "R3 = b#10000000001;\n\t" + "%0 <<= 2;\n\t" + "%0 = %0 & R2;\n\t" + "%0 = %0 | R3;\n\t" + :"=r"(value) + :"0"(value) + :"R2", "R3"); + pr_debug("%s value:%x\n", __FUNCTION__, value); + + SPORT_PUT_TX(up, value); +} + +static inline unsigned int rx_one_byte(struct sport_uart_port *up) +{ + unsigned int value, extract; + + value = SPORT_GET_RX32(up); + pr_debug("%s value:%x\n", __FUNCTION__, value); + + /* Extract 8 bits data */ + __asm__ volatile ( + "R5 = 0;\n\t" + "P0 = 8;\n\t" + "R1 = 0x1801(Z);\n\t" + "R3 = 0x0300(Z);\n\t" + "R4 = 0;\n\t" + "LSETUP(loop_s, loop_e) LC0 = P0;\nloop_s:\t" + "R2 = extract(%1, R1.L)(Z);\n\t" + "R2 <<= R4;\n\t" + "R5 = R5 | R2;\n\t" + "R1 = R1 - R3;\nloop_e:\t" + "R4 += 1;\n\t" + "%0 = R5;\n\t" + :"=r"(extract) + :"r"(value) + :"P0", "R1", "R2","R3","R4", "R5"); + + pr_debug(" extract:%x\n", extract); + return extract; +} + +static int sport_uart_setup(struct sport_uart_port *up, int sclk, int baud_rate) +{ + int tclkdiv, tfsdiv, rclkdiv; + + /* Set TCR1 and TCR2 */ + SPORT_PUT_TCR1(up, (LTFS | ITFS | TFSR | TLSBIT | ITCLK)); + SPORT_PUT_TCR2(up, 10); + pr_debug("%s TCR1:%x, TCR2:%x\n", __FUNCTION__, SPORT_GET_TCR1(up), SPORT_GET_TCR2(up)); + + /* Set RCR1 and RCR2 */ + SPORT_PUT_RCR1(up, (RCKFE | LARFS | LRFS | RFSR | IRCLK)); + SPORT_PUT_RCR2(up, 28); + pr_debug("%s RCR1:%x, RCR2:%x\n", __FUNCTION__, SPORT_GET_RCR1(up), SPORT_GET_RCR2(up)); + + tclkdiv = sclk/(2 * baud_rate) - 1; + tfsdiv = 12; + rclkdiv = sclk/(2 * baud_rate * 3) - 1; + SPORT_PUT_TCLKDIV(up, tclkdiv); + SPORT_PUT_TFSDIV(up, tfsdiv); + SPORT_PUT_RCLKDIV(up, rclkdiv); + SSYNC(); + pr_debug("%s sclk:%d, baud_rate:%d, tclkdiv:%d, tfsdiv:%d, rclkdiv:%d\n", + __FUNCTION__, sclk, baud_rate, tclkdiv, tfsdiv, rclkdiv); + + return 0; +} + +static irqreturn_t sport_uart_rx_irq(int irq, void *dev_id) +{ + struct sport_uart_port *up = dev_id; + struct tty_struct *tty = up->port.info->tty; + unsigned int ch; + + do { + ch = rx_one_byte(up); + up->port.icount.rx++; + + if (uart_handle_sysrq_char(&up->port, ch)) + ; + else + tty_insert_flip_char(tty, ch, TTY_NORMAL); + } while (SPORT_GET_STAT(up) & RXNE); + tty_flip_buffer_push(tty); + + return IRQ_HANDLED; +} + +static irqreturn_t sport_uart_tx_irq(int irq, void *dev_id) +{ + sport_uart_tx_chars(dev_id); + + return IRQ_HANDLED; +} + +static irqreturn_t sport_uart_err_irq(int irq, void *dev_id) +{ + struct sport_uart_port *up = dev_id; + struct tty_struct *tty = up->port.info->tty; + unsigned int stat = SPORT_GET_STAT(up); + + /* Overflow in RX FIFO */ + if (stat & ROVF) { + up->port.icount.overrun++; + tty_insert_flip_char(tty, 0, TTY_OVERRUN); + SPORT_PUT_STAT(up, ROVF); /* Clear ROVF bit */ + } + /* These should not happen */ + if (stat & (TOVF | TUVF | RUVF)) { + printk(KERN_ERR "SPORT Error:%s %s %s\n", + (stat & TOVF)?"TX overflow":"", + (stat & TUVF)?"TX underflow":"", + (stat & RUVF)?"RX underflow":""); + SPORT_PUT_TCR1(up, SPORT_GET_TCR1(up) & ~TSPEN); + SPORT_PUT_RCR1(up, SPORT_GET_RCR1(up) & ~RSPEN); + } + SSYNC(); + + return IRQ_HANDLED; +} + +/* Reqeust IRQ, Setup clock */ +static int sport_startup(struct uart_port *port) +{ + struct sport_uart_port *up = (struct sport_uart_port *)port; + char buffer[20]; + int retval; + + pr_debug("%s enter\n", __FUNCTION__); + memset(buffer, 20, '\0'); + snprintf(buffer, 20, "%s rx", up->name); + retval = request_irq(up->rx_irq, sport_uart_rx_irq, IRQF_SAMPLE_RANDOM, buffer, up); + if (retval) { + printk(KERN_ERR "Unable to request interrupt %s\n", buffer); + return retval; + } + + snprintf(buffer, 20, "%s tx", up->name); + retval = request_irq(up->tx_irq, sport_uart_tx_irq, IRQF_SAMPLE_RANDOM, buffer, up); + if (retval) { + printk(KERN_ERR "Unable to request interrupt %s\n", buffer); + goto fail1; + } + + snprintf(buffer, 20, "%s err", up->name); + retval = request_irq(up->err_irq, sport_uart_err_irq, IRQF_SAMPLE_RANDOM, buffer, up); + if (retval) { + printk(KERN_ERR "Unable to request interrupt %s\n", buffer); + goto fail2; + } + + if (port->line) { + if (peripheral_request_list(bfin_uart_pin_req_sport1, DRV_NAME)) + goto fail3; + } else { + if (peripheral_request_list(bfin_uart_pin_req_sport0, DRV_NAME)) + goto fail3; + } + + sport_uart_setup(up, get_sclk(), port->uartclk); + + /* Enable receive interrupt */ + SPORT_PUT_RCR1(up, (SPORT_GET_RCR1(up) | RSPEN)); + SSYNC(); + + return 0; + + +fail3: + printk(KERN_ERR DRV_NAME + ": Requesting Peripherals failed\n"); + + free_irq(up->err_irq, up); +fail2: + free_irq(up->tx_irq, up); +fail1: + free_irq(up->rx_irq, up); + + return retval; + +} + +static void sport_uart_tx_chars(struct sport_uart_port *up) +{ + struct circ_buf *xmit = &up->port.info->xmit; + + if (SPORT_GET_STAT(up) & TXF) + return; + + if (up->port.x_char) { + tx_one_byte(up, up->port.x_char); + up->port.icount.tx++; + up->port.x_char = 0; + return; + } + + if (uart_circ_empty(xmit) || uart_tx_stopped(&up->port)) { + sport_stop_tx(&up->port); + return; + } + + while(!(SPORT_GET_STAT(up) & TXF) && !uart_circ_empty(xmit)) { + tx_one_byte(up, xmit->buf[xmit->tail]); + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE -1); + up->port.icount.tx++; + } + + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(&up->port); +} + +static unsigned int sport_tx_empty(struct uart_port *port) +{ + struct sport_uart_port *up = (struct sport_uart_port *)port; + unsigned int stat; + + stat = SPORT_GET_STAT(up); + pr_debug("%s stat:%04x\n", __FUNCTION__, stat); + if (stat & TXHRE) { + return TIOCSER_TEMT; + } else + return 0; +} + +static unsigned int sport_get_mctrl(struct uart_port *port) +{ + pr_debug("%s enter\n", __FUNCTION__); + return (TIOCM_CTS | TIOCM_CD | TIOCM_DSR); +} + +static void sport_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ + pr_debug("%s enter\n", __FUNCTION__); +} + +static void sport_stop_tx(struct uart_port *port) +{ + struct sport_uart_port *up = (struct sport_uart_port *)port; + unsigned int stat; + + pr_debug("%s enter\n", __FUNCTION__); + + stat = SPORT_GET_STAT(up); + while(!(stat & TXHRE)) { + udelay(1); + stat = SPORT_GET_STAT(up); + } + /* Although the hold register is empty, last byte is still in shift + * register and not sent out yet. If baud rate is lower than default, + * delay should be longer. For example, if the baud rate is 9600, + * the delay must be at least 2ms by experience */ + udelay(500); + + SPORT_PUT_TCR1(up, (SPORT_GET_TCR1(up) & ~TSPEN)); + SSYNC(); + + return; +} + +static void sport_start_tx(struct uart_port *port) +{ + struct sport_uart_port *up = (struct sport_uart_port *)port; + + pr_debug("%s enter\n", __FUNCTION__); + /* Write data into SPORT FIFO before enable SPROT to transmit */ + sport_uart_tx_chars(up); + + /* Enable transmit, then an interrupt will generated */ + SPORT_PUT_TCR1(up, (SPORT_GET_TCR1(up) | TSPEN)); + SSYNC(); + pr_debug("%s exit\n", __FUNCTION__); +} + +static void sport_stop_rx(struct uart_port *port) +{ + struct sport_uart_port *up = (struct sport_uart_port *)port; + + pr_debug("%s enter\n", __FUNCTION__); + /* Disable sport to stop rx */ + SPORT_PUT_RCR1(up, (SPORT_GET_RCR1(up) & ~RSPEN)); + SSYNC(); +} + +static void sport_enable_ms(struct uart_port *port) +{ + pr_debug("%s enter\n", __FUNCTION__); +} + +static void sport_break_ctl(struct uart_port *port, int break_state) +{ + pr_debug("%s enter\n", __FUNCTION__); +} + +static void sport_shutdown(struct uart_port *port) +{ + struct sport_uart_port *up = (struct sport_uart_port *)port; + + pr_debug("%s enter\n", __FUNCTION__); + + /* Disable sport */ + SPORT_PUT_TCR1(up, (SPORT_GET_TCR1(up) & ~TSPEN)); + SPORT_PUT_RCR1(up, (SPORT_GET_RCR1(up) & ~RSPEN)); + SSYNC(); + + if (port->line) { + peripheral_free_list(bfin_uart_pin_req_sport1); + } else { + peripheral_free_list(bfin_uart_pin_req_sport0); + } + + free_irq(up->rx_irq, up); + free_irq(up->tx_irq, up); + free_irq(up->err_irq, up); +} + +static void sport_set_termios(struct uart_port *port, + struct termios *termios, struct termios *old) +{ + pr_debug("%s enter, c_cflag:%08x\n", __FUNCTION__, termios->c_cflag); + uart_update_timeout(port, CS8 ,port->uartclk); +} + +static const char *sport_type(struct uart_port *port) +{ + struct sport_uart_port *up = (struct sport_uart_port *)port; + + pr_debug("%s enter\n", __FUNCTION__); + return up->name; +} + +static void sport_release_port(struct uart_port *port) +{ + pr_debug("%s enter\n", __FUNCTION__); +} + +static int sport_request_port(struct uart_port *port) +{ + pr_debug("%s enter\n", __FUNCTION__); + return 0; +} + +static void sport_config_port(struct uart_port *port, int flags) +{ + struct sport_uart_port *up = (struct sport_uart_port *)port; + + pr_debug("%s enter\n", __FUNCTION__); + up->port.type = PORT_BFIN_SPORT; +} + +static int sport_verify_port(struct uart_port *port, struct serial_struct *ser) +{ + pr_debug("%s enter\n", __FUNCTION__); + return 0; +} + +struct uart_ops sport_uart_ops = { + .tx_empty = sport_tx_empty, + .set_mctrl = sport_set_mctrl, + .get_mctrl = sport_get_mctrl, + .stop_tx = sport_stop_tx, + .start_tx = sport_start_tx, + .stop_rx = sport_stop_rx, + .enable_ms = sport_enable_ms, + .break_ctl = sport_break_ctl, + .startup = sport_startup, + .shutdown = sport_shutdown, + .set_termios = sport_set_termios, + .type = sport_type, + .release_port = sport_release_port, + .request_port = sport_request_port, + .config_port = sport_config_port, + .verify_port = sport_verify_port, +}; + +static struct sport_uart_port sport_uart_ports[] = { + { /* SPORT 0 */ + .name = "SPORT0", + .tx_irq = IRQ_SPORT0_TX, + .rx_irq = IRQ_SPORT0_RX, + .err_irq= IRQ_SPORT0_ERROR, + .port = { + .type = PORT_BFIN_SPORT, + .iotype = UPIO_MEM, + .membase = (void __iomem *)SPORT0_TCR1, + .mapbase = SPORT0_TCR1, + .irq = IRQ_SPORT0_RX, + .uartclk = CONFIG_SPORT_BAUD_RATE, + .fifosize = 8, + .ops = &sport_uart_ops, + .line = 0, + }, + }, { /* SPORT 1 */ + .name = "SPORT1", + .tx_irq = IRQ_SPORT1_TX, + .rx_irq = IRQ_SPORT1_RX, + .err_irq= IRQ_SPORT1_ERROR, + .port = { + .type = PORT_BFIN_SPORT, + .iotype = UPIO_MEM, + .membase = (void __iomem *)SPORT1_TCR1, + .mapbase = SPORT1_TCR1, + .irq = IRQ_SPORT1_RX, + .uartclk = CONFIG_SPORT_BAUD_RATE, + .fifosize = 8, + .ops = &sport_uart_ops, + .line = 1, + }, + } +}; + +static struct uart_driver sport_uart_reg = { + .owner = THIS_MODULE, + .driver_name = "SPORT-UART", + .dev_name = "ttySS", + .major = 204, + .minor = 84, + .nr = ARRAY_SIZE(sport_uart_ports), + .cons = NULL, +}; + +static int sport_uart_suspend(struct platform_device *dev, pm_message_t state) +{ + struct sport_uart_port *sport = platform_get_drvdata(dev); + + pr_debug("%s enter\n", __FUNCTION__); + if (sport) + uart_suspend_port(&sport_uart_reg, &sport->port); + + return 0; +} + +static int sport_uart_resume(struct platform_device *dev) +{ + struct sport_uart_port *sport = platform_get_drvdata(dev); + + pr_debug("%s enter\n", __FUNCTION__); + if (sport) + uart_resume_port(&sport_uart_reg, &sport->port); + + return 0; +} + +static int sport_uart_probe(struct platform_device *dev) +{ + pr_debug("%s enter\n", __FUNCTION__); + sport_uart_ports[dev->id].port.dev = &dev->dev; + uart_add_one_port(&sport_uart_reg, &sport_uart_ports[dev->id].port); + platform_set_drvdata(dev, &sport_uart_ports[dev->id]); + + return 0; +} + +static int sport_uart_remove(struct platform_device *dev) +{ + struct sport_uart_port *sport = platform_get_drvdata(dev); + + pr_debug("%s enter\n", __FUNCTION__); + platform_set_drvdata(dev, NULL); + + if (sport) + uart_remove_one_port(&sport_uart_reg, &sport->port); + + return 0; +} + +static struct platform_driver sport_uart_driver = { + .probe = sport_uart_probe, + .remove = sport_uart_remove, + .suspend = sport_uart_suspend, + .resume = sport_uart_resume, + .driver = { + .name = DRV_NAME, + }, +}; + +static int __init sport_uart_init(void) +{ + int ret; + + pr_debug("%s enter\n", __FUNCTION__); + ret = uart_register_driver(&sport_uart_reg); + if (ret != 0) { + printk(KERN_ERR "Failed to register %s:%d\n", + sport_uart_reg.driver_name, ret); + return ret; + } + + ret = platform_driver_register(&sport_uart_driver); + if (ret != 0) { + printk(KERN_ERR "Failed to register sport uart driver:%d\n", ret); + uart_unregister_driver(&sport_uart_reg); + } + + + pr_debug("%s exit\n", __FUNCTION__); + return ret; +} + +static void __exit sport_uart_exit(void) +{ + pr_debug("%s enter\n", __FUNCTION__); + platform_driver_unregister(&sport_uart_driver); + uart_unregister_driver(&sport_uart_reg); +} + +module_init(sport_uart_init); +module_exit(sport_uart_exit); + +MODULE_LICENSE("GPL"); diff --git a/drivers/serial/bfin_sport_uart.h b/drivers/serial/bfin_sport_uart.h new file mode 100644 index 000000000000..671d41cc1a3f --- /dev/null +++ b/drivers/serial/bfin_sport_uart.h @@ -0,0 +1,63 @@ +/* + * File: linux/drivers/serial/bfin_sport_uart.h + * + * Based on: include/asm-blackfin/mach-533/bfin_serial_5xx.h + * Author: Roy Huang analog.com> + * + * Created: Nov 22, 2006 + * Copyright: (C) Analog Device Inc. + * Description: this driver enable SPORTs on Blackfin emulate UART. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see the file COPYING, or write + * to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +#define OFFSET_TCR1 0x00 /* Transmit Configuration 1 Register */ +#define OFFSET_TCR2 0x04 /* Transmit Configuration 2 Register */ +#define OFFSET_TCLKDIV 0x08 /* Transmit Serial Clock Divider Register */ +#define OFFSET_TFSDIV 0x0C /* Transmit Frame Sync Divider Register */ +#define OFFSET_TX 0x10 /* Transmit Data Register */ +#define OFFSET_RX 0x18 /* Receive Data Register */ +#define OFFSET_RCR1 0x20 /* Receive Configuration 1 Register */ +#define OFFSET_RCR2 0x24 /* Receive Configuration 2 Register */ +#define OFFSET_RCLKDIV 0x28 /* Receive Serial Clock Divider Register */ +#define OFFSET_RFSDIV 0x2c /* Receive Frame Sync Divider Register */ +#define OFFSET_STAT 0x30 /* Status Register */ + +#define SPORT_GET_TCR1(sport) bfin_read16(((sport)->port.membase + OFFSET_TCR1)) +#define SPORT_GET_TCR2(sport) bfin_read16(((sport)->port.membase + OFFSET_TCR2)) +#define SPORT_GET_TCLKDIV(sport) bfin_read16(((sport)->port.membase + OFFSET_TCLKDIV)) +#define SPORT_GET_TFSDIV(sport) bfin_read16(((sport)->port.membase + OFFSET_TFSDIV)) +#define SPORT_GET_TX(sport) bfin_read16(((sport)->port.membase + OFFSET_TX)) +#define SPORT_GET_RX(sport) bfin_read16(((sport)->port.membase + OFFSET_RX)) +#define SPORT_GET_RX32(sport) bfin_read32(((sport)->port.membase + OFFSET_RX)) +#define SPORT_GET_RCR1(sport) bfin_read16(((sport)->port.membase + OFFSET_RCR1)) +#define SPORT_GET_RCR2(sport) bfin_read16(((sport)->port.membase + OFFSET_RCR2)) +#define SPORT_GET_RCLKDIV(sport) bfin_read16(((sport)->port.membase + OFFSET_RCLKDIV)) +#define SPORT_GET_RFSDIV(sport) bfin_read16(((sport)->port.membase + OFFSET_RFSDIV)) +#define SPORT_GET_STAT(sport) bfin_read16(((sport)->port.membase + OFFSET_STAT)) + +#define SPORT_PUT_TCR1(sport, v) bfin_write16(((sport)->port.membase + OFFSET_TCR1), v) +#define SPORT_PUT_TCR2(sport, v) bfin_write16(((sport)->port.membase + OFFSET_TCR2), v) +#define SPORT_PUT_TCLKDIV(sport, v) bfin_write16(((sport)->port.membase + OFFSET_TCLKDIV), v) +#define SPORT_PUT_TFSDIV(sport, v) bfin_write16(((sport)->port.membase + OFFSET_TFSDIV), v) +#define SPORT_PUT_TX(sport, v) bfin_write16(((sport)->port.membase + OFFSET_TX), v) +#define SPORT_PUT_RX(sport, v) bfin_write16(((sport)->port.membase + OFFSET_RX), v) +#define SPORT_PUT_RCR1(sport, v) bfin_write16(((sport)->port.membase + OFFSET_RCR1), v) +#define SPORT_PUT_RCR2(sport, v) bfin_write16(((sport)->port.membase + OFFSET_RCR2), v) +#define SPORT_PUT_RCLKDIV(sport, v) bfin_write16(((sport)->port.membase + OFFSET_RCLKDIV), v) +#define SPORT_PUT_RFSDIV(sport, v) bfin_write16(((sport)->port.membase + OFFSET_RFSDIV), v) +#define SPORT_PUT_STAT(sport, v) bfin_write16(((sport)->port.membase + OFFSET_STAT), v) diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 7cb094a82456..d32123ae08ad 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -149,13 +149,15 @@ /* Freescale ColdFire */ #define PORT_MCF 78 -#define PORT_SC26XX 79 - +/* Blackfin SPORT */ +#define PORT_BFIN_SPORT 79 /* MN10300 on-chip UART numbers */ #define PORT_MN10300 80 #define PORT_MN10300_CTS 81 +#define PORT_SC26XX 82 + #ifdef __KERNEL__ #include -- cgit v1.2.3-71-gd317 From 6bfe0b499082fd3950429017cd8ebf2a6c458aa5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 30 Apr 2008 00:52:32 -0700 Subject: md: support blocking writes to an array on device failure Allows a userspace metadata handler to take action upon detecting a device failure. Based on an original patch by Neil Brown. Changes: -added blocked_wait waitqueue to rdev -don't qualify Blocked with Faulty always let userspace block writes -added md_wait_for_blocked_rdev to wait for the block device to be clear, if userspace misses the notification another one is sent every 5 seconds -set MD_RECOVERY_NEEDED after clearing "blocked" -kill DoBlock flag, just test mddev->external Signed-off-by: Dan Williams Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 33 ++++++++++++++++++++++++++++++++- drivers/md/raid1.c | 27 ++++++++++++++++++++++++--- drivers/md/raid10.c | 29 ++++++++++++++++++++++++++--- drivers/md/raid5.c | 33 +++++++++++++++++++++++++++++++++ include/linux/raid/md.h | 1 + include/linux/raid/md_k.h | 4 ++++ 6 files changed, 120 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/md.c b/drivers/md/md.c index bec00b201a73..83eb78b00137 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1828,6 +1828,10 @@ state_show(mdk_rdev_t *rdev, char *page) len += sprintf(page+len, "%swrite_mostly",sep); sep = ","; } + if (test_bit(Blocked, &rdev->flags)) { + len += sprintf(page+len, "%sblocked", sep); + sep = ","; + } if (!test_bit(Faulty, &rdev->flags) && !test_bit(In_sync, &rdev->flags)) { len += sprintf(page+len, "%sspare", sep); @@ -1844,6 +1848,8 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) * remove - disconnects the device * writemostly - sets write_mostly * -writemostly - clears write_mostly + * blocked - sets the Blocked flag + * -blocked - clears the Blocked flag */ int err = -EINVAL; if (cmd_match(buf, "faulty") && rdev->mddev->pers) { @@ -1865,6 +1871,16 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) err = 0; } else if (cmd_match(buf, "-writemostly")) { clear_bit(WriteMostly, &rdev->flags); + err = 0; + } else if (cmd_match(buf, "blocked")) { + set_bit(Blocked, &rdev->flags); + err = 0; + } else if (cmd_match(buf, "-blocked")) { + clear_bit(Blocked, &rdev->flags); + wake_up(&rdev->blocked_wait); + set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); + md_wakeup_thread(rdev->mddev->thread); + err = 0; } return err ? err : len; @@ -2194,7 +2210,9 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi goto abort_free; } } + INIT_LIST_HEAD(&rdev->same_set); + init_waitqueue_head(&rdev->blocked_wait); return rdev; @@ -4958,6 +4976,9 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) if (!rdev || test_bit(Faulty, &rdev->flags)) return; + + if (mddev->external) + set_bit(Blocked, &rdev->flags); /* dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n", mdname(mddev), @@ -5760,7 +5781,7 @@ static int remove_and_add_spares(mddev_t *mddev) rdev_for_each(rdev, rtmp, mddev) if (rdev->raid_disk >= 0 && - !mddev->external && + !test_bit(Blocked, &rdev->flags) && (test_bit(Faulty, &rdev->flags) || ! test_bit(In_sync, &rdev->flags)) && atomic_read(&rdev->nr_pending)==0) { @@ -5959,6 +5980,16 @@ void md_check_recovery(mddev_t *mddev) } } +void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev) +{ + sysfs_notify(&rdev->kobj, NULL, "state"); + wait_event_timeout(rdev->blocked_wait, + !test_bit(Blocked, &rdev->flags), + msecs_to_jiffies(5000)); + rdev_dec_pending(rdev, mddev); +} +EXPORT_SYMBOL(md_wait_for_blocked_rdev); + static int md_notify_reboot(struct notifier_block *this, unsigned long code, void *x) { diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 9fd473a6dbf5..6778b7cb39bd 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -773,7 +773,6 @@ static int make_request(struct request_queue *q, struct bio * bio) r1bio_t *r1_bio; struct bio *read_bio; int i, targets = 0, disks; - mdk_rdev_t *rdev; struct bitmap *bitmap = mddev->bitmap; unsigned long flags; struct bio_list bl; @@ -781,6 +780,7 @@ static int make_request(struct request_queue *q, struct bio * bio) const int rw = bio_data_dir(bio); const int do_sync = bio_sync(bio); int do_barriers; + mdk_rdev_t *blocked_rdev; /* * Register the new request and wait if the reconstruction @@ -862,10 +862,17 @@ static int make_request(struct request_queue *q, struct bio * bio) first = 0; } #endif + retry_write: + blocked_rdev = NULL; rcu_read_lock(); for (i = 0; i < disks; i++) { - if ((rdev=rcu_dereference(conf->mirrors[i].rdev)) != NULL && - !test_bit(Faulty, &rdev->flags)) { + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); + if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { + atomic_inc(&rdev->nr_pending); + blocked_rdev = rdev; + break; + } + if (rdev && !test_bit(Faulty, &rdev->flags)) { atomic_inc(&rdev->nr_pending); if (test_bit(Faulty, &rdev->flags)) { rdev_dec_pending(rdev, mddev); @@ -878,6 +885,20 @@ static int make_request(struct request_queue *q, struct bio * bio) } rcu_read_unlock(); + if (unlikely(blocked_rdev)) { + /* Wait for this device to become unblocked */ + int j; + + for (j = 0; j < i; j++) + if (r1_bio->bios[j]) + rdev_dec_pending(conf->mirrors[j].rdev, mddev); + + allow_barrier(conf); + md_wait_for_blocked_rdev(blocked_rdev, mddev); + wait_barrier(conf); + goto retry_write; + } + BUG_ON(targets == 0); /* we never fail the last device */ if (targets < conf->raid_disks) { diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 1e96aa3ff513..5938fa962922 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -790,6 +790,7 @@ static int make_request(struct request_queue *q, struct bio * bio) const int do_sync = bio_sync(bio); struct bio_list bl; unsigned long flags; + mdk_rdev_t *blocked_rdev; if (unlikely(bio_barrier(bio))) { bio_endio(bio, -EOPNOTSUPP); @@ -879,17 +880,23 @@ static int make_request(struct request_queue *q, struct bio * bio) /* * WRITE: */ - /* first select target devices under spinlock and + /* first select target devices under rcu_lock and * inc refcount on their rdev. Record them by setting * bios[x] to bio */ raid10_find_phys(conf, r10_bio); + retry_write: + blocked_rdev = 0; rcu_read_lock(); for (i = 0; i < conf->copies; i++) { int d = r10_bio->devs[i].devnum; mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev); - if (rdev && - !test_bit(Faulty, &rdev->flags)) { + if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { + atomic_inc(&rdev->nr_pending); + blocked_rdev = rdev; + break; + } + if (rdev && !test_bit(Faulty, &rdev->flags)) { atomic_inc(&rdev->nr_pending); r10_bio->devs[i].bio = bio; } else { @@ -899,6 +906,22 @@ static int make_request(struct request_queue *q, struct bio * bio) } rcu_read_unlock(); + if (unlikely(blocked_rdev)) { + /* Have to wait for this device to get unblocked, then retry */ + int j; + int d; + + for (j = 0; j < i; j++) + if (r10_bio->devs[j].bio) { + d = r10_bio->devs[j].devnum; + rdev_dec_pending(conf->mirrors[d].rdev, mddev); + } + allow_barrier(conf); + md_wait_for_blocked_rdev(blocked_rdev, mddev); + wait_barrier(conf); + goto retry_write; + } + atomic_set(&r10_bio->remaining, 0); bio_list_init(&bl); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 968dacaced6d..087eee0cb809 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2607,6 +2607,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, } } + /* * handle_stripe - do things to a stripe. * @@ -2632,6 +2633,7 @@ static void handle_stripe5(struct stripe_head *sh) struct stripe_head_state s; struct r5dev *dev; unsigned long pending = 0; + mdk_rdev_t *blocked_rdev = NULL; memset(&s, 0, sizeof(s)); pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " @@ -2691,6 +2693,11 @@ static void handle_stripe5(struct stripe_head *sh) if (dev->written) s.written++; rdev = rcu_dereference(conf->disks[i].rdev); + if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { + blocked_rdev = rdev; + atomic_inc(&rdev->nr_pending); + break; + } if (!rdev || !test_bit(In_sync, &rdev->flags)) { /* The ReadError flag will just be confusing now */ clear_bit(R5_ReadError, &dev->flags); @@ -2705,6 +2712,11 @@ static void handle_stripe5(struct stripe_head *sh) } rcu_read_unlock(); + if (unlikely(blocked_rdev)) { + set_bit(STRIPE_HANDLE, &sh->state); + goto unlock; + } + if (s.to_fill && !test_and_set_bit(STRIPE_OP_BIOFILL, &sh->ops.pending)) sh->ops.count++; @@ -2894,8 +2906,13 @@ static void handle_stripe5(struct stripe_head *sh) if (sh->ops.count) pending = get_stripe_work(sh); + unlock: spin_unlock(&sh->lock); + /* wait for this device to become unblocked */ + if (unlikely(blocked_rdev)) + md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); + if (pending) raid5_run_ops(sh, pending); @@ -2912,6 +2929,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) struct stripe_head_state s; struct r6_state r6s; struct r5dev *dev, *pdev, *qdev; + mdk_rdev_t *blocked_rdev = NULL; r6s.qd_idx = raid6_next_disk(pd_idx, disks); pr_debug("handling stripe %llu, state=%#lx cnt=%d, " @@ -2975,6 +2993,11 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) if (dev->written) s.written++; rdev = rcu_dereference(conf->disks[i].rdev); + if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { + blocked_rdev = rdev; + atomic_inc(&rdev->nr_pending); + break; + } if (!rdev || !test_bit(In_sync, &rdev->flags)) { /* The ReadError flag will just be confusing now */ clear_bit(R5_ReadError, &dev->flags); @@ -2989,6 +3012,11 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) set_bit(R5_Insync, &dev->flags); } rcu_read_unlock(); + + if (unlikely(blocked_rdev)) { + set_bit(STRIPE_HANDLE, &sh->state); + goto unlock; + } pr_debug("locked=%d uptodate=%d to_read=%d" " to_write=%d failed=%d failed_num=%d,%d\n", s.locked, s.uptodate, s.to_read, s.to_write, s.failed, @@ -3094,8 +3122,13 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) handle_stripe_expansion(conf, sh, &r6s); + unlock: spin_unlock(&sh->lock); + /* wait for this device to become unblocked */ + if (unlikely(blocked_rdev)) + md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); + return_io(return_bi); for (i=disks; i-- ;) { diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index 8ab630b67fcc..81a1a02d4566 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h @@ -94,6 +94,7 @@ extern int sync_page_io(struct block_device *bdev, sector_t sector, int size, extern void md_do_sync(mddev_t *mddev); extern void md_new_event(mddev_t *mddev); extern void md_allow_write(mddev_t *mddev); +extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); #endif /* CONFIG_MD */ #endif diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 7bb6d1abf71e..812ffa590cff 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -84,6 +84,10 @@ struct mdk_rdev_s #define AllReserved 6 /* If whole device is reserved for * one array */ #define AutoDetected 7 /* added by auto-detect */ +#define Blocked 8 /* An error occured on an externally + * managed array, don't allow writes + * until it is cleared */ + wait_queue_head_t blocked_wait; int desc_nr; /* descriptor index in the superblock */ int raid_disk; /* role of device in array */ -- cgit v1.2.3-71-gd317 From e442055193e4584218006e616c9bdce0c5e9ae5c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:52:44 -0700 Subject: signals: re-assign CLD_CONTINUED notification from the sender to reciever Based on discussion with Jiri and Roland. In short: currently handle_stop_signal(SIGCONT, p) sends the notification to p->parent, with this patch p itself notifies its parent when it becomes running. handle_stop_signal(SIGCONT) has to drop ->siglock temporary in order to notify the parent with do_notify_parent_cldstop(). This leads to multiple problems: - as Jiri Kosina pointed out, the stopped task can resume without actually seeing SIGCONT which may have a handler. - we race with another sig_kernel_stop() signal which may come in that window. - we race with sig_fatal() signals which may set SIGNAL_GROUP_EXIT in that window. - we can't avoid taking tasklist_lock() while sending SIGCONT. With this patch handle_stop_signal() just sets the new SIGNAL_CLD_CONTINUED flag in p->signal->flags and returns. The notification is sent by the first task which returns from finish_stop() (there should be at least one) or any other signalled thread from get_signal_to_deliver(). This is a user-visible change. Say, currently kill(SIGCONT, stopped_child) can't return without seeing SIGCHLD, with this patch SIGCHLD can be delayed unpredictably. Another difference is that if the child is ptraced by another process, CLD_CONTINUED may be delivered to ->real_parent after ptrace_detach() while currently it always goes to the tracer which doesn't actually need this notification. Hopefully not a problem. The patch asks for the futher obvious cleanups, I'll send them separately. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Cc: Jiri Kosina Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 6 ++++++ kernel/signal.c | 29 +++++++++++++++++++---------- 2 files changed, 25 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1d02babdb2c7..ef5615270342 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -554,6 +554,12 @@ struct signal_struct { #define SIGNAL_STOP_DEQUEUED 0x00000002 /* stop signal dequeued */ #define SIGNAL_STOP_CONTINUED 0x00000004 /* SIGCONT since WCONTINUED reap */ #define SIGNAL_GROUP_EXIT 0x00000008 /* group exit in progress */ +/* + * Pending notifications to parent. + */ +#define SIGNAL_CLD_STOPPED 0x00000010 +#define SIGNAL_CLD_CONTINUED 0x00000020 +#define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED) /* If true, all threads except ->group_exit_task have pending SIGKILL */ static inline int signal_group_exit(const struct signal_struct *sig) diff --git a/kernel/signal.c b/kernel/signal.c index 91d57f89f5a5..115c04f3f143 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -603,10 +603,8 @@ static void handle_stop_signal(int sig, struct task_struct *p) * the SIGCHLD was pending on entry to this kill. */ p->signal->group_stop_count = 0; - p->signal->flags = SIGNAL_STOP_CONTINUED; - spin_unlock(&p->sighand->siglock); - do_notify_parent_cldstop(p, CLD_STOPPED); - spin_lock(&p->sighand->siglock); + p->signal->flags = SIGNAL_STOP_CONTINUED | + SIGNAL_CLD_STOPPED; } rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending); t = p; @@ -643,25 +641,23 @@ static void handle_stop_signal(int sig, struct task_struct *p) * We were in fact stopped, and are now continued. * Notify the parent with CLD_CONTINUED. */ - p->signal->flags = SIGNAL_STOP_CONTINUED; + p->signal->flags = SIGNAL_STOP_CONTINUED | + SIGNAL_CLD_CONTINUED; p->signal->group_exit_code = 0; - spin_unlock(&p->sighand->siglock); - do_notify_parent_cldstop(p, CLD_CONTINUED); - spin_lock(&p->sighand->siglock); } else { /* * We are not stopped, but there could be a stop * signal in the middle of being processed after * being removed from the queue. Clear that too. */ - p->signal->flags = 0; + p->signal->flags &= ~SIGNAL_STOP_DEQUEUED; } } else if (sig == SIGKILL) { /* * Make sure that any pending stop signal already dequeued * is undone by the wakeup for SIGKILL. */ - p->signal->flags = 0; + p->signal->flags &= ~SIGNAL_STOP_DEQUEUED; } } @@ -1784,6 +1780,19 @@ relock: try_to_freeze(); spin_lock_irq(¤t->sighand->siglock); + + if (unlikely(current->signal->flags & SIGNAL_CLD_MASK)) { + int why = (current->signal->flags & SIGNAL_STOP_CONTINUED) + ? CLD_CONTINUED : CLD_STOPPED; + current->signal->flags &= ~SIGNAL_CLD_MASK; + spin_unlock_irq(¤t->sighand->siglock); + + read_lock(&tasklist_lock); + do_notify_parent_cldstop(current->group_leader, why); + read_unlock(&tasklist_lock); + goto relock; + } + for (;;) { struct k_sigaction *ka; -- cgit v1.2.3-71-gd317 From 6ca25b551309eb1b1b41f83414a92f7472e0b23d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:52:45 -0700 Subject: kill_pid_info: don't take now unneeded tasklist_lock Previously handle_stop_signal(SIGCONT) could drop ->siglock. That is why kill_pid_info(SIGCONT) takes tasklist_lock to make sure the target task can't go away after unlock. Not needed now. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Cc: Jiri Kosina Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/signal.h | 2 -- kernel/signal.c | 7 +------ 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index 42d2e0a948f4..84f997f8aa53 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -362,8 +362,6 @@ int unhandled_signal(struct task_struct *tsk, int sig); #define sig_kernel_stop(sig) \ (((sig) < SIGRTMIN) && siginmask(sig, SIG_KERNEL_STOP_MASK)) -#define sig_needs_tasklist(sig) ((sig) == SIGCONT) - #define sig_user_defined(t, signr) \ (((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_DFL) && \ ((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_IGN)) diff --git a/kernel/signal.c b/kernel/signal.c index 115c04f3f143..ce53ab19c21d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1039,9 +1039,6 @@ int kill_pid_info(int sig, struct siginfo *info, struct pid *pid) struct task_struct *p; rcu_read_lock(); - if (unlikely(sig_needs_tasklist(sig))) - read_lock(&tasklist_lock); - retry: p = pid_task(pid, PIDTYPE_PID); if (p) { @@ -1055,10 +1052,8 @@ retry: */ goto retry; } - - if (unlikely(sig_needs_tasklist(sig))) - read_unlock(&tasklist_lock); rcu_read_unlock(); + return error; } -- cgit v1.2.3-71-gd317 From ac5c215383f43a106ba4ef298126bf78c126f5e9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:52:57 -0700 Subject: signals: join send_sigqueue() with send_group_sigqueue() We export send_sigqueue() and send_group_sigqueue() for the only user, posix_timer_event(). This is a bit silly, because both are just trivial helpers on top of do_send_sigqueue() and because the we pass the unused .si_signo parameter. Kill them both, rename do_send_sigqueue() to send_sigqueue(), and export it. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 3 +-- kernel/posix-timers.c | 6 ++---- kernel/signal.c | 15 +-------------- 3 files changed, 4 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ef5615270342..0917b3df12d5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1751,8 +1751,7 @@ extern void zap_other_threads(struct task_struct *p); extern int kill_proc(pid_t, int, int); extern struct sigqueue *sigqueue_alloc(void); extern void sigqueue_free(struct sigqueue *); -extern int send_sigqueue(int, struct sigqueue *, struct task_struct *); -extern int send_group_sigqueue(int, struct sigqueue *, struct task_struct *); +extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long); diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 8476956ffd92..dbd8398ddb0b 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -310,8 +310,7 @@ int posix_timer_event(struct k_itimer *timr,int si_private) if (timr->it_sigev_notify & SIGEV_THREAD_ID) { struct task_struct *leader; - int ret = send_sigqueue(timr->it_sigev_signo, timr->sigq, - timr->it_process); + int ret = send_sigqueue(timr->sigq, timr->it_process, 0); if (likely(ret >= 0)) return ret; @@ -322,8 +321,7 @@ int posix_timer_event(struct k_itimer *timr,int si_private) timr->it_process = leader; } - return send_group_sigqueue(timr->it_sigev_signo, timr->sigq, - timr->it_process); + return send_sigqueue(timr->sigq, timr->it_process, 1); } EXPORT_SYMBOL_GPL(posix_timer_event); diff --git a/kernel/signal.c b/kernel/signal.c index 367c6662b12f..d52a1fe921fa 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1240,8 +1240,7 @@ void sigqueue_free(struct sigqueue *q) __sigqueue_free(q); } -static int do_send_sigqueue(struct sigqueue *q, struct task_struct *t, - int group) +int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) { int sig = q->info.si_signo; struct sigpending *pending; @@ -1266,7 +1265,6 @@ static int do_send_sigqueue(struct sigqueue *q, struct task_struct *t, * If an SI_TIMER entry is already queue just increment * the overrun count. */ - BUG_ON(q->info.si_code != SI_TIMER); q->info.si_overrun++; goto out; @@ -1283,17 +1281,6 @@ ret: return ret; } -int send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) -{ - return do_send_sigqueue(q, p, 0); -} - -int -send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) -{ - return do_send_sigqueue(q, p, 1); -} - /* * Wake up any threads in the parent blocked in wait* syscalls. */ -- cgit v1.2.3-71-gd317 From fae5fa44f1fd079ffbed8e0add929dd7bbd1347f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:53:03 -0700 Subject: signals: fix /sbin/init protection from unwanted signals The global init has a lot of long standing problems with the unhandled fatal signals. - The "is_global_init(current)" check in get_signal_to_deliver() protects only the main thread. Sub-thread can dequee the fatal signal and shutdown the whole thread group except the main thread. If it dequeues SIGSTOP /sbin/init will be stopped, this is not right too. Note that we can't use is_global_init(->group_leader), this breaks exec and this can't solve other problems we have. - Even if afterwards ignored, the fatal signals sets SIGNAL_GROUP_EXIT on delivery. This breaks exec, has other bad implications, and this is just wrong. Introduce the new SIGNAL_UNKILLABLE flag to fix these problems. It also helps to solve some other problems addressed by the subsequent patches. Currently we use this flag for the global init only, but it could also be used by kthreads and (perhaps) by the sub-namespace inits. Signed-off-by: Oleg Nesterov Acked-by: "Eric W. Biederman" Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 ++ init/main.c | 2 ++ kernel/signal.c | 9 ++++++--- 3 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0917b3df12d5..fe970cdca83c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -561,6 +561,8 @@ struct signal_struct { #define SIGNAL_CLD_CONTINUED 0x00000020 #define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED) +#define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ + /* If true, all threads except ->group_exit_task have pending SIGKILL */ static inline int signal_group_exit(const struct signal_struct *sig) { diff --git a/init/main.c b/init/main.c index 624266b524d4..1f4406477f83 100644 --- a/init/main.c +++ b/init/main.c @@ -802,6 +802,8 @@ static int noinline init_post(void) (void) sys_dup(0); (void) sys_dup(0); + current->signal->flags |= SIGNAL_UNKILLABLE; + if (ramdisk_execute_command) { run_init_process(ramdisk_execute_command); printk(KERN_WARNING "Failed to execute %s\n", diff --git a/kernel/signal.c b/kernel/signal.c index 02ef3548aeb0..646a8765696a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -728,7 +728,8 @@ static void complete_signal(int sig, struct task_struct *p, int group) * Found a killable thread. If the signal will be fatal, * then start taking the whole group down immediately. */ - if (sig_fatal(p, sig) && !(signal->flags & SIGNAL_GROUP_EXIT) && + if (sig_fatal(p, sig) && + !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) && !sigismember(&t->real_blocked, sig) && (sig == SIGKILL || !(t->ptrace & PT_PTRACED))) { /* @@ -1615,7 +1616,8 @@ static int do_signal_stop(int signr) } else { struct task_struct *t; - if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) || + if (unlikely((sig->flags & (SIGNAL_STOP_DEQUEUED | SIGNAL_UNKILLABLE)) + != SIGNAL_STOP_DEQUEUED) || unlikely(signal_group_exit(sig))) return 0; /* @@ -1761,7 +1763,8 @@ relock: /* * Global init gets no signals it doesn't want. */ - if (is_global_init(current)) + if (unlikely(signal->flags & SIGNAL_UNKILLABLE) && + !signal_group_exit(signal)) continue; if (sig_kernel_stop(signr)) { -- cgit v1.2.3-71-gd317 From 4e4c22c71144c1b2e22c257ec6cf08ccb5be1165 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Apr 2008 00:53:06 -0700 Subject: signals: add set_restore_sigmask This adds the set_restore_sigmask() inline in and replaces every set_thread_flag(TIF_RESTORE_SIGMASK) with a call to it. No change, but abstracts the details of the flag protocol from all the calls. Signed-off-by: Roland McGrath Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/compat.c | 6 +++--- fs/eventpoll.c | 3 +-- fs/select.c | 4 ++-- include/linux/thread_info.h | 15 ++++++++++++++- kernel/compat.c | 3 +-- kernel/signal.c | 2 +- 6 files changed, 22 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/compat.c b/fs/compat.c index 2ce4456aad30..9964d542ae9e 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1720,7 +1720,7 @@ sticky: if (sigmask) { memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved)); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); } } else if (sigmask) sigprocmask(SIG_SETMASK, &sigsaved, NULL); @@ -1791,7 +1791,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, if (sigmask) { memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved)); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); } ret = -ERESTARTNOHAND; } else if (sigmask) @@ -2117,7 +2117,7 @@ asmlinkage long compat_sys_epoll_pwait(int epfd, if (err == -EINTR) { memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved)); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); } else sigprocmask(SIG_SETMASK, &sigsaved, NULL); } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 0d237182d721..71af2fc0041e 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1279,7 +1279,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, if (error == -EINTR) { memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved)); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); } else sigprocmask(SIG_SETMASK, &sigsaved, NULL); } @@ -1309,4 +1309,3 @@ static int __init eventpoll_init(void) return 0; } fs_initcall(eventpoll_init); - diff --git a/fs/select.c b/fs/select.c index 00f58c5c7e05..32ce2b32fad1 100644 --- a/fs/select.c +++ b/fs/select.c @@ -498,7 +498,7 @@ sticky: if (sigmask) { memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved)); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); } } else if (sigmask) sigprocmask(SIG_SETMASK, &sigsaved, NULL); @@ -805,7 +805,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, if (sigmask) { memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved)); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); } ret = -ERESTARTNOHAND; } else if (sigmask) diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index accd7bad35b0..43d8162c696e 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -92,6 +92,19 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) #define set_need_resched() set_thread_flag(TIF_NEED_RESCHED) #define clear_need_resched() clear_thread_flag(TIF_NEED_RESCHED) -#endif +#ifdef TIF_RESTORE_SIGMASK +/** + * set_restore_sigmask() - make sure saved_sigmask processing gets done + * + * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code + * will run before returning to user mode, to process the flag. + */ +static inline void set_restore_sigmask(void) +{ + set_thread_flag(TIF_RESTORE_SIGMASK); +} +#endif /* TIF_RESTORE_SIGMASK */ + +#endif /* __KERNEL__ */ #endif /* _LINUX_THREAD_INFO_H */ diff --git a/kernel/compat.c b/kernel/compat.c index e1ef04870c2a..4a856a3643bb 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -898,7 +898,7 @@ asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat current->state = TASK_INTERRUPTIBLE; schedule(); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); return -ERESTARTNOHAND; } #endif /* __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND */ @@ -1080,4 +1080,3 @@ compat_sys_sysinfo(struct compat_sysinfo __user *info) return 0; } - diff --git a/kernel/signal.c b/kernel/signal.c index 9ac737e53df1..72bb4f51f963 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2541,7 +2541,7 @@ asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize) current->state = TASK_INTERRUPTIBLE; schedule(); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); return -ERESTARTNOHAND; } #endif /* __ARCH_WANT_SYS_RT_SIGSUSPEND */ -- cgit v1.2.3-71-gd317 From 7648d961fcb454d38e864d2d850bc30e078bf7e6 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Apr 2008 00:53:07 -0700 Subject: signals: set_restore_sigmask TIF_SIGPENDING Set TIF_SIGPENDING in set_restore_sigmask. This lets arch code take TIF_RESTORE_SIGMASK out of the set of bits that will be noticed on return to user mode. On some machines those bits are scarce, and we can free this unneeded one up for other uses. It is probably the case that TIF_SIGPENDING is always set anyway everywhere set_restore_sigmask() is used. But this is some cheap paranoia in case there is an arcane case where it might not be. Signed-off-by: Roland McGrath Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/thread_info.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 43d8162c696e..81c5f82f0663 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -97,11 +97,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) * set_restore_sigmask() - make sure saved_sigmask processing gets done * * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code - * will run before returning to user mode, to process the flag. + * will run before returning to user mode, to process the flag. For + * all callers, TIF_SIGPENDING is already set or it's no harm to set + * it. TIF_RESTORE_SIGMASK need not be in the set of bits that the + * arch code will notice on return to user mode, in case those bits + * are scarce. We set TIF_SIGPENDING here to ensure that the arch + * signal code always gets run when TIF_RESTORE_SIGMASK is set. */ static inline void set_restore_sigmask(void) { set_thread_flag(TIF_RESTORE_SIGMASK); + set_thread_flag(TIF_SIGPENDING); } #endif /* TIF_RESTORE_SIGMASK */ -- cgit v1.2.3-71-gd317 From f3de272b821accbc8387211977c2de4f38468d05 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Apr 2008 00:53:09 -0700 Subject: signals: use HAVE_SET_RESTORE_SIGMASK Change all the #ifdef TIF_RESTORE_SIGMASK conditionals in non-arch code to #ifdef HAVE_SET_RESTORE_SIGMASK. If arch code defines it first, the generic set_restore_sigmask() using TIF_RESTORE_SIGMASK is not defined. Signed-off-by: Roland McGrath Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/compat.c | 8 ++++---- fs/eventpoll.c | 4 ++-- fs/select.c | 8 ++++---- include/linux/sched.h | 2 +- include/linux/thread_info.h | 10 ++++++++-- 5 files changed, 19 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/compat.c b/fs/compat.c index 9964d542ae9e..139dc93c092d 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1634,7 +1634,7 @@ sticky: return ret; } -#ifdef TIF_RESTORE_SIGMASK +#ifdef HAVE_SET_RESTORE_SIGMASK asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask, @@ -1825,7 +1825,7 @@ sticky: return ret; } -#endif /* TIF_RESTORE_SIGMASK */ +#endif /* HAVE_SET_RESTORE_SIGMASK */ #if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE) /* Stuff for NFS server syscalls... */ @@ -2080,7 +2080,7 @@ long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2) #ifdef CONFIG_EPOLL -#ifdef TIF_RESTORE_SIGMASK +#ifdef HAVE_SET_RESTORE_SIGMASK asmlinkage long compat_sys_epoll_pwait(int epfd, struct compat_epoll_event __user *events, int maxevents, int timeout, @@ -2124,7 +2124,7 @@ asmlinkage long compat_sys_epoll_pwait(int epfd, return err; } -#endif /* TIF_RESTORE_SIGMASK */ +#endif /* HAVE_SET_RESTORE_SIGMASK */ #endif /* CONFIG_EPOLL */ diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 71af2fc0041e..221086fef174 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1241,7 +1241,7 @@ error_return: return error; } -#ifdef TIF_RESTORE_SIGMASK +#ifdef HAVE_SET_RESTORE_SIGMASK /* * Implement the event wait interface for the eventpoll file. It is the kernel @@ -1287,7 +1287,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, return error; } -#endif /* #ifdef TIF_RESTORE_SIGMASK */ +#endif /* HAVE_SET_RESTORE_SIGMASK */ static int __init eventpoll_init(void) { diff --git a/fs/select.c b/fs/select.c index 32ce2b32fad1..2c292146e246 100644 --- a/fs/select.c +++ b/fs/select.c @@ -425,7 +425,7 @@ sticky: return ret; } -#ifdef TIF_RESTORE_SIGMASK +#ifdef HAVE_SET_RESTORE_SIGMASK asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timespec __user *tsp, const sigset_t __user *sigmask, size_t sigsetsize) @@ -528,7 +528,7 @@ asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp, return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize); } -#endif /* TIF_RESTORE_SIGMASK */ +#endif /* HAVE_SET_RESTORE_SIGMASK */ struct poll_list { struct poll_list *next; @@ -759,7 +759,7 @@ asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, return ret; } -#ifdef TIF_RESTORE_SIGMASK +#ifdef HAVE_SET_RESTORE_SIGMASK asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, struct timespec __user *tsp, const sigset_t __user *sigmask, size_t sigsetsize) @@ -839,4 +839,4 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, return ret; } -#endif /* TIF_RESTORE_SIGMASK */ +#endif /* HAVE_SET_RESTORE_SIGMASK */ diff --git a/include/linux/sched.h b/include/linux/sched.h index fe970cdca83c..86e60796db62 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1175,7 +1175,7 @@ struct task_struct { struct sighand_struct *sighand; sigset_t blocked, real_blocked; - sigset_t saved_sigmask; /* To be restored with TIF_RESTORE_SIGMASK */ + sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ struct sigpending pending; unsigned long sas_ss_sp; diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 81c5f82f0663..38a56477f27a 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -92,7 +92,13 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) #define set_need_resched() set_thread_flag(TIF_NEED_RESCHED) #define clear_need_resched() clear_thread_flag(TIF_NEED_RESCHED) -#ifdef TIF_RESTORE_SIGMASK +#if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK +/* + * An arch can define its own version of set_restore_sigmask() to get the + * job done however works, with or without TIF_RESTORE_SIGMASK. + */ +#define HAVE_SET_RESTORE_SIGMASK 1 + /** * set_restore_sigmask() - make sure saved_sigmask processing gets done * @@ -109,7 +115,7 @@ static inline void set_restore_sigmask(void) set_thread_flag(TIF_RESTORE_SIGMASK); set_thread_flag(TIF_SIGPENDING); } -#endif /* TIF_RESTORE_SIGMASK */ +#endif /* TIF_RESTORE_SIGMASK && !HAVE_SET_RESTORE_SIGMASK */ #endif /* __KERNEL__ */ -- cgit v1.2.3-71-gd317 From 53b6f9fbd3b63af14b4f6268e8b5b80d178d05bc Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:53:13 -0700 Subject: ptrace: introduce ptrace_reparented() helper Add another trivial helper for the sake of grep. It also auto-documents the fact that ->parent != real_parent implies ->ptrace. No functional changes. Signed-off-by: Oleg Nesterov Acked-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 4 ++++ kernel/exit.c | 9 ++++----- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index ebe0c17039cf..f98501ba557e 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -98,6 +98,10 @@ extern void ptrace_untrace(struct task_struct *child); extern int ptrace_may_attach(struct task_struct *task); extern int __ptrace_may_attach(struct task_struct *task); +static inline int ptrace_reparented(struct task_struct *child) +{ + return child->real_parent != child->parent; +} static inline void ptrace_link(struct task_struct *child, struct task_struct *new_parent) { diff --git a/kernel/exit.c b/kernel/exit.c index 879ed6e1c883..0da2921b1e7f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -698,7 +698,7 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) if (unlikely(traced)) { /* Preserve ptrace links if someone else is tracing this child. */ list_del_init(&p->ptrace_list); - if (p->parent != p->real_parent) + if (ptrace_reparented(p)) list_add(&p->ptrace_list, &p->real_parent->ptrace_children); } else { /* If this child is being traced, then we're the one tracing it @@ -865,8 +865,8 @@ static void exit_notify(struct task_struct *tsk, int group_dead) * only has special meaning to our real parent. */ if (!task_detached(tsk) && thread_group_empty(tsk)) { - int signal = (tsk->parent == tsk->real_parent) - ? tsk->exit_signal : SIGCHLD; + int signal = ptrace_reparented(tsk) ? + SIGCHLD : tsk->exit_signal; do_notify_parent(tsk, signal); } else if (tsk->ptrace) { do_notify_parent(tsk, SIGCHLD); @@ -1269,8 +1269,7 @@ static int wait_task_zombie(struct task_struct *p, int noreap, return 0; } - /* traced means p->ptrace, but not vice versa */ - traced = (p->real_parent != p->parent); + traced = ptrace_reparented(p); if (likely(!traced)) { struct signal_struct *psig; -- cgit v1.2.3-71-gd317 From 04f378b198da233ca0aca341b113dc6579d46123 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 30 Apr 2008 00:53:29 -0700 Subject: tty: BKL pushdown - Push the BKL down into the line disciplines - Switch the tty layer to unlocked_ioctl - Introduce a new ctrl_lock spin lock for the control bits - Eliminate much of the lock_kernel use in n_tty - Prepare to (but don't yet) call the drivers with the lock dropped on the paths that historically held the lock BKL now primarily protects open/close/ldisc change in the tty layer [jirislaby@gmail.com: a couple of fixes] Signed-off-by: Alan Cox Signed-off-by: Jiri Slaby Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/n_hdlc.c | 24 ++++++++--- drivers/char/n_r3964.c | 16 ++++++- drivers/char/n_tty.c | 32 ++++++++++---- drivers/char/pty.c | 3 ++ drivers/char/tty_io.c | 107 ++++++++++++++++++++++++++++++++--------------- drivers/char/tty_ioctl.c | 6 +++ drivers/char/vt.c | 8 +++- fs/compat_ioctl.c | 2 +- include/linux/tty.h | 4 +- 9 files changed, 149 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/n_hdlc.c b/drivers/char/n_hdlc.c index 06803ed5568c..a07c0af4819e 100644 --- a/drivers/char/n_hdlc.c +++ b/drivers/char/n_hdlc.c @@ -578,26 +578,36 @@ static ssize_t n_hdlc_tty_read(struct tty_struct *tty, struct file *file, return -EFAULT; } + lock_kernel(); + for (;;) { - if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) + if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) { + unlock_kernel(); return -EIO; + } n_hdlc = tty2n_hdlc (tty); if (!n_hdlc || n_hdlc->magic != HDLC_MAGIC || - tty != n_hdlc->tty) + tty != n_hdlc->tty) { + unlock_kernel(); return 0; + } rbuf = n_hdlc_buf_get(&n_hdlc->rx_buf_list); if (rbuf) break; /* no data */ - if (file->f_flags & O_NONBLOCK) + if (file->f_flags & O_NONBLOCK) { + unlock_kernel(); return -EAGAIN; + } interruptible_sleep_on (&tty->read_wait); - if (signal_pending(current)) + if (signal_pending(current)) { + unlock_kernel(); return -EINTR; + } } if (rbuf->count > nr) @@ -618,7 +628,7 @@ static ssize_t n_hdlc_tty_read(struct tty_struct *tty, struct file *file, kfree(rbuf); else n_hdlc_buf_put(&n_hdlc->rx_free_buf_list,rbuf); - + unlock_kernel(); return ret; } /* end of n_hdlc_tty_read() */ @@ -661,6 +671,8 @@ static ssize_t n_hdlc_tty_write(struct tty_struct *tty, struct file *file, count = maxframe; } + lock_kernel(); + add_wait_queue(&tty->write_wait, &wait); set_current_state(TASK_INTERRUPTIBLE); @@ -695,7 +707,7 @@ static ssize_t n_hdlc_tty_write(struct tty_struct *tty, struct file *file, n_hdlc_buf_put(&n_hdlc->tx_buf_list,tbuf); n_hdlc_send_frames(n_hdlc,tty); } - + unlock_kernel(); return error; } /* end of n_hdlc_tty_write() */ diff --git a/drivers/char/n_r3964.c b/drivers/char/n_r3964.c index 6b918b80f73e..3f6486e9f1ec 100644 --- a/drivers/char/n_r3964.c +++ b/drivers/char/n_r3964.c @@ -1075,12 +1075,15 @@ static ssize_t r3964_read(struct tty_struct *tty, struct file *file, TRACE_L("read()"); + lock_kernel(); + pClient = findClient(pInfo, task_pid(current)); if (pClient) { pMsg = remove_msg(pInfo, pClient); if (pMsg == NULL) { /* no messages available. */ if (file->f_flags & O_NONBLOCK) { + unlock_kernel(); return -EAGAIN; } /* block until there is a message: */ @@ -1090,8 +1093,10 @@ static ssize_t r3964_read(struct tty_struct *tty, struct file *file, /* If we still haven't got a message, we must have been signalled */ - if (!pMsg) + if (!pMsg) { + unlock_kernel(); return -EINTR; + } /* deliver msg to client process: */ theMsg.msg_id = pMsg->msg_id; @@ -1102,12 +1107,15 @@ static ssize_t r3964_read(struct tty_struct *tty, struct file *file, kfree(pMsg); TRACE_M("r3964_read - msg kfree %p", pMsg); - if (copy_to_user(buf, &theMsg, count)) + if (copy_to_user(buf, &theMsg, count)) { + unlock_kernel(); return -EFAULT; + } TRACE_PS("read - return %d", count); return count; } + unlock_kernel(); return -EPERM; } @@ -1156,6 +1164,8 @@ static ssize_t r3964_write(struct tty_struct *tty, struct file *file, pHeader->locks = 0; pHeader->owner = NULL; + lock_kernel(); + pClient = findClient(pInfo, task_pid(current)); if (pClient) { pHeader->owner = pClient; @@ -1173,6 +1183,8 @@ static ssize_t r3964_write(struct tty_struct *tty, struct file *file, add_tx_queue(pInfo, pHeader); trigger_transmit(pInfo); + unlock_kernel(); + return 0; } diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c index 0c09409fa45d..001d9d875387 100644 --- a/drivers/char/n_tty.c +++ b/drivers/char/n_tty.c @@ -183,22 +183,24 @@ static void reset_buffer_flags(struct tty_struct *tty) * at hangup) or when the N_TTY line discipline internally has to * clean the pending queue (for example some signals). * - * FIXME: tty->ctrl_status is not spinlocked and relies on - * lock_kernel() still. + * Locking: ctrl_lock */ static void n_tty_flush_buffer(struct tty_struct *tty) { + unsigned long flags; /* clear everything and unthrottle the driver */ reset_buffer_flags(tty); if (!tty->link) return; + spin_lock_irqsave(&tty->ctrl_lock, flags); if (tty->link->packet) { tty->ctrl_status |= TIOCPKT_FLUSHREAD; wake_up_interruptible(&tty->link->read_wait); } + spin_unlock_irqrestore(&tty->ctrl_lock, flags); } /** @@ -264,7 +266,7 @@ static inline int is_continuation(unsigned char c, struct tty_struct *tty) * relevant in the world today. If you ever need them, add them here. * * Called from both the receive and transmit sides and can be called - * re-entrantly. Relies on lock_kernel() still. + * re-entrantly. Relies on lock_kernel() for tty->column state. */ static int opost(unsigned char c, struct tty_struct *tty) @@ -275,6 +277,7 @@ static int opost(unsigned char c, struct tty_struct *tty) if (!space) return -1; + lock_kernel(); if (O_OPOST(tty)) { switch (c) { case '\n': @@ -323,6 +326,7 @@ static int opost(unsigned char c, struct tty_struct *tty) } } tty->driver->put_char(tty, c); + unlock_kernel(); return 0; } @@ -337,7 +341,8 @@ static int opost(unsigned char c, struct tty_struct *tty) * the simple cases normally found and helps to generate blocks of * symbols for the console driver and thus improve performance. * - * Called from write_chan under the tty layer write lock. + * Called from write_chan under the tty layer write lock. Relies + * on lock_kernel for the tty->column state. */ static ssize_t opost_block(struct tty_struct *tty, @@ -353,6 +358,7 @@ static ssize_t opost_block(struct tty_struct *tty, if (nr > space) nr = space; + lock_kernel(); for (i = 0, cp = buf; i < nr; i++, cp++) { switch (*cp) { case '\n': @@ -387,6 +393,7 @@ break_out: if (tty->driver->flush_chars) tty->driver->flush_chars(tty); i = tty->driver->write(tty, buf, i); + unlock_kernel(); return i; } @@ -1194,6 +1201,11 @@ extern ssize_t redirected_tty_write(struct file *, const char __user *, * Perform job control management checks on this file/tty descriptor * and if appropriate send any needed signals and return a negative * error code if action should be taken. + * + * FIXME: + * Locking: None - redirected write test is safe, testing + * current->signal should possibly lock current->sighand + * pgrp locking ? */ static int job_control(struct tty_struct *tty, struct file *file) @@ -1246,6 +1258,7 @@ static ssize_t read_chan(struct tty_struct *tty, struct file *file, ssize_t size; long timeout; unsigned long flags; + int packet; do_it_again: @@ -1289,16 +1302,19 @@ do_it_again: if (mutex_lock_interruptible(&tty->atomic_read_lock)) return -ERESTARTSYS; } + packet = tty->packet; add_wait_queue(&tty->read_wait, &wait); while (nr) { /* First test for status change. */ - if (tty->packet && tty->link->ctrl_status) { + if (packet && tty->link->ctrl_status) { unsigned char cs; if (b != buf) break; + spin_lock_irqsave(&tty->link->ctrl_lock, flags); cs = tty->link->ctrl_status; tty->link->ctrl_status = 0; + spin_unlock_irqrestore(&tty->link->ctrl_lock, flags); if (tty_put_user(tty, cs, b++)) { retval = -EFAULT; b--; @@ -1333,6 +1349,7 @@ do_it_again: retval = -ERESTARTSYS; break; } + /* FIXME: does n_tty_set_room need locking ? */ n_tty_set_room(tty); timeout = schedule_timeout(timeout); continue; @@ -1340,7 +1357,7 @@ do_it_again: __set_current_state(TASK_RUNNING); /* Deal with packet mode. */ - if (tty->packet && b == buf) { + if (packet && b == buf) { if (tty_put_user(tty, TIOCPKT_DATA, b++)) { retval = -EFAULT; b--; @@ -1388,6 +1405,8 @@ do_it_again: break; } else { int uncopied; + /* The copy function takes the read lock and handles + locking internally for this case */ uncopied = copy_from_read_buf(tty, &b, &nr); uncopied += copy_from_read_buf(tty, &b, &nr); if (uncopied) { @@ -1429,7 +1448,6 @@ do_it_again: goto do_it_again; n_tty_set_room(tty); - return retval; } diff --git a/drivers/char/pty.c b/drivers/char/pty.c index 706ff34728f1..6288356b769d 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -181,6 +181,7 @@ static int pty_set_lock(struct tty_struct *tty, int __user * arg) static void pty_flush_buffer(struct tty_struct *tty) { struct tty_struct *to = tty->link; + unsigned long flags; if (!to) return; @@ -189,8 +190,10 @@ static void pty_flush_buffer(struct tty_struct *tty) to->ldisc.flush_buffer(to); if (to->packet) { + spin_lock_irqsave(&tty->ctrl_lock, flags); tty->ctrl_status |= TIOCPKT_FLUSHWRITE; wake_up_interruptible(&to->read_wait); + spin_unlock_irqrestore(&tty->ctrl_lock, flags); } } diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 2fa6856706ab..0b0354bc28d6 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -152,8 +152,7 @@ ssize_t redirected_tty_write(struct file *, const char __user *, static unsigned int tty_poll(struct file *, poll_table *); static int tty_open(struct inode *, struct file *); static int tty_release(struct inode *, struct file *); -int tty_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg); +long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT static long tty_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); @@ -1205,7 +1204,7 @@ EXPORT_SYMBOL_GPL(tty_find_polling_driver); * not in the foreground, send a SIGTTOU. If the signal is blocked or * ignored, go ahead and perform the operation. (POSIX 7.2) * - * Locking: none + * Locking: none - FIXME: review this */ int tty_check_change(struct tty_struct *tty) @@ -1247,8 +1246,8 @@ static unsigned int hung_up_tty_poll(struct file *filp, poll_table *wait) return POLLIN | POLLOUT | POLLERR | POLLHUP | POLLRDNORM | POLLWRNORM; } -static int hung_up_tty_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) +static long hung_up_tty_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) { return cmd == TIOCSPGRP ? -ENOTTY : -EIO; } @@ -1264,7 +1263,7 @@ static const struct file_operations tty_fops = { .read = tty_read, .write = tty_write, .poll = tty_poll, - .ioctl = tty_ioctl, + .unlocked_ioctl = tty_ioctl, .compat_ioctl = tty_compat_ioctl, .open = tty_open, .release = tty_release, @@ -1277,7 +1276,7 @@ static const struct file_operations ptmx_fops = { .read = tty_read, .write = tty_write, .poll = tty_poll, - .ioctl = tty_ioctl, + .unlocked_ioctl = tty_ioctl, .compat_ioctl = tty_compat_ioctl, .open = ptmx_open, .release = tty_release, @@ -1290,7 +1289,7 @@ static const struct file_operations console_fops = { .read = tty_read, .write = redirected_tty_write, .poll = tty_poll, - .ioctl = tty_ioctl, + .unlocked_ioctl = tty_ioctl, .compat_ioctl = tty_compat_ioctl, .open = tty_open, .release = tty_release, @@ -1302,7 +1301,7 @@ static const struct file_operations hung_up_tty_fops = { .read = hung_up_tty_read, .write = hung_up_tty_write, .poll = hung_up_tty_poll, - .ioctl = hung_up_tty_ioctl, + .unlocked_ioctl = hung_up_tty_ioctl, .compat_ioctl = hung_up_tty_compat_ioctl, .release = tty_release, }; @@ -1626,16 +1625,17 @@ void disassociate_ctty(int on_exit) struct tty_struct *tty; struct pid *tty_pgrp = NULL; - lock_kernel(); mutex_lock(&tty_mutex); tty = get_current_tty(); if (tty) { tty_pgrp = get_pid(tty->pgrp); mutex_unlock(&tty_mutex); + lock_kernel(); /* XXX: here we race, there is nothing protecting tty */ if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY) tty_vhangup(tty); + unlock_kernel(); } else if (on_exit) { struct pid *old_pgrp; spin_lock_irq(¤t->sighand->siglock); @@ -1648,7 +1648,6 @@ void disassociate_ctty(int on_exit) put_pid(old_pgrp); } mutex_unlock(&tty_mutex); - unlock_kernel(); return; } if (tty_pgrp) { @@ -1683,7 +1682,6 @@ void disassociate_ctty(int on_exit) read_lock(&tasklist_lock); session_clear_tty(task_session(current)); read_unlock(&tasklist_lock); - unlock_kernel(); } /** @@ -1693,8 +1691,10 @@ void disassociate_ctty(int on_exit) void no_tty(void) { struct task_struct *tsk = current; + lock_kernel(); if (tsk->signal->leader) disassociate_ctty(0); + unlock_kernel(); proc_clear_tty(tsk); } @@ -1714,19 +1714,24 @@ void no_tty(void) * but not always. * * Locking: - * Broken. Relies on BKL which is unsafe here. + * Uses the tty control lock internally */ void stop_tty(struct tty_struct *tty) { - if (tty->stopped) + unsigned long flags; + spin_lock_irqsave(&tty->ctrl_lock, flags); + if (tty->stopped) { + spin_unlock_irqrestore(&tty->ctrl_lock, flags); return; + } tty->stopped = 1; if (tty->link && tty->link->packet) { tty->ctrl_status &= ~TIOCPKT_START; tty->ctrl_status |= TIOCPKT_STOP; wake_up_interruptible(&tty->link->read_wait); } + spin_unlock_irqrestore(&tty->ctrl_lock, flags); if (tty->driver->stop) (tty->driver->stop)(tty); } @@ -1743,19 +1748,24 @@ EXPORT_SYMBOL(stop_tty); * driver start method is invoked and the line discipline woken. * * Locking: - * Broken. Relies on BKL which is unsafe here. + * ctrl_lock */ void start_tty(struct tty_struct *tty) { - if (!tty->stopped || tty->flow_stopped) + unsigned long flags; + spin_lock_irqsave(&tty->ctrl_lock, flags); + if (!tty->stopped || tty->flow_stopped) { + spin_unlock_irqrestore(&tty->ctrl_lock, flags); return; + } tty->stopped = 0; if (tty->link && tty->link->packet) { tty->ctrl_status &= ~TIOCPKT_STOP; tty->ctrl_status |= TIOCPKT_START; wake_up_interruptible(&tty->link->read_wait); } + spin_unlock_irqrestore(&tty->ctrl_lock, flags); if (tty->driver->start) (tty->driver->start)(tty); /* If we have a running line discipline it may need kicking */ @@ -1799,13 +1809,11 @@ static ssize_t tty_read(struct file *file, char __user *buf, size_t count, /* We want to wait for the line discipline to sort out in this situation */ ld = tty_ldisc_ref_wait(tty); - lock_kernel(); if (ld->read) i = (ld->read)(tty, file, buf, count); else i = -EIO; tty_ldisc_deref(ld); - unlock_kernel(); if (i > 0) inode->i_atime = current_fs_time(inode->i_sb); return i; @@ -1893,9 +1901,7 @@ static inline ssize_t do_tty_write( ret = -EFAULT; if (copy_from_user(tty->write_buf, buf, size)) break; - lock_kernel(); ret = write(tty, file, tty->write_buf, size); - unlock_kernel(); if (ret <= 0) break; written += ret; @@ -3070,10 +3076,13 @@ static int fionbio(struct file *file, int __user *p) if (get_user(nonblock, p)) return -EFAULT; + /* file->f_flags is still BKL protected in the fs layer - vomit */ + lock_kernel(); if (nonblock) file->f_flags |= O_NONBLOCK; else file->f_flags &= ~O_NONBLOCK; + unlock_kernel(); return 0; } @@ -3162,7 +3171,7 @@ static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t * Set the process group of the tty to the session passed. Only * permitted where the tty session is our session. * - * Locking: None + * Locking: RCU */ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) @@ -3237,10 +3246,16 @@ static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t _ static int tiocsetd(struct tty_struct *tty, int __user *p) { int ldisc; + int ret; if (get_user(ldisc, p)) return -EFAULT; - return tty_set_ldisc(tty, ldisc); + + lock_kernel(); + ret = tty_set_ldisc(tty, ldisc); + unlock_kernel(); + + return ret; } /** @@ -3258,16 +3273,21 @@ static int tiocsetd(struct tty_struct *tty, int __user *p) static int send_break(struct tty_struct *tty, unsigned int duration) { + int retval = -EINTR; + + lock_kernel(); if (tty_write_lock(tty, 0) < 0) - return -EINTR; + goto out; tty->driver->break_ctl(tty, -1); if (!signal_pending(current)) msleep_interruptible(duration); tty->driver->break_ctl(tty, 0); tty_write_unlock(tty); - if (signal_pending(current)) - return -EINTR; - return 0; + if (!signal_pending(current)) + retval = 0; +out: + unlock_kernel(); + return retval; } /** @@ -3287,7 +3307,9 @@ static int tty_tiocmget(struct tty_struct *tty, struct file *file, int __user *p int retval = -EINVAL; if (tty->driver->tiocmget) { + lock_kernel(); retval = tty->driver->tiocmget(tty, file); + unlock_kernel(); if (retval >= 0) retval = put_user(retval, p); @@ -3337,7 +3359,9 @@ static int tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int set &= TIOCM_DTR|TIOCM_RTS|TIOCM_OUT1|TIOCM_OUT2|TIOCM_LOOP; clear &= TIOCM_DTR|TIOCM_RTS|TIOCM_OUT1|TIOCM_OUT2|TIOCM_LOOP; + lock_kernel(); retval = tty->driver->tiocmset(tty, file, set, clear); + unlock_kernel(); } return retval; } @@ -3345,20 +3369,18 @@ static int tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int /* * Split this up, as gcc can choke on it otherwise.. */ -int tty_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) +long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct tty_struct *tty, *real_tty; void __user *p = (void __user *)arg; int retval; struct tty_ldisc *ld; + struct inode *inode = file->f_dentry->d_inode; tty = (struct tty_struct *)file->private_data; if (tty_paranoia_check(tty, inode, "tty_ioctl")) return -EINVAL; - /* CHECKME: is this safe as one end closes ? */ - real_tty = tty; if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) @@ -3367,13 +3389,19 @@ int tty_ioctl(struct inode *inode, struct file *file, /* * Break handling by driver */ + + retval = -EINVAL; + if (!tty->driver->break_ctl) { switch (cmd) { case TIOCSBRK: case TIOCCBRK: - if (tty->driver->ioctl) - return tty->driver->ioctl(tty, file, cmd, arg); - return -EINVAL; + if (tty->driver->ioctl) { + lock_kernel(); + retval = tty->driver->ioctl(tty, file, cmd, arg); + unlock_kernel(); + } + return retval; /* These two ioctl's always return success; even if */ /* the driver doesn't support them. */ @@ -3381,7 +3409,9 @@ int tty_ioctl(struct inode *inode, struct file *file, case TCSBRKP: if (!tty->driver->ioctl) return 0; + lock_kernel(); retval = tty->driver->ioctl(tty, file, cmd, arg); + unlock_kernel(); if (retval == -ENOIOCTLCMD) retval = 0; return retval; @@ -3401,7 +3431,9 @@ int tty_ioctl(struct inode *inode, struct file *file, if (retval) return retval; if (cmd != TIOCCBRK) { + lock_kernel(); tty_wait_until_sent(tty, 0); + unlock_kernel(); if (signal_pending(current)) return -EINTR; } @@ -3451,11 +3483,15 @@ int tty_ioctl(struct inode *inode, struct file *file, * Break handling */ case TIOCSBRK: /* Turn break on, unconditionally */ + lock_kernel(); tty->driver->break_ctl(tty, -1); + unlock_kernel(); return 0; case TIOCCBRK: /* Turn break off, unconditionally */ + lock_kernel(); tty->driver->break_ctl(tty, 0); + unlock_kernel(); return 0; case TCSBRK: /* SVID version: non-zero arg --> no break */ /* non-zero arg means wait for all output data @@ -3485,14 +3521,18 @@ int tty_ioctl(struct inode *inode, struct file *file, break; } if (tty->driver->ioctl) { + lock_kernel(); retval = (tty->driver->ioctl)(tty, file, cmd, arg); + unlock_kernel(); if (retval != -ENOIOCTLCMD) return retval; } ld = tty_ldisc_ref_wait(tty); retval = -EINVAL; if (ld->ioctl) { + lock_kernel(); retval = ld->ioctl(tty, file, cmd, arg); + unlock_kernel(); if (retval == -ENOIOCTLCMD) retval = -EINVAL; } @@ -3770,6 +3810,7 @@ static void initialize_tty_struct(struct tty_struct *tty) mutex_init(&tty->atomic_read_lock); mutex_init(&tty->atomic_write_lock); spin_lock_init(&tty->read_lock); + spin_lock_init(&tty->ctrl_lock); INIT_LIST_HEAD(&tty->tty_files); INIT_WORK(&tty->SAK_work, do_SAK_work); } diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c index f95a80b2265f..d6353d89b451 100644 --- a/drivers/char/tty_ioctl.c +++ b/drivers/char/tty_ioctl.c @@ -395,6 +395,7 @@ static void change_termios(struct tty_struct *tty, struct ktermios *new_termios) int canon_change; struct ktermios old_termios = *tty->termios; struct tty_ldisc *ld; + unsigned long flags; /* * Perform the actual termios internal changes under lock. @@ -429,11 +430,13 @@ static void change_termios(struct tty_struct *tty, struct ktermios *new_termios) STOP_CHAR(tty) == '\023' && START_CHAR(tty) == '\021'); if (old_flow != new_flow) { + spin_lock_irqsave(&tty->ctrl_lock, flags); tty->ctrl_status &= ~(TIOCPKT_DOSTOP | TIOCPKT_NOSTOP); if (new_flow) tty->ctrl_status |= TIOCPKT_DOSTOP; else tty->ctrl_status |= TIOCPKT_NOSTOP; + spin_unlock_irqrestore(&tty->ctrl_lock, flags); wake_up_interruptible(&tty->link->read_wait); } } @@ -905,6 +908,7 @@ int n_tty_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg) { struct tty_struct *real_tty; + unsigned long flags; int retval; if (tty->driver->type == TTY_DRIVER_TYPE_PTY && @@ -963,6 +967,7 @@ int n_tty_ioctl(struct tty_struct *tty, struct file *file, return -ENOTTY; if (get_user(pktmode, (int __user *) arg)) return -EFAULT; + spin_lock_irqsave(&tty->ctrl_lock, flags); if (pktmode) { if (!tty->packet) { tty->packet = 1; @@ -970,6 +975,7 @@ int n_tty_ioctl(struct tty_struct *tty, struct file *file, } } else tty->packet = 0; + spin_unlock_irqrestore(&tty->ctrl_lock, flags); return 0; } default: diff --git a/drivers/char/vt.c b/drivers/char/vt.c index 1c2660477135..e64f0bf3624e 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -2541,6 +2541,9 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) if (get_user(type, p)) return -EFAULT; ret = 0; + + lock_kernel(); + switch (type) { case TIOCL_SETSEL: @@ -2560,7 +2563,7 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) ret = sel_loadlut(p); break; case TIOCL_GETSHIFTSTATE: - + /* * Make it possible to react to Shift+Mousebutton. * Note that 'shift_state' is an undocumented @@ -2615,6 +2618,7 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) ret = -EINVAL; break; } + unlock_kernel(); return ret; } @@ -3829,7 +3833,7 @@ static int con_font_get(struct vc_data *vc, struct console_font_op *op) goto out; c = (font.width+7)/8 * 32 * font.charcount; - + if (op->data && font.charcount > op->charcount) rc = -ENOSPC; if (!(op->flags & KD_FONT_FLAG_OLD)) { diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index c6e72aebd16b..9663e8776724 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1046,7 +1046,7 @@ static int vt_check(struct file *file) struct inode *inode = file->f_path.dentry->d_inode; struct vc_data *vc; - if (file->f_op->ioctl != tty_ioctl) + if (file->f_op->unlocked_ioctl != tty_ioctl) return -EINVAL; tty = (struct tty_struct *)file->private_data; diff --git a/include/linux/tty.h b/include/linux/tty.h index 265831ccaa88..4d3702bade03 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -183,6 +183,7 @@ struct tty_struct { int index; struct tty_ldisc ldisc; struct mutex termios_mutex; + spinlock_t ctrl_lock; struct ktermios *termios, *termios_locked; char name[64]; struct pid *pgrp; @@ -323,8 +324,7 @@ extern void tty_ldisc_put(int); extern void tty_wakeup(struct tty_struct *tty); extern void tty_ldisc_flush(struct tty_struct *tty); -extern int tty_ioctl(struct inode *inode, struct file *file, unsigned int cmd, - unsigned long arg); +extern long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg); extern int tty_mode_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg); extern int tty_perform_flush(struct tty_struct *tty, unsigned long arg); -- cgit v1.2.3-71-gd317 From 47f86834bbd4193139d61d659bebf9ab9d691e37 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 30 Apr 2008 00:53:30 -0700 Subject: redo locking of tty->pgrp Historically tty->pgrp and friends were pid_t and the code "knew" they were safe. The change to pid structs opened up a few races and the removal of the BKL in places made them quite hittable. We put tty->pgrp under the ctrl_lock for the tty. Signed-off-by: Alan Cox Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/tty_io.c | 78 +++++++++++++++++++++++++++++++++++++++------------ drivers/char/vt.c | 6 ++++ include/linux/tty.h | 10 ++++--- 3 files changed, 72 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 0b0354bc28d6..c8aa318eaa18 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -1204,26 +1204,37 @@ EXPORT_SYMBOL_GPL(tty_find_polling_driver); * not in the foreground, send a SIGTTOU. If the signal is blocked or * ignored, go ahead and perform the operation. (POSIX 7.2) * - * Locking: none - FIXME: review this + * Locking: ctrl_lock - FIXME: review this */ int tty_check_change(struct tty_struct *tty) { + unsigned long flags; + int ret = 0; + if (current->signal->tty != tty) return 0; + + spin_lock_irqsave(&tty->ctrl_lock, flags); + if (!tty->pgrp) { printk(KERN_WARNING "tty_check_change: tty->pgrp == NULL!\n"); - return 0; + goto out; } if (task_pgrp(current) == tty->pgrp) - return 0; + goto out; if (is_ignored(SIGTTOU)) - return 0; - if (is_current_pgrp_orphaned()) - return -EIO; + goto out; + if (is_current_pgrp_orphaned()) { + ret = -EIO; + goto out; + } kill_pgrp(task_pgrp(current), SIGTTOU, 1); set_thread_flag(TIF_SIGPENDING); - return -ERESTARTSYS; + ret = -ERESTARTSYS; +out: + spin_unlock_irqrestore(&tty->ctrl_lock, flags); + return ret; } EXPORT_SYMBOL(tty_check_change); @@ -1403,6 +1414,7 @@ static void do_tty_hangup(struct work_struct *work) struct task_struct *p; struct tty_ldisc *ld; int closecount = 0, n; + unsigned long flags; if (!tty) return; @@ -1479,19 +1491,24 @@ static void do_tty_hangup(struct work_struct *work) __group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p); __group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p); put_pid(p->signal->tty_old_pgrp); /* A noop */ + spin_lock_irqsave(&tty->ctrl_lock, flags); if (tty->pgrp) p->signal->tty_old_pgrp = get_pid(tty->pgrp); + spin_unlock_irqrestore(&tty->ctrl_lock, flags); spin_unlock_irq(&p->sighand->siglock); } while_each_pid_task(tty->session, PIDTYPE_SID, p); } read_unlock(&tasklist_lock); + spin_lock_irqsave(&tty->ctrl_lock, flags); tty->flags = 0; put_pid(tty->session); put_pid(tty->pgrp); tty->session = NULL; tty->pgrp = NULL; tty->ctrl_status = 0; + spin_unlock_irqrestore(&tty->ctrl_lock, flags); + /* * If one of the devices matches a console pointer, we * cannot just call hangup() because that will cause @@ -1666,10 +1683,13 @@ void disassociate_ctty(int on_exit) /* It is possible that do_tty_hangup has free'd this tty */ tty = get_current_tty(); if (tty) { + unsigned long flags; + spin_lock_irqsave(&tty->ctrl_lock, flags); put_pid(tty->session); put_pid(tty->pgrp); tty->session = NULL; tty->pgrp = NULL; + spin_unlock_irqrestore(&tty->ctrl_lock, flags); } else { #ifdef TTY_DEBUG_HANGUP printk(KERN_DEBUG "error attempted to write to tty [0x%p]" @@ -1785,10 +1805,8 @@ EXPORT_SYMBOL(start_tty); * for hung up devices before calling the line discipline method. * * Locking: - * Locks the line discipline internally while needed - * For historical reasons the line discipline read method is - * invoked under the BKL. This will go away in time so do not rely on it - * in new code. Multiple read calls may be outstanding in parallel. + * Locks the line discipline internally while needed. Multiple + * read calls may be outstanding in parallel. */ static ssize_t tty_read(struct file *file, char __user *buf, size_t count, @@ -2888,6 +2906,7 @@ static unsigned int tty_poll(struct file *filp, poll_table *wait) static int tty_fasync(int fd, struct file *filp, int on) { struct tty_struct *tty; + unsigned long flags; int retval; tty = (struct tty_struct *)filp->private_data; @@ -2903,6 +2922,7 @@ static int tty_fasync(int fd, struct file *filp, int on) struct pid *pid; if (!waitqueue_active(&tty->read_wait)) tty->minimum_to_wake = 1; + spin_lock_irqsave(&tty->ctrl_lock, flags); if (tty->pgrp) { pid = tty->pgrp; type = PIDTYPE_PGID; @@ -2910,6 +2930,7 @@ static int tty_fasync(int fd, struct file *filp, int on) pid = task_pid(current); type = PIDTYPE_PID; } + spin_unlock_irqrestore(&tty->ctrl_lock, flags); retval = __f_setown(filp, pid, type, 0); if (retval) return retval; @@ -2995,6 +3016,8 @@ static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty, struct winsize __user *arg) { struct winsize tmp_ws; + struct pid *pgrp, *rpgrp; + unsigned long flags; if (copy_from_user(&tmp_ws, arg, sizeof(*arg))) return -EFAULT; @@ -3012,10 +3035,21 @@ static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty, } } #endif - if (tty->pgrp) - kill_pgrp(tty->pgrp, SIGWINCH, 1); - if ((real_tty->pgrp != tty->pgrp) && real_tty->pgrp) - kill_pgrp(real_tty->pgrp, SIGWINCH, 1); + /* Get the PID values and reference them so we can + avoid holding the tty ctrl lock while sending signals */ + spin_lock_irqsave(&tty->ctrl_lock, flags); + pgrp = get_pid(tty->pgrp); + rpgrp = get_pid(real_tty->pgrp); + spin_unlock_irqrestore(&tty->ctrl_lock, flags); + + if (pgrp) + kill_pgrp(pgrp, SIGWINCH, 1); + if (rpgrp != pgrp && rpgrp) + kill_pgrp(rpgrp, SIGWINCH, 1); + + put_pid(pgrp); + put_pid(rpgrp); + tty->winsize = tmp_ws; real_tty->winsize = tmp_ws; done: @@ -3171,7 +3205,7 @@ static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t * Set the process group of the tty to the session passed. Only * permitted where the tty session is our session. * - * Locking: RCU + * Locking: RCU, ctrl lock */ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) @@ -3179,6 +3213,7 @@ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t struct pid *pgrp; pid_t pgrp_nr; int retval = tty_check_change(real_tty); + unsigned long flags; if (retval == -EIO) return -ENOTTY; @@ -3201,8 +3236,10 @@ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t if (session_of_pgrp(pgrp) != task_session(current)) goto out_unlock; retval = 0; + spin_lock_irqsave(&tty->ctrl_lock, flags); put_pid(real_tty->pgrp); real_tty->pgrp = get_pid(pgrp); + spin_unlock_irqrestore(&tty->ctrl_lock, flags); out_unlock: rcu_read_unlock(); return retval; @@ -4077,14 +4114,19 @@ void proc_clear_tty(struct task_struct *p) } EXPORT_SYMBOL(proc_clear_tty); +/* Called under the sighand lock */ + static void __proc_set_tty(struct task_struct *tsk, struct tty_struct *tty) { if (tty) { - /* We should not have a session or pgrp to here but.... */ + unsigned long flags; + /* We should not have a session or pgrp to put here but.... */ + spin_lock_irqsave(&tty->ctrl_lock, flags); put_pid(tty->session); put_pid(tty->pgrp); - tty->session = get_pid(task_session(tsk)); tty->pgrp = get_pid(task_pgrp(tsk)); + spin_unlock_irqrestore(&tty->ctrl_lock, flags); + tty->session = get_pid(task_session(tsk)); } put_pid(tsk->signal->tty_old_pgrp); tsk->signal->tty = tty; diff --git a/drivers/char/vt.c b/drivers/char/vt.c index e64f0bf3624e..c71d1d0f13b9 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -909,15 +909,21 @@ int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int lines) if (vc->vc_tty) { struct winsize ws, *cws = &vc->vc_tty->winsize; + unsigned long flags; memset(&ws, 0, sizeof(ws)); ws.ws_row = vc->vc_rows; ws.ws_col = vc->vc_cols; ws.ws_ypixel = vc->vc_scan_lines; + + mutex_lock(&vc->vc_tty->termios_mutex); + spin_lock_irqsave(&vc->vc_tty->ctrl_lock, flags); if ((ws.ws_row != cws->ws_row || ws.ws_col != cws->ws_col) && vc->vc_tty->pgrp) kill_pgrp(vc->vc_tty->pgrp, SIGWINCH, 1); + spin_unlock_irqrestore(&vc->vc_tty->ctrl_lock, flags); *cws = ws; + mutex_unlock(&vc->vc_tty->termios_mutex); } if (CON_IS_VISIBLE(vc)) diff --git a/include/linux/tty.h b/include/linux/tty.h index 4d3702bade03..381085e45cca 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -184,21 +184,22 @@ struct tty_struct { struct tty_ldisc ldisc; struct mutex termios_mutex; spinlock_t ctrl_lock; + /* Termios values are protected by the termios mutex */ struct ktermios *termios, *termios_locked; char name[64]; - struct pid *pgrp; + struct pid *pgrp; /* Protected by ctrl lock */ struct pid *session; unsigned long flags; int count; - struct winsize winsize; + struct winsize winsize; /* termios mutex */ unsigned char stopped:1, hw_stopped:1, flow_stopped:1, packet:1; unsigned char low_latency:1, warned:1; - unsigned char ctrl_status; + unsigned char ctrl_status; /* ctrl_lock */ unsigned int receive_room; /* Bytes free for queue */ struct tty_struct *link; struct fasync_struct *fasync; - struct tty_bufhead buf; + struct tty_bufhead buf; /* Locked internally */ int alt_speed; /* For magic substitution of 38400 bps */ wait_queue_head_t write_wait; wait_queue_head_t read_wait; @@ -212,6 +213,7 @@ struct tty_struct { /* * The following is data for the N_TTY line discipline. For * historical reasons, this is included in the tty structure. + * Mostly locked by the BKL. */ unsigned int column; unsigned char lnext:1, erasing:1, raw:1, real_raw:1, icanon:1; -- cgit v1.2.3-71-gd317 From 5d0fdf1e01899805b6c2c0b789a707dcb731b1ea Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 30 Apr 2008 00:53:31 -0700 Subject: tty_io: fix remaining pid struct locking This fixes the last couple of pid struct locking failures I know about. [oleg@tv-sign.ru: clean up do_task_stat()] Signed-off-by: Alan Cox Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/tty_io.c | 28 +++++++++++++++++++++++++++- fs/proc/array.c | 4 +++- include/linux/tty.h | 1 + 3 files changed, 31 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index c8aa318eaa18..2460c4c76161 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -3173,6 +3173,27 @@ unlock: return ret; } +/** + * tty_get_pgrp - return a ref counted pgrp pid + * @tty: tty to read + * + * Returns a refcounted instance of the pid struct for the process + * group controlling the tty. + */ + +struct pid *tty_get_pgrp(struct tty_struct *tty) +{ + unsigned long flags; + struct pid *pgrp; + + spin_lock_irqsave(&tty->ctrl_lock, flags); + pgrp = get_pid(tty->pgrp); + spin_unlock_irqrestore(&tty->ctrl_lock, flags); + + return pgrp; +} +EXPORT_SYMBOL_GPL(tty_get_pgrp); + /** * tiocgpgrp - get process group * @tty: tty passed by user @@ -3187,13 +3208,18 @@ unlock: static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) { + struct pid *pid; + int ret; /* * (tty == real_tty) is a cheap way of * testing if the tty is NOT a master pty. */ if (tty == real_tty && current->signal->tty != real_tty) return -ENOTTY; - return put_user(pid_vnr(real_tty->pgrp), p); + pid = tty_get_pgrp(real_tty); + ret = put_user(pid_vnr(pid), p); + put_pid(pid); + return ret; } /** diff --git a/fs/proc/array.c b/fs/proc/array.c index b07a71002f2f..c135cbdd9127 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -429,7 +429,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, struct signal_struct *sig = task->signal; if (sig->tty) { - tty_pgrp = pid_nr_ns(sig->tty->pgrp, ns); + struct pid *pgrp = tty_get_pgrp(sig->tty); + tty_pgrp = pid_nr_ns(pgrp, ns); + put_pid(pgrp); tty_nr = new_encode_dev(tty_devnum(sig->tty)); } diff --git a/include/linux/tty.h b/include/linux/tty.h index 381085e45cca..2699298b00ef 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -297,6 +297,7 @@ extern int tty_read_raw_data(struct tty_struct *tty, unsigned char *bufp, extern void tty_write_message(struct tty_struct *tty, char *msg); extern int is_current_pgrp_orphaned(void); +extern struct pid *tty_get_pgrp(struct tty_struct *tty); extern int is_ignored(int sig); extern int tty_signal(int sig, struct tty_struct *tty); extern void tty_hangup(struct tty_struct * tty); -- cgit v1.2.3-71-gd317 From 76b25a5509bbafdbfc7d7d6b41a3c64947d59360 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 30 Apr 2008 00:54:03 -0700 Subject: char: switch gs, cyclades and esp to return int for put_char Signed-off-by: Alan Cox Cc: Jiri Slaby Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/cyclades.c | 9 +++++---- drivers/char/esp.c | 9 ++++++--- drivers/char/generic_serial.c | 11 ++++++----- include/linux/generic_serial.h | 2 +- 4 files changed, 18 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c index 571e4fab5bfa..b8fb251f80ee 100644 --- a/drivers/char/cyclades.c +++ b/drivers/char/cyclades.c @@ -2814,7 +2814,7 @@ static int cy_write(struct tty_struct *tty, const unsigned char *buf, int count) * done stuffing characters into the driver. If there is no room * in the queue, the character is ignored. */ -static void cy_put_char(struct tty_struct *tty, unsigned char ch) +static int cy_put_char(struct tty_struct *tty, unsigned char ch) { struct cyclades_port *info = tty->driver_data; unsigned long flags; @@ -2824,15 +2824,15 @@ static void cy_put_char(struct tty_struct *tty, unsigned char ch) #endif if (serial_paranoia_check(info, tty->name, "cy_put_char")) - return; + return 0; if (!info->xmit_buf) - return; + return 0; spin_lock_irqsave(&info->card->card_lock, flags); if (info->xmit_cnt >= (int)(SERIAL_XMIT_SIZE - 1)) { spin_unlock_irqrestore(&info->card->card_lock, flags); - return; + return 0; } info->xmit_buf[info->xmit_head++] = ch; @@ -2841,6 +2841,7 @@ static void cy_put_char(struct tty_struct *tty, unsigned char ch) info->idle_stats.xmit_bytes++; info->idle_stats.xmit_idle = jiffies; spin_unlock_irqrestore(&info->card->card_lock, flags); + return 1; } /* cy_put_char */ /* diff --git a/drivers/char/esp.c b/drivers/char/esp.c index b1f92db31331..996d3230c929 100644 --- a/drivers/char/esp.c +++ b/drivers/char/esp.c @@ -1156,24 +1156,27 @@ static void change_speed(struct esp_struct *info) spin_unlock_irqrestore(&info->lock, flags); } -static void rs_put_char(struct tty_struct *tty, unsigned char ch) +static int rs_put_char(struct tty_struct *tty, unsigned char ch) { struct esp_struct *info = (struct esp_struct *)tty->driver_data; unsigned long flags; + int ret = 0; if (serial_paranoia_check(info, tty->name, "rs_put_char")) - return; + return 0; if (!info->xmit_buf) - return; + return 0; spin_lock_irqsave(&info->lock, flags); if (info->xmit_cnt < ESP_XMIT_SIZE - 1) { info->xmit_buf[info->xmit_head++] = ch; info->xmit_head &= ESP_XMIT_SIZE-1; info->xmit_cnt++; + ret = 1; } spin_unlock_irqrestore(&info->lock, flags); + return ret; } static void rs_flush_chars(struct tty_struct *tty) diff --git a/drivers/char/generic_serial.c b/drivers/char/generic_serial.c index f6610f28d657..149518e22fa6 100644 --- a/drivers/char/generic_serial.c +++ b/drivers/char/generic_serial.c @@ -48,19 +48,19 @@ static int gs_debug; module_param(gs_debug, int, 0644); -void gs_put_char(struct tty_struct * tty, unsigned char ch) +int gs_put_char(struct tty_struct * tty, unsigned char ch) { struct gs_port *port; func_enter (); - if (!tty) return; + if (!tty) return 0; port = tty->driver_data; - if (!port) return; + if (!port) return 0; - if (! (port->flags & ASYNC_INITIALIZED)) return; + if (! (port->flags & ASYNC_INITIALIZED)) return 0; /* Take a lock on the serial tranmit buffer! */ mutex_lock(& port->port_write_mutex); @@ -68,7 +68,7 @@ void gs_put_char(struct tty_struct * tty, unsigned char ch) if (port->xmit_cnt >= SERIAL_XMIT_SIZE - 1) { /* Sorry, buffer is full, drop character. Update statistics???? -- REW */ mutex_unlock(&port->port_write_mutex); - return; + return 0; } port->xmit_buf[port->xmit_head++] = ch; @@ -77,6 +77,7 @@ void gs_put_char(struct tty_struct * tty, unsigned char ch) mutex_unlock(&port->port_write_mutex); func_exit (); + return 1; } diff --git a/include/linux/generic_serial.h b/include/linux/generic_serial.h index 5412da28fa47..110833666e37 100644 --- a/include/linux/generic_serial.h +++ b/include/linux/generic_serial.h @@ -78,7 +78,7 @@ struct gs_port { #define GS_DEBUG_WRITE 0x00000040 #ifdef __KERNEL__ -void gs_put_char(struct tty_struct *tty, unsigned char ch); +int gs_put_char(struct tty_struct *tty, unsigned char ch); int gs_write(struct tty_struct *tty, const unsigned char *buf, int count); int gs_write_room(struct tty_struct *tty); -- cgit v1.2.3-71-gd317 From f34d7a5b7010b82fe97da95496b9971435530062 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 30 Apr 2008 00:54:13 -0700 Subject: tty: The big operations rework - Operations are now a shared const function block as with most other Linux objects - Introduce wrappers for some optional functions to get consistent behaviour - Wrap put_char which used to be patched by the tty layer - Document which functions are needed/optional - Make put_char report success/fail - Cache the driver->ops pointer in the tty as tty->ops - Remove various surplus lock calls we no longer need - Remove proc_write method as noted by Alexey Dobriyan - Introduce some missing sanity checks where certain driver/ldisc combinations would oops as they didn't check needed methods were present [akpm@linux-foundation.org: fix fs/compat_ioctl.c build] [akpm@linux-foundation.org: fix isicom] [akpm@linux-foundation.org: fix arch/ia64/hp/sim/simserial.c build] [akpm@linux-foundation.org: fix kgdb] Signed-off-by: Alan Cox Acked-by: Greg Kroah-Hartman Cc: Jason Wessel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/hp/sim/simserial.c | 11 +- drivers/bluetooth/hci_ldisc.c | 13 +- drivers/char/ip2/ip2main.c | 12 +- drivers/char/isicom.c | 15 +- drivers/char/keyboard.c | 2 +- drivers/char/n_hdlc.c | 11 +- drivers/char/n_r3964.c | 17 +-- drivers/char/n_tty.c | 103 ++++++------- drivers/char/tty_io.c | 174 +++++++++------------- drivers/char/tty_ioctl.c | 62 +++++--- drivers/input/serio/serport.c | 2 +- drivers/isdn/gigaset/ser-gigaset.c | 15 +- drivers/net/hamradio/6pack.c | 36 ++--- drivers/net/hamradio/mkiss.c | 15 +- drivers/net/irda/irtty-sir.c | 95 ++++-------- drivers/net/ppp_async.c | 9 +- drivers/net/ppp_synctty.c | 9 +- drivers/net/slip.c | 13 +- drivers/net/wan/x25_asy.c | 279 +++++++++++++++++------------------ drivers/serial/kgdboc.c | 6 +- drivers/serial/serial_core.c | 38 +++-- drivers/usb/serial/digi_acceleport.c | 3 +- drivers/usb/serial/usb-serial.c | 129 +++------------- drivers/usb/serial/whiteheat.c | 4 +- fs/compat_ioctl.c | 2 +- fs/proc/proc_tty.c | 6 +- include/linux/tty.h | 8 + include/linux/tty_driver.h | 102 +++++++------ kernel/printk.c | 4 +- net/irda/ircomm/ircomm_tty.c | 6 +- 30 files changed, 537 insertions(+), 664 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c index eb0c32a85fd7..23cafc80d2a4 100644 --- a/arch/ia64/hp/sim/simserial.c +++ b/arch/ia64/hp/sim/simserial.c @@ -210,21 +210,23 @@ static void do_softint(struct work_struct *private_) printk(KERN_ERR "simserial: do_softint called\n"); } -static void rs_put_char(struct tty_struct *tty, unsigned char ch) +static int rs_put_char(struct tty_struct *tty, unsigned char ch) { struct async_struct *info = (struct async_struct *)tty->driver_data; unsigned long flags; - if (!tty || !info->xmit.buf) return; + if (!tty || !info->xmit.buf) + return 0; local_irq_save(flags); if (CIRC_SPACE(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE) == 0) { local_irq_restore(flags); - return; + return 0; } info->xmit.buf[info->xmit.head] = ch; info->xmit.head = (info->xmit.head + 1) & (SERIAL_XMIT_SIZE-1); local_irq_restore(flags); + return 1; } static void transmit_chars(struct async_struct *info, int *intr_done) @@ -621,7 +623,8 @@ static void rs_close(struct tty_struct *tty, struct file * filp) * the line discipline to only process XON/XOFF characters. */ shutdown(info); - if (tty->driver->flush_buffer) tty->driver->flush_buffer(tty); + if (tty->ops->flush_buffer) + tty->ops->flush_buffer(tty); if (tty->ldisc.flush_buffer) tty->ldisc.flush_buffer(tty); info->event = 0; info->tty = NULL; diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index 7e31d5f1bc8a..a6c2619ec782 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -143,7 +143,7 @@ restart: int len; set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); - len = tty->driver->write(tty, skb->data, skb->len); + len = tty->ops->write(tty, skb->data, skb->len); hdev->stat.byte_tx += len; skb_pull(skb, len); @@ -190,8 +190,7 @@ static int hci_uart_flush(struct hci_dev *hdev) /* Flush any pending characters in the driver and discipline. */ tty_ldisc_flush(tty); - if (tty->driver && tty->driver->flush_buffer) - tty->driver->flush_buffer(tty); + tty_driver_flush_buffer(tty); if (test_bit(HCI_UART_PROTO_SET, &hu->flags)) hu->proto->flush(hu); @@ -285,9 +284,7 @@ static int hci_uart_tty_open(struct tty_struct *tty) if (tty->ldisc.flush_buffer) tty->ldisc.flush_buffer(tty); - - if (tty->driver && tty->driver->flush_buffer) - tty->driver->flush_buffer(tty); + tty_driver_flush_buffer(tty); return 0; } @@ -374,8 +371,8 @@ static void hci_uart_tty_receive(struct tty_struct *tty, const u8 *data, char *f spin_unlock(&hu->rx_lock); if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) && - tty->driver->unthrottle) - tty->driver->unthrottle(tty); + tty->ops->unthrottle) + tty->ops->unthrottle(tty); } static int hci_uart_register_dev(struct hci_uart *hu) diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c index c4f4ca31f7c0..5ef69dcd2588 100644 --- a/drivers/char/ip2/ip2main.c +++ b/drivers/char/ip2/ip2main.c @@ -169,7 +169,7 @@ static int Fip_firmware_size; static int ip2_open(PTTY, struct file *); static void ip2_close(PTTY, struct file *); static int ip2_write(PTTY, const unsigned char *, int); -static void ip2_putchar(PTTY, unsigned char); +static int ip2_putchar(PTTY, unsigned char); static void ip2_flush_chars(PTTY); static int ip2_write_room(PTTY); static int ip2_chars_in_buf(PTTY); @@ -1616,10 +1616,9 @@ ip2_close( PTTY tty, struct file *pFile ) serviceOutgoingFifo ( pCh->pMyBord ); - if ( tty->driver->flush_buffer ) - tty->driver->flush_buffer(tty); - if ( tty->ldisc.flush_buffer ) - tty->ldisc.flush_buffer(tty); + if ( tty->driver->ops->flush_buffer ) + tty->driver->ops->flush_buffer(tty); + tty_ldisc_flush(tty); tty->closing = 0; pCh->pTTY = NULL; @@ -1738,7 +1737,7 @@ ip2_write( PTTY tty, const unsigned char *pData, int count) /* */ /* */ /******************************************************************************/ -static void +static int ip2_putchar( PTTY tty, unsigned char ch ) { i2ChanStrPtr pCh = tty->driver_data; @@ -1753,6 +1752,7 @@ ip2_putchar( PTTY tty, unsigned char ch ) ip2_flush_chars( tty ); } else write_unlock_irqrestore(&pCh->Pbuf_spinlock, flags); + return 1; // ip2trace (CHANN, ITRC_PUTC, ITRC_RETURN, 1, ch ); } diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c index 57b115272aaa..9c6be8da220c 100644 --- a/drivers/char/isicom.c +++ b/drivers/char/isicom.c @@ -1140,28 +1140,29 @@ static int isicom_write(struct tty_struct *tty, const unsigned char *buf, } /* put_char et all */ -static void isicom_put_char(struct tty_struct *tty, unsigned char ch) +static int isicom_put_char(struct tty_struct *tty, unsigned char ch) { struct isi_port *port = tty->driver_data; struct isi_board *card = port->card; unsigned long flags; if (isicom_paranoia_check(port, tty->name, "isicom_put_char")) - return; + return 0; if (!port->xmit_buf) - return; + return 0; spin_lock_irqsave(&card->card_lock, flags); - if (port->xmit_cnt >= SERIAL_XMIT_SIZE - 1) - goto out; + if (port->xmit_cnt >= SERIAL_XMIT_SIZE - 1) { + spin_unlock_irqrestore(&card->card_lock, flags); + return 0; + } port->xmit_buf[port->xmit_head++] = ch; port->xmit_head &= (SERIAL_XMIT_SIZE - 1); port->xmit_cnt++; spin_unlock_irqrestore(&card->card_lock, flags); -out: - return; + return 1; } /* flush_chars et all */ diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c index 60b934adea65..d1c50b3302e5 100644 --- a/drivers/char/keyboard.c +++ b/drivers/char/keyboard.c @@ -1230,7 +1230,7 @@ static void kbd_keycode(unsigned int keycode, int down, int hw_raw) if (rep && (!vc_kbd_mode(kbd, VC_REPEAT) || - (tty && !L_ECHO(tty) && tty->driver->chars_in_buffer(tty)))) { + (tty && !L_ECHO(tty) && tty_chars_in_buffer(tty)))) { /* * Don't repeat a key if the input buffers are not empty and the * characters get aren't echoed locally. This makes key repeat diff --git a/drivers/char/n_hdlc.c b/drivers/char/n_hdlc.c index a07c0af4819e..a35bfd7ee80e 100644 --- a/drivers/char/n_hdlc.c +++ b/drivers/char/n_hdlc.c @@ -342,12 +342,10 @@ static int n_hdlc_tty_open (struct tty_struct *tty) #endif /* Flush any pending characters in the driver and discipline. */ - if (tty->ldisc.flush_buffer) - tty->ldisc.flush_buffer (tty); + tty->ldisc.flush_buffer(tty); - if (tty->driver->flush_buffer) - tty->driver->flush_buffer (tty); + tty_driver_flush_buffer(tty); if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d)n_hdlc_tty_open() success\n",__FILE__,__LINE__); @@ -399,7 +397,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty) /* Send the next block of data to device */ tty->flags |= (1 << TTY_DO_WRITE_WAKEUP); - actual = tty->driver->write(tty, tbuf->buf, tbuf->count); + actual = tty->ops->write(tty, tbuf->buf, tbuf->count); /* rollback was possible and has been done */ if (actual == -ERESTARTSYS) { @@ -752,8 +750,7 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file, case TIOCOUTQ: /* get the pending tx byte count in the driver */ - count = tty->driver->chars_in_buffer ? - tty->driver->chars_in_buffer(tty) : 0; + count = tty_chars_in_buffer(tty); /* add size of next output frame in queue */ spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock,flags); if (n_hdlc->tx_buf_list.head) diff --git a/drivers/char/n_r3964.c b/drivers/char/n_r3964.c index 3f6486e9f1ec..902169062332 100644 --- a/drivers/char/n_r3964.c +++ b/drivers/char/n_r3964.c @@ -376,8 +376,9 @@ static void put_char(struct r3964_info *pInfo, unsigned char ch) if (tty == NULL) return; - if (tty->driver->put_char) { - tty->driver->put_char(tty, ch); + /* FIXME: put_char should not be called from an IRQ */ + if (tty->ops->put_char) { + tty->ops->put_char(tty, ch); } pInfo->bcc ^= ch; } @@ -386,12 +387,9 @@ static void flush(struct r3964_info *pInfo) { struct tty_struct *tty = pInfo->tty; - if (tty == NULL) + if (tty == NULL || tty->ops->flush_chars == NULL) return; - - if (tty->driver->flush_chars) { - tty->driver->flush_chars(tty); - } + tty->ops->flush_chars(tty); } static void trigger_transmit(struct r3964_info *pInfo) @@ -449,12 +447,11 @@ static void transmit_block(struct r3964_info *pInfo) struct r3964_block_header *pBlock = pInfo->tx_first; int room = 0; - if ((tty == NULL) || (pBlock == NULL)) { + if (tty == NULL || pBlock == NULL) { return; } - if (tty->driver->write_room) - room = tty->driver->write_room(tty); + room = tty_write_room(tty); TRACE_PS("transmit_block %p, room %d, length %d", pBlock, room, pBlock->length); diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c index e1518e17e09d..abc93a93dcdd 100644 --- a/drivers/char/n_tty.c +++ b/drivers/char/n_tty.c @@ -149,8 +149,8 @@ static void check_unthrottle(struct tty_struct *tty) { if (tty->count && test_and_clear_bit(TTY_THROTTLED, &tty->flags) && - tty->driver->unthrottle) - tty->driver->unthrottle(tty); + tty->ops->unthrottle) + tty->ops->unthrottle(tty); } /** @@ -273,7 +273,7 @@ static int opost(unsigned char c, struct tty_struct *tty) { int space, spaces; - space = tty->driver->write_room(tty); + space = tty_write_room(tty); if (!space) return -1; @@ -286,7 +286,7 @@ static int opost(unsigned char c, struct tty_struct *tty) if (O_ONLCR(tty)) { if (space < 2) return -1; - tty->driver->put_char(tty, '\r'); + tty_put_char(tty, '\r'); tty->column = 0; } tty->canon_column = tty->column; @@ -308,7 +308,7 @@ static int opost(unsigned char c, struct tty_struct *tty) if (space < spaces) return -1; tty->column += spaces; - tty->driver->write(tty, " ", spaces); + tty->ops->write(tty, " ", spaces); return 0; } tty->column += spaces; @@ -325,7 +325,7 @@ static int opost(unsigned char c, struct tty_struct *tty) break; } } - tty->driver->put_char(tty, c); + tty_put_char(tty, c); unlock_kernel(); return 0; } @@ -352,7 +352,7 @@ static ssize_t opost_block(struct tty_struct *tty, int i; const unsigned char *cp; - space = tty->driver->write_room(tty); + space = tty_write_room(tty); if (!space) return 0; if (nr > space) @@ -390,27 +390,14 @@ static ssize_t opost_block(struct tty_struct *tty, } } break_out: - if (tty->driver->flush_chars) - tty->driver->flush_chars(tty); - i = tty->driver->write(tty, buf, i); + if (tty->ops->flush_chars) + tty->ops->flush_chars(tty); + i = tty->ops->write(tty, buf, i); unlock_kernel(); return i; } -/** - * put_char - write character to driver - * @c: character (or part of unicode symbol) - * @tty: terminal device - * - * Queue a byte to the driver layer for output - */ - -static inline void put_char(unsigned char c, struct tty_struct *tty) -{ - tty->driver->put_char(tty, c); -} - /** * echo_char - echo characters * @c: unicode byte to echo @@ -423,8 +410,8 @@ static inline void put_char(unsigned char c, struct tty_struct *tty) static void echo_char(unsigned char c, struct tty_struct *tty) { if (L_ECHOCTL(tty) && iscntrl(c) && c != '\t') { - put_char('^', tty); - put_char(c ^ 0100, tty); + tty_put_char(tty, '^'); + tty_put_char(tty, c ^ 0100); tty->column += 2; } else opost(c, tty); @@ -433,7 +420,7 @@ static void echo_char(unsigned char c, struct tty_struct *tty) static inline void finish_erasing(struct tty_struct *tty) { if (tty->erasing) { - put_char('/', tty); + tty_put_char(tty, '/'); tty->column++; tty->erasing = 0; } @@ -517,7 +504,7 @@ static void eraser(unsigned char c, struct tty_struct *tty) if (L_ECHO(tty)) { if (L_ECHOPRT(tty)) { if (!tty->erasing) { - put_char('\\', tty); + tty_put_char(tty, '\\'); tty->column++; tty->erasing = 1; } @@ -525,7 +512,7 @@ static void eraser(unsigned char c, struct tty_struct *tty) echo_char(c, tty); while (--cnt > 0) { head = (head+1) & (N_TTY_BUF_SIZE-1); - put_char(tty->read_buf[head], tty); + tty_put_char(tty, tty->read_buf[head]); } } else if (kill_type == ERASE && !L_ECHOE(tty)) { echo_char(ERASE_CHAR(tty), tty); @@ -553,22 +540,22 @@ static void eraser(unsigned char c, struct tty_struct *tty) /* Now backup to that column. */ while (tty->column > col) { /* Can't use opost here. */ - put_char('\b', tty); + tty_put_char(tty, '\b'); if (tty->column > 0) tty->column--; } } else { if (iscntrl(c) && L_ECHOCTL(tty)) { - put_char('\b', tty); - put_char(' ', tty); - put_char('\b', tty); + tty_put_char(tty, '\b'); + tty_put_char(tty, ' '); + tty_put_char(tty, '\b'); if (tty->column > 0) tty->column--; } if (!iscntrl(c) || L_ECHOCTL(tty)) { - put_char('\b', tty); - put_char(' ', tty); - put_char('\b', tty); + tty_put_char(tty, '\b'); + tty_put_char(tty, ' '); + tty_put_char(tty, '\b'); if (tty->column > 0) tty->column--; } @@ -599,8 +586,7 @@ static inline void isig(int sig, struct tty_struct *tty, int flush) kill_pgrp(tty->pgrp, sig, 1); if (flush || !L_NOFLSH(tty)) { n_tty_flush_buffer(tty); - if (tty->driver->flush_buffer) - tty->driver->flush_buffer(tty); + tty_driver_flush_buffer(tty); } } @@ -732,7 +718,7 @@ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c) tty->lnext = 0; if (L_ECHO(tty)) { if (tty->read_cnt >= N_TTY_BUF_SIZE-1) { - put_char('\a', tty); /* beep if no space */ + tty_put_char(tty, '\a'); /* beep if no space */ return; } /* Record the column of first canon char. */ @@ -776,8 +762,7 @@ send_signal: */ if (!L_NOFLSH(tty)) { n_tty_flush_buffer(tty); - if (tty->driver->flush_buffer) - tty->driver->flush_buffer(tty); + tty_driver_flush_buffer(tty); } if (L_ECHO(tty)) echo_char(c, tty); @@ -806,8 +791,8 @@ send_signal: if (L_ECHO(tty)) { finish_erasing(tty); if (L_ECHOCTL(tty)) { - put_char('^', tty); - put_char('\b', tty); + tty_put_char(tty, '^'); + tty_put_char(tty, '\b'); } } return; @@ -828,7 +813,7 @@ send_signal: if (c == '\n') { if (L_ECHO(tty) || L_ECHONL(tty)) { if (tty->read_cnt >= N_TTY_BUF_SIZE-1) - put_char('\a', tty); + tty_put_char(tty, '\a'); opost('\n', tty); } goto handle_newline; @@ -846,7 +831,7 @@ send_signal: */ if (L_ECHO(tty)) { if (tty->read_cnt >= N_TTY_BUF_SIZE-1) - put_char('\a', tty); + tty_put_char(tty, '\a'); /* Record the column of first canon char. */ if (tty->canon_head == tty->read_head) tty->canon_column = tty->column; @@ -876,7 +861,7 @@ handle_newline: finish_erasing(tty); if (L_ECHO(tty)) { if (tty->read_cnt >= N_TTY_BUF_SIZE-1) { - put_char('\a', tty); /* beep if no space */ + tty_put_char(tty, '\a'); /* beep if no space */ return; } if (c == '\n') @@ -980,8 +965,8 @@ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp, break; } } - if (tty->driver->flush_chars) - tty->driver->flush_chars(tty); + if (tty->ops->flush_chars) + tty->ops->flush_chars(tty); } n_tty_set_room(tty); @@ -1000,8 +985,8 @@ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp, if (tty->receive_room < TTY_THRESHOLD_THROTTLE) { /* check TTY_THROTTLED first so it indicates our state */ if (!test_and_set_bit(TTY_THROTTLED, &tty->flags) && - tty->driver->throttle) - tty->driver->throttle(tty); + tty->ops->throttle) + tty->ops->throttle(tty); } } @@ -1086,6 +1071,9 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old) tty->real_raw = 0; } n_tty_set_room(tty); + /* The termios change make the tty ready for I/O */ + wake_up_interruptible(&tty->write_wait); + wake_up_interruptible(&tty->read_wait); } /** @@ -1513,11 +1501,11 @@ static ssize_t write_chan(struct tty_struct *tty, struct file *file, break; b++; nr--; } - if (tty->driver->flush_chars) - tty->driver->flush_chars(tty); + if (tty->ops->flush_chars) + tty->ops->flush_chars(tty); } else { while (nr > 0) { - c = tty->driver->write(tty, b, nr); + c = tty->ops->write(tty, b, nr); if (c < 0) { retval = c; goto break_out; @@ -1554,11 +1542,6 @@ break_out: * * This code must be sure never to sleep through a hangup. * Called without the kernel lock held - fine - * - * FIXME: if someone changes the VMIN or discipline settings for the - * terminal while another process is in poll() the poll does not - * recompute the new limits. Possibly set_termios should issue - * a read wakeup to fix this bug. */ static unsigned int normal_poll(struct tty_struct *tty, struct file *file, @@ -1582,9 +1565,9 @@ static unsigned int normal_poll(struct tty_struct *tty, struct file *file, else tty->minimum_to_wake = 1; } - if (!tty_is_writelocked(tty) && - tty->driver->chars_in_buffer(tty) < WAKEUP_CHARS && - tty->driver->write_room(tty) > 0) + if (tty->ops->write && !tty_is_writelocked(tty) && + tty_chars_in_buffer(tty) < WAKEUP_CHARS && + tty_write_room(tty) > 0) mask |= POLLOUT | POLLWRNORM; return mask; } diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index b1692afd797e..f69fb8d7a680 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -1108,8 +1108,8 @@ restart: a reference to the old ldisc. If we ended up flipping back to the existing ldisc we have two references to it */ - if (tty->ldisc.num != o_ldisc.num && tty->driver->set_ldisc) - tty->driver->set_ldisc(tty); + if (tty->ldisc.num != o_ldisc.num && tty->ops->set_ldisc) + tty->ops->set_ldisc(tty); tty_ldisc_put(o_ldisc.num); @@ -1181,9 +1181,8 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line) if (*str == '\0') str = NULL; - if (tty_line >= 0 && tty_line <= p->num && p->poll_init && - !p->poll_init(p, tty_line, str)) { - + if (tty_line >= 0 && tty_line <= p->num && p->ops && + p->ops->poll_init && !p->ops->poll_init(p, tty_line, str)) { res = p; *line = tty_line; break; @@ -1452,8 +1451,7 @@ static void do_tty_hangup(struct work_struct *work) /* We may have no line discipline at this point */ if (ld->flush_buffer) ld->flush_buffer(tty); - if (tty->driver->flush_buffer) - tty->driver->flush_buffer(tty); + tty_driver_flush_buffer(tty); if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) && ld->write_wakeup) ld->write_wakeup(tty); @@ -1516,11 +1514,11 @@ static void do_tty_hangup(struct work_struct *work) * So we just call close() the right number of times. */ if (cons_filp) { - if (tty->driver->close) + if (tty->ops->close) for (n = 0; n < closecount; n++) - tty->driver->close(tty, cons_filp); - } else if (tty->driver->hangup) - (tty->driver->hangup)(tty); + tty->ops->close(tty, cons_filp); + } else if (tty->ops->hangup) + (tty->ops->hangup)(tty); /* * We don't want to have driver/ldisc interactions beyond * the ones we did here. The driver layer expects no @@ -1752,8 +1750,8 @@ void stop_tty(struct tty_struct *tty) wake_up_interruptible(&tty->link->read_wait); } spin_unlock_irqrestore(&tty->ctrl_lock, flags); - if (tty->driver->stop) - (tty->driver->stop)(tty); + if (tty->ops->stop) + (tty->ops->stop)(tty); } EXPORT_SYMBOL(stop_tty); @@ -1786,8 +1784,8 @@ void start_tty(struct tty_struct *tty) wake_up_interruptible(&tty->link->read_wait); } spin_unlock_irqrestore(&tty->ctrl_lock, flags); - if (tty->driver->start) - (tty->driver->start)(tty); + if (tty->ops->start) + (tty->ops->start)(tty); /* If we have a running line discipline it may need kicking */ tty_wakeup(tty); } @@ -1972,10 +1970,13 @@ static ssize_t tty_write(struct file *file, const char __user *buf, tty = (struct tty_struct *)file->private_data; if (tty_paranoia_check(tty, inode, "tty_write")) return -EIO; - if (!tty || !tty->driver->write || + if (!tty || !tty->ops->write || (test_bit(TTY_IO_ERROR, &tty->flags))) return -EIO; - + /* Short term debug to catch buggy drivers */ + if (tty->ops->write_room == NULL) + printk(KERN_ERR "tty driver %s lacks a write_room method.\n", + tty->driver->name); ld = tty_ldisc_ref_wait(tty); if (!ld->write) ret = -EIO; @@ -2122,6 +2123,7 @@ static int init_dev(struct tty_driver *driver, int idx, goto fail_no_mem; initialize_tty_struct(tty); tty->driver = driver; + tty->ops = driver->ops; tty->index = idx; tty_line_name(driver, idx, tty->name); @@ -2152,6 +2154,7 @@ static int init_dev(struct tty_driver *driver, int idx, goto free_mem_out; initialize_tty_struct(o_tty); o_tty->driver = driver->other; + o_tty->ops = driver->ops; o_tty->index = idx; tty_line_name(driver->other, idx, o_tty->name); @@ -2456,8 +2459,8 @@ static void release_dev(struct file *filp) } } #endif - if (tty->driver->close) - tty->driver->close(tty, filp); + if (tty->ops->close) + tty->ops->close(tty, filp); /* * Sanity check: if tty->count is going to zero, there shouldn't be @@ -2740,8 +2743,8 @@ got_driver: printk(KERN_DEBUG "opening %s...", tty->name); #endif if (!retval) { - if (tty->driver->open) - retval = tty->driver->open(tty, filp); + if (tty->ops->open) + retval = tty->ops->open(tty, filp); else retval = -ENODEV; } @@ -2840,7 +2843,7 @@ static int ptmx_open(struct inode *inode, struct file *filp) goto out1; check_tty_count(tty, "tty_open"); - retval = ptm_driver->open(tty, filp); + retval = ptm_driver->ops->open(tty, filp); if (!retval) return 0; out1: @@ -3336,25 +3339,20 @@ static int tiocsetd(struct tty_struct *tty, int __user *p) static int send_break(struct tty_struct *tty, unsigned int duration) { - int retval = -EINTR; - - lock_kernel(); if (tty_write_lock(tty, 0) < 0) - goto out; - tty->driver->break_ctl(tty, -1); + return -EINTR; + tty->ops->break_ctl(tty, -1); if (!signal_pending(current)) msleep_interruptible(duration); - tty->driver->break_ctl(tty, 0); + tty->ops->break_ctl(tty, 0); tty_write_unlock(tty); if (!signal_pending(current)) - retval = 0; -out: - unlock_kernel(); - return retval; + return -EINTR; + return 0; } /** - * tiocmget - get modem status + * tty_tiocmget - get modem status * @tty: tty device * @file: user file pointer * @p: pointer to result @@ -3369,10 +3367,8 @@ static int tty_tiocmget(struct tty_struct *tty, struct file *file, int __user *p { int retval = -EINVAL; - if (tty->driver->tiocmget) { - lock_kernel(); - retval = tty->driver->tiocmget(tty, file); - unlock_kernel(); + if (tty->ops->tiocmget) { + retval = tty->ops->tiocmget(tty, file); if (retval >= 0) retval = put_user(retval, p); @@ -3381,7 +3377,7 @@ static int tty_tiocmget(struct tty_struct *tty, struct file *file, int __user *p } /** - * tiocmset - set modem status + * tty_tiocmset - set modem status * @tty: tty device * @file: user file pointer * @cmd: command - clear bits, set bits or set all @@ -3398,7 +3394,7 @@ static int tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int { int retval = -EINVAL; - if (tty->driver->tiocmset) { + if (tty->ops->tiocmset) { unsigned int set, clear, val; retval = get_user(val, p); @@ -3422,9 +3418,7 @@ static int tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int set &= TIOCM_DTR|TIOCM_RTS|TIOCM_OUT1|TIOCM_OUT2|TIOCM_LOOP; clear &= TIOCM_DTR|TIOCM_RTS|TIOCM_OUT1|TIOCM_OUT2|TIOCM_LOOP; - lock_kernel(); - retval = tty->driver->tiocmset(tty, file, set, clear); - unlock_kernel(); + retval = tty->ops->tiocmset(tty, file, set, clear); } return retval; } @@ -3455,23 +3449,25 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) retval = -EINVAL; - if (!tty->driver->break_ctl) { + if (!tty->ops->break_ctl) { switch (cmd) { case TIOCSBRK: case TIOCCBRK: - if (tty->driver->ioctl) - retval = tty->driver->ioctl(tty, file, cmd, arg); + if (tty->ops->ioctl) + retval = tty->ops->ioctl(tty, file, cmd, arg); + if (retval != -EINVAL && retval != -ENOIOCTLCMD) + printk(KERN_WARNING "tty: driver %s needs updating to use break_ctl\n", tty->driver->name); return retval; /* These two ioctl's always return success; even if */ /* the driver doesn't support them. */ case TCSBRK: case TCSBRKP: - if (!tty->driver->ioctl) + if (!tty->ops->ioctl) return 0; - lock_kernel(); - retval = tty->driver->ioctl(tty, file, cmd, arg); - unlock_kernel(); + retval = tty->ops->ioctl(tty, file, cmd, arg); + if (retval != -EINVAL && retval != -ENOIOCTLCMD) + printk(KERN_WARNING "tty: driver %s needs updating to use break_ctl\n", tty->driver->name); if (retval == -ENOIOCTLCMD) retval = 0; return retval; @@ -3491,9 +3487,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (retval) return retval; if (cmd != TIOCCBRK) { - lock_kernel(); tty_wait_until_sent(tty, 0); - unlock_kernel(); if (signal_pending(current)) return -EINTR; } @@ -3531,7 +3525,6 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case TIOCGSID: return tiocgsid(tty, real_tty, p); case TIOCGETD: - /* FIXME: check this is ok */ return put_user(tty->ldisc.num, (int __user *)p); case TIOCSETD: return tiocsetd(tty, p); @@ -3543,15 +3536,13 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) * Break handling */ case TIOCSBRK: /* Turn break on, unconditionally */ - lock_kernel(); - tty->driver->break_ctl(tty, -1); - unlock_kernel(); + if (tty->ops->break_ctl) + tty->ops->break_ctl(tty, -1); return 0; case TIOCCBRK: /* Turn break off, unconditionally */ - lock_kernel(); - tty->driver->break_ctl(tty, 0); - unlock_kernel(); + if (tty->ops->break_ctl) + tty->ops->break_ctl(tty, 0); return 0; case TCSBRK: /* SVID version: non-zero arg --> no break */ /* non-zero arg means wait for all output data @@ -3580,8 +3571,8 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } break; } - if (tty->driver->ioctl) { - retval = (tty->driver->ioctl)(tty, file, cmd, arg); + if (tty->ops->ioctl) { + retval = (tty->ops->ioctl)(tty, file, cmd, arg); if (retval != -ENOIOCTLCMD) return retval; } @@ -3608,8 +3599,8 @@ static long tty_compat_ioctl(struct file *file, unsigned int cmd, if (tty_paranoia_check(tty, inode, "tty_ioctl")) return -EINVAL; - if (tty->driver->compat_ioctl) { - retval = (tty->driver->compat_ioctl)(tty, file, cmd, arg); + if (tty->ops->compat_ioctl) { + retval = (tty->ops->compat_ioctl)(tty, file, cmd, arg); if (retval != -ENOIOCTLCMD) return retval; } @@ -3659,8 +3650,7 @@ void __do_SAK(struct tty_struct *tty) tty_ldisc_flush(tty); - if (tty->driver->flush_buffer) - tty->driver->flush_buffer(tty); + tty_driver_flush_buffer(tty); read_lock(&tasklist_lock); /* Kill the entire session */ @@ -3871,15 +3861,27 @@ static void initialize_tty_struct(struct tty_struct *tty) INIT_WORK(&tty->SAK_work, do_SAK_work); } -/* - * The default put_char routine if the driver did not define one. +/** + * tty_put_char - write one character to a tty + * @tty: tty + * @ch: character + * + * Write one byte to the tty using the provided put_char method + * if present. Returns the number of characters successfully output. + * + * Note: the specific put_char operation in the driver layer may go + * away soon. Don't call it directly, use this method */ -static void tty_default_put_char(struct tty_struct *tty, unsigned char ch) +int tty_put_char(struct tty_struct *tty, unsigned char ch) { - tty->driver->write(tty, &ch, 1); + if (tty->ops->put_char) + return tty->ops->put_char(tty, ch); + return tty->ops->write(tty, &ch, 1); } +EXPORT_SYMBOL_GPL(tty_put_char); + static struct class *tty_class; /** @@ -3962,37 +3964,8 @@ void put_tty_driver(struct tty_driver *driver) void tty_set_operations(struct tty_driver *driver, const struct tty_operations *op) { - driver->open = op->open; - driver->close = op->close; - driver->write = op->write; - driver->put_char = op->put_char; - driver->flush_chars = op->flush_chars; - driver->write_room = op->write_room; - driver->chars_in_buffer = op->chars_in_buffer; - driver->ioctl = op->ioctl; - driver->compat_ioctl = op->compat_ioctl; - driver->set_termios = op->set_termios; - driver->throttle = op->throttle; - driver->unthrottle = op->unthrottle; - driver->stop = op->stop; - driver->start = op->start; - driver->hangup = op->hangup; - driver->break_ctl = op->break_ctl; - driver->flush_buffer = op->flush_buffer; - driver->set_ldisc = op->set_ldisc; - driver->wait_until_sent = op->wait_until_sent; - driver->send_xchar = op->send_xchar; - driver->read_proc = op->read_proc; - driver->write_proc = op->write_proc; - driver->tiocmget = op->tiocmget; - driver->tiocmset = op->tiocmset; -#ifdef CONFIG_CONSOLE_POLL - driver->poll_init = op->poll_init; - driver->poll_get_char = op->poll_get_char; - driver->poll_put_char = op->poll_put_char; -#endif -} - + driver->ops = op; +}; EXPORT_SYMBOL(alloc_tty_driver); EXPORT_SYMBOL(put_tty_driver); @@ -4055,9 +4028,6 @@ int tty_register_driver(struct tty_driver *driver) return error; } - if (!driver->put_char) - driver->put_char = tty_default_put_char; - mutex_lock(&tty_mutex); list_add(&driver->tty_drivers, &tty_drivers); mutex_unlock(&tty_mutex); diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c index 8c4bf3e48d5b..c10d40c4c5ca 100644 --- a/drivers/char/tty_ioctl.c +++ b/drivers/char/tty_ioctl.c @@ -40,6 +40,34 @@ #define TERMIOS_OLD 8 +int tty_chars_in_buffer(struct tty_struct *tty) +{ + if (tty->ops->chars_in_buffer) + return tty->ops->chars_in_buffer(tty); + else + return 0; +} + +EXPORT_SYMBOL(tty_chars_in_buffer); + +int tty_write_room(struct tty_struct *tty) +{ + if (tty->ops->write_room) + return tty->ops->write_room(tty); + return 2048; +} + +EXPORT_SYMBOL(tty_write_room); + +void tty_driver_flush_buffer(struct tty_struct *tty) +{ + if (tty->ops->flush_buffer) + tty->ops->flush_buffer(tty); +} + +EXPORT_SYMBOL(tty_driver_flush_buffer); + + /** * tty_wait_until_sent - wait for I/O to finish * @tty: tty we are waiting for @@ -58,17 +86,13 @@ void tty_wait_until_sent(struct tty_struct *tty, long timeout) printk(KERN_DEBUG "%s wait until sent...\n", tty_name(tty, buf)); #endif - if (!tty->driver->chars_in_buffer) - return; if (!timeout) timeout = MAX_SCHEDULE_TIMEOUT; - lock_kernel(); if (wait_event_interruptible_timeout(tty->write_wait, - !tty->driver->chars_in_buffer(tty), timeout) >= 0) { - if (tty->driver->wait_until_sent) - tty->driver->wait_until_sent(tty, timeout); + !tty_chars_in_buffer(tty), timeout) >= 0) { + if (tty->ops->wait_until_sent) + tty->ops->wait_until_sent(tty, timeout); } - unlock_kernel(); } EXPORT_SYMBOL(tty_wait_until_sent); @@ -444,8 +468,8 @@ static void change_termios(struct tty_struct *tty, struct ktermios *new_termios) } } - if (tty->driver->set_termios) - (*tty->driver->set_termios)(tty, &old_termios); + if (tty->ops->set_termios) + (*tty->ops->set_termios)(tty, &old_termios); else tty_termios_copy_hw(tty->termios, &old_termios); @@ -748,8 +772,8 @@ static int send_prio_char(struct tty_struct *tty, char ch) { int was_stopped = tty->stopped; - if (tty->driver->send_xchar) { - tty->driver->send_xchar(tty, ch); + if (tty->ops->send_xchar) { + tty->ops->send_xchar(tty, ch); return 0; } @@ -758,7 +782,7 @@ static int send_prio_char(struct tty_struct *tty, char ch) if (was_stopped) start_tty(tty); - tty->driver->write(tty, &ch, 1); + tty->ops->write(tty, &ch, 1); if (was_stopped) stop_tty(tty); tty_write_unlock(tty); @@ -778,13 +802,14 @@ static int tty_change_softcar(struct tty_struct *tty, int arg) { int ret = 0; int bit = arg ? CLOCAL : 0; - struct ktermios old = *tty->termios; + struct ktermios old; mutex_lock(&tty->termios_mutex); + old = *tty->termios; tty->termios->c_cflag &= ~CLOCAL; tty->termios->c_cflag |= bit; - if (tty->driver->set_termios) - tty->driver->set_termios(tty, &old); + if (tty->ops->set_termios) + tty->ops->set_termios(tty, &old); if ((tty->termios->c_cflag & CLOCAL) != bit) ret = -EINVAL; mutex_unlock(&tty->termios_mutex); @@ -926,8 +951,7 @@ int tty_perform_flush(struct tty_struct *tty, unsigned long arg) ld->flush_buffer(tty); /* fall through */ case TCOFLUSH: - if (tty->driver->flush_buffer) - tty->driver->flush_buffer(tty); + tty_driver_flush_buffer(tty); break; default: tty_ldisc_deref(ld); @@ -984,9 +1008,7 @@ int n_tty_ioctl(struct tty_struct *tty, struct file *file, case TCFLSH: return tty_perform_flush(tty, arg); case TIOCOUTQ: - return put_user(tty->driver->chars_in_buffer ? - tty->driver->chars_in_buffer(tty) : 0, - (int __user *) arg); + return put_user(tty_chars_in_buffer(tty), (int __user *) arg); case TIOCINQ: retval = tty->read_cnt; if (L_ICANON(tty)) diff --git a/drivers/input/serio/serport.c b/drivers/input/serio/serport.c index e1a3a79ab3f9..7ff71ba7b7c9 100644 --- a/drivers/input/serio/serport.c +++ b/drivers/input/serio/serport.c @@ -46,7 +46,7 @@ struct serport { static int serport_serio_write(struct serio *serio, unsigned char data) { struct serport *serport = serio->port_data; - return -(serport->tty->driver->write(serport->tty, &data, 1) != 1); + return -(serport->tty->ops->write(serport->tty, &data, 1) != 1); } static int serport_serio_open(struct serio *serio) diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c index fceeb1d57682..45d1ee93cd39 100644 --- a/drivers/isdn/gigaset/ser-gigaset.c +++ b/drivers/isdn/gigaset/ser-gigaset.c @@ -68,10 +68,10 @@ static int write_modem(struct cardstate *cs) struct tty_struct *tty = cs->hw.ser->tty; struct bc_state *bcs = &cs->bcs[0]; /* only one channel */ struct sk_buff *skb = bcs->tx_skb; - int sent; + int sent = -EOPNOTSUPP; if (!tty || !tty->driver || !skb) - return -EFAULT; + return -EINVAL; if (!skb->len) { dev_kfree_skb_any(skb); @@ -80,7 +80,8 @@ static int write_modem(struct cardstate *cs) } set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); - sent = tty->driver->write(tty, skb->data, skb->len); + if (tty->ops->write) + sent = tty->ops->write(tty, skb->data, skb->len); gig_dbg(DEBUG_OUTPUT, "write_modem: sent %d", sent); if (sent < 0) { /* error */ @@ -120,7 +121,7 @@ static int send_cb(struct cardstate *cs) if (cb->len) { set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); - sent = tty->driver->write(tty, cb->buf + cb->offset, cb->len); + sent = tty->ops->write(tty, cb->buf + cb->offset, cb->len); if (sent < 0) { /* error */ gig_dbg(DEBUG_OUTPUT, "send_cb: write error %d", sent); @@ -440,14 +441,14 @@ static int gigaset_set_modem_ctrl(struct cardstate *cs, unsigned old_state, unsi struct tty_struct *tty = cs->hw.ser->tty; unsigned int set, clear; - if (!tty || !tty->driver || !tty->driver->tiocmset) - return -EFAULT; + if (!tty || !tty->driver || !tty->ops->tiocmset) + return -EINVAL; set = new_state & ~old_state; clear = old_state & ~new_state; if (!set && !clear) return 0; gig_dbg(DEBUG_IF, "tiocmset set %x clear %x", set, clear); - return tty->driver->tiocmset(tty, NULL, set, clear); + return tty->ops->tiocmset(tty, NULL, set, clear); } static int gigaset_baud_rate(struct cardstate *cs, unsigned cflag) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 1da55dd2a5a0..82a36266dfc9 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -148,13 +148,13 @@ static void sp_xmit_on_air(unsigned long channel) if (((sp->status1 & SIXP_DCD_MASK) == 0) && (random < sp->persistence)) { sp->led_state = 0x70; - sp->tty->driver->write(sp->tty, &sp->led_state, 1); + sp->tty->ops->write(sp->tty, &sp->led_state, 1); sp->tx_enable = 1; - actual = sp->tty->driver->write(sp->tty, sp->xbuff, sp->status2); + actual = sp->tty->ops->write(sp->tty, sp->xbuff, sp->status2); sp->xleft -= actual; sp->xhead += actual; sp->led_state = 0x60; - sp->tty->driver->write(sp->tty, &sp->led_state, 1); + sp->tty->ops->write(sp->tty, &sp->led_state, 1); sp->status2 = 0; } else mod_timer(&sp->tx_t, jiffies + ((when + 1) * HZ) / 100); @@ -220,13 +220,13 @@ static void sp_encaps(struct sixpack *sp, unsigned char *icp, int len) */ if (sp->duplex == 1) { sp->led_state = 0x70; - sp->tty->driver->write(sp->tty, &sp->led_state, 1); + sp->tty->ops->write(sp->tty, &sp->led_state, 1); sp->tx_enable = 1; - actual = sp->tty->driver->write(sp->tty, sp->xbuff, count); + actual = sp->tty->ops->write(sp->tty, sp->xbuff, count); sp->xleft = count - actual; sp->xhead = sp->xbuff + actual; sp->led_state = 0x60; - sp->tty->driver->write(sp->tty, &sp->led_state, 1); + sp->tty->ops->write(sp->tty, &sp->led_state, 1); } else { sp->xleft = count; sp->xhead = sp->xbuff; @@ -444,7 +444,7 @@ static void sixpack_write_wakeup(struct tty_struct *tty) } if (sp->tx_enable) { - actual = tty->driver->write(tty, sp->xhead, sp->xleft); + actual = tty->ops->write(tty, sp->xhead, sp->xleft); sp->xleft -= actual; sp->xhead += actual; } @@ -492,8 +492,8 @@ static void sixpack_receive_buf(struct tty_struct *tty, sp_put(sp); if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) - && tty->driver->unthrottle) - tty->driver->unthrottle(tty); + && tty->ops->unthrottle) + tty->ops->unthrottle(tty); } /* @@ -554,8 +554,8 @@ static void resync_tnc(unsigned long channel) /* resync the TNC */ sp->led_state = 0x60; - sp->tty->driver->write(sp->tty, &sp->led_state, 1); - sp->tty->driver->write(sp->tty, &resync_cmd, 1); + sp->tty->ops->write(sp->tty, &sp->led_state, 1); + sp->tty->ops->write(sp->tty, &resync_cmd, 1); /* Start resync timer again -- the TNC might be still absent */ @@ -573,7 +573,7 @@ static inline int tnc_init(struct sixpack *sp) tnc_set_sync_state(sp, TNC_UNSYNC_STARTUP); - sp->tty->driver->write(sp->tty, &inbyte, 1); + sp->tty->ops->write(sp->tty, &inbyte, 1); del_timer(&sp->resync_t); sp->resync_t.data = (unsigned long) sp; @@ -601,6 +601,8 @@ static int sixpack_open(struct tty_struct *tty) if (!capable(CAP_NET_ADMIN)) return -EPERM; + if (tty->ops->write == NULL) + return -EOPNOTSUPP; dev = alloc_netdev(sizeof(struct sixpack), "sp%d", sp_setup); if (!dev) { @@ -914,9 +916,9 @@ static void decode_prio_command(struct sixpack *sp, unsigned char cmd) } else { /* output watchdog char if idle */ if ((sp->status2 != 0) && (sp->duplex == 1)) { sp->led_state = 0x70; - sp->tty->driver->write(sp->tty, &sp->led_state, 1); + sp->tty->ops->write(sp->tty, &sp->led_state, 1); sp->tx_enable = 1; - actual = sp->tty->driver->write(sp->tty, sp->xbuff, sp->status2); + actual = sp->tty->ops->write(sp->tty, sp->xbuff, sp->status2); sp->xleft -= actual; sp->xhead += actual; sp->led_state = 0x60; @@ -926,7 +928,7 @@ static void decode_prio_command(struct sixpack *sp, unsigned char cmd) } /* needed to trigger the TNC watchdog */ - sp->tty->driver->write(sp->tty, &sp->led_state, 1); + sp->tty->ops->write(sp->tty, &sp->led_state, 1); /* if the state byte has been received, the TNC is present, so the resync timer can be reset. */ @@ -956,12 +958,12 @@ static void decode_std_command(struct sixpack *sp, unsigned char cmd) if ((sp->status & SIXP_RX_DCD_MASK) == SIXP_RX_DCD_MASK) { sp->led_state = 0x68; - sp->tty->driver->write(sp->tty, &sp->led_state, 1); + sp->tty->ops->write(sp->tty, &sp->led_state, 1); } } else { sp->led_state = 0x60; /* fill trailing bytes with zeroes */ - sp->tty->driver->write(sp->tty, &sp->led_state, 1); + sp->tty->ops->write(sp->tty, &sp->led_state, 1); rest = sp->rx_count; if (rest != 0) for (i = rest; i <= 3; i++) diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 30c9b3b0d131..ebcc5adee7cc 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -516,7 +516,7 @@ static void ax_encaps(struct net_device *dev, unsigned char *icp, int len) spin_unlock_bh(&ax->buflock); set_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags); - actual = ax->tty->driver->write(ax->tty, ax->xbuff, count); + actual = ax->tty->ops->write(ax->tty, ax->xbuff, count); ax->stats.tx_packets++; ax->stats.tx_bytes += actual; @@ -546,7 +546,7 @@ static int ax_xmit(struct sk_buff *skb, struct net_device *dev) } printk(KERN_ERR "mkiss: %s: transmit timed out, %s?\n", dev->name, - (ax->tty->driver->chars_in_buffer(ax->tty) || ax->xleft) ? + (ax->tty->ops->chars_in_buffer(ax->tty) || ax->xleft) ? "bad line quality" : "driver error"); ax->xleft = 0; @@ -736,6 +736,8 @@ static int mkiss_open(struct tty_struct *tty) if (!capable(CAP_NET_ADMIN)) return -EPERM; + if (tty->ops->write == NULL) + return -EOPNOTSUPP; dev = alloc_netdev(sizeof(struct mkiss), "ax%d", ax_setup); if (!dev) { @@ -754,8 +756,7 @@ static int mkiss_open(struct tty_struct *tty) tty->disc_data = ax; tty->receive_room = 65535; - if (tty->driver->flush_buffer) - tty->driver->flush_buffer(tty); + tty_driver_flush_buffer(tty); /* Restore default settings */ dev->type = ARPHRD_AX25; @@ -936,8 +937,8 @@ static void mkiss_receive_buf(struct tty_struct *tty, const unsigned char *cp, mkiss_put(ax); if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) - && tty->driver->unthrottle) - tty->driver->unthrottle(tty); + && tty->ops->unthrottle) + tty->ops->unthrottle(tty); } /* @@ -962,7 +963,7 @@ static void mkiss_write_wakeup(struct tty_struct *tty) goto out; } - actual = tty->driver->write(tty, ax->xhead, ax->xleft); + actual = tty->ops->write(tty, ax->xhead, ax->xleft); ax->xleft -= actual; ax->xhead += actual; diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c index fc753d7f674e..e6f40b7f9041 100644 --- a/drivers/net/irda/irtty-sir.c +++ b/drivers/net/irda/irtty-sir.c @@ -64,7 +64,7 @@ static int irtty_chars_in_buffer(struct sir_dev *dev) IRDA_ASSERT(priv != NULL, return -1;); IRDA_ASSERT(priv->magic == IRTTY_MAGIC, return -1;); - return priv->tty->driver->chars_in_buffer(priv->tty); + return tty_chars_in_buffer(priv->tty); } /* Wait (sleep) until underlaying hardware finished transmission @@ -93,10 +93,8 @@ static void irtty_wait_until_sent(struct sir_dev *dev) IRDA_ASSERT(priv->magic == IRTTY_MAGIC, return;); tty = priv->tty; - if (tty->driver->wait_until_sent) { - lock_kernel(); - tty->driver->wait_until_sent(tty, msecs_to_jiffies(100)); - unlock_kernel(); + if (tty->ops->wait_until_sent) { + tty->ops->wait_until_sent(tty, msecs_to_jiffies(100)); } else { msleep(USBSERIAL_TX_DONE_DELAY); @@ -125,48 +123,14 @@ static int irtty_change_speed(struct sir_dev *dev, unsigned speed) tty = priv->tty; - lock_kernel(); + mutex_lock(&tty->termios_mutex); old_termios = *(tty->termios); cflag = tty->termios->c_cflag; - - cflag &= ~CBAUD; - - IRDA_DEBUG(2, "%s(), Setting speed to %d\n", __FUNCTION__, speed); - - switch (speed) { - case 1200: - cflag |= B1200; - break; - case 2400: - cflag |= B2400; - break; - case 4800: - cflag |= B4800; - break; - case 19200: - cflag |= B19200; - break; - case 38400: - cflag |= B38400; - break; - case 57600: - cflag |= B57600; - break; - case 115200: - cflag |= B115200; - break; - case 9600: - default: - cflag |= B9600; - break; - } - - tty->termios->c_cflag = cflag; - if (tty->driver->set_termios) - tty->driver->set_termios(tty, &old_termios); - unlock_kernel(); - + tty_encode_baud_rate(tty, speed, speed); + if (tty->ops->set_termios) + tty->ops->set_termios(tty, &old_termios); priv->io.speed = speed; + mutex_unlock(&tty->termios_mutex); return 0; } @@ -202,8 +166,8 @@ static int irtty_set_dtr_rts(struct sir_dev *dev, int dtr, int rts) * This function is not yet defined for all tty driver, so * let's be careful... Jean II */ - IRDA_ASSERT(priv->tty->driver->tiocmset != NULL, return -1;); - priv->tty->driver->tiocmset(priv->tty, NULL, set, clear); + IRDA_ASSERT(priv->tty->ops->tiocmset != NULL, return -1;); + priv->tty->ops->tiocmset(priv->tty, NULL, set, clear); return 0; } @@ -225,17 +189,13 @@ static int irtty_do_write(struct sir_dev *dev, const unsigned char *ptr, size_t IRDA_ASSERT(priv->magic == IRTTY_MAGIC, return -1;); tty = priv->tty; - if (!tty->driver->write) + if (!tty->ops->write) return 0; tty->flags |= (1 << TTY_DO_WRITE_WAKEUP); - if (tty->driver->write_room) { - writelen = tty->driver->write_room(tty); - if (writelen > len) - writelen = len; - } - else + writelen = tty_write_room(tty); + if (writelen > len) writelen = len; - return tty->driver->write(tty, ptr, writelen); + return tty->ops->write(tty, ptr, writelen); } /* ------------------------------------------------------- */ @@ -321,7 +281,7 @@ static inline void irtty_stop_receiver(struct tty_struct *tty, int stop) struct ktermios old_termios; int cflag; - lock_kernel(); + mutex_lock(&tty->termios_mutex); old_termios = *(tty->termios); cflag = tty->termios->c_cflag; @@ -331,9 +291,9 @@ static inline void irtty_stop_receiver(struct tty_struct *tty, int stop) cflag |= CREAD; tty->termios->c_cflag = cflag; - if (tty->driver->set_termios) - tty->driver->set_termios(tty, &old_termios); - unlock_kernel(); + if (tty->ops->set_termios) + tty->ops->set_termios(tty, &old_termios); + mutex_unlock(&tty->termios_mutex); } /*****************************************************************/ @@ -359,8 +319,8 @@ static int irtty_start_dev(struct sir_dev *dev) tty = priv->tty; - if (tty->driver->start) - tty->driver->start(tty); + if (tty->ops->start) + tty->ops->start(tty); /* Make sure we can receive more data */ irtty_stop_receiver(tty, FALSE); @@ -388,8 +348,8 @@ static int irtty_stop_dev(struct sir_dev *dev) /* Make sure we don't receive more data */ irtty_stop_receiver(tty, TRUE); - if (tty->driver->stop) - tty->driver->stop(tty); + if (tty->ops->stop) + tty->ops->stop(tty); mutex_unlock(&irtty_mutex); @@ -483,11 +443,10 @@ static int irtty_open(struct tty_struct *tty) /* stop the underlying driver */ irtty_stop_receiver(tty, TRUE); - if (tty->driver->stop) - tty->driver->stop(tty); + if (tty->ops->stop) + tty->ops->stop(tty); - if (tty->driver->flush_buffer) - tty->driver->flush_buffer(tty); + tty_driver_flush_buffer(tty); /* apply mtt override */ sir_tty_drv.qos_mtt_bits = qos_mtt_bits; @@ -564,8 +523,8 @@ static void irtty_close(struct tty_struct *tty) /* Stop tty */ irtty_stop_receiver(tty, TRUE); tty->flags &= ~(1 << TTY_DO_WRITE_WAKEUP); - if (tty->driver->stop) - tty->driver->stop(tty); + if (tty->ops->stop) + tty->ops->stop(tty); kfree(priv); diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c index f023d5b67e6e..1c4b7e37912c 100644 --- a/drivers/net/ppp_async.c +++ b/drivers/net/ppp_async.c @@ -158,6 +158,9 @@ ppp_asynctty_open(struct tty_struct *tty) struct asyncppp *ap; int err; + if (tty->ops->write == NULL) + return -EOPNOTSUPP; + err = -ENOMEM; ap = kzalloc(sizeof(*ap), GFP_KERNEL); if (!ap) @@ -359,8 +362,8 @@ ppp_asynctty_receive(struct tty_struct *tty, const unsigned char *buf, tasklet_schedule(&ap->tsk); ap_put(ap); if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) - && tty->driver->unthrottle) - tty->driver->unthrottle(tty); + && tty->ops->unthrottle) + tty->ops->unthrottle(tty); } static void @@ -676,7 +679,7 @@ ppp_async_push(struct asyncppp *ap) if (!tty_stuffed && ap->optr < ap->olim) { avail = ap->olim - ap->optr; set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); - sent = tty->driver->write(tty, ap->optr, avail); + sent = tty->ops->write(tty, ap->optr, avail); if (sent < 0) goto flush; /* error, e.g. loss of CD */ ap->optr += sent; diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c index 0d80fa546719..48ed5fdbfe18 100644 --- a/drivers/net/ppp_synctty.c +++ b/drivers/net/ppp_synctty.c @@ -207,6 +207,9 @@ ppp_sync_open(struct tty_struct *tty) struct syncppp *ap; int err; + if (tty->ops->write == NULL) + return -EOPNOTSUPP; + ap = kzalloc(sizeof(*ap), GFP_KERNEL); err = -ENOMEM; if (!ap) @@ -399,8 +402,8 @@ ppp_sync_receive(struct tty_struct *tty, const unsigned char *buf, tasklet_schedule(&ap->tsk); sp_put(ap); if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) - && tty->driver->unthrottle) - tty->driver->unthrottle(tty); + && tty->ops->unthrottle) + tty->ops->unthrottle(tty); } static void @@ -653,7 +656,7 @@ ppp_sync_push(struct syncppp *ap) tty_stuffed = 0; if (!tty_stuffed && ap->tpkt) { set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); - sent = tty->driver->write(tty, ap->tpkt->data, ap->tpkt->len); + sent = tty->ops->write(tty, ap->tpkt->data, ap->tpkt->len); if (sent < 0) goto flush; /* error, e.g. loss of CD */ if (sent < ap->tpkt->len) { diff --git a/drivers/net/slip.c b/drivers/net/slip.c index 5a55ede352f4..84af68fdb6c2 100644 --- a/drivers/net/slip.c +++ b/drivers/net/slip.c @@ -396,14 +396,14 @@ static void sl_encaps(struct slip *sl, unsigned char *icp, int len) /* Order of next two lines is *very* important. * When we are sending a little amount of data, - * the transfer may be completed inside driver.write() + * the transfer may be completed inside the ops->write() * routine, because it's running with interrupts enabled. * In this case we *never* got WRITE_WAKEUP event, * if we did not request it before write operation. * 14 Oct 1994 Dmitry Gorodchanin. */ sl->tty->flags |= (1 << TTY_DO_WRITE_WAKEUP); - actual = sl->tty->driver->write(sl->tty, sl->xbuff, count); + actual = sl->tty->ops->write(sl->tty, sl->xbuff, count); #ifdef SL_CHECK_TRANSMIT sl->dev->trans_start = jiffies; #endif @@ -437,7 +437,7 @@ static void slip_write_wakeup(struct tty_struct *tty) return; } - actual = tty->driver->write(tty, sl->xhead, sl->xleft); + actual = tty->ops->write(tty, sl->xhead, sl->xleft); sl->xleft -= actual; sl->xhead += actual; } @@ -462,7 +462,7 @@ static void sl_tx_timeout(struct net_device *dev) } printk(KERN_WARNING "%s: transmit timed out, %s?\n", dev->name, - (sl->tty->driver->chars_in_buffer(sl->tty) || sl->xleft) ? + (tty_chars_in_buffer(sl->tty) || sl->xleft) ? "bad line quality" : "driver error"); sl->xleft = 0; sl->tty->flags &= ~(1 << TTY_DO_WRITE_WAKEUP); @@ -830,6 +830,9 @@ static int slip_open(struct tty_struct *tty) if (!capable(CAP_NET_ADMIN)) return -EPERM; + if (tty->ops->write == NULL) + return -EOPNOTSUPP; + /* RTnetlink lock is misused here to serialize concurrent opens of slip channels. There are better ways, but it is the simplest one. @@ -1432,7 +1435,7 @@ static void sl_outfill(unsigned long sls) /* put END into tty queue. Is it right ??? */ if (!netif_queue_stopped(sl->dev)) { /* if device busy no outfill */ - sl->tty->driver->write(sl->tty, &s, 1); + sl->tty->ops->write(sl->tty, &s, 1); } } else set_bit(SLF_OUTWAIT, &sl->flags); diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 0f8aca8a4d43..249e18053d5f 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include #include @@ -95,7 +95,7 @@ static struct x25_asy *x25_asy_alloc(void) x25_asy_devs[i] = dev; return sl; } else { - printk("x25_asy_alloc() - register_netdev() failure.\n"); + printk(KERN_WARNING "x25_asy_alloc() - register_netdev() failure.\n"); free_netdev(dev); } } @@ -112,23 +112,22 @@ static void x25_asy_free(struct x25_asy *sl) kfree(sl->xbuff); sl->xbuff = NULL; - if (!test_and_clear_bit(SLF_INUSE, &sl->flags)) { - printk("%s: x25_asy_free for already free unit.\n", sl->dev->name); - } + if (!test_and_clear_bit(SLF_INUSE, &sl->flags)) + printk(KERN_ERR "%s: x25_asy_free for already free unit.\n", + sl->dev->name); } static int x25_asy_change_mtu(struct net_device *dev, int newmtu) { struct x25_asy *sl = dev->priv; unsigned char *xbuff, *rbuff; - int len = 2* newmtu; + int len = 2 * newmtu; xbuff = kmalloc(len + 4, GFP_ATOMIC); rbuff = kmalloc(len + 4, GFP_ATOMIC); - if (xbuff == NULL || rbuff == NULL) - { - printk("%s: unable to grow X.25 buffers, MTU change cancelled.\n", + if (xbuff == NULL || rbuff == NULL) { + printk(KERN_WARNING "%s: unable to grow X.25 buffers, MTU change cancelled.\n", dev->name); kfree(xbuff); kfree(rbuff); @@ -193,25 +192,23 @@ static void x25_asy_bump(struct x25_asy *sl) int err; count = sl->rcount; - sl->stats.rx_bytes+=count; - + sl->stats.rx_bytes += count; + skb = dev_alloc_skb(count+1); - if (skb == NULL) - { - printk("%s: memory squeeze, dropping packet.\n", sl->dev->name); + if (skb == NULL) { + printk(KERN_WARNING "%s: memory squeeze, dropping packet.\n", + sl->dev->name); sl->stats.rx_dropped++; return; } - skb_push(skb,1); /* LAPB internal control */ - memcpy(skb_put(skb,count), sl->rbuff, count); + skb_push(skb, 1); /* LAPB internal control */ + memcpy(skb_put(skb, count), sl->rbuff, count); skb->protocol = x25_type_trans(skb, sl->dev); - if((err=lapb_data_received(skb->dev, skb))!=LAPB_OK) - { + err = lapb_data_received(skb->dev, skb); + if (err != LAPB_OK) { kfree_skb(skb); - printk(KERN_DEBUG "x25_asy: data received err - %d\n",err); - } - else - { + printk(KERN_DEBUG "x25_asy: data received err - %d\n", err); + } else { netif_rx(skb); sl->dev->last_rx = jiffies; sl->stats.rx_packets++; @@ -224,10 +221,11 @@ static void x25_asy_encaps(struct x25_asy *sl, unsigned char *icp, int len) unsigned char *p; int actual, count, mtu = sl->dev->mtu; - if (len > mtu) - { /* Sigh, shouldn't occur BUT ... */ + if (len > mtu) { + /* Sigh, shouldn't occur BUT ... */ len = mtu; - printk ("%s: truncating oversized transmit packet!\n", sl->dev->name); + printk(KERN_DEBUG "%s: truncating oversized transmit packet!\n", + sl->dev->name); sl->stats.tx_dropped++; x25_asy_unlock(sl); return; @@ -245,7 +243,7 @@ static void x25_asy_encaps(struct x25_asy *sl, unsigned char *icp, int len) * 14 Oct 1994 Dmitry Gorodchanin. */ sl->tty->flags |= (1 << TTY_DO_WRITE_WAKEUP); - actual = sl->tty->driver->write(sl->tty, sl->xbuff, count); + actual = sl->tty->ops->write(sl->tty, sl->xbuff, count); sl->xleft = count - actual; sl->xhead = sl->xbuff + actual; /* VSV */ @@ -265,8 +263,7 @@ static void x25_asy_write_wakeup(struct tty_struct *tty) if (!sl || sl->magic != X25_ASY_MAGIC || !netif_running(sl->dev)) return; - if (sl->xleft <= 0) - { + if (sl->xleft <= 0) { /* Now serial buffer is almost free & we can start * transmission of another packet */ sl->stats.tx_packets++; @@ -275,14 +272,14 @@ static void x25_asy_write_wakeup(struct tty_struct *tty) return; } - actual = tty->driver->write(tty, sl->xhead, sl->xleft); + actual = tty->ops->write(tty, sl->xhead, sl->xleft); sl->xleft -= actual; sl->xhead += actual; } static void x25_asy_timeout(struct net_device *dev) { - struct x25_asy *sl = (struct x25_asy*)(dev->priv); + struct x25_asy *sl = dev->priv; spin_lock(&sl->lock); if (netif_queue_stopped(dev)) { @@ -290,7 +287,7 @@ static void x25_asy_timeout(struct net_device *dev) * 14 Oct 1994 Dmitry Gorodchanin. */ printk(KERN_WARNING "%s: transmit timed out, %s?\n", dev->name, - (sl->tty->driver->chars_in_buffer(sl->tty) || sl->xleft) ? + (tty_chars_in_buffer(sl->tty) || sl->xleft) ? "bad line quality" : "driver error"); sl->xleft = 0; sl->tty->flags &= ~(1 << TTY_DO_WRITE_WAKEUP); @@ -303,31 +300,34 @@ static void x25_asy_timeout(struct net_device *dev) static int x25_asy_xmit(struct sk_buff *skb, struct net_device *dev) { - struct x25_asy *sl = (struct x25_asy*)(dev->priv); + struct x25_asy *sl = dev->priv; int err; if (!netif_running(sl->dev)) { - printk("%s: xmit call when iface is down\n", dev->name); + printk(KERN_ERR "%s: xmit call when iface is down\n", + dev->name); kfree_skb(skb); return 0; } - - switch(skb->data[0]) - { - case 0x00:break; - case 0x01: /* Connection request .. do nothing */ - if((err=lapb_connect_request(dev))!=LAPB_OK) - printk(KERN_ERR "x25_asy: lapb_connect_request error - %d\n", err); - kfree_skb(skb); - return 0; - case 0x02: /* Disconnect request .. do nothing - hang up ?? */ - if((err=lapb_disconnect_request(dev))!=LAPB_OK) - printk(KERN_ERR "x25_asy: lapb_disconnect_request error - %d\n", err); - default: - kfree_skb(skb); - return 0; + + switch (skb->data[0]) { + case 0x00: + break; + case 0x01: /* Connection request .. do nothing */ + err = lapb_connect_request(dev); + if (err != LAPB_OK) + printk(KERN_ERR "x25_asy: lapb_connect_request error - %d\n", err); + kfree_skb(skb); + return 0; + case 0x02: /* Disconnect request .. do nothing - hang up ?? */ + err = lapb_disconnect_request(dev); + if (err != LAPB_OK) + printk(KERN_ERR "x25_asy: lapb_disconnect_request error - %d\n", err); + default: + kfree_skb(skb); + return 0; } - skb_pull(skb,1); /* Remove control byte */ + skb_pull(skb, 1); /* Remove control byte */ /* * If we are busy already- too bad. We ought to be able * to queue things at this point, to allow for a little @@ -338,10 +338,10 @@ static int x25_asy_xmit(struct sk_buff *skb, struct net_device *dev) * So, no queues ! * 14 Oct 1994 Dmitry Gorodchanin. */ - - if((err=lapb_data_request(dev,skb))!=LAPB_OK) - { - printk(KERN_ERR "lapbeth: lapb_data_request error - %d\n", err); + + err = lapb_data_request(dev, skb); + if (err != LAPB_OK) { + printk(KERN_ERR "x25_asy: lapb_data_request error - %d\n", err); kfree_skb(skb); return 0; } @@ -357,7 +357,7 @@ static int x25_asy_xmit(struct sk_buff *skb, struct net_device *dev) * Called when I frame data arrives. We did the work above - throw it * at the net layer. */ - + static int x25_asy_data_indication(struct net_device *dev, struct sk_buff *skb) { skb->dev->last_rx = jiffies; @@ -369,24 +369,22 @@ static int x25_asy_data_indication(struct net_device *dev, struct sk_buff *skb) * busy cases too well. Its tricky to see how to do this nicely - * perhaps lapb should allow us to bounce this ? */ - + static void x25_asy_data_transmit(struct net_device *dev, struct sk_buff *skb) { - struct x25_asy *sl=dev->priv; - + struct x25_asy *sl = dev->priv; + spin_lock(&sl->lock); - if (netif_queue_stopped(sl->dev) || sl->tty == NULL) - { + if (netif_queue_stopped(sl->dev) || sl->tty == NULL) { spin_unlock(&sl->lock); printk(KERN_ERR "x25_asy: tbusy drop\n"); kfree_skb(skb); return; } /* We were not busy, so we are now... :-) */ - if (skb != NULL) - { + if (skb != NULL) { x25_asy_lock(sl); - sl->stats.tx_bytes+=skb->len; + sl->stats.tx_bytes += skb->len; x25_asy_encaps(sl, skb->data, skb->len); dev_kfree_skb(skb); } @@ -396,15 +394,16 @@ static void x25_asy_data_transmit(struct net_device *dev, struct sk_buff *skb) /* * LAPB connection establish/down information. */ - + static void x25_asy_connected(struct net_device *dev, int reason) { struct x25_asy *sl = dev->priv; struct sk_buff *skb; unsigned char *ptr; - if ((skb = dev_alloc_skb(1)) == NULL) { - printk(KERN_ERR "lapbeth: out of memory\n"); + skb = dev_alloc_skb(1); + if (skb == NULL) { + printk(KERN_ERR "x25_asy: out of memory\n"); return; } @@ -422,7 +421,8 @@ static void x25_asy_disconnected(struct net_device *dev, int reason) struct sk_buff *skb; unsigned char *ptr; - if ((skb = dev_alloc_skb(1)) == NULL) { + skb = dev_alloc_skb(1); + if (skb == NULL) { printk(KERN_ERR "x25_asy: out of memory\n"); return; } @@ -449,7 +449,7 @@ static struct lapb_register_struct x25_asy_callbacks = { /* Open the low-level part of the X.25 channel. Easy! */ static int x25_asy_open(struct net_device *dev) { - struct x25_asy *sl = (struct x25_asy*)(dev->priv); + struct x25_asy *sl = dev->priv; unsigned long len; int err; @@ -466,13 +466,11 @@ static int x25_asy_open(struct net_device *dev) len = dev->mtu * 2; sl->rbuff = kmalloc(len + 4, GFP_KERNEL); - if (sl->rbuff == NULL) { + if (sl->rbuff == NULL) goto norbuff; - } sl->xbuff = kmalloc(len + 4, GFP_KERNEL); - if (sl->xbuff == NULL) { + if (sl->xbuff == NULL) goto noxbuff; - } sl->buffsize = len; sl->rcount = 0; @@ -480,11 +478,12 @@ static int x25_asy_open(struct net_device *dev) sl->flags &= (1 << SLF_INUSE); /* Clear ESCAPE & ERROR flags */ netif_start_queue(dev); - + /* * Now attach LAPB */ - if((err=lapb_register(dev, &x25_asy_callbacks))==LAPB_OK) + err = lapb_register(dev, &x25_asy_callbacks); + if (err == LAPB_OK) return 0; /* Cleanup */ @@ -499,18 +498,20 @@ norbuff: /* Close the low-level part of the X.25 channel. Easy! */ static int x25_asy_close(struct net_device *dev) { - struct x25_asy *sl = (struct x25_asy*)(dev->priv); + struct x25_asy *sl = dev->priv; int err; spin_lock(&sl->lock); - if (sl->tty) + if (sl->tty) sl->tty->flags &= ~(1 << TTY_DO_WRITE_WAKEUP); netif_stop_queue(dev); sl->rcount = 0; sl->xleft = 0; - if((err=lapb_unregister(dev))!=LAPB_OK) - printk(KERN_ERR "x25_asy_close: lapb_unregister error -%d\n",err); + err = lapb_unregister(dev); + if (err != LAPB_OK) + printk(KERN_ERR "x25_asy_close: lapb_unregister error -%d\n", + err); spin_unlock(&sl->lock); return 0; } @@ -521,8 +522,9 @@ static int x25_asy_close(struct net_device *dev) * a block of X.25 data has been received, which can now be decapsulated * and sent on to some IP layer for further processing. */ - -static void x25_asy_receive_buf(struct tty_struct *tty, const unsigned char *cp, char *fp, int count) + +static void x25_asy_receive_buf(struct tty_struct *tty, + const unsigned char *cp, char *fp, int count) { struct x25_asy *sl = (struct x25_asy *) tty->disc_data; @@ -533,9 +535,8 @@ static void x25_asy_receive_buf(struct tty_struct *tty, const unsigned char *cp, /* Read the characters out of the buffer */ while (count--) { if (fp && *fp++) { - if (!test_and_set_bit(SLF_ERROR, &sl->flags)) { + if (!test_and_set_bit(SLF_ERROR, &sl->flags)) sl->stats.rx_errors++; - } cp++; continue; } @@ -556,31 +557,31 @@ static int x25_asy_open_tty(struct tty_struct *tty) struct x25_asy *sl = (struct x25_asy *) tty->disc_data; int err; + if (tty->ops->write == NULL) + return -EOPNOTSUPP; + /* First make sure we're not already connected. */ - if (sl && sl->magic == X25_ASY_MAGIC) { + if (sl && sl->magic == X25_ASY_MAGIC) return -EEXIST; - } /* OK. Find a free X.25 channel to use. */ - if ((sl = x25_asy_alloc()) == NULL) { + sl = x25_asy_alloc(); + if (sl == NULL) return -ENFILE; - } sl->tty = tty; tty->disc_data = sl; tty->receive_room = 65536; - if (tty->driver->flush_buffer) { - tty->driver->flush_buffer(tty); - } + tty_driver_flush_buffer(tty); tty_ldisc_flush(tty); /* Restore default settings */ sl->dev->type = ARPHRD_X25; - + /* Perform the low-level X.25 async init */ - if ((err = x25_asy_open(sl->dev))) + err = x25_asy_open(sl->dev); + if (err) return err; - /* Done. We have linked the TTY line to a channel. */ return sl->dev->base_addr; } @@ -601,9 +602,7 @@ static void x25_asy_close_tty(struct tty_struct *tty) return; if (sl->dev->flags & IFF_UP) - { - (void) dev_close(sl->dev); - } + dev_close(sl->dev); tty->disc_data = NULL; sl->tty = NULL; @@ -613,8 +612,7 @@ static void x25_asy_close_tty(struct tty_struct *tty) static struct net_device_stats *x25_asy_get_stats(struct net_device *dev) { - struct x25_asy *sl = (struct x25_asy*)(dev->priv); - + struct x25_asy *sl = dev->priv; return &sl->stats; } @@ -641,21 +639,19 @@ int x25_asy_esc(unsigned char *s, unsigned char *d, int len) * character sequence, according to the X.25 protocol. */ - while (len-- > 0) - { - switch(c = *s++) - { - case X25_END: - *ptr++ = X25_ESC; - *ptr++ = X25_ESCAPE(X25_END); - break; - case X25_ESC: - *ptr++ = X25_ESC; - *ptr++ = X25_ESCAPE(X25_ESC); - break; - default: - *ptr++ = c; - break; + while (len-- > 0) { + switch (c = *s++) { + case X25_END: + *ptr++ = X25_ESC; + *ptr++ = X25_ESCAPE(X25_END); + break; + case X25_ESC: + *ptr++ = X25_ESC; + *ptr++ = X25_ESCAPE(X25_ESC); + break; + default: + *ptr++ = c; + break; } } *ptr++ = X25_END; @@ -665,31 +661,25 @@ int x25_asy_esc(unsigned char *s, unsigned char *d, int len) static void x25_asy_unesc(struct x25_asy *sl, unsigned char s) { - switch(s) - { - case X25_END: - if (!test_and_clear_bit(SLF_ERROR, &sl->flags) && (sl->rcount > 2)) - { - x25_asy_bump(sl); - } - clear_bit(SLF_ESCAPE, &sl->flags); - sl->rcount = 0; - return; - - case X25_ESC: - set_bit(SLF_ESCAPE, &sl->flags); - return; - - case X25_ESCAPE(X25_ESC): - case X25_ESCAPE(X25_END): - if (test_and_clear_bit(SLF_ESCAPE, &sl->flags)) - s = X25_UNESCAPE(s); - break; - } - if (!test_bit(SLF_ERROR, &sl->flags)) - { - if (sl->rcount < sl->buffsize) - { + switch (s) { + case X25_END: + if (!test_and_clear_bit(SLF_ERROR, &sl->flags) + && sl->rcount > 2) + x25_asy_bump(sl); + clear_bit(SLF_ESCAPE, &sl->flags); + sl->rcount = 0; + return; + case X25_ESC: + set_bit(SLF_ESCAPE, &sl->flags); + return; + case X25_ESCAPE(X25_ESC): + case X25_ESCAPE(X25_END): + if (test_and_clear_bit(SLF_ESCAPE, &sl->flags)) + s = X25_UNESCAPE(s); + break; + } + if (!test_bit(SLF_ERROR, &sl->flags)) { + if (sl->rcount < sl->buffsize) { sl->rbuff[sl->rcount++] = s; return; } @@ -709,7 +699,7 @@ static int x25_asy_ioctl(struct tty_struct *tty, struct file *file, if (!sl || sl->magic != X25_ASY_MAGIC) return -EINVAL; - switch(cmd) { + switch (cmd) { case SIOCGIFNAME: if (copy_to_user((void __user *)arg, sl->dev->name, strlen(sl->dev->name) + 1)) @@ -724,8 +714,8 @@ static int x25_asy_ioctl(struct tty_struct *tty, struct file *file, static int x25_asy_open_dev(struct net_device *dev) { - struct x25_asy *sl = (struct x25_asy*)(dev->priv); - if(sl->tty==NULL) + struct x25_asy *sl = dev->priv; + if (sl->tty == NULL) return -ENODEV; return 0; } @@ -741,9 +731,9 @@ static void x25_asy_setup(struct net_device *dev) set_bit(SLF_INUSE, &sl->flags); /* - * Finish setting up the DEVICE info. + * Finish setting up the DEVICE info. */ - + dev->mtu = SL_MTU; dev->hard_start_xmit = x25_asy_xmit; dev->tx_timeout = x25_asy_timeout; @@ -778,9 +768,10 @@ static int __init init_x25_asy(void) x25_asy_maxdev = 4; /* Sanity */ printk(KERN_INFO "X.25 async: version 0.00 ALPHA " - "(dynamic channels, max=%d).\n", x25_asy_maxdev ); + "(dynamic channels, max=%d).\n", x25_asy_maxdev); - x25_asy_devs = kcalloc(x25_asy_maxdev, sizeof(struct net_device*), GFP_KERNEL); + x25_asy_devs = kcalloc(x25_asy_maxdev, sizeof(struct net_device *), + GFP_KERNEL); if (!x25_asy_devs) { printk(KERN_WARNING "X25 async: Can't allocate x25_asy_ctrls[] " "array! Uaargh! (-> No X.25 available)\n"); @@ -802,7 +793,7 @@ static void __exit exit_x25_asy(void) struct x25_asy *sl = dev->priv; spin_lock_bh(&sl->lock); - if (sl->tty) + if (sl->tty) tty_hangup(sl->tty); spin_unlock_bh(&sl->lock); diff --git a/drivers/serial/kgdboc.c b/drivers/serial/kgdboc.c index 9cf03327386a..eadc1ab6bbce 100644 --- a/drivers/serial/kgdboc.c +++ b/drivers/serial/kgdboc.c @@ -96,12 +96,14 @@ static void cleanup_kgdboc(void) static int kgdboc_get_char(void) { - return kgdb_tty_driver->poll_get_char(kgdb_tty_driver, kgdb_tty_line); + return kgdb_tty_driver->ops->poll_get_char(kgdb_tty_driver, + kgdb_tty_line); } static void kgdboc_put_char(u8 chr) { - kgdb_tty_driver->poll_put_char(kgdb_tty_driver, kgdb_tty_line, chr); + kgdb_tty_driver->ops->poll_put_char(kgdb_tty_driver, + kgdb_tty_line, chr); } static int param_set_kgdboc_var(const char *kmessage, struct kernel_param *kp) diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c index 6c7a5cf76582..1e2b9d826f69 100644 --- a/drivers/serial/serial_core.c +++ b/drivers/serial/serial_core.c @@ -532,15 +532,25 @@ uart_write(struct tty_struct *tty, const unsigned char *buf, int count) static int uart_write_room(struct tty_struct *tty) { struct uart_state *state = tty->driver_data; + unsigned long flags; + int ret; - return uart_circ_chars_free(&state->info->xmit); + spin_lock_irqsave(&state->port->lock, flags); + ret = uart_circ_chars_free(&state->info->xmit); + spin_unlock_irqrestore(&state->port->lock, flags); + return ret; } static int uart_chars_in_buffer(struct tty_struct *tty) { struct uart_state *state = tty->driver_data; + unsigned long flags; + int ret; - return uart_circ_chars_pending(&state->info->xmit); + spin_lock_irqsave(&state->port->lock, flags); + ret = uart_circ_chars_pending(&state->info->xmit); + spin_unlock_irqrestore(&state->port->lock, flags); + return ret; } static void uart_flush_buffer(struct tty_struct *tty) @@ -622,6 +632,11 @@ static int uart_get_info(struct uart_state *state, struct serial_struct tmp; memset(&tmp, 0, sizeof(tmp)); + + /* Ensure the state we copy is consistent and no hardware changes + occur as we go */ + mutex_lock(&state->mutex); + tmp.type = port->type; tmp.line = port->line; tmp.port = port->iobase; @@ -641,6 +656,8 @@ static int uart_get_info(struct uart_state *state, tmp.iomem_reg_shift = port->regshift; tmp.iomem_base = (void *)(unsigned long)port->mapbase; + mutex_unlock(&state->mutex); + if (copy_to_user(retinfo, &tmp, sizeof(*retinfo))) return -EFAULT; return 0; @@ -918,14 +935,12 @@ static void uart_break_ctl(struct tty_struct *tty, int break_state) struct uart_state *state = tty->driver_data; struct uart_port *port = state->port; - lock_kernel(); mutex_lock(&state->mutex); if (port->type != PORT_UNKNOWN) port->ops->break_ctl(port, break_state); mutex_unlock(&state->mutex); - unlock_kernel(); } static int uart_do_autoconfig(struct uart_state *state) @@ -1074,7 +1089,6 @@ uart_ioctl(struct tty_struct *tty, struct file *filp, unsigned int cmd, int ret = -ENOIOCTLCMD; - lock_kernel(); /* * These ioctls don't rely on the hardware to be present. */ @@ -1144,10 +1158,9 @@ uart_ioctl(struct tty_struct *tty, struct file *filp, unsigned int cmd, break; } } - out_up: +out_up: mutex_unlock(&state->mutex); - out: - unlock_kernel(); +out: return ret; } @@ -1173,7 +1186,6 @@ static void uart_set_termios(struct tty_struct *tty, return; } - lock_kernel(); uart_change_speed(state, old_termios); /* Handle transition to B0 status */ @@ -1206,7 +1218,6 @@ static void uart_set_termios(struct tty_struct *tty, } spin_unlock_irqrestore(&state->port->lock, flags); } - unlock_kernel(); #if 0 /* * No need to wake up processes in open wait, since they @@ -1322,11 +1333,11 @@ static void uart_wait_until_sent(struct tty_struct *tty, int timeout) struct uart_port *port = state->port; unsigned long char_time, expire; - BUG_ON(!kernel_locked()); - if (port->type == PORT_UNKNOWN || port->fifosize == 0) return; + lock_kernel(); + /* * Set the check interval to be 1/5 of the estimated time to * send a single character, and make it at least 1. The check @@ -1372,6 +1383,7 @@ static void uart_wait_until_sent(struct tty_struct *tty, int timeout) break; } set_current_state(TASK_RUNNING); /* might not be needed */ + unlock_kernel(); } /* @@ -2085,7 +2097,9 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port) int ret; uart_change_pm(state, 0); + spin_lock_irq(&port->lock); ops->set_mctrl(port, 0); + spin_unlock_irq(&port->lock); ret = ops->startup(port); if (ret == 0) { uart_change_speed(state, NULL); diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index d17d1645714f..04a56f300ea6 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -1421,8 +1421,7 @@ static void digi_close(struct usb_serial_port *port, struct file *filp) tty_wait_until_sent(tty, DIGI_CLOSE_TIMEOUT); /* flush driver and line discipline buffers */ - if (tty->driver->flush_buffer) - tty->driver->flush_buffer(tty); + tty_driver_flush_buffer(tty); tty_ldisc_flush(tty); if (port->serial->dev) { diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index a9934a3f9845..0cb0d77dc429 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -296,16 +296,14 @@ static int serial_write (struct tty_struct * tty, const unsigned char *buf, int struct usb_serial_port *port = tty->driver_data; int retval = -ENODEV; - if (!port || port->serial->dev->state == USB_STATE_NOTATTACHED) + if (port->serial->dev->state == USB_STATE_NOTATTACHED) goto exit; dbg("%s - port %d, %d byte(s)", __func__, port->number, count); - if (!port->open_count) { - retval = -EINVAL; - dbg("%s - port not opened", __func__); - goto exit; - } + /* open_count is managed under the mutex lock for the tty so cannot + drop to zero until after the last close completes */ + WARN_ON(!port->open_count); /* pass on to the driver specific version of this function */ retval = port->serial->type->write(port, buf, count); @@ -317,61 +315,28 @@ exit: static int serial_write_room (struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; - int retval = -ENODEV; - - if (!port) - goto exit; - dbg("%s - port %d", __func__, port->number); - - if (!port->open_count) { - dbg("%s - port not open", __func__); - goto exit; - } - + WARN_ON(!port->open_count); /* pass on to the driver specific version of this function */ - retval = port->serial->type->write_room(port); - -exit: - return retval; + return port->serial->type->write_room(port); } static int serial_chars_in_buffer (struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; - int retval = -ENODEV; - - if (!port) - goto exit; - dbg("%s = port %d", __func__, port->number); - if (!port->open_count) { - dbg("%s - port not open", __func__); - goto exit; - } - + WARN_ON(!port->open_count); /* pass on to the driver specific version of this function */ - retval = port->serial->type->chars_in_buffer(port); - -exit: - return retval; + return port->serial->type->chars_in_buffer(port); } static void serial_throttle (struct tty_struct * tty) { struct usb_serial_port *port = tty->driver_data; - - if (!port) - return; - dbg("%s - port %d", __func__, port->number); - if (!port->open_count) { - dbg ("%s - port not open", __func__); - return; - } - + WARN_ON(!port->open_count); /* pass on to the driver specific version of this function */ if (port->serial->type->throttle) port->serial->type->throttle(port); @@ -380,17 +345,9 @@ static void serial_throttle (struct tty_struct * tty) static void serial_unthrottle (struct tty_struct * tty) { struct usb_serial_port *port = tty->driver_data; - - if (!port) - return; - dbg("%s - port %d", __func__, port->number); - if (!port->open_count) { - dbg("%s - port not open", __func__); - return; - } - + WARN_ON(!port->open_count); /* pass on to the driver specific version of this function */ if (port->serial->type->unthrottle) port->serial->type->unthrottle(port); @@ -401,42 +358,27 @@ static int serial_ioctl (struct tty_struct *tty, struct file * file, unsigned in struct usb_serial_port *port = tty->driver_data; int retval = -ENODEV; - lock_kernel(); - if (!port) - goto exit; - dbg("%s - port %d, cmd 0x%.4x", __func__, port->number, cmd); - /* Caution - port->open_count is BKL protected */ - if (!port->open_count) { - dbg ("%s - port not open", __func__); - goto exit; - } + WARN_ON(!port->open_count); /* pass on to the driver specific version of this function if it is available */ - if (port->serial->type->ioctl) + if (port->serial->type->ioctl) { + lock_kernel(); retval = port->serial->type->ioctl(port, file, cmd, arg); + unlock_kernel(); + } else retval = -ENOIOCTLCMD; -exit: - unlock_kernel(); return retval; } static void serial_set_termios (struct tty_struct *tty, struct ktermios * old) { struct usb_serial_port *port = tty->driver_data; - - if (!port) - return; - dbg("%s - port %d", __func__, port->number); - if (!port->open_count) { - dbg("%s - port not open", __func__); - return; - } - + WARN_ON(!port->open_count); /* pass on to the driver specific version of this function if it is available */ if (port->serial->type->set_termios) port->serial->type->set_termios(port, old); @@ -448,24 +390,15 @@ static void serial_break (struct tty_struct *tty, int break_state) { struct usb_serial_port *port = tty->driver_data; - lock_kernel(); - if (!port) { - unlock_kernel(); - return; - } - dbg("%s - port %d", __func__, port->number); - if (!port->open_count) { - dbg("%s - port not open", __func__); - unlock_kernel(); - return; - } - + WARN_ON(!port->open_count); /* pass on to the driver specific version of this function if it is available */ - if (port->serial->type->break_ctl) + if (port->serial->type->break_ctl) { + lock_kernel(); port->serial->type->break_ctl(port, break_state); - unlock_kernel(); + unlock_kernel(); + } } static int serial_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) @@ -519,19 +452,11 @@ static int serial_tiocmget (struct tty_struct *tty, struct file *file) { struct usb_serial_port *port = tty->driver_data; - if (!port) - return -ENODEV; - dbg("%s - port %d", __func__, port->number); - if (!port->open_count) { - dbg("%s - port not open", __func__); - return -ENODEV; - } - + WARN_ON(!port->open_count); if (port->serial->type->tiocmget) return port->serial->type->tiocmget(port, file); - return -EINVAL; } @@ -540,19 +465,11 @@ static int serial_tiocmset (struct tty_struct *tty, struct file *file, { struct usb_serial_port *port = tty->driver_data; - if (!port) - return -ENODEV; - dbg("%s - port %d", __func__, port->number); - if (!port->open_count) { - dbg("%s - port not open", __func__); - return -ENODEV; - } - + WARN_ON(!port->open_count); if (port->serial->type->tiocmset) return port->serial->type->tiocmset(port, file, set, clear); - return -EINVAL; } diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index e96bf8663ffc..f07e8a4c1f3d 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -673,15 +673,13 @@ static void whiteheat_close(struct usb_serial_port *port, struct file * filp) } */ - if (port->tty->driver->flush_buffer) - port->tty->driver->flush_buffer(port->tty); + tty_driver_flush_buffer(port->tty); tty_ldisc_flush(port->tty); firm_report_tx_done(port); firm_close(port); -printk(KERN_ERR"Before processing rx_urbs_submitted.\n"); /* shutdown our bulk reads and writes */ mutex_lock(&info->deathwarrant); spin_lock_irq(&info->lock); diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 9663e8776724..97dba0d92348 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1053,7 +1053,7 @@ static int vt_check(struct file *file) if (tty_paranoia_check(tty, inode, "tty_ioctl")) return -EINVAL; - if (tty->driver->ioctl != vt_ioctl) + if (tty->ops->ioctl != vt_ioctl) return -EINVAL; vc = (struct vc_data *)tty->driver_data; diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index ac26ccc25f42..21f490f5d65c 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -192,16 +192,14 @@ void proc_tty_register_driver(struct tty_driver *driver) { struct proc_dir_entry *ent; - if ((!driver->read_proc && !driver->write_proc) || - !driver->driver_name || + if (!driver->ops->read_proc || !driver->driver_name || driver->proc_entry) return; ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver); if (!ent) return; - ent->read_proc = driver->read_proc; - ent->write_proc = driver->write_proc; + ent->read_proc = driver->ops->read_proc; ent->owner = driver->owner; ent->data = driver; diff --git a/include/linux/tty.h b/include/linux/tty.h index 2699298b00ef..c36a76da2ae2 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -177,9 +177,13 @@ struct signal_struct; * size each time the window is created or resized anyway. * - TYT, 9/14/92 */ + +struct tty_operations; + struct tty_struct { int magic; struct tty_driver *driver; + const struct tty_operations *ops; int index; struct tty_ldisc ldisc; struct mutex termios_mutex; @@ -295,6 +299,10 @@ extern void tty_unregister_device(struct tty_driver *driver, unsigned index); extern int tty_read_raw_data(struct tty_struct *tty, unsigned char *bufp, int buflen); extern void tty_write_message(struct tty_struct *tty, char *msg); +extern int tty_put_char(struct tty_struct *tty, unsigned char c); +extern int tty_chars_in_buffer(struct tty_struct *tty); +extern int tty_write_room(struct tty_struct *tty); +extern void tty_driver_flush_buffer(struct tty_struct *tty); extern int is_current_pgrp_orphaned(void); extern struct pid *tty_get_pgrp(struct tty_struct *tty); diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 21f69aca4505..f80d73b690f6 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -12,11 +12,15 @@ * This routine is called when a particular tty device is opened. * This routine is mandatory; if this routine is not filled in, * the attempted open will fail with ENODEV. + * + * Required method. * * void (*close)(struct tty_struct * tty, struct file * filp); * * This routine is called when a particular tty device is closed. * + * Required method. + * * int (*write)(struct tty_struct * tty, * const unsigned char *buf, int count); * @@ -26,7 +30,9 @@ * number of characters actually accepted for writing. This * routine is mandatory. * - * void (*put_char)(struct tty_struct *tty, unsigned char ch); + * Optional: Required for writable devices. + * + * int (*put_char)(struct tty_struct *tty, unsigned char ch); * * This routine is called by the kernel to write a single * character to the tty device. If the kernel uses this routine, @@ -34,10 +40,18 @@ * done stuffing characters into the driver. If there is no room * in the queue, the character is ignored. * + * Optional: Kernel will use the write method if not provided. + * + * Note: Do not call this function directly, call tty_put_char + * * void (*flush_chars)(struct tty_struct *tty); * * This routine is called by the kernel after it has written a * series of characters to the tty device using put_char(). + * + * Optional: + * + * Note: Do not call this function directly, call tty_driver_flush_chars * * int (*write_room)(struct tty_struct *tty); * @@ -45,6 +59,10 @@ * will accept for queuing to be written. This number is subject * to change as output buffers get emptied, or if the output flow * control is acted. + * + * Required if write method is provided else not needed. + * + * Note: Do not call this function directly, call tty_write_room * * int (*ioctl)(struct tty_struct *tty, struct file * file, * unsigned int cmd, unsigned long arg); @@ -53,22 +71,29 @@ * device-specific ioctl's. If the ioctl number passed in cmd * is not recognized by the driver, it should return ENOIOCTLCMD. * + * Optional + * * long (*compat_ioctl)(struct tty_struct *tty, struct file * file, * unsigned int cmd, unsigned long arg); * * implement ioctl processing for 32 bit process on 64 bit system + * + * Optional * * void (*set_termios)(struct tty_struct *tty, struct ktermios * old); * * This routine allows the tty driver to be notified when - * device's termios settings have changed. Note that a - * well-designed tty driver should be prepared to accept the case - * where old == NULL, and try to do something rational. + * device's termios settings have changed. + * + * Optional: Called under the termios lock + * * * void (*set_ldisc)(struct tty_struct *tty); * * This routine allows the tty driver to be notified when the * device's termios settings have changed. + * + * Optional: Called under BKL (currently) * * void (*throttle)(struct tty_struct * tty); * @@ -86,17 +111,27 @@ * * This routine notifies the tty driver that it should stop * outputting characters to the tty device. + * + * Optional: + * + * Note: Call stop_tty not this method. * * void (*start)(struct tty_struct *tty); * * This routine notifies the tty driver that it resume sending * characters to the tty device. + * + * Optional: + * + * Note: Call start_tty not this method. * * void (*hangup)(struct tty_struct *tty); * * This routine notifies the tty driver that it should hangup the * tty device. * + * Required: + * * void (*break_ctl)(struct tty_stuct *tty, int state); * * This optional routine requests the tty driver to turn on or @@ -106,18 +141,26 @@ * * If this routine is implemented, the high-level tty driver will * handle the following ioctls: TCSBRK, TCSBRKP, TIOCSBRK, - * TIOCCBRK. Otherwise, these ioctls will be passed down to the - * driver to handle. + * TIOCCBRK. + * + * Optional: Required for TCSBRK/BRKP/etc handling. * * void (*wait_until_sent)(struct tty_struct *tty, int timeout); * * This routine waits until the device has written out all of the * characters in its transmitter FIFO. * + * Optional: If not provided the device is assumed to have no FIFO + * + * Note: Usually correct to call tty_wait_until_sent + * * void (*send_xchar)(struct tty_struct *tty, char ch); * * This routine is used to send a high-priority XON/XOFF * character to the device. + * + * Optional: If not provided then the write method is called under + * the atomic write lock to keep it serialized with the ldisc. */ #include @@ -132,7 +175,7 @@ struct tty_operations { void (*close)(struct tty_struct * tty, struct file * filp); int (*write)(struct tty_struct * tty, const unsigned char *buf, int count); - void (*put_char)(struct tty_struct *tty, unsigned char ch); + int (*put_char)(struct tty_struct *tty, unsigned char ch); void (*flush_chars)(struct tty_struct *tty); int (*write_room)(struct tty_struct *tty); int (*chars_in_buffer)(struct tty_struct *tty); @@ -153,8 +196,6 @@ struct tty_operations { void (*send_xchar)(struct tty_struct *tty, char ch); int (*read_proc)(char *page, char **start, off_t off, int count, int *eof, void *data); - int (*write_proc)(struct file *file, const char __user *buffer, - unsigned long count, void *data); int (*tiocmget)(struct tty_struct *tty, struct file *file); int (*tiocmset)(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear); @@ -190,48 +231,13 @@ struct tty_driver { struct tty_struct **ttys; struct ktermios **termios; struct ktermios **termios_locked; - void *driver_state; /* only used for the PTY driver */ - + void *driver_state; + /* - * Interface routines from the upper tty layer to the tty - * driver. Will be replaced with struct tty_operations. + * Driver methods */ - int (*open)(struct tty_struct * tty, struct file * filp); - void (*close)(struct tty_struct * tty, struct file * filp); - int (*write)(struct tty_struct * tty, - const unsigned char *buf, int count); - void (*put_char)(struct tty_struct *tty, unsigned char ch); - void (*flush_chars)(struct tty_struct *tty); - int (*write_room)(struct tty_struct *tty); - int (*chars_in_buffer)(struct tty_struct *tty); - int (*ioctl)(struct tty_struct *tty, struct file * file, - unsigned int cmd, unsigned long arg); - long (*compat_ioctl)(struct tty_struct *tty, struct file * file, - unsigned int cmd, unsigned long arg); - void (*set_termios)(struct tty_struct *tty, struct ktermios * old); - void (*throttle)(struct tty_struct * tty); - void (*unthrottle)(struct tty_struct * tty); - void (*stop)(struct tty_struct *tty); - void (*start)(struct tty_struct *tty); - void (*hangup)(struct tty_struct *tty); - void (*break_ctl)(struct tty_struct *tty, int state); - void (*flush_buffer)(struct tty_struct *tty); - void (*set_ldisc)(struct tty_struct *tty); - void (*wait_until_sent)(struct tty_struct *tty, int timeout); - void (*send_xchar)(struct tty_struct *tty, char ch); - int (*read_proc)(char *page, char **start, off_t off, - int count, int *eof, void *data); - int (*write_proc)(struct file *file, const char __user *buffer, - unsigned long count, void *data); - int (*tiocmget)(struct tty_struct *tty, struct file *file); - int (*tiocmset)(struct tty_struct *tty, struct file *file, - unsigned int set, unsigned int clear); -#ifdef CONFIG_CONSOLE_POLL - int (*poll_init)(struct tty_driver *driver, int line, char *options); - int (*poll_get_char)(struct tty_driver *driver, int line); - void (*poll_put_char)(struct tty_driver *driver, int line, char ch); -#endif + const struct tty_operations *ops; struct list_head tty_drivers; }; diff --git a/kernel/printk.c b/kernel/printk.c index d3f9c0f788bf..0d232589a923 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1272,8 +1272,8 @@ late_initcall(disable_boot_consoles); */ void tty_write_message(struct tty_struct *tty, char *msg) { - if (tty && tty->driver->write) - tty->driver->write(tty, msg, strlen(msg)); + if (tty && tty->ops->write) + tty->ops->write(tty, msg, strlen(msg)); return; } diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index d2620410cb0a..76c3057d0179 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -555,10 +555,8 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) ircomm_tty_shutdown(self); - if (tty->driver->flush_buffer) - tty->driver->flush_buffer(tty); - if (tty->ldisc.flush_buffer) - tty->ldisc.flush_buffer(tty); + tty_driver_flush_buffer(tty); + tty_ldisc_flush(tty); tty->closing = 0; self->tty = NULL; -- cgit v1.2.3-71-gd317 From 39c2e60f8c584c1b29b5c4375dd49df7995386bb Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 30 Apr 2008 00:54:18 -0700 Subject: tty: add throttle/unthrottle helpers Something Arjan suggested which allows us to clean up the code nicely Signed-off-by: Alan Cox Cc: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/bluetooth/hci_ldisc.c | 4 +--- drivers/char/n_tty.c | 14 ++++---------- drivers/char/tty_ioctl.c | 16 ++++++++++++++++ drivers/net/hamradio/6pack.c | 4 +--- drivers/net/hamradio/mkiss.c | 4 +--- drivers/net/ppp_async.c | 4 +--- drivers/net/ppp_synctty.c | 4 +--- include/linux/tty.h | 2 ++ include/linux/tty_driver.h | 4 ++++ 9 files changed, 31 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index a6c2619ec782..e5cd856a2fea 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -370,9 +370,7 @@ static void hci_uart_tty_receive(struct tty_struct *tty, const u8 *data, char *f hu->hdev->stat.byte_rx += count; spin_unlock(&hu->rx_lock); - if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) && - tty->ops->unthrottle) - tty->ops->unthrottle(tty); + tty_unthrottle(tty); } static int hci_uart_register_dev(struct hci_uart *hu) diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c index abc93a93dcdd..19105ec203f7 100644 --- a/drivers/char/n_tty.c +++ b/drivers/char/n_tty.c @@ -147,10 +147,8 @@ static void put_tty_queue(unsigned char c, struct tty_struct *tty) static void check_unthrottle(struct tty_struct *tty) { - if (tty->count && - test_and_clear_bit(TTY_THROTTLED, &tty->flags) && - tty->ops->unthrottle) - tty->ops->unthrottle(tty); + if (tty->count) + tty_unthrottle(tty); } /** @@ -982,12 +980,8 @@ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp, * mode. We don't want to throttle the driver if we're in * canonical mode and don't have a newline yet! */ - if (tty->receive_room < TTY_THRESHOLD_THROTTLE) { - /* check TTY_THROTTLED first so it indicates our state */ - if (!test_and_set_bit(TTY_THROTTLED, &tty->flags) && - tty->ops->throttle) - tty->ops->throttle(tty); - } + if (tty->receive_room < TTY_THRESHOLD_THROTTLE) + tty_throttle(tty); } int is_ignored(int sig) diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c index c10d40c4c5ca..b1a757a5ee27 100644 --- a/drivers/char/tty_ioctl.c +++ b/drivers/char/tty_ioctl.c @@ -67,6 +67,22 @@ void tty_driver_flush_buffer(struct tty_struct *tty) EXPORT_SYMBOL(tty_driver_flush_buffer); +void tty_throttle(struct tty_struct *tty) +{ + /* check TTY_THROTTLED first so it indicates our state */ + if (!test_and_set_bit(TTY_THROTTLED, &tty->flags) && + tty->ops->throttle) + tty->ops->throttle(tty); +} +EXPORT_SYMBOL(tty_throttle); + +void tty_unthrottle(struct tty_struct *tty) +{ + if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) && + tty->ops->unthrottle) + tty->ops->unthrottle(tty); +} +EXPORT_SYMBOL(tty_unthrottle); /** * tty_wait_until_sent - wait for I/O to finish diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 82a36266dfc9..9d5721287d6f 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -491,9 +491,7 @@ static void sixpack_receive_buf(struct tty_struct *tty, sixpack_decode(sp, buf, count1); sp_put(sp); - if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) - && tty->ops->unthrottle) - tty->ops->unthrottle(tty); + tty_unthrottle(tty); } /* diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index ebcc5adee7cc..65166035aca0 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -936,9 +936,7 @@ static void mkiss_receive_buf(struct tty_struct *tty, const unsigned char *cp, } mkiss_put(ax); - if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) - && tty->ops->unthrottle) - tty->ops->unthrottle(tty); + tty_unthrottle(tty); } /* diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c index 1c4b7e37912c..f1a52def1241 100644 --- a/drivers/net/ppp_async.c +++ b/drivers/net/ppp_async.c @@ -361,9 +361,7 @@ ppp_asynctty_receive(struct tty_struct *tty, const unsigned char *buf, if (!skb_queue_empty(&ap->rqueue)) tasklet_schedule(&ap->tsk); ap_put(ap); - if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) - && tty->ops->unthrottle) - tty->ops->unthrottle(tty); + tty_unthrottle(tty); } static void diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c index 48ed5fdbfe18..b8f0369a71e7 100644 --- a/drivers/net/ppp_synctty.c +++ b/drivers/net/ppp_synctty.c @@ -401,9 +401,7 @@ ppp_sync_receive(struct tty_struct *tty, const unsigned char *buf, if (!skb_queue_empty(&ap->rqueue)) tasklet_schedule(&ap->tsk); sp_put(ap); - if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) - && tty->ops->unthrottle) - tty->ops->unthrottle(tty); + tty_unthrottle(tty); } static void diff --git a/include/linux/tty.h b/include/linux/tty.h index c36a76da2ae2..7f7121f9c968 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -303,6 +303,8 @@ extern int tty_put_char(struct tty_struct *tty, unsigned char c); extern int tty_chars_in_buffer(struct tty_struct *tty); extern int tty_write_room(struct tty_struct *tty); extern void tty_driver_flush_buffer(struct tty_struct *tty); +extern void tty_throttle(struct tty_struct *tty); +extern void tty_unthrottle(struct tty_struct *tty); extern int is_current_pgrp_orphaned(void); extern struct pid *tty_get_pgrp(struct tty_struct *tty); diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index f80d73b690f6..59f1c0bd8f9c 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -100,6 +100,8 @@ * This routine notifies the tty driver that input buffers for * the line discipline are close to full, and it should somehow * signal that no more characters should be sent to the tty. + * + * Optional: Always invoke via tty_throttle(); * * void (*unthrottle)(struct tty_struct * tty); * @@ -107,6 +109,8 @@ * that characters can now be sent to the tty without fear of * overrunning the input buffers of the line disciplines. * + * Optional: Always invoke via tty_unthrottle(); + * * void (*stop)(struct tty_struct *tty); * * This routine notifies the tty driver that it should stop -- cgit v1.2.3-71-gd317 From 718a916338e821a10961e6a7a17430c18e5e58d9 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 30 Apr 2008 00:54:21 -0700 Subject: devpts: factor out PTY index allocation Factor out the code used to allocate/free a pts index into new interfaces, devpts_new_index() and devpts_kill_index(). This localizes the external data structures used in managing the pts indices. [akpm@linux-foundation.org: undo accidental mutex2sem conversion] Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Serge Hallyn Signed-off-by: Matt Helsley Acked-by: H. Peter Anvin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/tty_io.c | 40 ++++++---------------------------------- fs/devpts/inode.c | 43 ++++++++++++++++++++++++++++++++++++++++++- include/linux/devpts_fs.h | 4 ++++ 3 files changed, 52 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index edcb7e471f02..1d298c2cf930 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -91,7 +91,6 @@ #include #include #include -#include #include #include #include @@ -137,9 +136,6 @@ EXPORT_SYMBOL(tty_mutex); #ifdef CONFIG_UNIX98_PTYS extern struct tty_driver *ptm_driver; /* Unix98 pty masters; for /dev/ptmx */ -extern int pty_limit; /* Config limit on Unix98 ptys */ -static DEFINE_IDR(allocated_ptys); -static DEFINE_MUTEX(allocated_ptys_lock); static int ptmx_open(struct inode *, struct file *); #endif @@ -2639,15 +2635,9 @@ static void release_dev(struct file *filp) */ release_tty(tty, idx); -#ifdef CONFIG_UNIX98_PTYS /* Make this pty number available for reallocation */ - if (devpts) { - mutex_lock(&allocated_ptys_lock); - idr_remove(&allocated_ptys, idx); - mutex_unlock(&allocated_ptys_lock); - } -#endif - + if (devpts) + devpts_kill_index(idx); } /** @@ -2803,29 +2793,13 @@ static int ptmx_open(struct inode *inode, struct file *filp) struct tty_struct *tty; int retval; int index; - int idr_ret; nonseekable_open(inode, filp); /* find a device that is not in use. */ - mutex_lock(&allocated_ptys_lock); - if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) { - mutex_unlock(&allocated_ptys_lock); - return -ENOMEM; - } - idr_ret = idr_get_new(&allocated_ptys, NULL, &index); - if (idr_ret < 0) { - mutex_unlock(&allocated_ptys_lock); - if (idr_ret == -EAGAIN) - return -ENOMEM; - return -EIO; - } - if (index >= pty_limit) { - idr_remove(&allocated_ptys, index); - mutex_unlock(&allocated_ptys_lock); - return -EIO; - } - mutex_unlock(&allocated_ptys_lock); + index = devpts_new_index(); + if (index < 0) + return index; mutex_lock(&tty_mutex); retval = init_dev(ptm_driver, index, &tty); @@ -2850,9 +2824,7 @@ out1: release_dev(filp); return retval; out: - mutex_lock(&allocated_ptys_lock); - idr_remove(&allocated_ptys, index); - mutex_unlock(&allocated_ptys_lock); + devpts_kill_index(index); return retval; } #endif diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index f120e1207874..285b64a8b06e 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include #include #include @@ -26,6 +28,10 @@ #define DEVPTS_DEFAULT_MODE 0600 +extern int pty_limit; /* Config limit on Unix98 ptys */ +static DEFINE_IDR(allocated_ptys); +static DEFINE_MUTEX(allocated_ptys_lock); + static struct vfsmount *devpts_mnt; static struct dentry *devpts_root; @@ -171,9 +177,44 @@ static struct dentry *get_node(int num) return lookup_one_len(s, root, sprintf(s, "%d", num)); } +int devpts_new_index(void) +{ + int index; + int idr_ret; + +retry: + if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) { + return -ENOMEM; + } + + mutex_lock(&allocated_ptys_lock); + idr_ret = idr_get_new(&allocated_ptys, NULL, &index); + if (idr_ret < 0) { + mutex_unlock(&allocated_ptys_lock); + if (idr_ret == -EAGAIN) + goto retry; + return -EIO; + } + + if (index >= pty_limit) { + idr_remove(&allocated_ptys, index); + mutex_unlock(&allocated_ptys_lock); + return -EIO; + } + mutex_unlock(&allocated_ptys_lock); + return index; +} + +void devpts_kill_index(int idx) +{ + mutex_lock(&allocated_ptys_lock); + idr_remove(&allocated_ptys, idx); + mutex_unlock(&allocated_ptys_lock); +} + int devpts_pty_new(struct tty_struct *tty) { - int number = tty->index; + int number = tty->index; /* tty layer puts index from devpts_new_index() in here */ struct tty_driver *driver = tty->driver; dev_t device = MKDEV(driver->major, driver->minor_start+number); struct dentry *dentry; diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index b672ddc00735..154769cad3f3 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -17,6 +17,8 @@ #ifdef CONFIG_UNIX98_PTYS +int devpts_new_index(void); +void devpts_kill_index(int idx); int devpts_pty_new(struct tty_struct *tty); /* mknod in devpts */ struct tty_struct *devpts_get_tty(int number); /* get tty structure */ void devpts_pty_kill(int number); /* unlink */ @@ -24,6 +26,8 @@ void devpts_pty_kill(int number); /* unlink */ #else /* Dummy stubs in the no-pty case */ +static inline int devpts_new_index(void) { return -EINVAL; } +static inline void devpts_kill_index(int idx) { } static inline int devpts_pty_new(struct tty_struct *tty) { return -EINVAL; } static inline struct tty_struct *devpts_get_tty(int number) { return NULL; } static inline void devpts_pty_kill(int number) { } -- cgit v1.2.3-71-gd317 From 5cd204550b1a006f2b0c986b0e0f53220ebfd391 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 30 Apr 2008 00:54:24 -0700 Subject: Deprecate find_task_by_pid() There are some places that are known to operate on tasks' global pids only: * the rest_init() call (called on boot) * the kgdb's getthread * the create_kthread() (since the kthread is run in init ns) So use the find_task_by_pid_ns(..., &init_pid_ns) there and schedule the find_task_by_pid for removal. [sukadev@us.ibm.com: Fix warning in kernel/pid.c] Signed-off-by: Pavel Emelyanov Cc: "Eric W. Biederman" Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/feature-removal-schedule.txt | 18 ++++++++++++++++++ include/linux/sched.h | 5 ++++- init/main.c | 2 +- kernel/kthread.c | 2 +- kernel/pid.c | 6 ------ 5 files changed, 24 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 599fe55bf297..3c35d452b1a9 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -138,6 +138,24 @@ Who: Kay Sievers --------------------------- +What: find_task_by_pid +When: 2.6.26 +Why: With pid namespaces, calling this funciton will return the + wrong task when called from inside a namespace. + + The best way to save a task pid and find a task by this + pid later, is to find this task's struct pid pointer (or get + it directly from the task) and call pid_task() later. + + If someone really needs to get a task by its pid_t, then + he most likely needs the find_task_by_vpid() to get the + task from the same namespace as the current task is in, but + this may be not so in general. + +Who: Pavel Emelyanov + +--------------------------- + What: ACPI procfs interface When: July 2008 Why: ACPI sysfs conversion should be finished by January 2008. diff --git a/include/linux/sched.h b/include/linux/sched.h index 86e60796db62..03c238088aee 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1677,7 +1677,10 @@ extern struct pid_namespace init_pid_ns; extern struct task_struct *find_task_by_pid_type_ns(int type, int pid, struct pid_namespace *ns); -extern struct task_struct *find_task_by_pid(pid_t nr); +static inline struct task_struct *__deprecated find_task_by_pid(pid_t nr) +{ + return find_task_by_pid_type_ns(PIDTYPE_PID, nr, &init_pid_ns); +} extern struct task_struct *find_task_by_vpid(pid_t nr); extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns); diff --git a/init/main.c b/init/main.c index 1f4406477f83..dff253cfcd9f 100644 --- a/init/main.c +++ b/init/main.c @@ -459,7 +459,7 @@ static void noinline __init_refok rest_init(void) kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND); numa_default_policy(); pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); - kthreadd_task = find_task_by_pid(pid); + kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns); unlock_kernel(); /* diff --git a/kernel/kthread.c b/kernel/kthread.c index ac72eea48339..bd1b9ea024e1 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -98,7 +98,7 @@ static void create_kthread(struct kthread_create_info *create) struct sched_param param = { .sched_priority = 0 }; wait_for_completion(&create->started); read_lock(&tasklist_lock); - create->result = find_task_by_pid(pid); + create->result = find_task_by_pid_ns(pid, &init_pid_ns); read_unlock(&tasklist_lock); /* * root may have changed our (kthreadd's) priority or CPU mask. diff --git a/kernel/pid.c b/kernel/pid.c index b322cdf401bf..a9ae9f7fb229 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -381,12 +381,6 @@ struct task_struct *find_task_by_pid_type_ns(int type, int nr, EXPORT_SYMBOL(find_task_by_pid_type_ns); -struct task_struct *find_task_by_pid(pid_t nr) -{ - return find_task_by_pid_type_ns(PIDTYPE_PID, nr, &init_pid_ns); -} -EXPORT_SYMBOL(find_task_by_pid); - struct task_struct *find_task_by_vpid(pid_t vnr) { return find_task_by_pid_type_ns(PIDTYPE_PID, vnr, -- cgit v1.2.3-71-gd317 From 24336eaeecea860b2a82530e07c80bc7e0558b73 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:54:26 -0700 Subject: pids: introduce change_pid() helper Based on Eric W. Biederman's idea. Without tasklist_lock held task_session()/task_pgrp() can return NULL if the caller races with setprgp()/setsid() which does detach_pid() + attach_pid(). This can happen even if task == current. Intoduce the new helper, change_pid(), which should be used instead. This way the caller always sees the special pid != NULL, either old or new. Also change the prototype of attach_pid(), it always returns 0 and nobody check the returned value. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pid.h | 6 ++++-- kernel/pid.c | 21 ++++++++++++++++----- 2 files changed, 20 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pid.h b/include/linux/pid.h index c7980810eb09..8d199033c0ca 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -89,9 +89,11 @@ extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type); * attach_pid() and detach_pid() must be called with the tasklist_lock * write-held. */ -extern int attach_pid(struct task_struct *task, enum pid_type type, - struct pid *pid); +extern void attach_pid(struct task_struct *task, enum pid_type type, + struct pid *pid); extern void detach_pid(struct task_struct *task, enum pid_type); +extern void change_pid(struct task_struct *task, enum pid_type, + struct pid *pid); extern void transfer_pid(struct task_struct *old, struct task_struct *new, enum pid_type); diff --git a/kernel/pid.c b/kernel/pid.c index e9a31d362b28..20d59fa2d493 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -317,7 +317,7 @@ EXPORT_SYMBOL_GPL(find_pid); /* * attach_pid() must be called with the tasklist_lock write-held. */ -int attach_pid(struct task_struct *task, enum pid_type type, +void attach_pid(struct task_struct *task, enum pid_type type, struct pid *pid) { struct pid_link *link; @@ -325,11 +325,10 @@ int attach_pid(struct task_struct *task, enum pid_type type, link = &task->pids[type]; link->pid = pid; hlist_add_head_rcu(&link->node, &pid->tasks[type]); - - return 0; } -void detach_pid(struct task_struct *task, enum pid_type type) +static void __change_pid(struct task_struct *task, enum pid_type type, + struct pid *new) { struct pid_link *link; struct pid *pid; @@ -339,7 +338,7 @@ void detach_pid(struct task_struct *task, enum pid_type type) pid = link->pid; hlist_del_rcu(&link->node); - link->pid = NULL; + link->pid = new; for (tmp = PIDTYPE_MAX; --tmp >= 0; ) if (!hlist_empty(&pid->tasks[tmp])) @@ -348,6 +347,18 @@ void detach_pid(struct task_struct *task, enum pid_type type) free_pid(pid); } +void detach_pid(struct task_struct *task, enum pid_type type) +{ + __change_pid(task, type, NULL); +} + +void change_pid(struct task_struct *task, enum pid_type type, + struct pid *pid) +{ + __change_pid(task, type, pid); + attach_pid(task, type, pid); +} + /* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */ void transfer_pid(struct task_struct *old, struct task_struct *new, enum pid_type type) -- cgit v1.2.3-71-gd317 From caafa4324335aeb11bc233d5f87aca8cce30beba Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 30 Apr 2008 00:54:31 -0700 Subject: pidns: make pid->level and pid_ns->level unsigned These values represent the nesting level of a namespace and pids living in it, and it's always non-negative. Turning this from int to unsigned int saves some space in pid.c (11 bytes on x86 and 64 on ia64) by letting the compiler optimize the pid_nr_ns a bit. E.g. on ia64 this removes the sign extension calls, which compiler adds to optimize access to pid->nubers[ns->level]. Signed-off-by: Pavel Emelyanov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pid.h | 2 +- include/linux/pid_namespace.h | 2 +- kernel/pid_namespace.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pid.h b/include/linux/pid.h index 8d199033c0ca..c21c7e8124a7 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -60,7 +60,7 @@ struct pid /* lists of tasks that use this pid */ struct hlist_head tasks[PIDTYPE_MAX]; struct rcu_head rcu; - int level; + unsigned int level; struct upid numbers[1]; }; diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index fcd61fa2c833..caff5283d15c 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -20,7 +20,7 @@ struct pid_namespace { int last_pid; struct task_struct *child_reaper; struct kmem_cache *pid_cachep; - int level; + unsigned int level; struct pid_namespace *parent; #ifdef CONFIG_PROC_FS struct vfsmount *proc_mnt; diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 5ca37fa50beb..98702b4b8851 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -66,7 +66,7 @@ err_alloc: return NULL; } -static struct pid_namespace *create_pid_namespace(int level) +static struct pid_namespace *create_pid_namespace(unsigned int level) { struct pid_namespace *ns; int i; -- cgit v1.2.3-71-gd317 From cf0ca9fe5dd9e3693d935757a7b2fc50fc576554 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Apr 2008 00:54:32 -0700 Subject: mm: bdi: export BDI attributes in sysfs Provide a place in sysfs (/sys/class/bdi) for the backing_dev_info object. This allows us to see and set the various BDI specific variables. In particular this properly exposes the read-ahead window for all relevant users and /sys/block//queue/read_ahead_kb should be deprecated. With patient help from Kay Sievers and Greg KH [mszeredi@suse.cz] - split off NFS and FUSE changes into separate patches - document new sysfs attributes under Documentation/ABI - do bdi_class_init as a core_initcall, otherwise the "default" BDI won't be initialized - remove bdi_init_fmt macro, it's not used very much [akpm@linux-foundation.org: fix ia64 warning] Signed-off-by: Peter Zijlstra Cc: Kay Sievers Acked-by: Greg KH Cc: Trond Myklebust Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/sysfs-class-bdi | 46 ++++++++++++ block/genhd.c | 8 ++ include/linux/backing-dev.h | 9 +++ include/linux/writeback.h | 3 + lib/percpu_counter.c | 1 + mm/backing-dev.c | 119 ++++++++++++++++++++++++++++++ mm/page-writeback.c | 2 +- mm/readahead.c | 8 +- 8 files changed, 194 insertions(+), 2 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-class-bdi (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi new file mode 100644 index 000000000000..b800cdda40bb --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -0,0 +1,46 @@ +What: /sys/class/bdi// +Date: January 2008 +Contact: Peter Zijlstra +Description: + +Provide a place in sysfs for the backing_dev_info object. +This allows us to see and set the various BDI specific variables. + +The identifier can be either of the following: + +MAJOR:MINOR + + Device number for block devices, or value of st_dev on + non-block filesystems which provide their own BDI, such as NFS + and FUSE. + +default + + The default backing dev, used for non-block device backed + filesystems which do not provide their own BDI. + +Files under /sys/class/bdi// +--------------------------------- + +read_ahead_kb (read-write) + + Size of the read-ahead window in kilobytes + +reclaimable_kb (read-only) + + Reclaimable (dirty or unstable) memory destined for writeback + to this device + +writeback_kb (read-only) + + Memory currently under writeback to this device + +dirty_kb (read-only) + + Global threshold for reclaimable + writeback memory + +bdi_dirty_kb (read-only) + + Current threshold on this BDI for reclaimable + writeback + memory + diff --git a/block/genhd.c b/block/genhd.c index 00da5219ee37..fda9c7a63c29 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -182,11 +182,17 @@ static int exact_lock(dev_t devt, void *data) */ void add_disk(struct gendisk *disk) { + struct backing_dev_info *bdi; + disk->flags |= GENHD_FL_UP; blk_register_region(MKDEV(disk->major, disk->first_minor), disk->minors, NULL, exact_match, exact_lock, disk); register_disk(disk); blk_register_queue(disk); + + bdi = &disk->queue->backing_dev_info; + bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor)); + sysfs_create_link(&disk->dev.kobj, &bdi->dev->kobj, "bdi"); } EXPORT_SYMBOL(add_disk); @@ -194,6 +200,8 @@ EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */ void unlink_gendisk(struct gendisk *disk) { + sysfs_remove_link(&disk->dev.kobj, "bdi"); + bdi_unregister(&disk->queue->backing_dev_info); blk_unregister_queue(disk); blk_unregister_region(MKDEV(disk->major, disk->first_minor), disk->minors); diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index b66fa2bdfd9c..6d513666d45c 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -11,9 +11,11 @@ #include #include #include +#include #include struct page; +struct device; /* * Bits in backing_dev_info.state @@ -48,11 +50,18 @@ struct backing_dev_info { struct prop_local_percpu completions; int dirty_exceeded; + + struct device *dev; }; int bdi_init(struct backing_dev_info *bdi); void bdi_destroy(struct backing_dev_info *bdi); +int bdi_register(struct backing_dev_info *bdi, struct device *parent, + const char *fmt, ...); +int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); +void bdi_unregister(struct backing_dev_info *bdi); + static inline void __add_bdi_stat(struct backing_dev_info *bdi, enum bdi_stat_item item, s64 amount) { diff --git a/include/linux/writeback.h b/include/linux/writeback.h index b7b3362f7717..f462439cc288 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -114,6 +114,9 @@ struct file; int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); +void get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, + struct backing_dev_info *bdi); + void page_writeback_init(void); void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, unsigned long nr_pages_dirtied); diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 393a0e915c23..119174494cb5 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -102,6 +102,7 @@ void percpu_counter_destroy(struct percpu_counter *fbc) return; free_percpu(fbc->counters); + fbc->counters = NULL; #ifdef CONFIG_HOTPLUG_CPU mutex_lock(&percpu_counters_lock); list_del(&fbc->list); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index e8644b1e5527..847eabe4824c 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -4,12 +4,129 @@ #include #include #include +#include +#include + + +static struct class *bdi_class; + +static ssize_t read_ahead_kb_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + char *end; + unsigned long read_ahead_kb; + ssize_t ret = -EINVAL; + + read_ahead_kb = simple_strtoul(buf, &end, 10); + if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) { + bdi->ra_pages = read_ahead_kb >> (PAGE_SHIFT - 10); + ret = count; + } + return ret; +} + +#define K(pages) ((pages) << (PAGE_SHIFT - 10)) + +#define BDI_SHOW(name, expr) \ +static ssize_t name##_show(struct device *dev, \ + struct device_attribute *attr, char *page) \ +{ \ + struct backing_dev_info *bdi = dev_get_drvdata(dev); \ + \ + return snprintf(page, PAGE_SIZE-1, "%lld\n", (long long)expr); \ +} + +BDI_SHOW(read_ahead_kb, K(bdi->ra_pages)) + +BDI_SHOW(reclaimable_kb, K(bdi_stat(bdi, BDI_RECLAIMABLE))) +BDI_SHOW(writeback_kb, K(bdi_stat(bdi, BDI_WRITEBACK))) + +static inline unsigned long get_dirty(struct backing_dev_info *bdi, int i) +{ + unsigned long thresh[3]; + + get_dirty_limits(&thresh[0], &thresh[1], &thresh[2], bdi); + + return thresh[i]; +} + +BDI_SHOW(dirty_kb, K(get_dirty(bdi, 1))) +BDI_SHOW(bdi_dirty_kb, K(get_dirty(bdi, 2))) + +#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) + +static struct device_attribute bdi_dev_attrs[] = { + __ATTR_RW(read_ahead_kb), + __ATTR_RO(reclaimable_kb), + __ATTR_RO(writeback_kb), + __ATTR_RO(dirty_kb), + __ATTR_RO(bdi_dirty_kb), + __ATTR_NULL, +}; + +static __init int bdi_class_init(void) +{ + bdi_class = class_create(THIS_MODULE, "bdi"); + bdi_class->dev_attrs = bdi_dev_attrs; + return 0; +} + +core_initcall(bdi_class_init); + +int bdi_register(struct backing_dev_info *bdi, struct device *parent, + const char *fmt, ...) +{ + char *name; + va_list args; + int ret = 0; + struct device *dev; + + va_start(args, fmt); + name = kvasprintf(GFP_KERNEL, fmt, args); + va_end(args); + + if (!name) + return -ENOMEM; + + dev = device_create(bdi_class, parent, MKDEV(0, 0), name); + if (IS_ERR(dev)) { + ret = PTR_ERR(dev); + goto exit; + } + + bdi->dev = dev; + dev_set_drvdata(bdi->dev, bdi); + +exit: + kfree(name); + return ret; +} +EXPORT_SYMBOL(bdi_register); + +int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev) +{ + return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev)); +} +EXPORT_SYMBOL(bdi_register_dev); + +void bdi_unregister(struct backing_dev_info *bdi) +{ + if (bdi->dev) { + device_unregister(bdi->dev); + bdi->dev = NULL; + } +} +EXPORT_SYMBOL(bdi_unregister); int bdi_init(struct backing_dev_info *bdi) { int i; int err; + bdi->dev = NULL; + for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0); if (err) @@ -33,6 +150,8 @@ void bdi_destroy(struct backing_dev_info *bdi) { int i; + bdi_unregister(bdi); + for (i = 0; i < NR_BDI_STAT_ITEMS; i++) percpu_counter_destroy(&bdi->bdi_stat[i]); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 5e00f1772c20..e5b6b1190a95 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -300,7 +300,7 @@ static unsigned long determine_dirtyable_memory(void) return x + 1; /* Ensure that we never return 0 */ } -static void +void get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, struct backing_dev_info *bdi) { diff --git a/mm/readahead.c b/mm/readahead.c index 8762e8988972..d8723a5f6496 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -235,7 +235,13 @@ unsigned long max_sane_readahead(unsigned long nr) static int __init readahead_init(void) { - return bdi_init(&default_backing_dev_info); + int err; + + err = bdi_init(&default_backing_dev_info); + if (!err) + bdi_register(&default_backing_dev_info, NULL, "default"); + + return err; } subsys_initcall(readahead_init); -- cgit v1.2.3-71-gd317 From 189d3c4a94ef19fca2a71a6a336e9fda900e25e7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Apr 2008 00:54:35 -0700 Subject: mm: bdi: allow setting a minimum for the bdi dirty limit Under normal circumstances each device is given a part of the total write-back cache that relates to its current avg writeout speed in relation to the other devices. min_ratio - allows one to assign a minimum portion of the write-back cache to a particular device. This is useful in situations where you might want to provide a minimum QoS. (One request for this feature came from flash based storage people who wanted to avoid writing out at all costs - they of course needed some pdflush hacks as well) max_ratio - allows one to assign a maximum portion of the dirty limit to a particular device. This is useful in situations where you want to avoid one device taking all or most of the write-back cache. Eg. an NFS mount that is prone to get stuck, or a FUSE mount which you don't trust to play fair. Add "min_ratio" to /sys/class/bdi. This indicates the minimum percentage of the global dirty threshold allocated to this bdi. [mszeredi@suse.cz] - fix parsing in min_ratio_store() - document new sysfs attribute Signed-off-by: Peter Zijlstra Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/sysfs-class-bdi | 6 ++++++ include/linux/backing-dev.h | 4 ++++ mm/backing-dev.c | 21 +++++++++++++++++++++ mm/page-writeback.c | 27 ++++++++++++++++++++++++++- 4 files changed, 57 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index b800cdda40bb..b9e8a9368dc6 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -44,3 +44,9 @@ bdi_dirty_kb (read-only) Current threshold on this BDI for reclaimable + writeback memory +min_ratio (read-write) + + Minimal percentage of global dirty threshold allocated to this + bdi. If the value written to this file would make the the sum + of all min_ratio values exceed 100, then EINVAL is returned. + The default is zero diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 6d513666d45c..9a8965518d1d 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -51,6 +51,8 @@ struct backing_dev_info { struct prop_local_percpu completions; int dirty_exceeded; + unsigned int min_ratio; + struct device *dev; }; @@ -137,6 +139,8 @@ static inline unsigned long bdi_stat_error(struct backing_dev_info *bdi) #endif } +int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); + /* * Flags in backing_dev_info::capability * - The first two flags control whether dirty pages will contribute to the diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 847eabe4824c..4967fb176e53 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -55,6 +55,24 @@ static inline unsigned long get_dirty(struct backing_dev_info *bdi, int i) BDI_SHOW(dirty_kb, K(get_dirty(bdi, 1))) BDI_SHOW(bdi_dirty_kb, K(get_dirty(bdi, 2))) +static ssize_t min_ratio_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + char *end; + unsigned int ratio; + ssize_t ret = -EINVAL; + + ratio = simple_strtoul(buf, &end, 10); + if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) { + ret = bdi_set_min_ratio(bdi, ratio); + if (!ret) + ret = count; + } + return ret; +} +BDI_SHOW(min_ratio, bdi->min_ratio) + #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) static struct device_attribute bdi_dev_attrs[] = { @@ -63,6 +81,7 @@ static struct device_attribute bdi_dev_attrs[] = { __ATTR_RO(writeback_kb), __ATTR_RO(dirty_kb), __ATTR_RO(bdi_dirty_kb), + __ATTR_RW(min_ratio), __ATTR_NULL, }; @@ -127,6 +146,8 @@ int bdi_init(struct backing_dev_info *bdi) bdi->dev = NULL; + bdi->min_ratio = 0; + for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0); if (err) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index e5b6b1190a95..4ac077f4269c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -242,6 +242,29 @@ static void task_dirty_limit(struct task_struct *tsk, long *pdirty) *pdirty = dirty; } +/* + * + */ +static DEFINE_SPINLOCK(bdi_lock); +static unsigned int bdi_min_ratio; + +int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) +{ + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&bdi_lock, flags); + min_ratio -= bdi->min_ratio; + if (bdi_min_ratio + min_ratio < 100) { + bdi_min_ratio += min_ratio; + bdi->min_ratio += min_ratio; + } else + ret = -EINVAL; + spin_unlock_irqrestore(&bdi_lock, flags); + + return ret; +} + /* * Work out the current dirty-memory clamping and background writeout * thresholds. @@ -330,7 +353,7 @@ get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, *pdirty = dirty; if (bdi) { - u64 bdi_dirty = dirty; + u64 bdi_dirty; long numerator, denominator; /* @@ -338,8 +361,10 @@ get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, */ bdi_writeout_fraction(bdi, &numerator, &denominator); + bdi_dirty = (dirty * (100 - bdi_min_ratio)) / 100; bdi_dirty *= numerator; do_div(bdi_dirty, denominator); + bdi_dirty += (dirty * bdi->min_ratio) / 100; *pbdi_dirty = bdi_dirty; clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty); -- cgit v1.2.3-71-gd317 From a42dde04152750426cc620fd277e80fffae2f65a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Apr 2008 00:54:36 -0700 Subject: mm: bdi: allow setting a maximum for the bdi dirty limit Add "max_ratio" to /sys/class/bdi. This indicates the maximum percentage of the global dirty threshold allocated to this bdi. [mszeredi@suse.cz] - fix parsing in max_ratio_store(). - export bdi_set_max_ratio() to modules - limit bdi_dirty with bdi->max_ratio - document new sysfs attribute Signed-off-by: Peter Zijlstra Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/sysfs-class-bdi | 9 ++++++- include/linux/backing-dev.h | 2 ++ include/linux/proportions.h | 13 ++++++++++ lib/proportions.c | 38 +++++++++++++++++++++++----- mm/backing-dev.c | 21 ++++++++++++++++ mm/page-writeback.c | 41 ++++++++++++++++++++++++++----- 6 files changed, 111 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index b9e8a9368dc6..c55e811ca180 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -49,4 +49,11 @@ min_ratio (read-write) Minimal percentage of global dirty threshold allocated to this bdi. If the value written to this file would make the the sum of all min_ratio values exceed 100, then EINVAL is returned. - The default is zero + If min_ratio would become larger than the current max_ratio, + then also EINVAL is returned. The default is zero + +max_ratio (read-write) + + Maximal percentage of global dirty threshold allocated to this + bdi. If max_ratio would become smaller than the current + min_ratio, then EINVAL is returned. The default is 100 diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 9a8965518d1d..ad3271d1e90a 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -52,6 +52,7 @@ struct backing_dev_info { int dirty_exceeded; unsigned int min_ratio; + unsigned int max_ratio, max_prop_frac; struct device *dev; }; @@ -140,6 +141,7 @@ static inline unsigned long bdi_stat_error(struct backing_dev_info *bdi) } int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); +int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); /* * Flags in backing_dev_info::capability diff --git a/include/linux/proportions.h b/include/linux/proportions.h index 2c3b3cad92be..5afc1b23346d 100644 --- a/include/linux/proportions.h +++ b/include/linux/proportions.h @@ -77,6 +77,19 @@ void prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) local_irq_restore(flags); } +/* + * Limit the time part in order to ensure there are some bits left for the + * cycle counter and fraction multiply. + */ +#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4) + +#define PROP_FRAC_SHIFT (BITS_PER_LONG - PROP_MAX_SHIFT - 1) +#define PROP_FRAC_BASE (1UL << PROP_FRAC_SHIFT) + +void __prop_inc_percpu_max(struct prop_descriptor *pd, + struct prop_local_percpu *pl, long frac); + + /* * ----- SINGLE ------ */ diff --git a/lib/proportions.c b/lib/proportions.c index 9508d9a7af3e..4f387a643d72 100644 --- a/lib/proportions.c +++ b/lib/proportions.c @@ -73,12 +73,6 @@ #include #include -/* - * Limit the time part in order to ensure there are some bits left for the - * cycle counter. - */ -#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4) - int prop_descriptor_init(struct prop_descriptor *pd, int shift) { int err; @@ -267,6 +261,38 @@ void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) prop_put_global(pd, pg); } +/* + * identical to __prop_inc_percpu, except that it limits this pl's fraction to + * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded. + */ +void __prop_inc_percpu_max(struct prop_descriptor *pd, + struct prop_local_percpu *pl, long frac) +{ + struct prop_global *pg = prop_get_global(pd); + + prop_norm_percpu(pg, pl); + + if (unlikely(frac != PROP_FRAC_BASE)) { + unsigned long period_2 = 1UL << (pg->shift - 1); + unsigned long counter_mask = period_2 - 1; + unsigned long global_count; + long numerator, denominator; + + numerator = percpu_counter_read_positive(&pl->events); + global_count = percpu_counter_read(&pg->events); + denominator = period_2 + (global_count & counter_mask); + + if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT)) + goto out_put; + } + + percpu_counter_add(&pl->events, 1); + percpu_counter_add(&pg->events, 1); + +out_put: + prop_put_global(pd, pg); +} + /* * Obtain a fraction of this proportion * diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 4967fb176e53..08361b6aad50 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -73,6 +73,24 @@ static ssize_t min_ratio_store(struct device *dev, } BDI_SHOW(min_ratio, bdi->min_ratio) +static ssize_t max_ratio_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + char *end; + unsigned int ratio; + ssize_t ret = -EINVAL; + + ratio = simple_strtoul(buf, &end, 10); + if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) { + ret = bdi_set_max_ratio(bdi, ratio); + if (!ret) + ret = count; + } + return ret; +} +BDI_SHOW(max_ratio, bdi->max_ratio) + #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) static struct device_attribute bdi_dev_attrs[] = { @@ -82,6 +100,7 @@ static struct device_attribute bdi_dev_attrs[] = { __ATTR_RO(dirty_kb), __ATTR_RO(bdi_dirty_kb), __ATTR_RW(min_ratio), + __ATTR_RW(max_ratio), __ATTR_NULL, }; @@ -147,6 +166,8 @@ int bdi_init(struct backing_dev_info *bdi) bdi->dev = NULL; bdi->min_ratio = 0; + bdi->max_ratio = 100; + bdi->max_prop_frac = PROP_FRAC_BASE; for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 4ac077f4269c..2a9942f5387c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -164,7 +164,8 @@ int dirty_ratio_handler(struct ctl_table *table, int write, */ static inline void __bdi_writeout_inc(struct backing_dev_info *bdi) { - __prop_inc_percpu(&vm_completions, &bdi->completions); + __prop_inc_percpu_max(&vm_completions, &bdi->completions, + bdi->max_prop_frac); } static inline void task_dirty_inc(struct task_struct *tsk) @@ -254,16 +255,42 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) unsigned long flags; spin_lock_irqsave(&bdi_lock, flags); - min_ratio -= bdi->min_ratio; - if (bdi_min_ratio + min_ratio < 100) { - bdi_min_ratio += min_ratio; - bdi->min_ratio += min_ratio; - } else + if (min_ratio > bdi->max_ratio) { ret = -EINVAL; + } else { + min_ratio -= bdi->min_ratio; + if (bdi_min_ratio + min_ratio < 100) { + bdi_min_ratio += min_ratio; + bdi->min_ratio += min_ratio; + } else { + ret = -EINVAL; + } + } + spin_unlock_irqrestore(&bdi_lock, flags); + + return ret; +} + +int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) +{ + unsigned long flags; + int ret = 0; + + if (max_ratio > 100) + return -EINVAL; + + spin_lock_irqsave(&bdi_lock, flags); + if (bdi->min_ratio > max_ratio) { + ret = -EINVAL; + } else { + bdi->max_ratio = max_ratio; + bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; + } spin_unlock_irqrestore(&bdi_lock, flags); return ret; } +EXPORT_SYMBOL(bdi_set_max_ratio); /* * Work out the current dirty-memory clamping and background writeout @@ -365,6 +392,8 @@ get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, bdi_dirty *= numerator; do_div(bdi_dirty, denominator); bdi_dirty += (dirty * bdi->min_ratio) / 100; + if (bdi_dirty > (dirty * bdi->max_ratio) / 100) + bdi_dirty = dirty * bdi->max_ratio / 100; *pbdi_dirty = bdi_dirty; clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty); -- cgit v1.2.3-71-gd317 From 76f1418b485da2707531178e517bbb5cf06b3c76 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 30 Apr 2008 00:54:36 -0700 Subject: mm: bdi: move statistics to debugfs Move BDI statistics to debugfs: /sys/kernel/debug/bdi//stats Use postcore_initcall() to initialize the sysfs class and debugfs, because debugfs is initialized in core_initcall(). Update descriptions in ABI documentation. Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/sysfs-class-bdi | 43 +++++--------- include/linux/backing-dev.h | 6 ++ mm/backing-dev.c | 98 ++++++++++++++++++++++++------- 3 files changed, 99 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index c55e811ca180..5ac1e01bbd48 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -3,8 +3,8 @@ Date: January 2008 Contact: Peter Zijlstra Description: -Provide a place in sysfs for the backing_dev_info object. -This allows us to see and set the various BDI specific variables. +Provide a place in sysfs for the backing_dev_info object. This allows +setting and retrieving various BDI specific variables. The identifier can be either of the following: @@ -26,34 +26,21 @@ read_ahead_kb (read-write) Size of the read-ahead window in kilobytes -reclaimable_kb (read-only) - - Reclaimable (dirty or unstable) memory destined for writeback - to this device - -writeback_kb (read-only) - - Memory currently under writeback to this device - -dirty_kb (read-only) - - Global threshold for reclaimable + writeback memory - -bdi_dirty_kb (read-only) - - Current threshold on this BDI for reclaimable + writeback - memory - min_ratio (read-write) - Minimal percentage of global dirty threshold allocated to this - bdi. If the value written to this file would make the the sum - of all min_ratio values exceed 100, then EINVAL is returned. - If min_ratio would become larger than the current max_ratio, - then also EINVAL is returned. The default is zero + Under normal circumstances each device is given a part of the + total write-back cache that relates to its current average + writeout speed in relation to the other devices. + + The 'min_ratio' parameter allows assigning a minimum + percentage of the write-back cache to a particular device. + For example, this is useful for providing a minimum QoS. max_ratio (read-write) - Maximal percentage of global dirty threshold allocated to this - bdi. If max_ratio would become smaller than the current - min_ratio, then EINVAL is returned. The default is 100 + Allows limiting a particular device to use not more than the + given percentage of the write-back cache. This is useful in + situations where we want to avoid one device taking all or + most of the write-back cache. For example in case of an NFS + mount that is prone to get stuck, or a FUSE mount which cannot + be trusted to play fair. diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index ad3271d1e90a..c49a2d045e11 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -16,6 +16,7 @@ struct page; struct device; +struct dentry; /* * Bits in backing_dev_info.state @@ -55,6 +56,11 @@ struct backing_dev_info { unsigned int max_ratio, max_prop_frac; struct device *dev; + +#ifdef CONFIG_DEBUG_FS + struct dentry *debug_dir; + struct dentry *debug_stats; +#endif }; int bdi_init(struct backing_dev_info *bdi); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 08361b6aad50..7c4f9e097095 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -10,6 +10,80 @@ static struct class *bdi_class; +#ifdef CONFIG_DEBUG_FS +#include +#include + +static struct dentry *bdi_debug_root; + +static void bdi_debug_init(void) +{ + bdi_debug_root = debugfs_create_dir("bdi", NULL); +} + +static int bdi_debug_stats_show(struct seq_file *m, void *v) +{ + struct backing_dev_info *bdi = m->private; + long background_thresh; + long dirty_thresh; + long bdi_thresh; + + get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); + +#define K(x) ((x) << (PAGE_SHIFT - 10)) + seq_printf(m, + "BdiWriteback: %8lu kB\n" + "BdiReclaimable: %8lu kB\n" + "BdiDirtyThresh: %8lu kB\n" + "DirtyThresh: %8lu kB\n" + "BackgroundThresh: %8lu kB\n", + (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)), + (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)), + K(bdi_thresh), + K(dirty_thresh), + K(background_thresh)); +#undef K + + return 0; +} + +static int bdi_debug_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, bdi_debug_stats_show, inode->i_private); +} + +static const struct file_operations bdi_debug_stats_fops = { + .open = bdi_debug_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void bdi_debug_register(struct backing_dev_info *bdi, const char *name) +{ + bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root); + bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir, + bdi, &bdi_debug_stats_fops); +} + +static void bdi_debug_unregister(struct backing_dev_info *bdi) +{ + debugfs_remove(bdi->debug_stats); + debugfs_remove(bdi->debug_dir); +} +#else +static inline void bdi_debug_init(void) +{ +} +static inline void bdi_debug_register(struct backing_dev_info *bdi, + const char *name) +{ +} +static inline void bdi_debug_unregister(struct backing_dev_info *bdi) +{ +} +#endif + static ssize_t read_ahead_kb_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -40,21 +114,6 @@ static ssize_t name##_show(struct device *dev, \ BDI_SHOW(read_ahead_kb, K(bdi->ra_pages)) -BDI_SHOW(reclaimable_kb, K(bdi_stat(bdi, BDI_RECLAIMABLE))) -BDI_SHOW(writeback_kb, K(bdi_stat(bdi, BDI_WRITEBACK))) - -static inline unsigned long get_dirty(struct backing_dev_info *bdi, int i) -{ - unsigned long thresh[3]; - - get_dirty_limits(&thresh[0], &thresh[1], &thresh[2], bdi); - - return thresh[i]; -} - -BDI_SHOW(dirty_kb, K(get_dirty(bdi, 1))) -BDI_SHOW(bdi_dirty_kb, K(get_dirty(bdi, 2))) - static ssize_t min_ratio_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -95,10 +154,6 @@ BDI_SHOW(max_ratio, bdi->max_ratio) static struct device_attribute bdi_dev_attrs[] = { __ATTR_RW(read_ahead_kb), - __ATTR_RO(reclaimable_kb), - __ATTR_RO(writeback_kb), - __ATTR_RO(dirty_kb), - __ATTR_RO(bdi_dirty_kb), __ATTR_RW(min_ratio), __ATTR_RW(max_ratio), __ATTR_NULL, @@ -108,10 +163,11 @@ static __init int bdi_class_init(void) { bdi_class = class_create(THIS_MODULE, "bdi"); bdi_class->dev_attrs = bdi_dev_attrs; + bdi_debug_init(); return 0; } -core_initcall(bdi_class_init); +postcore_initcall(bdi_class_init); int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...) @@ -136,6 +192,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, bdi->dev = dev; dev_set_drvdata(bdi->dev, bdi); + bdi_debug_register(bdi, name); exit: kfree(name); @@ -152,6 +209,7 @@ EXPORT_SYMBOL(bdi_register_dev); void bdi_unregister(struct backing_dev_info *bdi) { if (bdi->dev) { + bdi_debug_unregister(bdi); device_unregister(bdi->dev); bdi->dev = NULL; } -- cgit v1.2.3-71-gd317 From e4ad08fe64afca4ef79ecc4c624e6e871688da0d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 30 Apr 2008 00:54:37 -0700 Subject: mm: bdi: add separate writeback accounting capability Add a new BDI capability flag: BDI_CAP_NO_ACCT_WB. If this flag is set, then don't update the per-bdi writeback stats from test_set_page_writeback() and test_clear_page_writeback(). Misc cleanups: - convert bdi_cap_writeback_dirty() and friends to static inline functions - create a flag that includes all three dirty/writeback related flags, since almst all users will want to have them toghether Signed-off-by: Miklos Szeredi Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/configfs/inode.c | 2 +- fs/hugetlbfs/inode.c | 2 +- fs/ocfs2/dlm/dlmfs.c | 2 +- fs/ramfs/inode.c | 2 +- fs/sysfs/inode.c | 2 +- include/linux/backing-dev.h | 77 +++++++++++++++++++++++++++++++++------------ kernel/cgroup.c | 2 +- mm/page-writeback.c | 4 +-- mm/shmem.c | 2 +- mm/swap_state.c | 2 +- 10 files changed, 67 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 4c1ebff778ee..b9a1d810346d 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -47,7 +47,7 @@ static const struct address_space_operations configfs_aops = { static struct backing_dev_info configfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, }; static const struct inode_operations configfs_inode_operations ={ diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 9783723e8ffe..aeabf80f81a5 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -45,7 +45,7 @@ static const struct inode_operations hugetlbfs_inode_operations; static struct backing_dev_info hugetlbfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, }; int sysctl_hugetlb_shm_group; diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index 61a000f8524c..e48aba698b77 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -327,7 +327,7 @@ clear_fields: static struct backing_dev_info dlmfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, }; static struct inode *dlmfs_get_root_inode(struct super_block *sb) diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 8428d5b2711d..b13123424e49 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -44,7 +44,7 @@ static const struct inode_operations ramfs_dir_inode_operations; static struct backing_dev_info ramfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP, }; diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index d9262f74f94e..f8b82e73b3bf 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -30,7 +30,7 @@ static const struct address_space_operations sysfs_aops = { static struct backing_dev_info sysfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, }; static const struct inode_operations sysfs_inode_operations ={ diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index c49a2d045e11..13ab79d99268 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -12,6 +12,7 @@ #include #include #include +#include #include struct page; @@ -151,22 +152,43 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); /* * Flags in backing_dev_info::capability - * - The first two flags control whether dirty pages will contribute to the - * VM's accounting and whether writepages() should be called for dirty pages - * (something that would not, for example, be appropriate for ramfs) - * - These flags let !MMU mmap() govern direct device mapping vs immediate - * copying more easily for MAP_PRIVATE, especially for ROM filesystems + * + * The first three flags control whether dirty pages will contribute to the + * VM's accounting and whether writepages() should be called for dirty pages + * (something that would not, for example, be appropriate for ramfs) + * + * WARNING: these flags are closely related and should not normally be + * used separately. The BDI_CAP_NO_ACCT_AND_WRITEBACK combines these + * three flags into a single convenience macro. + * + * BDI_CAP_NO_ACCT_DIRTY: Dirty pages shouldn't contribute to accounting + * BDI_CAP_NO_WRITEBACK: Don't write pages back + * BDI_CAP_NO_ACCT_WB: Don't automatically account writeback pages + * + * These flags let !MMU mmap() govern direct device mapping vs immediate + * copying more easily for MAP_PRIVATE, especially for ROM filesystems. + * + * BDI_CAP_MAP_COPY: Copy can be mapped (MAP_PRIVATE) + * BDI_CAP_MAP_DIRECT: Can be mapped directly (MAP_SHARED) + * BDI_CAP_READ_MAP: Can be mapped for reading + * BDI_CAP_WRITE_MAP: Can be mapped for writing + * BDI_CAP_EXEC_MAP: Can be mapped for execution */ -#define BDI_CAP_NO_ACCT_DIRTY 0x00000001 /* Dirty pages shouldn't contribute to accounting */ -#define BDI_CAP_NO_WRITEBACK 0x00000002 /* Don't write pages back */ -#define BDI_CAP_MAP_COPY 0x00000004 /* Copy can be mapped (MAP_PRIVATE) */ -#define BDI_CAP_MAP_DIRECT 0x00000008 /* Can be mapped directly (MAP_SHARED) */ -#define BDI_CAP_READ_MAP 0x00000010 /* Can be mapped for reading */ -#define BDI_CAP_WRITE_MAP 0x00000020 /* Can be mapped for writing */ -#define BDI_CAP_EXEC_MAP 0x00000040 /* Can be mapped for execution */ +#define BDI_CAP_NO_ACCT_DIRTY 0x00000001 +#define BDI_CAP_NO_WRITEBACK 0x00000002 +#define BDI_CAP_MAP_COPY 0x00000004 +#define BDI_CAP_MAP_DIRECT 0x00000008 +#define BDI_CAP_READ_MAP 0x00000010 +#define BDI_CAP_WRITE_MAP 0x00000020 +#define BDI_CAP_EXEC_MAP 0x00000040 +#define BDI_CAP_NO_ACCT_WB 0x00000080 + #define BDI_CAP_VMFLAGS \ (BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP) +#define BDI_CAP_NO_ACCT_AND_WRITEBACK \ + (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB) + #if defined(VM_MAYREAD) && \ (BDI_CAP_READ_MAP != VM_MAYREAD || \ BDI_CAP_WRITE_MAP != VM_MAYWRITE || \ @@ -206,17 +228,32 @@ void clear_bdi_congested(struct backing_dev_info *bdi, int rw); void set_bdi_congested(struct backing_dev_info *bdi, int rw); long congestion_wait(int rw, long timeout); -#define bdi_cap_writeback_dirty(bdi) \ - (!((bdi)->capabilities & BDI_CAP_NO_WRITEBACK)) -#define bdi_cap_account_dirty(bdi) \ - (!((bdi)->capabilities & BDI_CAP_NO_ACCT_DIRTY)) +static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi) +{ + return !(bdi->capabilities & BDI_CAP_NO_WRITEBACK); +} + +static inline bool bdi_cap_account_dirty(struct backing_dev_info *bdi) +{ + return !(bdi->capabilities & BDI_CAP_NO_ACCT_DIRTY); +} -#define mapping_cap_writeback_dirty(mapping) \ - bdi_cap_writeback_dirty((mapping)->backing_dev_info) +static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi) +{ + /* Paranoia: BDI_CAP_NO_WRITEBACK implies BDI_CAP_NO_ACCT_WB */ + return !(bdi->capabilities & (BDI_CAP_NO_ACCT_WB | + BDI_CAP_NO_WRITEBACK)); +} -#define mapping_cap_account_dirty(mapping) \ - bdi_cap_account_dirty((mapping)->backing_dev_info) +static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) +{ + return bdi_cap_writeback_dirty(mapping->backing_dev_info); +} +static inline bool mapping_cap_account_dirty(struct address_space *mapping) +{ + return bdi_cap_account_dirty(mapping->backing_dev_info); +} #endif /* _LINUX_BACKING_DEV_H */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index b9d467d83fc1..fbc6fc8949b4 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -575,7 +575,7 @@ static struct inode_operations cgroup_dir_inode_operations; static struct file_operations proc_cgroupstats_operations; static struct backing_dev_info cgroup_backing_dev_info = { - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, }; static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 2a9942f5387c..bbcb916190c9 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1246,7 +1246,7 @@ int test_clear_page_writeback(struct page *page) radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_WRITEBACK); - if (bdi_cap_writeback_dirty(bdi)) { + if (bdi_cap_account_writeback(bdi)) { __dec_bdi_stat(bdi, BDI_WRITEBACK); __bdi_writeout_inc(bdi); } @@ -1275,7 +1275,7 @@ int test_set_page_writeback(struct page *page) radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_WRITEBACK); - if (bdi_cap_writeback_dirty(bdi)) + if (bdi_cap_account_writeback(bdi)) __inc_bdi_stat(bdi, BDI_WRITEBACK); } if (!PageDirty(page)) diff --git a/mm/shmem.c b/mm/shmem.c index e6d9298aa22a..e2a6ae1a44e9 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -201,7 +201,7 @@ static struct vm_operations_struct shmem_vm_ops; static struct backing_dev_info shmem_backing_dev_info __read_mostly = { .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, .unplug_io_fn = default_unplug_io_fn, }; diff --git a/mm/swap_state.c b/mm/swap_state.c index 50757ee3f9f3..d8aadaf2a0ba 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -33,7 +33,7 @@ static const struct address_space_operations swap_aops = { }; static struct backing_dev_info swap_backing_dev_info = { - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, .unplug_io_fn = swap_unplug_io_fn, }; -- cgit v1.2.3-71-gd317 From dd5656e59ca7b25fb60a22f9079905ed0da5ed0c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 30 Apr 2008 00:54:37 -0700 Subject: mm: bdi: export bdi_writeout_inc() Fuse needs this for writable mmap support. Signed-off-by: Miklos Szeredi Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/backing-dev.h | 2 ++ mm/page-writeback.c | 10 ++++++++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 13ab79d99268..0a24d5550eb3 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -135,6 +135,8 @@ static inline s64 bdi_stat_sum(struct backing_dev_info *bdi, return sum; } +extern void bdi_writeout_inc(struct backing_dev_info *bdi); + /* * maximal error of a stat counter. */ diff --git a/mm/page-writeback.c b/mm/page-writeback.c index bbcb916190c9..c90a1e8e479f 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -168,6 +168,16 @@ static inline void __bdi_writeout_inc(struct backing_dev_info *bdi) bdi->max_prop_frac); } +void bdi_writeout_inc(struct backing_dev_info *bdi) +{ + unsigned long flags; + + local_irq_save(flags); + __bdi_writeout_inc(bdi); + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(bdi_writeout_inc); + static inline void task_dirty_inc(struct task_struct *tsk) { prop_inc_single(&vm_dirties, &tsk->dirties); -- cgit v1.2.3-71-gd317 From fc3ba692a4d19019387c5acaea63131f9eab05dd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 30 Apr 2008 00:54:38 -0700 Subject: mm: Add NR_WRITEBACK_TEMP counter Fuse will use temporary buffers to write back dirty data from memory mappings (normal writes are done synchronously). This is needed, because there cannot be any guarantee about the time in which a write will complete. By using temporary buffers, from the MM's point if view the page is written back immediately. If the writeout was due to memory pressure, this effectively migrates data from a full zone to a less full zone. This patch adds a new counter (NR_WRITEBACK_TEMP) for the number of pages used as temporary buffers. [Lee.Schermerhorn@hp.com: add vmstat_text for NR_WRITEBACK_TEMP] Signed-off-by: Miklos Szeredi Cc: Christoph Lameter Signed-off-by: Lee Schermerhorn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 2 ++ fs/proc/proc_misc.c | 2 ++ include/linux/mmzone.h | 1 + mm/page-writeback.c | 3 ++- mm/vmstat.c | 1 + 5 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/base/node.c b/drivers/base/node.c index 12fde2d03d69..39f3d1b3a213 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -77,6 +77,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf) "Node %d PageTables: %8lu kB\n" "Node %d NFS_Unstable: %8lu kB\n" "Node %d Bounce: %8lu kB\n" + "Node %d WritebackTmp: %8lu kB\n" "Node %d Slab: %8lu kB\n" "Node %d SReclaimable: %8lu kB\n" "Node %d SUnreclaim: %8lu kB\n", @@ -99,6 +100,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf) nid, K(node_page_state(nid, NR_PAGETABLE)), nid, K(node_page_state(nid, NR_UNSTABLE_NFS)), nid, K(node_page_state(nid, NR_BOUNCE)), + nid, K(node_page_state(nid, NR_WRITEBACK_TEMP)), nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE) + node_page_state(nid, NR_SLAB_UNRECLAIMABLE)), nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE)), diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 48bcf20cec2f..74a323d2b850 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -179,6 +179,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, "PageTables: %8lu kB\n" "NFS_Unstable: %8lu kB\n" "Bounce: %8lu kB\n" + "WritebackTmp: %8lu kB\n" "CommitLimit: %8lu kB\n" "Committed_AS: %8lu kB\n" "VmallocTotal: %8lu kB\n" @@ -210,6 +211,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, K(global_page_state(NR_PAGETABLE)), K(global_page_state(NR_UNSTABLE_NFS)), K(global_page_state(NR_BOUNCE)), + K(global_page_state(NR_WRITEBACK_TEMP)), K(allowed), K(committed), (unsigned long)VMALLOC_TOTAL >> 10, diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index aad98003176f..ceb675d83a56 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -97,6 +97,7 @@ enum zone_stat_item { NR_UNSTABLE_NFS, /* NFS unstable pages */ NR_BOUNCE, NR_VMSCAN_WRITE, + NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ #ifdef CONFIG_NUMA NUMA_HIT, /* allocated in intended node */ NUMA_MISS, /* allocated in non intended node */ diff --git a/mm/page-writeback.c b/mm/page-writeback.c index c90a1e8e479f..789b6adbef37 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -211,7 +211,8 @@ clip_bdi_dirty_limit(struct backing_dev_info *bdi, long dirty, long *pbdi_dirty) avail_dirty = dirty - (global_page_state(NR_FILE_DIRTY) + global_page_state(NR_WRITEBACK) + - global_page_state(NR_UNSTABLE_NFS)); + global_page_state(NR_UNSTABLE_NFS) + + global_page_state(NR_WRITEBACK_TEMP)); if (avail_dirty < 0) avail_dirty = 0; diff --git a/mm/vmstat.c b/mm/vmstat.c index 280a7ed549f2..1a32130b958c 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -612,6 +612,7 @@ static const char * const vmstat_text[] = { "nr_unstable", "nr_bounce", "nr_vmscan_write", + "nr_writeback_temp", #ifdef CONFIG_NUMA "numa_hit", -- cgit v1.2.3-71-gd317 From 86098fa0115358abf5159093d11ddb306ce4b0da Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Apr 2008 00:54:46 -0700 Subject: reiserfs: use open_bdev_excl Use the proper helper to open a blockdevice by name for filesystem use, this makes sure it's properly claimed (also added for open-by-number) and gets rid of the struct file abuse. Tested by mounting a reiserfs filesystem with external journal. Signed-off-by: Christoph Hellwig Cc: Chris Mason Cc: Jeff Mahoney Acked-by: Edward Shishkin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/journal.c | 50 +++++++++++++++++++----------------------- include/linux/reiserfs_fs_sb.h | 1 - 2 files changed, 23 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index da86042b3e03..e396b2fa4743 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2574,11 +2574,9 @@ static int release_journal_dev(struct super_block *super, result = 0; - if (journal->j_dev_file != NULL) { - result = filp_close(journal->j_dev_file, NULL); - journal->j_dev_file = NULL; - journal->j_dev_bd = NULL; - } else if (journal->j_dev_bd != NULL) { + if (journal->j_dev_bd != NULL) { + if (journal->j_dev_bd->bd_dev != super->s_dev) + bd_release(journal->j_dev_bd); result = blkdev_put(journal->j_dev_bd); journal->j_dev_bd = NULL; } @@ -2603,7 +2601,6 @@ static int journal_init_dev(struct super_block *super, result = 0; journal->j_dev_bd = NULL; - journal->j_dev_file = NULL; jdev = SB_ONDISK_JOURNAL_DEVICE(super) ? new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; @@ -2620,35 +2617,34 @@ static int journal_init_dev(struct super_block *super, "cannot init journal device '%s': %i", __bdevname(jdev, b), result); return result; - } else if (jdev != super->s_dev) + } else if (jdev != super->s_dev) { + result = bd_claim(journal->j_dev_bd, journal); + if (result) { + blkdev_put(journal->j_dev_bd); + return result; + } + set_blocksize(journal->j_dev_bd, super->s_blocksize); + } + return 0; } - journal->j_dev_file = filp_open(jdev_name, 0, 0); - if (!IS_ERR(journal->j_dev_file)) { - struct inode *jdev_inode = journal->j_dev_file->f_mapping->host; - if (!S_ISBLK(jdev_inode->i_mode)) { - reiserfs_warning(super, "journal_init_dev: '%s' is " - "not a block device", jdev_name); - result = -ENOTBLK; - release_journal_dev(super, journal); - } else { - /* ok */ - journal->j_dev_bd = I_BDEV(jdev_inode); - set_blocksize(journal->j_dev_bd, super->s_blocksize); - reiserfs_info(super, - "journal_init_dev: journal device: %s\n", - bdevname(journal->j_dev_bd, b)); - } - } else { - result = PTR_ERR(journal->j_dev_file); - journal->j_dev_file = NULL; + journal->j_dev_bd = open_bdev_excl(jdev_name, 0, journal); + if (IS_ERR(journal->j_dev_bd)) { + result = PTR_ERR(journal->j_dev_bd); + journal->j_dev_bd = NULL; reiserfs_warning(super, "journal_init_dev: Cannot open '%s': %i", jdev_name, result); + return result; } - return result; + + set_blocksize(journal->j_dev_bd, super->s_blocksize); + reiserfs_info(super, + "journal_init_dev: journal device: %s\n", + bdevname(journal->j_dev_bd, b)); + return 0; } /** diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index db5ef9b83c3f..336ee43ed7d8 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -177,7 +177,6 @@ struct reiserfs_journal { struct reiserfs_journal_cnode *j_last; /* newest journal block */ struct reiserfs_journal_cnode *j_first; /* oldest journal block. start here for traverse */ - struct file *j_dev_file; struct block_device *j_dev_bd; int j_1st_reserved_block; /* first block on s_dev of reserved area journal */ -- cgit v1.2.3-71-gd317 From f7511d5f66f01fc451747b24e79f3ada7a3af9af Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Wed, 30 Apr 2008 00:54:51 -0700 Subject: Basic braille screen reader support This adds a minimalistic braille screen reader support. This is meant to be used by blind people e.g. on boot failures or when / cannot be mounted etc and thus the userland screen readers can not work. [akpm@linux-foundation.org: fix exports] Signed-off-by: Samuel Thibault Cc: Jiri Kosina Cc: Dmitry Torokhov Acked-by: Alan Cox Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/braille-console.txt | 34 ++ Documentation/kernel-parameters.txt | 5 + arch/powerpc/kernel/ppc_ksyms.c | 3 - arch/ppc/kernel/ppc_ksyms.c | 3 - drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/accessibility/Kconfig | 23 ++ drivers/accessibility/Makefile | 1 + drivers/accessibility/braille/Makefile | 1 + drivers/accessibility/braille/braille_console.c | 397 ++++++++++++++++++++++++ drivers/char/consolemap.c | 1 + drivers/char/keyboard.c | 2 + drivers/char/vt.c | 1 + include/linux/console.h | 4 + kernel/printk.c | 90 ++++-- 15 files changed, 538 insertions(+), 30 deletions(-) create mode 100644 Documentation/braille-console.txt create mode 100644 drivers/accessibility/Kconfig create mode 100644 drivers/accessibility/Makefile create mode 100644 drivers/accessibility/braille/Makefile create mode 100644 drivers/accessibility/braille/braille_console.c (limited to 'include/linux') diff --git a/Documentation/braille-console.txt b/Documentation/braille-console.txt new file mode 100644 index 000000000000..000b0fbdc105 --- /dev/null +++ b/Documentation/braille-console.txt @@ -0,0 +1,34 @@ + Linux Braille Console + +To get early boot messages on a braille device (before userspace screen +readers can start), you first need to compile the support for the usual serial +console (see serial-console.txt), and for braille device (in Device Drivers - +Accessibility). + +Then you need to specify a console=brl, option on the kernel command line, the +format is: + + console=brl,serial_options... + +where serial_options... are the same as described in serial-console.txt + +So for instance you can use console=brl,ttyS0 if the braille device is connected +to the first serial port, and console=brl,ttyS0,115200 to override the baud rate +to 115200, etc. + +By default, the braille device will just show the last kernel message (console +mode). To review previous messages, press the Insert key to switch to the VT +review mode. In review mode, the arrow keys permit to browse in the VT content, +page up/down keys go at the top/bottom of the screen, and the home key goes back +to the cursor, hence providing very basic screen reviewing facility. + +Sound feedback can be obtained by adding the braille_console.sound=1 kernel +parameter. + +For simplicity, only one braille console can be enabled, other uses of +console=brl,... will be discarded. Also note that it does not interfere with +the console selection mecanism described in serial-console.txt + +For now, only the VisioBraille device is supported. + +Samuel Thibault diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 3ce193f86565..0ba0861b5d18 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -496,6 +496,11 @@ and is between 256 and 4096 characters. It is defined in the file switching to the matching ttyS device later. The options are the same as for ttyS, above. + If the device connected to the port is not a TTY but a braille + device, prepend "brl," before the device type, for instance + console=brl,ttyS0 + For now, only VisioBraille is supported. + earlycon= [KNL] Output early console device and options. uart[8250],io,[,options] uart[8250],mmio,[,options] diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 09fcb50c45ae..cf6b5a7d8b3f 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -133,9 +133,6 @@ EXPORT_SYMBOL(adb_try_handler_change); EXPORT_SYMBOL(cuda_request); EXPORT_SYMBOL(cuda_poll); #endif /* CONFIG_ADB_CUDA */ -#ifdef CONFIG_VT -EXPORT_SYMBOL(kd_mksound); -#endif EXPORT_SYMBOL(to_tm); #ifdef CONFIG_PPC32 diff --git a/arch/ppc/kernel/ppc_ksyms.c b/arch/ppc/kernel/ppc_ksyms.c index d9036ef0b658..16ac11ca7ba0 100644 --- a/arch/ppc/kernel/ppc_ksyms.c +++ b/arch/ppc/kernel/ppc_ksyms.c @@ -183,9 +183,6 @@ EXPORT_SYMBOL(cuda_poll); #if defined(CONFIG_BOOTX_TEXT) EXPORT_SYMBOL(btext_update_display); #endif -#ifdef CONFIG_VT -EXPORT_SYMBOL(kd_mksound); -#endif EXPORT_SYMBOL(to_tm); EXPORT_SYMBOL(pm_power_off); diff --git a/drivers/Kconfig b/drivers/Kconfig index 80f0ec91e2cf..59f33fa6af3e 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -84,6 +84,8 @@ source "drivers/memstick/Kconfig" source "drivers/leds/Kconfig" +source "drivers/accessibility/Kconfig" + source "drivers/infiniband/Kconfig" source "drivers/edac/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index e5e394a7e6c0..f65deda72d61 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -70,6 +70,7 @@ obj-$(CONFIG_WATCHDOG) += watchdog/ obj-$(CONFIG_PHONE) += telephony/ obj-$(CONFIG_MD) += md/ obj-$(CONFIG_BT) += bluetooth/ +obj-$(CONFIG_ACCESSIBILITY) += accessibility/ obj-$(CONFIG_ISDN) += isdn/ obj-$(CONFIG_EDAC) += edac/ obj-$(CONFIG_MCA) += mca/ diff --git a/drivers/accessibility/Kconfig b/drivers/accessibility/Kconfig new file mode 100644 index 000000000000..1264c4b98094 --- /dev/null +++ b/drivers/accessibility/Kconfig @@ -0,0 +1,23 @@ +menuconfig ACCESSIBILITY + bool "Accessibility support" + ---help--- + Enable a submenu where accessibility items may be enabled. + + If unsure, say N. + +if ACCESSIBILITY +config A11Y_BRAILLE_CONSOLE + bool "Console on braille device" + depends on VT + depends on SERIAL_CORE_CONSOLE + ---help--- + Enables console output on a braille device connected to a 8250 + serial port. For now only the VisioBraille device is supported. + + To actually enable it, you need to pass option + console=brl,ttyS0 + to the kernel. Options are the same as for serial console. + + If unsure, say N. + +endif # ACCESSIBILITY diff --git a/drivers/accessibility/Makefile b/drivers/accessibility/Makefile new file mode 100644 index 000000000000..72b01a46546f --- /dev/null +++ b/drivers/accessibility/Makefile @@ -0,0 +1 @@ +obj-y += braille/ diff --git a/drivers/accessibility/braille/Makefile b/drivers/accessibility/braille/Makefile new file mode 100644 index 000000000000..2e9f16c91347 --- /dev/null +++ b/drivers/accessibility/braille/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille_console.o diff --git a/drivers/accessibility/braille/braille_console.c b/drivers/accessibility/braille/braille_console.c new file mode 100644 index 000000000000..0a5f6b2114c5 --- /dev/null +++ b/drivers/accessibility/braille/braille_console.c @@ -0,0 +1,397 @@ +/* + * Minimalistic braille device kernel support. + * + * By default, shows console messages on the braille device. + * Pressing Insert switches to VC browsing. + * + * Copyright (C) Samuel Thibault + * + * This program is free software ; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation ; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY ; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the program ; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +MODULE_AUTHOR("samuel.thibault@ens-lyon.org"); +MODULE_DESCRIPTION("braille device"); +MODULE_LICENSE("GPL"); + +/* + * Braille device support part. + */ + +/* Emit various sounds */ +static int sound; +module_param(sound, bool, 0); +MODULE_PARM_DESC(sound, "emit sounds"); + +static void beep(unsigned int freq) +{ + if (sound) + kd_mksound(freq, HZ/10); +} + +/* mini console */ +#define WIDTH 40 +#define BRAILLE_KEY KEY_INSERT +static u16 console_buf[WIDTH]; +static int console_cursor; + +/* mini view of VC */ +static int vc_x, vc_y, lastvc_x, lastvc_y; + +/* show console ? (or show VC) */ +static int console_show = 1; +/* pending newline ? */ +static int console_newline = 1; +static int lastVC = -1; + +static struct console *braille_co; + +/* Very VisioBraille-specific */ +static void braille_write(u16 *buf) +{ + static u16 lastwrite[WIDTH]; + unsigned char data[1 + 1 + 2*WIDTH + 2 + 1], csum = 0, *c; + u16 out; + int i; + + if (!braille_co) + return; + + if (!memcmp(lastwrite, buf, WIDTH * sizeof(*buf))) + return; + memcpy(lastwrite, buf, WIDTH * sizeof(*buf)); + +#define SOH 1 +#define STX 2 +#define ETX 2 +#define EOT 4 +#define ENQ 5 + data[0] = STX; + data[1] = '>'; + csum ^= '>'; + c = &data[2]; + for (i = 0; i < WIDTH; i++) { + out = buf[i]; + if (out >= 0x100) + out = '?'; + else if (out == 0x00) + out = ' '; + csum ^= out; + if (out <= 0x05) { + *c++ = SOH; + out |= 0x40; + } + *c++ = out; + } + + if (csum <= 0x05) { + *c++ = SOH; + csum |= 0x40; + } + *c++ = csum; + *c++ = ETX; + + braille_co->write(braille_co, data, c - data); +} + +/* Follow the VC cursor*/ +static void vc_follow_cursor(struct vc_data *vc) +{ + vc_x = vc->vc_x - (vc->vc_x % WIDTH); + vc_y = vc->vc_y; + lastvc_x = vc->vc_x; + lastvc_y = vc->vc_y; +} + +/* Maybe the VC cursor moved, if so follow it */ +static void vc_maybe_cursor_moved(struct vc_data *vc) +{ + if (vc->vc_x != lastvc_x || vc->vc_y != lastvc_y) + vc_follow_cursor(vc); +} + +/* Show portion of VC at vc_x, vc_y */ +static void vc_refresh(struct vc_data *vc) +{ + u16 buf[WIDTH]; + int i; + + for (i = 0; i < WIDTH; i++) { + u16 glyph = screen_glyph(vc, + 2 * (vc_x + i) + vc_y * vc->vc_size_row); + buf[i] = inverse_translate(vc, glyph, 1); + } + braille_write(buf); +} + +/* + * Link to keyboard + */ + +static int keyboard_notifier_call(struct notifier_block *blk, + unsigned long code, void *_param) +{ + struct keyboard_notifier_param *param = _param; + struct vc_data *vc = param->vc; + int ret = NOTIFY_OK; + + if (!param->down) + return ret; + + switch (code) { + case KBD_KEYCODE: + if (console_show) { + if (param->value == BRAILLE_KEY) { + console_show = 0; + beep(880); + vc_maybe_cursor_moved(vc); + vc_refresh(vc); + ret = NOTIFY_STOP; + } + } else { + ret = NOTIFY_STOP; + switch (param->value) { + case KEY_INSERT: + beep(440); + console_show = 1; + lastVC = -1; + braille_write(console_buf); + break; + case KEY_LEFT: + if (vc_x > 0) { + vc_x -= WIDTH; + if (vc_x < 0) + vc_x = 0; + } else if (vc_y >= 1) { + beep(880); + vc_y--; + vc_x = vc->vc_cols-WIDTH; + } else + beep(220); + break; + case KEY_RIGHT: + if (vc_x + WIDTH < vc->vc_cols) { + vc_x += WIDTH; + } else if (vc_y + 1 < vc->vc_rows) { + beep(880); + vc_y++; + vc_x = 0; + } else + beep(220); + break; + case KEY_DOWN: + if (vc_y + 1 < vc->vc_rows) + vc_y++; + else + beep(220); + break; + case KEY_UP: + if (vc_y >= 1) + vc_y--; + else + beep(220); + break; + case KEY_HOME: + vc_follow_cursor(vc); + break; + case KEY_PAGEUP: + vc_x = 0; + vc_y = 0; + break; + case KEY_PAGEDOWN: + vc_x = 0; + vc_y = vc->vc_rows-1; + break; + default: + ret = NOTIFY_OK; + break; + } + if (ret == NOTIFY_STOP) + vc_refresh(vc); + } + break; + case KBD_POST_KEYSYM: + { + unsigned char type = KTYP(param->value) - 0xf0; + if (type == KT_SPEC) { + unsigned char val = KVAL(param->value); + int on_off = -1; + + switch (val) { + case KVAL(K_CAPS): + on_off = vc_kbd_led(kbd_table + fg_console, + VC_CAPSLOCK); + break; + case KVAL(K_NUM): + on_off = vc_kbd_led(kbd_table + fg_console, + VC_NUMLOCK); + break; + case KVAL(K_HOLD): + on_off = vc_kbd_led(kbd_table + fg_console, + VC_SCROLLOCK); + break; + } + if (on_off == 1) + beep(880); + else if (on_off == 0) + beep(440); + } + } + case KBD_UNBOUND_KEYCODE: + case KBD_UNICODE: + case KBD_KEYSYM: + /* Unused */ + break; + } + return ret; +} + +static struct notifier_block keyboard_notifier_block = { + .notifier_call = keyboard_notifier_call, +}; + +static int vt_notifier_call(struct notifier_block *blk, + unsigned long code, void *_param) +{ + struct vt_notifier_param *param = _param; + struct vc_data *vc = param->vc; + switch (code) { + case VT_ALLOCATE: + break; + case VT_DEALLOCATE: + break; + case VT_WRITE: + { + unsigned char c = param->c; + if (vc->vc_num != fg_console) + break; + switch (c) { + case '\b': + case 127: + if (console_cursor > 0) { + console_cursor--; + console_buf[console_cursor] = ' '; + } + break; + case '\n': + case '\v': + case '\f': + case '\r': + console_newline = 1; + break; + case '\t': + c = ' '; + /* Fallthrough */ + default: + if (c < 32) + /* Ignore other control sequences */ + break; + if (console_newline) { + memset(console_buf, 0, sizeof(console_buf)); + console_cursor = 0; + console_newline = 0; + } + if (console_cursor == WIDTH) + memmove(console_buf, &console_buf[1], + (WIDTH-1) * sizeof(*console_buf)); + else + console_cursor++; + console_buf[console_cursor-1] = c; + break; + } + if (console_show) + braille_write(console_buf); + else { + vc_maybe_cursor_moved(vc); + vc_refresh(vc); + } + break; + } + case VT_UPDATE: + /* Maybe a VT switch, flush */ + if (console_show) { + if (vc->vc_num != lastVC) { + lastVC = vc->vc_num; + memset(console_buf, 0, sizeof(console_buf)); + console_cursor = 0; + braille_write(console_buf); + } + } else { + vc_maybe_cursor_moved(vc); + vc_refresh(vc); + } + break; + } + return NOTIFY_OK; +} + +static struct notifier_block vt_notifier_block = { + .notifier_call = vt_notifier_call, +}; + +/* + * Called from printk.c when console=brl is given + */ + +int braille_register_console(struct console *console, int index, + char *console_options, char *braille_options) +{ + int ret; + if (!console_options) + /* Only support VisioBraille for now */ + console_options = "57600o8"; + if (braille_co) + return -ENODEV; + if (console->setup) { + ret = console->setup(console, console_options); + if (ret != 0) + return ret; + } + console->flags |= CON_ENABLED; + console->index = index; + braille_co = console; + return 0; +} + +int braille_unregister_console(struct console *console) +{ + if (braille_co != console) + return -EINVAL; + braille_co = NULL; + return 0; +} + +static int __init braille_init(void) +{ + register_keyboard_notifier(&keyboard_notifier_block); + register_vt_notifier(&vt_notifier_block); + return 0; +} + +console_initcall(braille_init); diff --git a/drivers/char/consolemap.c b/drivers/char/consolemap.c index 6b104e45a322..4246b8e36cb3 100644 --- a/drivers/char/consolemap.c +++ b/drivers/char/consolemap.c @@ -277,6 +277,7 @@ u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode) return p->inverse_translations[m][glyph]; } } +EXPORT_SYMBOL_GPL(inverse_translate); static void update_user_maps(void) { diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c index d1c50b3302e5..7f7e798c1384 100644 --- a/drivers/char/keyboard.c +++ b/drivers/char/keyboard.c @@ -110,6 +110,7 @@ const int max_vals[] = { const int NR_TYPES = ARRAY_SIZE(max_vals); struct kbd_struct kbd_table[MAX_NR_CONSOLES]; +EXPORT_SYMBOL_GPL(kbd_table); static struct kbd_struct *kbd = kbd_table; struct vt_spawn_console vt_spawn_con = { @@ -260,6 +261,7 @@ void kd_mksound(unsigned int hz, unsigned int ticks) } else kd_nosound(0); } +EXPORT_SYMBOL(kd_mksound); /* * Setting the keyboard rate. diff --git a/drivers/char/vt.c b/drivers/char/vt.c index 71cf203d282d..e458b08139af 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -4004,6 +4004,7 @@ u16 screen_glyph(struct vc_data *vc, int offset) c |= 0x100; return c; } +EXPORT_SYMBOL_GPL(screen_glyph); /* used by vcs - note the word offset */ unsigned short *screen_pos(struct vc_data *vc, int w_offset, int viewed) diff --git a/include/linux/console.h b/include/linux/console.h index a5f88a6a259d..a4f27fbdf549 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -91,6 +91,7 @@ void give_up_console(const struct consw *sw); #define CON_ENABLED (4) #define CON_BOOT (8) #define CON_ANYTIME (16) /* Safe to call when cpu is offline */ +#define CON_BRL (32) /* Used for a braille device */ struct console { char name[16]; @@ -121,6 +122,9 @@ extern struct tty_driver *console_device(int *); extern void console_stop(struct console *); extern void console_start(struct console *); extern int is_console_locked(void); +extern int braille_register_console(struct console *, int index, + char *console_options, char *braille_options); +extern int braille_unregister_console(struct console *); extern int console_suspend_enabled; diff --git a/kernel/printk.c b/kernel/printk.c index 0d232589a923..e61346faf6a5 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -111,6 +111,9 @@ struct console_cmdline char name[8]; /* Name of the driver */ int index; /* Minor dev. to use */ char *options; /* Options for the driver */ +#ifdef CONFIG_A11Y_BRAILLE_CONSOLE + char *brl_options; /* Options for braille driver */ +#endif }; #define MAX_CMDLINECONSOLES 8 @@ -808,15 +811,60 @@ static void call_console_drivers(unsigned start, unsigned end) #endif +static int __add_preferred_console(char *name, int idx, char *options, + char *brl_options) +{ + struct console_cmdline *c; + int i; + + /* + * See if this tty is not yet registered, and + * if we have a slot free. + */ + for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) + if (strcmp(console_cmdline[i].name, name) == 0 && + console_cmdline[i].index == idx) { + if (!brl_options) + selected_console = i; + return 0; + } + if (i == MAX_CMDLINECONSOLES) + return -E2BIG; + if (!brl_options) + selected_console = i; + c = &console_cmdline[i]; + strlcpy(c->name, name, sizeof(c->name)); + c->options = options; +#ifdef CONFIG_A11Y_BRAILLE_CONSOLE + c->brl_options = brl_options; +#endif + c->index = idx; + return 0; +} /* * Set up a list of consoles. Called from init/main.c */ static int __init console_setup(char *str) { char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */ - char *s, *options; + char *s, *options, *brl_options = NULL; int idx; +#ifdef CONFIG_A11Y_BRAILLE_CONSOLE + if (!memcmp(str, "brl,", 4)) { + brl_options = ""; + str += 4; + } else if (!memcmp(str, "brl=", 4)) { + brl_options = str + 4; + str = strchr(brl_options, ','); + if (!str) { + printk(KERN_ERR "need port name after brl=\n"); + return 1; + } + *(str++) = 0; + } +#endif + /* * Decode str into name, index, options. */ @@ -841,7 +889,7 @@ static int __init console_setup(char *str) idx = simple_strtoul(s, NULL, 10); *s = 0; - add_preferred_console(buf, idx, options); + __add_preferred_console(buf, idx, options, brl_options); return 1; } __setup("console=", console_setup); @@ -861,28 +909,7 @@ __setup("console=", console_setup); */ int add_preferred_console(char *name, int idx, char *options) { - struct console_cmdline *c; - int i; - - /* - * See if this tty is not yet registered, and - * if we have a slot free. - */ - for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) - if (strcmp(console_cmdline[i].name, name) == 0 && - console_cmdline[i].index == idx) { - selected_console = i; - return 0; - } - if (i == MAX_CMDLINECONSOLES) - return -E2BIG; - selected_console = i; - c = &console_cmdline[i]; - memcpy(c->name, name, sizeof(c->name)); - c->name[sizeof(c->name) - 1] = 0; - c->options = options; - c->index = idx; - return 0; + return __add_preferred_console(name, idx, options, NULL); } int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options) @@ -1163,6 +1190,16 @@ void register_console(struct console *console) continue; if (console->index < 0) console->index = console_cmdline[i].index; +#ifdef CONFIG_A11Y_BRAILLE_CONSOLE + if (console_cmdline[i].brl_options) { + console->flags |= CON_BRL; + braille_register_console(console, + console_cmdline[i].index, + console_cmdline[i].options, + console_cmdline[i].brl_options); + return; + } +#endif if (console->setup && console->setup(console, console_cmdline[i].options) != 0) break; @@ -1221,6 +1258,11 @@ int unregister_console(struct console *console) struct console *a, *b; int res = 1; +#ifdef CONFIG_A11Y_BRAILLE_CONSOLE + if (console->flags & CON_BRL) + return braille_unregister_console(console); +#endif + acquire_console_sem(); if (console_drivers == console) { console_drivers=console->next; -- cgit v1.2.3-71-gd317 From bdf4bbaaee3d4b8f555658333cbce1affe9070fb Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 30 Apr 2008 00:54:55 -0700 Subject: Add macros similar to min/max/min_t/max_t Also, change the variable names used in the min/max macros to avoid shadowed variable warnings when min/max min_t/max_t are nested. Small formatting changes to make all the macros have a similar form. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: fix v4l build] Signed-off-by: Harvey Harrison Cc: Mauro Carvalho Chehab Signed-off-by: Andrew Morton Cc: Bartlomiej Zolnierkiewicz Cc: Jeff Garzik Cc: Tejun Heo Cc: Michael Buesch Cc: "John W. Linville" Cc: Miklos Szeredi Cc: Dmitry Torokhov Cc: Jiri Kosina Cc: Arnaldo Carvalho de Melo Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/media/video/bt8xx/bttvp.h | 2 - drivers/media/video/usbvideo/vicam.c | 6 --- include/linux/kernel.h | 91 +++++++++++++++++++++++++++++------- 3 files changed, 74 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/video/bt8xx/bttvp.h b/drivers/media/video/bt8xx/bttvp.h index 03816b73f847..27da7b423275 100644 --- a/drivers/media/video/bt8xx/bttvp.h +++ b/drivers/media/video/bt8xx/bttvp.h @@ -81,8 +81,6 @@ /* Limits scaled width, which must be a multiple of 4. */ #define MAX_HACTIVE (0x3FF & -4) -#define clamp(x, low, high) min (max (low, x), high) - #define BTTV_NORMS (\ V4L2_STD_PAL | V4L2_STD_PAL_N | \ V4L2_STD_PAL_Nc | V4L2_STD_SECAM | \ diff --git a/drivers/media/video/usbvideo/vicam.c b/drivers/media/video/usbvideo/vicam.c index 64819353276a..17f542dfb366 100644 --- a/drivers/media/video/usbvideo/vicam.c +++ b/drivers/media/video/usbvideo/vicam.c @@ -70,12 +70,6 @@ #define VICAM_HEADER_SIZE 64 -#define clamp( x, l, h ) max_t( __typeof__( x ), \ - ( l ), \ - min_t( __typeof__( x ), \ - ( h ), \ - ( x ) ) ) - /* Not sure what all the bytes in these char * arrays do, but they're necessary to make * the camera work. diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 53839ba265ec..4d46e299afb5 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -338,33 +338,90 @@ extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type, #endif /* __LITTLE_ENDIAN */ /* - * min()/max() macros that also do + * min()/max()/clamp() macros that also do * strict type-checking.. See the * "unnecessary" pointer comparison. */ -#define min(x,y) ({ \ - typeof(x) _x = (x); \ - typeof(y) _y = (y); \ - (void) (&_x == &_y); \ - _x < _y ? _x : _y; }) - -#define max(x,y) ({ \ - typeof(x) _x = (x); \ - typeof(y) _y = (y); \ - (void) (&_x == &_y); \ - _x > _y ? _x : _y; }) +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) + +#define max(x, y) ({ \ + typeof(x) _max1 = (x); \ + typeof(y) _max2 = (y); \ + (void) (&_max1 == &_max2); \ + _max1 > _max2 ? _max1 : _max2; }) + +/** + * clamp - return a value clamped to a given range with strict typechecking + * @val: current value + * @min: minimum allowable value + * @max: maximum allowable value + * + * This macro does strict typechecking of min/max to make sure they are of the + * same type as val. See the unnecessary pointer comparisons. + */ +#define clamp(val, min, max) ({ \ + typeof(val) __val = (val); \ + typeof(min) __min = (min); \ + typeof(max) __max = (max); \ + (void) (&__val == &__min); \ + (void) (&__val == &__max); \ + __val = __val < __min ? __min: __val; \ + __val > __max ? __max: __val; }) /* * ..and if you can't take the strict * types, you can specify one yourself. * - * Or not use min/max at all, of course. + * Or not use min/max/clamp at all, of course. + */ +#define min_t(type, x, y) ({ \ + type __min1 = (x); \ + type __min2 = (y); \ + __min1 < __min2 ? __min1: __min2; }) + +#define max_t(type, x, y) ({ \ + type __max1 = (x); \ + type __max2 = (y); \ + __max1 > __max2 ? __max1: __max2; }) + +/** + * clamp_t - return a value clamped to a given range using a given type + * @type: the type of variable to use + * @val: current value + * @min: minimum allowable value + * @max: maximum allowable value + * + * This macro does no typechecking and uses temporary variables of type + * 'type' to make all the comparisons. */ -#define min_t(type,x,y) \ - ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; }) -#define max_t(type,x,y) \ - ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; }) +#define clamp_t(type, val, min, max) ({ \ + type __val = (val); \ + type __min = (min); \ + type __max = (max); \ + __val = __val < __min ? __min: __val; \ + __val > __max ? __max: __val; }) +/** + * clamp_val - return a value clamped to a given range using val's type + * @val: current value + * @min: minimum allowable value + * @max: maximum allowable value + * + * This macro does no typechecking and uses temporary variables of whatever + * type the input argument 'val' is. This is useful when val is an unsigned + * type and min and max are literals that will otherwise be assigned a signed + * integer type. + */ +#define clamp_val(val, min, max) ({ \ + typeof(val) __val = (val); \ + typeof(val) __min = (min); \ + typeof(val) __max = (max); \ + __val = __val < __min ? __min: __val; \ + __val > __max ? __max: __val; }) /** * container_of - cast a member of a structure out to the containing structure -- cgit v1.2.3-71-gd317 From 30327acf7846c5eb97c8e31c78317a2918d3e515 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Apr 2008 00:54:59 -0700 Subject: slab: add a flag to prevent debug_free checks on a kmem_cache This is a preperatory patch for the debugobjects infrastructure. The flag prevents debug_free checks on kmem_caches. This is necessary to avoid resursive calls into a debug mechanism which uses a kmem_cache itself. Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Cc: Pekka Enberg Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index f62caaad94e0..6d03c954f641 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -29,6 +29,13 @@ #define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */ #define SLAB_TRACE 0x00200000UL /* Trace allocations and frees */ +/* Flag to prevent checks on free */ +#ifdef CONFIG_DEBUG_OBJECTS +# define SLAB_DEBUG_OBJECTS 0x00400000UL +#else +# define SLAB_DEBUG_OBJECTS 0x00000000UL +#endif + /* The following flags affect the page allocator grouping pages by mobility */ #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ -- cgit v1.2.3-71-gd317 From 3ac7fe5a4aab409bd5674d0b070bce97f9d20872 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Apr 2008 00:55:01 -0700 Subject: infrastructure to debug (dynamic) objects We can see an ever repeating problem pattern with objects of any kind in the kernel: 1) freeing of active objects 2) reinitialization of active objects Both problems can be hard to debug because the crash happens at a point where we have no chance to decode the root cause anymore. One problem spot are kernel timers, where the detection of the problem often happens in interrupt context and usually causes the machine to panic. While working on a timer related bug report I had to hack specialized code into the timer subsystem to get a reasonable hint for the root cause. This debug hack was fine for temporary use, but far from a mergeable solution due to the intrusiveness into the timer code. The code further lacked the ability to detect and report the root cause instantly and keep the system operational. Keeping the system operational is important to get hold of the debug information without special debugging aids like serial consoles and special knowledge of the bug reporter. The problems described above are not restricted to timers, but timers tend to expose it usually in a full system crash. Other objects are less explosive, but the symptoms caused by such mistakes can be even harder to debug. Instead of creating specialized debugging code for the timer subsystem a generic infrastructure is created which allows developers to verify their code and provides an easy to enable debug facility for users in case of trouble. The debugobjects core code keeps track of operations on static and dynamic objects by inserting them into a hashed list and sanity checking them on object operations and provides additional checks whenever kernel memory is freed. The tracked object operations are: - initializing an object - adding an object to a subsystem list - deleting an object from a subsystem list Each operation is sanity checked before the operation is executed and the subsystem specific code can provide a fixup function which allows to prevent the damage of the operation. When the sanity check triggers a warning message and a stack trace is printed. The list of operations can be extended if the need arises. For now it's limited to the requirements of the first user (timers). The core code enqueues the objects into hash buckets. The hash index is generated from the address of the object to simplify the lookup for the check on kfree/vfree. Each bucket has it's own spinlock to avoid contention on a global lock. The debug code can be compiled in without being active. The runtime overhead is minimal and could be optimized by asm alternatives. A kernel command line option enables the debugging code. Thanks to Ingo Molnar for review, suggestions and cleanup patches. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Cc: Greg KH Cc: Randy Dunlap Cc: Kay Sievers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 2 + include/linux/debugobjects.h | 90 ++++ init/main.c | 3 + lib/Kconfig.debug | 23 + lib/Makefile | 1 + lib/debugobjects.c | 890 ++++++++++++++++++++++++++++++++++++ mm/page_alloc.c | 10 +- mm/slab.c | 10 +- mm/slub.c | 3 + mm/vmalloc.c | 2 + 10 files changed, 1030 insertions(+), 4 deletions(-) create mode 100644 include/linux/debugobjects.h create mode 100644 lib/debugobjects.c (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 0ba0861b5d18..a3c35446e755 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -561,6 +561,8 @@ and is between 256 and 4096 characters. It is defined in the file 1 will print _a lot_ more information - normally only useful to kernel developers. + debug_objects [KNL] Enable object debugging + decnet.addr= [HW,NET] Format: [,] See also Documentation/networking/decnet.txt. diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h new file mode 100644 index 000000000000..8c243aaa86a7 --- /dev/null +++ b/include/linux/debugobjects.h @@ -0,0 +1,90 @@ +#ifndef _LINUX_DEBUGOBJECTS_H +#define _LINUX_DEBUGOBJECTS_H + +#include +#include + +enum debug_obj_state { + ODEBUG_STATE_NONE, + ODEBUG_STATE_INIT, + ODEBUG_STATE_INACTIVE, + ODEBUG_STATE_ACTIVE, + ODEBUG_STATE_DESTROYED, + ODEBUG_STATE_NOTAVAILABLE, + ODEBUG_STATE_MAX, +}; + +struct debug_obj_descr; + +/** + * struct debug_obj - representaion of an tracked object + * @node: hlist node to link the object into the tracker list + * @state: tracked object state + * @object: pointer to the real object + * @descr: pointer to an object type specific debug description structure + */ +struct debug_obj { + struct hlist_node node; + enum debug_obj_state state; + void *object; + struct debug_obj_descr *descr; +}; + +/** + * struct debug_obj_descr - object type specific debug description structure + * @name: name of the object typee + * @fixup_init: fixup function, which is called when the init check + * fails + * @fixup_activate: fixup function, which is called when the activate check + * fails + * @fixup_destroy: fixup function, which is called when the destroy check + * fails + * @fixup_free: fixup function, which is called when the free check + * fails + */ +struct debug_obj_descr { + const char *name; + + int (*fixup_init) (void *addr, enum debug_obj_state state); + int (*fixup_activate) (void *addr, enum debug_obj_state state); + int (*fixup_destroy) (void *addr, enum debug_obj_state state); + int (*fixup_free) (void *addr, enum debug_obj_state state); +}; + +#ifdef CONFIG_DEBUG_OBJECTS +extern void debug_object_init (void *addr, struct debug_obj_descr *descr); +extern void +debug_object_init_on_stack(void *addr, struct debug_obj_descr *descr); +extern void debug_object_activate (void *addr, struct debug_obj_descr *descr); +extern void debug_object_deactivate(void *addr, struct debug_obj_descr *descr); +extern void debug_object_destroy (void *addr, struct debug_obj_descr *descr); +extern void debug_object_free (void *addr, struct debug_obj_descr *descr); + +extern void debug_objects_early_init(void); +extern void debug_objects_mem_init(void); +#else +static inline void +debug_object_init (void *addr, struct debug_obj_descr *descr) { } +static inline void +debug_object_init_on_stack(void *addr, struct debug_obj_descr *descr) { } +static inline void +debug_object_activate (void *addr, struct debug_obj_descr *descr) { } +static inline void +debug_object_deactivate(void *addr, struct debug_obj_descr *descr) { } +static inline void +debug_object_destroy (void *addr, struct debug_obj_descr *descr) { } +static inline void +debug_object_free (void *addr, struct debug_obj_descr *descr) { } + +static inline void debug_objects_early_init(void) { } +static inline void debug_objects_mem_init(void) { } +#endif + +#ifdef CONFIG_DEBUG_OBJECTS_FREE +extern void debug_check_no_obj_freed(const void *address, unsigned long size); +#else +static inline void +debug_check_no_obj_freed(const void *address, unsigned long size) { } +#endif + +#endif diff --git a/init/main.c b/init/main.c index dff253cfcd9f..a87d4ca5c36c 100644 --- a/init/main.c +++ b/init/main.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -543,6 +544,7 @@ asmlinkage void __init start_kernel(void) */ unwind_init(); lockdep_init(); + debug_objects_early_init(); cgroup_init_early(); local_irq_disable(); @@ -638,6 +640,7 @@ asmlinkage void __init start_kernel(void) enable_debug_pagealloc(); cpu_hotplug_init(); kmem_cache_init(); + debug_objects_mem_init(); idr_init_cache(); setup_per_cpu_pageset(); numa_policy_init(); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 754cc0027f2a..3e132b0a59cc 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -194,6 +194,29 @@ config TIMER_STATS (it defaults to deactivated on bootup and will only be activated if some application like powertop activates it explicitly). +config DEBUG_OBJECTS + bool "Debug object operations" + depends on DEBUG_KERNEL + help + If you say Y here, additional code will be inserted into the + kernel to track the life time of various objects and validate + the operations on those objects. + +config DEBUG_OBJECTS_SELFTEST + bool "Debug objects selftest" + depends on DEBUG_OBJECTS + help + This enables the selftest of the object debug code. + +config DEBUG_OBJECTS_FREE + bool "Debug objects in freed memory" + depends on DEBUG_OBJECTS + help + This enables checks whether a k/v free operation frees an area + which contains an object which has not been deactivated + properly. This can make kmalloc/kfree-intensive workloads + much slower. + config DEBUG_SLAB bool "Debug slab memory allocations" depends on DEBUG_KERNEL && SLAB diff --git a/lib/Makefile b/lib/Makefile index 0ae4eb047aac..74b0cfb1fcc3 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o obj-$(CONFIG_PLIST) += plist.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o obj-$(CONFIG_DEBUG_LIST) += list_debug.o +obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o ifneq ($(CONFIG_HAVE_DEC_LOCK),y) lib-y += dec_and_lock.o diff --git a/lib/debugobjects.c b/lib/debugobjects.c new file mode 100644 index 000000000000..a76a5e122ae1 --- /dev/null +++ b/lib/debugobjects.c @@ -0,0 +1,890 @@ +/* + * Generic infrastructure for lifetime debugging of objects. + * + * Started by Thomas Gleixner + * + * Copyright (C) 2008, Thomas Gleixner + * + * For licencing details see kernel-base/COPYING + */ +#include +#include +#include +#include +#include + +#define ODEBUG_HASH_BITS 14 +#define ODEBUG_HASH_SIZE (1 << ODEBUG_HASH_BITS) + +#define ODEBUG_POOL_SIZE 512 +#define ODEBUG_POOL_MIN_LEVEL 256 + +#define ODEBUG_CHUNK_SHIFT PAGE_SHIFT +#define ODEBUG_CHUNK_SIZE (1 << ODEBUG_CHUNK_SHIFT) +#define ODEBUG_CHUNK_MASK (~(ODEBUG_CHUNK_SIZE - 1)) + +struct debug_bucket { + struct hlist_head list; + spinlock_t lock; +}; + +static struct debug_bucket obj_hash[ODEBUG_HASH_SIZE]; + +static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE]; + +static DEFINE_SPINLOCK(pool_lock); + +static HLIST_HEAD(obj_pool); + +static int obj_pool_min_free = ODEBUG_POOL_SIZE; +static int obj_pool_free = ODEBUG_POOL_SIZE; +static int obj_pool_used; +static int obj_pool_max_used; +static struct kmem_cache *obj_cache; + +static int debug_objects_maxchain __read_mostly; +static int debug_objects_fixups __read_mostly; +static int debug_objects_warnings __read_mostly; +static int debug_objects_enabled __read_mostly; +static struct debug_obj_descr *descr_test __read_mostly; + +static int __init enable_object_debug(char *str) +{ + debug_objects_enabled = 1; + return 0; +} +early_param("debug_objects", enable_object_debug); + +static const char *obj_states[ODEBUG_STATE_MAX] = { + [ODEBUG_STATE_NONE] = "none", + [ODEBUG_STATE_INIT] = "initialized", + [ODEBUG_STATE_INACTIVE] = "inactive", + [ODEBUG_STATE_ACTIVE] = "active", + [ODEBUG_STATE_DESTROYED] = "destroyed", + [ODEBUG_STATE_NOTAVAILABLE] = "not available", +}; + +static int fill_pool(void) +{ + gfp_t gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; + struct debug_obj *new; + + if (likely(obj_pool_free >= ODEBUG_POOL_MIN_LEVEL)) + return obj_pool_free; + + if (unlikely(!obj_cache)) + return obj_pool_free; + + while (obj_pool_free < ODEBUG_POOL_MIN_LEVEL) { + + new = kmem_cache_zalloc(obj_cache, gfp); + if (!new) + return obj_pool_free; + + spin_lock(&pool_lock); + hlist_add_head(&new->node, &obj_pool); + obj_pool_free++; + spin_unlock(&pool_lock); + } + return obj_pool_free; +} + +/* + * Lookup an object in the hash bucket. + */ +static struct debug_obj *lookup_object(void *addr, struct debug_bucket *b) +{ + struct hlist_node *node; + struct debug_obj *obj; + int cnt = 0; + + hlist_for_each_entry(obj, node, &b->list, node) { + cnt++; + if (obj->object == addr) + return obj; + } + if (cnt > debug_objects_maxchain) + debug_objects_maxchain = cnt; + + return NULL; +} + +/* + * Allocate a new object. If the pool is empty and no refill possible, + * switch off the debugger. + */ +static struct debug_obj * +alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) +{ + struct debug_obj *obj = NULL; + int retry = 0; + +repeat: + spin_lock(&pool_lock); + if (obj_pool.first) { + obj = hlist_entry(obj_pool.first, typeof(*obj), node); + + obj->object = addr; + obj->descr = descr; + obj->state = ODEBUG_STATE_NONE; + hlist_del(&obj->node); + + hlist_add_head(&obj->node, &b->list); + + obj_pool_used++; + if (obj_pool_used > obj_pool_max_used) + obj_pool_max_used = obj_pool_used; + + obj_pool_free--; + if (obj_pool_free < obj_pool_min_free) + obj_pool_min_free = obj_pool_free; + } + spin_unlock(&pool_lock); + + if (fill_pool() && !obj && !retry++) + goto repeat; + + return obj; +} + +/* + * Put the object back into the pool or give it back to kmem_cache: + */ +static void free_object(struct debug_obj *obj) +{ + unsigned long idx = (unsigned long)(obj - obj_static_pool); + + if (obj_pool_free < ODEBUG_POOL_SIZE || idx < ODEBUG_POOL_SIZE) { + spin_lock(&pool_lock); + hlist_add_head(&obj->node, &obj_pool); + obj_pool_free++; + obj_pool_used--; + spin_unlock(&pool_lock); + } else { + spin_lock(&pool_lock); + obj_pool_used--; + spin_unlock(&pool_lock); + kmem_cache_free(obj_cache, obj); + } +} + +/* + * We run out of memory. That means we probably have tons of objects + * allocated. + */ +static void debug_objects_oom(void) +{ + struct debug_bucket *db = obj_hash; + struct hlist_node *node, *tmp; + struct debug_obj *obj; + unsigned long flags; + int i; + + printk(KERN_WARNING "ODEBUG: Out of memory. ODEBUG disabled\n"); + + for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) { + spin_lock_irqsave(&db->lock, flags); + hlist_for_each_entry_safe(obj, node, tmp, &db->list, node) { + hlist_del(&obj->node); + free_object(obj); + } + spin_unlock_irqrestore(&db->lock, flags); + } +} + +/* + * We use the pfn of the address for the hash. That way we can check + * for freed objects simply by checking the affected bucket. + */ +static struct debug_bucket *get_bucket(unsigned long addr) +{ + unsigned long hash; + + hash = hash_long((addr >> ODEBUG_CHUNK_SHIFT), ODEBUG_HASH_BITS); + return &obj_hash[hash]; +} + +static void debug_print_object(struct debug_obj *obj, char *msg) +{ + static int limit; + + if (limit < 5 && obj->descr != descr_test) { + limit++; + printk(KERN_ERR "ODEBUG: %s %s object type: %s\n", msg, + obj_states[obj->state], obj->descr->name); + WARN_ON(1); + } + debug_objects_warnings++; +} + +/* + * Try to repair the damage, so we have a better chance to get useful + * debug output. + */ +static void +debug_object_fixup(int (*fixup)(void *addr, enum debug_obj_state state), + void * addr, enum debug_obj_state state) +{ + if (fixup) + debug_objects_fixups += fixup(addr, state); +} + +static void debug_object_is_on_stack(void *addr, int onstack) +{ + void *stack = current->stack; + int is_on_stack; + static int limit; + + if (limit > 4) + return; + + is_on_stack = (addr >= stack && addr < (stack + THREAD_SIZE)); + + if (is_on_stack == onstack) + return; + + limit++; + if (is_on_stack) + printk(KERN_WARNING + "ODEBUG: object is on stack, but not annotated\n"); + else + printk(KERN_WARNING + "ODEBUG: object is not on stack, but annotated\n"); + WARN_ON(1); +} + +static void +__debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack) +{ + enum debug_obj_state state; + struct debug_bucket *db; + struct debug_obj *obj; + unsigned long flags; + + db = get_bucket((unsigned long) addr); + + spin_lock_irqsave(&db->lock, flags); + + obj = lookup_object(addr, db); + if (!obj) { + obj = alloc_object(addr, db, descr); + if (!obj) { + debug_objects_enabled = 0; + spin_unlock_irqrestore(&db->lock, flags); + debug_objects_oom(); + return; + } + debug_object_is_on_stack(addr, onstack); + } + + switch (obj->state) { + case ODEBUG_STATE_NONE: + case ODEBUG_STATE_INIT: + case ODEBUG_STATE_INACTIVE: + obj->state = ODEBUG_STATE_INIT; + break; + + case ODEBUG_STATE_ACTIVE: + debug_print_object(obj, "init"); + state = obj->state; + spin_unlock_irqrestore(&db->lock, flags); + debug_object_fixup(descr->fixup_init, addr, state); + return; + + case ODEBUG_STATE_DESTROYED: + debug_print_object(obj, "init"); + break; + default: + break; + } + + spin_unlock_irqrestore(&db->lock, flags); +} + +/** + * debug_object_init - debug checks when an object is initialized + * @addr: address of the object + * @descr: pointer to an object specific debug description structure + */ +void debug_object_init(void *addr, struct debug_obj_descr *descr) +{ + if (!debug_objects_enabled) + return; + + __debug_object_init(addr, descr, 0); +} + +/** + * debug_object_init_on_stack - debug checks when an object on stack is + * initialized + * @addr: address of the object + * @descr: pointer to an object specific debug description structure + */ +void debug_object_init_on_stack(void *addr, struct debug_obj_descr *descr) +{ + if (!debug_objects_enabled) + return; + + __debug_object_init(addr, descr, 1); +} + +/** + * debug_object_activate - debug checks when an object is activated + * @addr: address of the object + * @descr: pointer to an object specific debug description structure + */ +void debug_object_activate(void *addr, struct debug_obj_descr *descr) +{ + enum debug_obj_state state; + struct debug_bucket *db; + struct debug_obj *obj; + unsigned long flags; + + if (!debug_objects_enabled) + return; + + db = get_bucket((unsigned long) addr); + + spin_lock_irqsave(&db->lock, flags); + + obj = lookup_object(addr, db); + if (obj) { + switch (obj->state) { + case ODEBUG_STATE_INIT: + case ODEBUG_STATE_INACTIVE: + obj->state = ODEBUG_STATE_ACTIVE; + break; + + case ODEBUG_STATE_ACTIVE: + debug_print_object(obj, "activate"); + state = obj->state; + spin_unlock_irqrestore(&db->lock, flags); + debug_object_fixup(descr->fixup_activate, addr, state); + return; + + case ODEBUG_STATE_DESTROYED: + debug_print_object(obj, "activate"); + break; + default: + break; + } + spin_unlock_irqrestore(&db->lock, flags); + return; + } + + spin_unlock_irqrestore(&db->lock, flags); + /* + * This happens when a static object is activated. We + * let the type specific code decide whether this is + * true or not. + */ + debug_object_fixup(descr->fixup_activate, addr, + ODEBUG_STATE_NOTAVAILABLE); +} + +/** + * debug_object_deactivate - debug checks when an object is deactivated + * @addr: address of the object + * @descr: pointer to an object specific debug description structure + */ +void debug_object_deactivate(void *addr, struct debug_obj_descr *descr) +{ + struct debug_bucket *db; + struct debug_obj *obj; + unsigned long flags; + + if (!debug_objects_enabled) + return; + + db = get_bucket((unsigned long) addr); + + spin_lock_irqsave(&db->lock, flags); + + obj = lookup_object(addr, db); + if (obj) { + switch (obj->state) { + case ODEBUG_STATE_INIT: + case ODEBUG_STATE_INACTIVE: + case ODEBUG_STATE_ACTIVE: + obj->state = ODEBUG_STATE_INACTIVE; + break; + + case ODEBUG_STATE_DESTROYED: + debug_print_object(obj, "deactivate"); + break; + default: + break; + } + } else { + struct debug_obj o = { .object = addr, + .state = ODEBUG_STATE_NOTAVAILABLE, + .descr = descr }; + + debug_print_object(&o, "deactivate"); + } + + spin_unlock_irqrestore(&db->lock, flags); +} + +/** + * debug_object_destroy - debug checks when an object is destroyed + * @addr: address of the object + * @descr: pointer to an object specific debug description structure + */ +void debug_object_destroy(void *addr, struct debug_obj_descr *descr) +{ + enum debug_obj_state state; + struct debug_bucket *db; + struct debug_obj *obj; + unsigned long flags; + + if (!debug_objects_enabled) + return; + + db = get_bucket((unsigned long) addr); + + spin_lock_irqsave(&db->lock, flags); + + obj = lookup_object(addr, db); + if (!obj) + goto out_unlock; + + switch (obj->state) { + case ODEBUG_STATE_NONE: + case ODEBUG_STATE_INIT: + case ODEBUG_STATE_INACTIVE: + obj->state = ODEBUG_STATE_DESTROYED; + break; + case ODEBUG_STATE_ACTIVE: + debug_print_object(obj, "destroy"); + state = obj->state; + spin_unlock_irqrestore(&db->lock, flags); + debug_object_fixup(descr->fixup_destroy, addr, state); + return; + + case ODEBUG_STATE_DESTROYED: + debug_print_object(obj, "destroy"); + break; + default: + break; + } +out_unlock: + spin_unlock_irqrestore(&db->lock, flags); +} + +/** + * debug_object_free - debug checks when an object is freed + * @addr: address of the object + * @descr: pointer to an object specific debug description structure + */ +void debug_object_free(void *addr, struct debug_obj_descr *descr) +{ + enum debug_obj_state state; + struct debug_bucket *db; + struct debug_obj *obj; + unsigned long flags; + + if (!debug_objects_enabled) + return; + + db = get_bucket((unsigned long) addr); + + spin_lock_irqsave(&db->lock, flags); + + obj = lookup_object(addr, db); + if (!obj) + goto out_unlock; + + switch (obj->state) { + case ODEBUG_STATE_ACTIVE: + debug_print_object(obj, "free"); + state = obj->state; + spin_unlock_irqrestore(&db->lock, flags); + debug_object_fixup(descr->fixup_free, addr, state); + return; + default: + hlist_del(&obj->node); + free_object(obj); + break; + } +out_unlock: + spin_unlock_irqrestore(&db->lock, flags); +} + +#ifdef CONFIG_DEBUG_OBJECTS_FREE +static void __debug_check_no_obj_freed(const void *address, unsigned long size) +{ + unsigned long flags, oaddr, saddr, eaddr, paddr, chunks; + struct hlist_node *node, *tmp; + struct debug_obj_descr *descr; + enum debug_obj_state state; + struct debug_bucket *db; + struct debug_obj *obj; + int cnt; + + saddr = (unsigned long) address; + eaddr = saddr + size; + paddr = saddr & ODEBUG_CHUNK_MASK; + chunks = ((eaddr - paddr) + (ODEBUG_CHUNK_SIZE - 1)); + chunks >>= ODEBUG_CHUNK_SHIFT; + + for (;chunks > 0; chunks--, paddr += ODEBUG_CHUNK_SIZE) { + db = get_bucket(paddr); + +repeat: + cnt = 0; + spin_lock_irqsave(&db->lock, flags); + hlist_for_each_entry_safe(obj, node, tmp, &db->list, node) { + cnt++; + oaddr = (unsigned long) obj->object; + if (oaddr < saddr || oaddr >= eaddr) + continue; + + switch (obj->state) { + case ODEBUG_STATE_ACTIVE: + debug_print_object(obj, "free"); + descr = obj->descr; + state = obj->state; + spin_unlock_irqrestore(&db->lock, flags); + debug_object_fixup(descr->fixup_free, + (void *) oaddr, state); + goto repeat; + default: + hlist_del(&obj->node); + free_object(obj); + break; + } + } + spin_unlock_irqrestore(&db->lock, flags); + if (cnt > debug_objects_maxchain) + debug_objects_maxchain = cnt; + } +} + +void debug_check_no_obj_freed(const void *address, unsigned long size) +{ + if (debug_objects_enabled) + __debug_check_no_obj_freed(address, size); +} +#endif + +#ifdef CONFIG_DEBUG_FS + +static int debug_stats_show(struct seq_file *m, void *v) +{ + seq_printf(m, "max_chain :%d\n", debug_objects_maxchain); + seq_printf(m, "warnings :%d\n", debug_objects_warnings); + seq_printf(m, "fixups :%d\n", debug_objects_fixups); + seq_printf(m, "pool_free :%d\n", obj_pool_free); + seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free); + seq_printf(m, "pool_used :%d\n", obj_pool_used); + seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used); + return 0; +} + +static int debug_stats_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, debug_stats_show, NULL); +} + +static const struct file_operations debug_stats_fops = { + .open = debug_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init debug_objects_init_debugfs(void) +{ + struct dentry *dbgdir, *dbgstats; + + if (!debug_objects_enabled) + return 0; + + dbgdir = debugfs_create_dir("debug_objects", NULL); + if (!dbgdir) + return -ENOMEM; + + dbgstats = debugfs_create_file("stats", 0444, dbgdir, NULL, + &debug_stats_fops); + if (!dbgstats) + goto err; + + return 0; + +err: + debugfs_remove(dbgdir); + + return -ENOMEM; +} +__initcall(debug_objects_init_debugfs); + +#else +static inline void debug_objects_init_debugfs(void) { } +#endif + +#ifdef CONFIG_DEBUG_OBJECTS_SELFTEST + +/* Random data structure for the self test */ +struct self_test { + unsigned long dummy1[6]; + int static_init; + unsigned long dummy2[3]; +}; + +static __initdata struct debug_obj_descr descr_type_test; + +/* + * fixup_init is called when: + * - an active object is initialized + */ +static int __init fixup_init(void *addr, enum debug_obj_state state) +{ + struct self_test *obj = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + debug_object_deactivate(obj, &descr_type_test); + debug_object_init(obj, &descr_type_test); + return 1; + default: + return 0; + } +} + +/* + * fixup_activate is called when: + * - an active object is activated + * - an unknown object is activated (might be a statically initialized object) + */ +static int __init fixup_activate(void *addr, enum debug_obj_state state) +{ + struct self_test *obj = addr; + + switch (state) { + case ODEBUG_STATE_NOTAVAILABLE: + if (obj->static_init == 1) { + debug_object_init(obj, &descr_type_test); + debug_object_activate(obj, &descr_type_test); + /* + * Real code should return 0 here ! This is + * not a fixup of some bad behaviour. We + * merily call the debug_init function to keep + * track of the object. + */ + return 1; + } else { + /* Real code needs to emit a warning here */ + } + return 0; + + case ODEBUG_STATE_ACTIVE: + debug_object_deactivate(obj, &descr_type_test); + debug_object_activate(obj, &descr_type_test); + return 1; + + default: + return 0; + } +} + +/* + * fixup_destroy is called when: + * - an active object is destroyed + */ +static int __init fixup_destroy(void *addr, enum debug_obj_state state) +{ + struct self_test *obj = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + debug_object_deactivate(obj, &descr_type_test); + debug_object_destroy(obj, &descr_type_test); + return 1; + default: + return 0; + } +} + +/* + * fixup_free is called when: + * - an active object is freed + */ +static int __init fixup_free(void *addr, enum debug_obj_state state) +{ + struct self_test *obj = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + debug_object_deactivate(obj, &descr_type_test); + debug_object_free(obj, &descr_type_test); + return 1; + default: + return 0; + } +} + +static int +check_results(void *addr, enum debug_obj_state state, int fixups, int warnings) +{ + struct debug_bucket *db; + struct debug_obj *obj; + unsigned long flags; + int res = -EINVAL; + + db = get_bucket((unsigned long) addr); + + spin_lock_irqsave(&db->lock, flags); + + obj = lookup_object(addr, db); + if (!obj && state != ODEBUG_STATE_NONE) { + printk(KERN_ERR "ODEBUG: selftest object not found\n"); + WARN_ON(1); + goto out; + } + if (obj && obj->state != state) { + printk(KERN_ERR "ODEBUG: selftest wrong state: %d != %d\n", + obj->state, state); + WARN_ON(1); + goto out; + } + if (fixups != debug_objects_fixups) { + printk(KERN_ERR "ODEBUG: selftest fixups failed %d != %d\n", + fixups, debug_objects_fixups); + WARN_ON(1); + goto out; + } + if (warnings != debug_objects_warnings) { + printk(KERN_ERR "ODEBUG: selftest warnings failed %d != %d\n", + warnings, debug_objects_warnings); + WARN_ON(1); + goto out; + } + res = 0; +out: + spin_unlock_irqrestore(&db->lock, flags); + if (res) + debug_objects_enabled = 0; + return res; +} + +static __initdata struct debug_obj_descr descr_type_test = { + .name = "selftest", + .fixup_init = fixup_init, + .fixup_activate = fixup_activate, + .fixup_destroy = fixup_destroy, + .fixup_free = fixup_free, +}; + +static __initdata struct self_test obj = { .static_init = 0 }; + +static void __init debug_objects_selftest(void) +{ + int fixups, oldfixups, warnings, oldwarnings; + unsigned long flags; + + local_irq_save(flags); + + fixups = oldfixups = debug_objects_fixups; + warnings = oldwarnings = debug_objects_warnings; + descr_test = &descr_type_test; + + debug_object_init(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_INIT, fixups, warnings)) + goto out; + debug_object_activate(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings)) + goto out; + debug_object_activate(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_ACTIVE, ++fixups, ++warnings)) + goto out; + debug_object_deactivate(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_INACTIVE, fixups, warnings)) + goto out; + debug_object_destroy(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_DESTROYED, fixups, warnings)) + goto out; + debug_object_init(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_DESTROYED, fixups, ++warnings)) + goto out; + debug_object_activate(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_DESTROYED, fixups, ++warnings)) + goto out; + debug_object_deactivate(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_DESTROYED, fixups, ++warnings)) + goto out; + debug_object_free(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_NONE, fixups, warnings)) + goto out; + + obj.static_init = 1; + debug_object_activate(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_ACTIVE, ++fixups, warnings)) + goto out; + debug_object_init(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_INIT, ++fixups, ++warnings)) + goto out; + debug_object_free(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_NONE, fixups, warnings)) + goto out; + +#ifdef CONFIG_DEBUG_OBJECTS_FREE + debug_object_init(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_INIT, fixups, warnings)) + goto out; + debug_object_activate(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings)) + goto out; + __debug_check_no_obj_freed(&obj, sizeof(obj)); + if (check_results(&obj, ODEBUG_STATE_NONE, ++fixups, ++warnings)) + goto out; +#endif + printk(KERN_INFO "ODEBUG: selftest passed\n"); + +out: + debug_objects_fixups = oldfixups; + debug_objects_warnings = oldwarnings; + descr_test = NULL; + + local_irq_restore(flags); +} +#else +static inline void debug_objects_selftest(void) { } +#endif + +/* + * Called during early boot to initialize the hash buckets and link + * the static object pool objects into the poll list. After this call + * the object tracker is fully operational. + */ +void __init debug_objects_early_init(void) +{ + int i; + + for (i = 0; i < ODEBUG_HASH_SIZE; i++) + spin_lock_init(&obj_hash[i].lock); + + for (i = 0; i < ODEBUG_POOL_SIZE; i++) + hlist_add_head(&obj_static_pool[i].node, &obj_pool); +} + +/* + * Called after the kmem_caches are functional to setup a dedicated + * cache pool, which has the SLAB_DEBUG_OBJECTS flag set. This flag + * prevents that the debug code is called on kmem_cache_free() for the + * debug tracker objects to avoid recursive calls. + */ +void __init debug_objects_mem_init(void) +{ + if (!debug_objects_enabled) + return; + + obj_cache = kmem_cache_create("debug_objects_cache", + sizeof (struct debug_obj), 0, + SLAB_DEBUG_OBJECTS, NULL); + + if (!obj_cache) + debug_objects_enabled = 0; + else + debug_objects_selftest(); +} diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0a502e99ee22..bdd5c432c426 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -532,8 +533,11 @@ static void __free_pages_ok(struct page *page, unsigned int order) if (reserved) return; - if (!PageHighMem(page)) + if (!PageHighMem(page)) { debug_check_no_locks_freed(page_address(page),PAGE_SIZE< #include #include +#include #include #include @@ -174,12 +175,14 @@ SLAB_CACHE_DMA | \ SLAB_STORE_USER | \ SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ - SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD) + SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ + SLAB_DEBUG_OBJECTS) #else # define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ SLAB_CACHE_DMA | \ SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ - SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD) + SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ + SLAB_DEBUG_OBJECTS) #endif /* @@ -3760,6 +3763,8 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) local_irq_save(flags); debug_check_no_locks_freed(objp, obj_size(cachep)); + if (!(cachep->flags & SLAB_DEBUG_OBJECTS)) + debug_check_no_obj_freed(objp, obj_size(cachep)); __cache_free(cachep, objp); local_irq_restore(flags); } @@ -3785,6 +3790,7 @@ void kfree(const void *objp) kfree_debugcheck(objp); c = virt_to_cache(objp); debug_check_no_locks_freed(objp, obj_size(c)); + debug_check_no_obj_freed(objp, obj_size(c)); __cache_free(c, (void *)objp); local_irq_restore(flags); } diff --git a/mm/slub.c b/mm/slub.c index b145e798bf3d..70db2897c1ea 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -1747,6 +1748,8 @@ static __always_inline void slab_free(struct kmem_cache *s, local_irq_save(flags); c = get_cpu_slab(s, smp_processor_id()); debug_check_no_locks_freed(object, c->objsize); + if (!(s->flags & SLAB_DEBUG_OBJECTS)) + debug_check_no_obj_freed(object, s->objsize); if (likely(page == c->page && c->node >= 0)) { object[c->offset] = c->freelist; c->freelist = object; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index e33e0ae69ad1..2a39cf128aba 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -394,6 +395,7 @@ static void __vunmap(const void *addr, int deallocate_pages) } debug_check_no_locks_freed(addr, area->size); + debug_check_no_obj_freed(addr, area->size); if (deallocate_pages) { int i; -- cgit v1.2.3-71-gd317 From c6f3a97f86a5c97be0ca255976110bb9c3cfe669 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Apr 2008 00:55:03 -0700 Subject: debugobjects: add timer specific object debugging code Add calls to the generic object debugging infrastructure and provide fixup functions which allow to keep the system alive when recoverable problems have been detected by the object debugging core code. Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Cc: Greg KH Cc: Randy Dunlap Cc: Kay Sievers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/parport/ieee1284.c | 4 +- fs/aio.c | 5 +- include/linux/poison.h | 7 +++ include/linux/timer.h | 23 ++++++- kernel/timer.c | 153 ++++++++++++++++++++++++++++++++++++++++++--- lib/Kconfig.debug | 8 +++ 6 files changed, 187 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/parport/ieee1284.c b/drivers/parport/ieee1284.c index 54a6ef72906e..0338b0912674 100644 --- a/drivers/parport/ieee1284.c +++ b/drivers/parport/ieee1284.c @@ -76,7 +76,7 @@ int parport_wait_event (struct parport *port, signed long timeout) semaphore. */ return 1; - init_timer (&timer); + init_timer_on_stack(&timer); timer.expires = jiffies + timeout; timer.function = timeout_waiting_on_port; port_from_cookie[port->number % PARPORT_MAX] = port; @@ -88,6 +88,8 @@ int parport_wait_event (struct parport *port, signed long timeout) /* Timed out. */ ret = 1; + destroy_timer_on_stack(&timer); + return ret; } diff --git a/fs/aio.c b/fs/aio.c index 99c2352906a0..b5253e77eb2f 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1078,9 +1078,7 @@ static void timeout_func(unsigned long data) static inline void init_timeout(struct aio_timeout *to) { - init_timer(&to->timer); - to->timer.data = (unsigned long)to; - to->timer.function = timeout_func; + setup_timer_on_stack(&to->timer, timeout_func, (unsigned long) to); to->timed_out = 0; to->p = current; } @@ -1213,6 +1211,7 @@ retry: if (timeout) clear_timeout(&to); out: + destroy_timer_on_stack(&to.timer); return i ? i : ret; } diff --git a/include/linux/poison.h b/include/linux/poison.h index a9c31be7052c..9f31683728fd 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -10,6 +10,13 @@ #define LIST_POISON1 ((void *) 0x00100100) #define LIST_POISON2 ((void *) 0x00200200) +/********** include/linux/timer.h **********/ +/* + * Magic number "tsta" to indicate a static timer initializer + * for the object debugging code. + */ +#define TIMER_ENTRY_STATIC ((void *) 0x74737461) + /********** mm/slab.c **********/ /* * Magic nums for obj red zoning. diff --git a/include/linux/timer.h b/include/linux/timer.h index 979fefdeb862..d4ba79248a27 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -4,6 +4,7 @@ #include #include #include +#include struct tvec_base; @@ -25,6 +26,7 @@ struct timer_list { extern struct tvec_base boot_tvec_bases; #define TIMER_INITIALIZER(_function, _expires, _data) { \ + .entry = { .prev = TIMER_ENTRY_STATIC }, \ .function = (_function), \ .expires = (_expires), \ .data = (_data), \ @@ -38,6 +40,17 @@ extern struct tvec_base boot_tvec_bases; void init_timer(struct timer_list *timer); void init_timer_deferrable(struct timer_list *timer); +#ifdef CONFIG_DEBUG_OBJECTS_TIMERS +extern void init_timer_on_stack(struct timer_list *timer); +extern void destroy_timer_on_stack(struct timer_list *timer); +#else +static inline void destroy_timer_on_stack(struct timer_list *timer) { } +static inline void init_timer_on_stack(struct timer_list *timer) +{ + init_timer(timer); +} +#endif + static inline void setup_timer(struct timer_list * timer, void (*function)(unsigned long), unsigned long data) @@ -47,6 +60,15 @@ static inline void setup_timer(struct timer_list * timer, init_timer(timer); } +static inline void setup_timer_on_stack(struct timer_list *timer, + void (*function)(unsigned long), + unsigned long data) +{ + timer->function = function; + timer->data = data; + init_timer_on_stack(timer); +} + /** * timer_pending - is a timer pending? * @timer: the timer in question @@ -164,5 +186,4 @@ unsigned long __round_jiffies_relative(unsigned long j, int cpu); unsigned long round_jiffies(unsigned long j); unsigned long round_jiffies_relative(unsigned long j); - #endif diff --git a/kernel/timer.c b/kernel/timer.c index f3d35d4ea42e..ceacc6626572 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -320,14 +320,130 @@ static void timer_stats_account_timer(struct timer_list *timer) static void timer_stats_account_timer(struct timer_list *timer) {} #endif -/** - * init_timer - initialize a timer. - * @timer: the timer to be initialized - * - * init_timer() must be done to a timer prior calling *any* of the - * other timer functions. +#ifdef CONFIG_DEBUG_OBJECTS_TIMERS + +static struct debug_obj_descr timer_debug_descr; + +/* + * fixup_init is called when: + * - an active object is initialized */ -void init_timer(struct timer_list *timer) +static int timer_fixup_init(void *addr, enum debug_obj_state state) +{ + struct timer_list *timer = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + del_timer_sync(timer); + debug_object_init(timer, &timer_debug_descr); + return 1; + default: + return 0; + } +} + +/* + * fixup_activate is called when: + * - an active object is activated + * - an unknown object is activated (might be a statically initialized object) + */ +static int timer_fixup_activate(void *addr, enum debug_obj_state state) +{ + struct timer_list *timer = addr; + + switch (state) { + + case ODEBUG_STATE_NOTAVAILABLE: + /* + * This is not really a fixup. The timer was + * statically initialized. We just make sure that it + * is tracked in the object tracker. + */ + if (timer->entry.next == NULL && + timer->entry.prev == TIMER_ENTRY_STATIC) { + debug_object_init(timer, &timer_debug_descr); + debug_object_activate(timer, &timer_debug_descr); + return 0; + } else { + WARN_ON_ONCE(1); + } + return 0; + + case ODEBUG_STATE_ACTIVE: + WARN_ON(1); + + default: + return 0; + } +} + +/* + * fixup_free is called when: + * - an active object is freed + */ +static int timer_fixup_free(void *addr, enum debug_obj_state state) +{ + struct timer_list *timer = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + del_timer_sync(timer); + debug_object_free(timer, &timer_debug_descr); + return 1; + default: + return 0; + } +} + +static struct debug_obj_descr timer_debug_descr = { + .name = "timer_list", + .fixup_init = timer_fixup_init, + .fixup_activate = timer_fixup_activate, + .fixup_free = timer_fixup_free, +}; + +static inline void debug_timer_init(struct timer_list *timer) +{ + debug_object_init(timer, &timer_debug_descr); +} + +static inline void debug_timer_activate(struct timer_list *timer) +{ + debug_object_activate(timer, &timer_debug_descr); +} + +static inline void debug_timer_deactivate(struct timer_list *timer) +{ + debug_object_deactivate(timer, &timer_debug_descr); +} + +static inline void debug_timer_free(struct timer_list *timer) +{ + debug_object_free(timer, &timer_debug_descr); +} + +static void __init_timer(struct timer_list *timer); + +void init_timer_on_stack(struct timer_list *timer) +{ + debug_object_init_on_stack(timer, &timer_debug_descr); + __init_timer(timer); +} +EXPORT_SYMBOL_GPL(init_timer_on_stack); + +void destroy_timer_on_stack(struct timer_list *timer) +{ + debug_object_free(timer, &timer_debug_descr); +} +EXPORT_SYMBOL_GPL(destroy_timer_on_stack); + +#else +static inline void debug_timer_init(struct timer_list *timer) { } +static inline void debug_timer_activate(struct timer_list *timer) { } +static inline void debug_timer_deactivate(struct timer_list *timer) { } +#endif + +static void __init_timer(struct timer_list *timer) { timer->entry.next = NULL; timer->base = __raw_get_cpu_var(tvec_bases); @@ -337,6 +453,19 @@ void init_timer(struct timer_list *timer) memset(timer->start_comm, 0, TASK_COMM_LEN); #endif } + +/** + * init_timer - initialize a timer. + * @timer: the timer to be initialized + * + * init_timer() must be done to a timer prior calling *any* of the + * other timer functions. + */ +void init_timer(struct timer_list *timer) +{ + debug_timer_init(timer); + __init_timer(timer); +} EXPORT_SYMBOL(init_timer); void init_timer_deferrable(struct timer_list *timer) @@ -351,6 +480,8 @@ static inline void detach_timer(struct timer_list *timer, { struct list_head *entry = &timer->entry; + debug_timer_deactivate(timer); + __list_del(entry->prev, entry->next); if (clear_pending) entry->next = NULL; @@ -405,6 +536,8 @@ int __mod_timer(struct timer_list *timer, unsigned long expires) ret = 1; } + debug_timer_activate(timer); + new_base = __get_cpu_var(tvec_bases); if (base != new_base) { @@ -450,6 +583,7 @@ void add_timer_on(struct timer_list *timer, int cpu) BUG_ON(timer_pending(timer) || !timer->function); spin_lock_irqsave(&base->lock, flags); timer_set_base(timer, base); + debug_timer_activate(timer); internal_add_timer(base, timer); /* * Check whether the other CPU is idle and needs to be @@ -1086,11 +1220,14 @@ signed long __sched schedule_timeout(signed long timeout) expire = timeout + jiffies; - setup_timer(&timer, process_timeout, (unsigned long)current); + setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); __mod_timer(&timer, expire); schedule(); del_singleshot_timer_sync(&timer); + /* Remove the timer from the object tracker */ + destroy_timer_on_stack(&timer); + timeout = expire - jiffies; out: diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 3e132b0a59cc..d2099f41aa1e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -217,6 +217,14 @@ config DEBUG_OBJECTS_FREE properly. This can make kmalloc/kfree-intensive workloads much slower. +config DEBUG_OBJECTS_TIMERS + bool "Debug timer objects" + depends on DEBUG_OBJECTS + help + If you say Y here, additional code will be inserted into the + timer routines to track the life time of timer objects and + validate the timer operations. + config DEBUG_SLAB bool "Debug slab memory allocations" depends on DEBUG_KERNEL && SLAB -- cgit v1.2.3-71-gd317 From 237fc6e7a35076f584b9d0794a5204fe4bd9b9e5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Apr 2008 00:55:04 -0700 Subject: add hrtimer specific debugobjects code hrtimers have now dynamic users in the network code. Put them under debugobjects surveillance as well. Add calls to the generic object debugging infrastructure and provide fixup functions which allow to keep the system alive when recoverable problems have been detected by the object debugging core code. Signed-off-by: Thomas Gleixner Cc: Greg KH Cc: Randy Dunlap Cc: Kay Sievers Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hrtimer.h | 15 ++++ kernel/futex.c | 17 ++++- kernel/hrtimer.c | 177 ++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 186 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 56f3236da829..31a4d653389f 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -266,6 +266,21 @@ extern ktime_t ktime_get_real(void); extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock, enum hrtimer_mode mode); +#ifdef CONFIG_DEBUG_OBJECTS_TIMERS +extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock, + enum hrtimer_mode mode); + +extern void destroy_hrtimer_on_stack(struct hrtimer *timer); +#else +static inline void hrtimer_init_on_stack(struct hrtimer *timer, + clockid_t which_clock, + enum hrtimer_mode mode) +{ + hrtimer_init(timer, which_clock, mode); +} +static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { } +#endif + /* Basic timer operations: */ extern int hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode); diff --git a/kernel/futex.c b/kernel/futex.c index e43945e995f5..98092c9817f4 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1266,11 +1266,13 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, if (!abs_time) schedule(); else { - hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, + HRTIMER_MODE_ABS); hrtimer_init_sleeper(&t, current); t.timer.expires = *abs_time; - hrtimer_start(&t.timer, t.timer.expires, HRTIMER_MODE_ABS); + hrtimer_start(&t.timer, t.timer.expires, + HRTIMER_MODE_ABS); if (!hrtimer_active(&t.timer)) t.task = NULL; @@ -1286,6 +1288,8 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, /* Flag if a timeout occured */ rem = (t.task == NULL); + + destroy_hrtimer_on_stack(&t.timer); } } __set_current_state(TASK_RUNNING); @@ -1367,7 +1371,8 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, if (time) { to = &timeout; - hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); + hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME, + HRTIMER_MODE_ABS); hrtimer_init_sleeper(to, current); to->timer.expires = *time; } @@ -1581,6 +1586,8 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, unqueue_me_pi(&q); futex_unlock_mm(fshared); + if (to) + destroy_hrtimer_on_stack(&to->timer); return ret != -EINTR ? ret : -ERESTARTNOINTR; out_unlock_release_sem: @@ -1588,6 +1595,8 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, out_release_sem: futex_unlock_mm(fshared); + if (to) + destroy_hrtimer_on_stack(&to->timer); return ret; uaddr_faulted: @@ -1615,6 +1624,8 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, if (!ret && (uval != -EFAULT)) goto retry; + if (to) + destroy_hrtimer_on_stack(&to->timer); return ret; } diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index dea4c9124ac8..9af1d6a8095e 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -43,6 +43,7 @@ #include #include #include +#include #include @@ -342,6 +343,115 @@ ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs) return res; } +#ifdef CONFIG_DEBUG_OBJECTS_TIMERS + +static struct debug_obj_descr hrtimer_debug_descr; + +/* + * fixup_init is called when: + * - an active object is initialized + */ +static int hrtimer_fixup_init(void *addr, enum debug_obj_state state) +{ + struct hrtimer *timer = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + hrtimer_cancel(timer); + debug_object_init(timer, &hrtimer_debug_descr); + return 1; + default: + return 0; + } +} + +/* + * fixup_activate is called when: + * - an active object is activated + * - an unknown object is activated (might be a statically initialized object) + */ +static int hrtimer_fixup_activate(void *addr, enum debug_obj_state state) +{ + switch (state) { + + case ODEBUG_STATE_NOTAVAILABLE: + WARN_ON_ONCE(1); + return 0; + + case ODEBUG_STATE_ACTIVE: + WARN_ON(1); + + default: + return 0; + } +} + +/* + * fixup_free is called when: + * - an active object is freed + */ +static int hrtimer_fixup_free(void *addr, enum debug_obj_state state) +{ + struct hrtimer *timer = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + hrtimer_cancel(timer); + debug_object_free(timer, &hrtimer_debug_descr); + return 1; + default: + return 0; + } +} + +static struct debug_obj_descr hrtimer_debug_descr = { + .name = "hrtimer", + .fixup_init = hrtimer_fixup_init, + .fixup_activate = hrtimer_fixup_activate, + .fixup_free = hrtimer_fixup_free, +}; + +static inline void debug_hrtimer_init(struct hrtimer *timer) +{ + debug_object_init(timer, &hrtimer_debug_descr); +} + +static inline void debug_hrtimer_activate(struct hrtimer *timer) +{ + debug_object_activate(timer, &hrtimer_debug_descr); +} + +static inline void debug_hrtimer_deactivate(struct hrtimer *timer) +{ + debug_object_deactivate(timer, &hrtimer_debug_descr); +} + +static inline void debug_hrtimer_free(struct hrtimer *timer) +{ + debug_object_free(timer, &hrtimer_debug_descr); +} + +static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, + enum hrtimer_mode mode); + +void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id, + enum hrtimer_mode mode) +{ + debug_object_init_on_stack(timer, &hrtimer_debug_descr); + __hrtimer_init(timer, clock_id, mode); +} + +void destroy_hrtimer_on_stack(struct hrtimer *timer) +{ + debug_object_free(timer, &hrtimer_debug_descr); +} + +#else +static inline void debug_hrtimer_init(struct hrtimer *timer) { } +static inline void debug_hrtimer_activate(struct hrtimer *timer) { } +static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } +#endif + /* * Check, whether the timer is on the callback pending list */ @@ -567,6 +677,7 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, /* Timer is expired, act upon the callback mode */ switch(timer->cb_mode) { case HRTIMER_CB_IRQSAFE_NO_RESTART: + debug_hrtimer_deactivate(timer); /* * We can call the callback from here. No restart * happens, so no danger of recursion @@ -581,6 +692,7 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, * the tick timer in the softirq ! The calling site * takes care of this. */ + debug_hrtimer_deactivate(timer); return 1; case HRTIMER_CB_IRQSAFE: case HRTIMER_CB_SOFTIRQ: @@ -735,6 +847,8 @@ static void enqueue_hrtimer(struct hrtimer *timer, struct hrtimer *entry; int leftmost = 1; + debug_hrtimer_activate(timer); + /* * Find the right place in the rbtree: */ @@ -831,6 +945,7 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) * reprogramming happens in the interrupt handler. This is a * rare case and less expensive than a smp call. */ + debug_hrtimer_deactivate(timer); timer_stats_hrtimer_clear_start_info(timer); reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases); __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, @@ -878,6 +993,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) tim = ktime_add_safe(tim, base->resolution); #endif } + timer->expires = tim; timer_stats_hrtimer_set_start_info(timer); @@ -1011,14 +1127,8 @@ ktime_t hrtimer_get_next_event(void) } #endif -/** - * hrtimer_init - initialize a timer to the given clock - * @timer: the timer to be initialized - * @clock_id: the clock to be used - * @mode: timer mode abs/rel - */ -void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, - enum hrtimer_mode mode) +static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, + enum hrtimer_mode mode) { struct hrtimer_cpu_base *cpu_base; @@ -1039,6 +1149,19 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, memset(timer->start_comm, 0, TASK_COMM_LEN); #endif } + +/** + * hrtimer_init - initialize a timer to the given clock + * @timer: the timer to be initialized + * @clock_id: the clock to be used + * @mode: timer mode abs/rel + */ +void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, + enum hrtimer_mode mode) +{ + debug_hrtimer_init(timer); + __hrtimer_init(timer, clock_id, mode); +} EXPORT_SYMBOL_GPL(hrtimer_init); /** @@ -1072,6 +1195,7 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base) timer = list_entry(cpu_base->cb_pending.next, struct hrtimer, cb_entry); + debug_hrtimer_deactivate(timer); timer_stats_account_hrtimer(timer); fn = timer->function; @@ -1120,6 +1244,7 @@ static void __run_hrtimer(struct hrtimer *timer) enum hrtimer_restart (*fn)(struct hrtimer *); int restart; + debug_hrtimer_deactivate(timer); __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); timer_stats_account_hrtimer(timer); @@ -1378,22 +1503,27 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) { struct hrtimer_sleeper t; struct timespec __user *rmtp; + int ret = 0; - hrtimer_init(&t.timer, restart->nanosleep.index, HRTIMER_MODE_ABS); + hrtimer_init_on_stack(&t.timer, restart->nanosleep.index, + HRTIMER_MODE_ABS); t.timer.expires.tv64 = restart->nanosleep.expires; if (do_nanosleep(&t, HRTIMER_MODE_ABS)) - return 0; + goto out; rmtp = restart->nanosleep.rmtp; if (rmtp) { - int ret = update_rmtp(&t.timer, rmtp); + ret = update_rmtp(&t.timer, rmtp); if (ret <= 0) - return ret; + goto out; } /* The other values in restart are already filled in */ - return -ERESTART_RESTARTBLOCK; + ret = -ERESTART_RESTARTBLOCK; +out: + destroy_hrtimer_on_stack(&t.timer); + return ret; } long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, @@ -1401,20 +1531,23 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, { struct restart_block *restart; struct hrtimer_sleeper t; + int ret = 0; - hrtimer_init(&t.timer, clockid, mode); + hrtimer_init_on_stack(&t.timer, clockid, mode); t.timer.expires = timespec_to_ktime(*rqtp); if (do_nanosleep(&t, mode)) - return 0; + goto out; /* Absolute timers do not update the rmtp value and restart: */ - if (mode == HRTIMER_MODE_ABS) - return -ERESTARTNOHAND; + if (mode == HRTIMER_MODE_ABS) { + ret = -ERESTARTNOHAND; + goto out; + } if (rmtp) { - int ret = update_rmtp(&t.timer, rmtp); + ret = update_rmtp(&t.timer, rmtp); if (ret <= 0) - return ret; + goto out; } restart = ¤t_thread_info()->restart_block; @@ -1423,7 +1556,10 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, restart->nanosleep.rmtp = rmtp; restart->nanosleep.expires = t.timer.expires.tv64; - return -ERESTART_RESTARTBLOCK; + ret = -ERESTART_RESTARTBLOCK; +out: + destroy_hrtimer_on_stack(&t.timer); + return ret; } asmlinkage long @@ -1468,6 +1604,7 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, while ((node = rb_first(&old_base->active))) { timer = rb_entry(node, struct hrtimer, node); BUG_ON(hrtimer_callback_running(timer)); + debug_hrtimer_deactivate(timer); __remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE, 0); timer->base = new_base; /* -- cgit v1.2.3-71-gd317 From 735643ee6cc5249bfac07fcad0946a5e7aff4423 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Wed, 30 Apr 2008 00:55:12 -0700 Subject: Remove "#ifdef __KERNEL__" checks from unexported headers Remove the "#ifdef __KERNEL__" tests from unexported header files in linux/include whose entire contents are wrapped in that preprocessor test. Signed-off-by: Robert P. J. Day Cc: David Woodhouse Cc: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/agp_backend.h | 3 --- include/linux/cdev.h | 2 -- include/linux/coda_fs_i.h | 2 -- include/linux/concap.h | 3 +-- include/linux/configfs.h | 4 ---- include/linux/crc-ccitt.h | 2 -- include/linux/dcache.h | 4 ---- include/linux/device-mapper.h | 3 --- include/linux/eventfd.h | 5 ----- include/linux/fsl_devices.h | 2 -- include/linux/fsnotify.h | 4 ---- include/linux/hw_random.h | 2 -- include/linux/i2o.h | 3 --- include/linux/if_macvlan.h | 3 --- include/linux/inet.h | 2 -- include/linux/isicom.h | 7 ------- include/linux/kfifo.h | 5 ----- include/linux/kobj_map.h | 4 ---- include/linux/kobject.h | 3 --- include/linux/kref.h | 3 --- include/linux/list.h | 5 ----- include/linux/mmzone.h | 2 -- include/linux/mount.h | 2 -- include/linux/ncp_fs_i.h | 4 ---- include/linux/of_device.h | 2 -- include/linux/pm.h | 4 ---- include/linux/pnp.h | 4 ---- include/linux/profile.h | 4 ---- include/linux/rcuclassic.h | 3 --- include/linux/rcupdate.h | 3 --- include/linux/rcupreempt.h | 3 --- include/linux/rcupreempt_trace.h | 2 -- include/linux/rio.h | 3 --- include/linux/rio_drv.h | 3 --- include/linux/rwsem.h | 3 --- include/linux/seq_file.h | 2 -- include/linux/slab.h | 3 --- include/linux/smb_fs_i.h | 2 -- include/linux/smb_fs_sb.h | 4 ---- include/linux/svga.h | 3 --- include/linux/textsearch.h | 4 ---- 41 files changed, 1 insertion(+), 130 deletions(-) (limited to 'include/linux') diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h index 03e34547d489..661d90d6cf7c 100644 --- a/include/linux/agp_backend.h +++ b/include/linux/agp_backend.h @@ -30,8 +30,6 @@ #ifndef _AGP_BACKEND_H #define _AGP_BACKEND_H 1 -#ifdef __KERNEL__ - #ifndef TRUE #define TRUE 1 #endif @@ -111,5 +109,4 @@ extern struct agp_bridge_data *agp_backend_acquire(struct pci_dev *); extern void agp_backend_release(struct agp_bridge_data *); extern void agp_flush_chipset(struct agp_bridge_data *); -#endif /* __KERNEL__ */ #endif /* _AGP_BACKEND_H */ diff --git a/include/linux/cdev.h b/include/linux/cdev.h index 1e29b13d0062..fb4591977b03 100644 --- a/include/linux/cdev.h +++ b/include/linux/cdev.h @@ -1,6 +1,5 @@ #ifndef _LINUX_CDEV_H #define _LINUX_CDEV_H -#ifdef __KERNEL__ #include #include @@ -34,4 +33,3 @@ void cd_forget(struct inode *); extern struct backing_dev_info directly_mappable_cdev_bdi; #endif -#endif diff --git a/include/linux/coda_fs_i.h b/include/linux/coda_fs_i.h index 424fe9cf02c4..b3ef0c461578 100644 --- a/include/linux/coda_fs_i.h +++ b/include/linux/coda_fs_i.h @@ -8,7 +8,6 @@ #ifndef _LINUX_CODA_FS_I #define _LINUX_CODA_FS_I -#ifdef __KERNEL__ #include #include #include @@ -52,4 +51,3 @@ struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb); void coda_replace_fid(struct inode *, struct CodaFid *, struct CodaFid *); #endif -#endif diff --git a/include/linux/concap.h b/include/linux/concap.h index 27304651d700..977acb3d1fb2 100644 --- a/include/linux/concap.h +++ b/include/linux/concap.h @@ -8,7 +8,7 @@ #ifndef _LINUX_CONCAP_H #define _LINUX_CONCAP_H -#ifdef __KERNEL__ + #include #include @@ -110,4 +110,3 @@ extern int concap_nop(struct concap_proto *cprot); */ extern int concap_drop_skb(struct concap_proto *cprot, struct sk_buff *skb); #endif -#endif diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 4b287ad9371a..3ae65b1bf90f 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -35,8 +35,6 @@ #ifndef _CONFIGFS_H_ #define _CONFIGFS_H_ -#ifdef __KERNEL__ - #include #include #include @@ -194,6 +192,4 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys); int configfs_depend_item(struct configfs_subsystem *subsys, struct config_item *target); void configfs_undepend_item(struct configfs_subsystem *subsys, struct config_item *target); -#endif /* __KERNEL__ */ - #endif /* _CONFIGFS_H_ */ diff --git a/include/linux/crc-ccitt.h b/include/linux/crc-ccitt.h index 90037617da8f..f52696a1ff0d 100644 --- a/include/linux/crc-ccitt.h +++ b/include/linux/crc-ccitt.h @@ -1,6 +1,5 @@ #ifndef _LINUX_CRC_CCITT_H #define _LINUX_CRC_CCITT_H -#ifdef __KERNEL__ #include @@ -13,5 +12,4 @@ static inline u16 crc_ccitt_byte(u16 crc, const u8 c) return (crc >> 8) ^ crc_ccitt_table[(crc ^ c) & 0xff]; } -#endif /* __KERNEL__ */ #endif /* _LINUX_CRC_CCITT_H */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index cfb1627ac51c..2a6639407c80 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -1,8 +1,6 @@ #ifndef __LINUX_DCACHE_H #define __LINUX_DCACHE_H -#ifdef __KERNEL__ - #include #include #include @@ -365,6 +363,4 @@ extern struct dentry *lookup_create(struct nameidata *nd, int is_dir); extern int sysctl_vfs_cache_pressure; -#endif /* __KERNEL__ */ - #endif /* __LINUX_DCACHE_H */ diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index ad3b787479a4..0d8d419d191a 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -8,8 +8,6 @@ #ifndef _LINUX_DEVICE_MAPPER_H #define _LINUX_DEVICE_MAPPER_H -#ifdef __KERNEL__ - #include struct dm_target; @@ -344,5 +342,4 @@ static inline unsigned long to_bytes(sector_t n) return (n << SECTOR_SHIFT); } -#endif /* __KERNEL__ */ #endif /* _LINUX_DEVICE_MAPPER_H */ diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index b489fc6d0b62..a701399b7fed 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -8,9 +8,6 @@ #ifndef _LINUX_EVENTFD_H #define _LINUX_EVENTFD_H - -#ifdef __KERNEL__ - #ifdef CONFIG_EVENTFD struct file *eventfd_fget(int fd); @@ -24,7 +21,5 @@ static inline int eventfd_signal(struct file *file, int n) #endif /* CONFIG_EVENTFD */ -#endif /* __KERNEL__ */ - #endif /* _LINUX_EVENTFD_H */ diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index 2cad5c67397e..c415a496de3a 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -14,7 +14,6 @@ * option) any later version. */ -#ifdef __KERNEL__ #ifndef _FSL_DEVICE_H_ #define _FSL_DEVICE_H_ @@ -127,4 +126,3 @@ struct mpc8xx_pcmcia_ops { }; #endif /* _FSL_DEVICE_H_ */ -#endif /* __KERNEL__ */ diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index d4b7c4ac72e6..a89513188ce7 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -11,8 +11,6 @@ * (C) Copyright 2005 Robert Love */ -#ifdef __KERNEL__ - #include #include #include @@ -296,6 +294,4 @@ static inline void fsnotify_oldname_free(const char *old_name) #endif /* ! CONFIG_INOTIFY */ -#endif /* __KERNEL__ */ - #endif /* _LINUX_FS_NOTIFY_H */ diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 85d11916e9ea..7244456e7e65 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -11,7 +11,6 @@ #ifndef LINUX_HWRANDOM_H_ #define LINUX_HWRANDOM_H_ -#ifdef __KERNEL__ #include #include @@ -46,5 +45,4 @@ extern int hwrng_register(struct hwrng *rng); /** Unregister a Hardware Random Number Generator driver. */ extern void hwrng_unregister(struct hwrng *rng); -#endif /* __KERNEL__ */ #endif /* LINUX_HWRANDOM_H_ */ diff --git a/include/linux/i2o.h b/include/linux/i2o.h index f65e58a1d925..7d51cbca49ab 100644 --- a/include/linux/i2o.h +++ b/include/linux/i2o.h @@ -18,8 +18,6 @@ #ifndef _I2O_H #define _I2O_H -#ifdef __KERNEL__ /* This file to be included by kernel only */ - #include /* How many different OSM's are we allowing */ @@ -1255,5 +1253,4 @@ extern void i2o_dump_message(struct i2o_message *); extern void i2o_dump_hrt(struct i2o_controller *c); extern void i2o_debug_state(struct i2o_controller *c); -#endif /* __KERNEL__ */ #endif /* _I2O_H */ diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 0d9d7ea2c1cc..5f200bac3749 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -1,9 +1,6 @@ #ifndef _LINUX_IF_MACVLAN_H #define _LINUX_IF_MACVLAN_H -#ifdef __KERNEL__ - extern struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *); -#endif /* __KERNEL__ */ #endif /* _LINUX_IF_MACVLAN_H */ diff --git a/include/linux/inet.h b/include/linux/inet.h index 675a7dbe86f8..1354080cf8cf 100644 --- a/include/linux/inet.h +++ b/include/linux/inet.h @@ -42,11 +42,9 @@ #ifndef _LINUX_INET_H #define _LINUX_INET_H -#ifdef __KERNEL__ #include extern __be32 in_aton(const char *str); extern int in4_pton(const char *src, int srclen, u8 *dst, int delim, const char **end); extern int in6_pton(const char *src, int srclen, u8 *dst, int delim, const char **end); -#endif #endif /* _LINUX_INET_H */ diff --git a/include/linux/isicom.h b/include/linux/isicom.h index 8f4c71759d73..bbd42197298f 100644 --- a/include/linux/isicom.h +++ b/include/linux/isicom.h @@ -1,11 +1,6 @@ #ifndef _LINUX_ISICOM_H #define _LINUX_ISICOM_H -/*#define ISICOM_DEBUG*/ -/*#define ISICOM_DEBUG_DTR_RTS*/ - -#ifdef __KERNEL__ - #define YES 1 #define NO 0 @@ -85,6 +80,4 @@ #define ISI_TXOK 0x0001 -#endif /* __KERNEL__ */ - #endif /* ISICOM_H */ diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 404f4464cb1a..29f62e1733ff 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -21,8 +21,6 @@ #ifndef _LINUX_KFIFO_H #define _LINUX_KFIFO_H -#ifdef __KERNEL__ - #include #include @@ -151,7 +149,4 @@ static inline unsigned int kfifo_len(struct kfifo *fifo) return ret; } -#else -#warning "don't include kernel headers in userspace" -#endif /* __KERNEL__ */ #endif diff --git a/include/linux/kobj_map.h b/include/linux/kobj_map.h index bafe178a381f..73717ed9ea79 100644 --- a/include/linux/kobj_map.h +++ b/include/linux/kobj_map.h @@ -1,5 +1,3 @@ -#ifdef __KERNEL__ - #include typedef struct kobject *kobj_probe_t(dev_t, int *, void *); @@ -10,5 +8,3 @@ int kobj_map(struct kobj_map *, dev_t, unsigned long, struct module *, void kobj_unmap(struct kobj_map *, dev_t, unsigned long); struct kobject *kobj_lookup(struct kobj_map *, dev_t, int *); struct kobj_map *kobj_map_init(kobj_probe_t *, struct mutex *); - -#endif diff --git a/include/linux/kobject.h b/include/linux/kobject.h index caa3f411f15d..39e709f88aa0 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -16,8 +16,6 @@ #ifndef _KOBJECT_H_ #define _KOBJECT_H_ -#ifdef __KERNEL__ - #include #include #include @@ -224,5 +222,4 @@ static inline int kobject_action_type(const char *buf, size_t count, { return -EINVAL; } #endif -#endif /* __KERNEL__ */ #endif /* _KOBJECT_H_ */ diff --git a/include/linux/kref.h b/include/linux/kref.h index 5d185635786e..0cef6badd6fb 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -15,8 +15,6 @@ #ifndef _KREF_H_ #define _KREF_H_ -#ifdef __KERNEL__ - #include #include @@ -29,5 +27,4 @@ void kref_init(struct kref *kref); void kref_get(struct kref *kref); int kref_put(struct kref *kref, void (*release) (struct kref *kref)); -#endif /* __KERNEL__ */ #endif /* _KREF_H_ */ diff --git a/include/linux/list.h b/include/linux/list.h index 7627508f1b74..08cf4f651889 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -1,8 +1,6 @@ #ifndef _LINUX_LIST_H #define _LINUX_LIST_H -#ifdef __KERNEL__ - #include #include #include @@ -983,7 +981,4 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ pos = rcu_dereference(pos->next)) -#else -#warning "don't include kernel headers in userspace" -#endif /* __KERNEL__ */ #endif diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ceb675d83a56..c463cd8a15a4 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1,7 +1,6 @@ #ifndef _LINUX_MMZONE_H #define _LINUX_MMZONE_H -#ifdef __KERNEL__ #ifndef __ASSEMBLY__ #ifndef __GENERATING_BOUNDS_H @@ -1005,5 +1004,4 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); #endif /* !__GENERATING_BOUNDS.H */ #endif /* !__ASSEMBLY__ */ -#endif /* __KERNEL__ */ #endif /* _LINUX_MMZONE_H */ diff --git a/include/linux/mount.h b/include/linux/mount.h index b4836d58f428..4374d1adeb4b 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -10,7 +10,6 @@ */ #ifndef _LINUX_MOUNT_H #define _LINUX_MOUNT_H -#ifdef __KERNEL__ #include #include @@ -114,5 +113,4 @@ extern void mark_mounts_for_expiry(struct list_head *mounts); extern spinlock_t vfsmount_lock; extern dev_t name_to_dev_t(char *name); -#endif #endif /* _LINUX_MOUNT_H */ diff --git a/include/linux/ncp_fs_i.h b/include/linux/ncp_fs_i.h index bdb4c8ae6924..4b0bec477846 100644 --- a/include/linux/ncp_fs_i.h +++ b/include/linux/ncp_fs_i.h @@ -8,8 +8,6 @@ #ifndef _LINUX_NCP_FS_I #define _LINUX_NCP_FS_I -#ifdef __KERNEL__ - /* * This is the ncpfs part of the inode structure. This must contain * all the information we need to work with an inode after creation. @@ -28,6 +26,4 @@ struct ncp_inode_info { struct inode vfs_inode; }; -#endif /* __KERNEL__ */ - #endif /* _LINUX_NCP_FS_I */ diff --git a/include/linux/of_device.h b/include/linux/of_device.h index 6dc11959770c..afe338217d91 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -1,6 +1,5 @@ #ifndef _LINUX_OF_DEVICE_H #define _LINUX_OF_DEVICE_H -#ifdef __KERNEL__ #include #include @@ -25,5 +24,4 @@ static inline void of_device_free(struct of_device *dev) of_release_dev(&dev->dev); } -#endif /* __KERNEL__ */ #endif /* _LINUX_OF_DEVICE_H */ diff --git a/include/linux/pm.h b/include/linux/pm.h index 1de72cbbe0d1..39a7ee859b67 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -21,8 +21,6 @@ #ifndef _LINUX_PM_H #define _LINUX_PM_H -#ifdef __KERNEL__ - #include #include #include @@ -225,6 +223,4 @@ extern unsigned int pm_flags; #define PM_APM 1 #define PM_ACPI 2 -#endif /* __KERNEL__ */ - #endif /* _LINUX_PM_H */ diff --git a/include/linux/pnp.h b/include/linux/pnp.h index b2f05c230f4b..2f3bcf73052c 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -6,8 +6,6 @@ #ifndef _LINUX_PNP_H #define _LINUX_PNP_H -#ifdef __KERNEL__ - #include #include #include @@ -466,6 +464,4 @@ static inline void pnp_unregister_driver(struct pnp_driver *drv) { } #define pnp_dbg(format, arg...) do {} while (0) #endif -#endif /* __KERNEL__ */ - #endif /* _LINUX_PNP_H */ diff --git a/include/linux/profile.h b/include/linux/profile.h index ff576d1db67d..05c1cc736937 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -1,8 +1,6 @@ #ifndef _LINUX_PROFILE_H #define _LINUX_PROFILE_H -#ifdef __KERNEL__ - #include #include #include @@ -118,6 +116,4 @@ static inline void unregister_timer_hook(int (*hook)(struct pt_regs *)) #endif /* CONFIG_PROFILING */ -#endif /* __KERNEL__ */ - #endif /* _LINUX_PROFILE_H */ diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index b3dccd68629e..b3aa05baab8a 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -33,8 +33,6 @@ #ifndef __LINUX_RCUCLASSIC_H #define __LINUX_RCUCLASSIC_H -#ifdef __KERNEL__ - #include #include #include @@ -163,5 +161,4 @@ extern long rcu_batches_completed_bh(void); #define rcu_enter_nohz() do { } while (0) #define rcu_exit_nohz() do { } while (0) -#endif /* __KERNEL__ */ #endif /* __LINUX_RCUCLASSIC_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 37a642c54871..8082d6587a0f 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -33,8 +33,6 @@ #ifndef __LINUX_RCUPDATE_H #define __LINUX_RCUPDATE_H -#ifdef __KERNEL__ - #include #include #include @@ -245,5 +243,4 @@ extern long rcu_batches_completed_bh(void); extern void rcu_init(void); extern int rcu_needs_cpu(int cpu); -#endif /* __KERNEL__ */ #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index d038aa6e5ee1..8a05c7e20bc4 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -33,8 +33,6 @@ #ifndef __LINUX_RCUPREEMPT_H #define __LINUX_RCUPREEMPT_H -#ifdef __KERNEL__ - #include #include #include @@ -104,5 +102,4 @@ static inline void rcu_exit_nohz(void) #define rcu_exit_nohz() do { } while (0) #endif /* CONFIG_NO_HZ */ -#endif /* __KERNEL__ */ #endif /* __LINUX_RCUPREEMPT_H */ diff --git a/include/linux/rcupreempt_trace.h b/include/linux/rcupreempt_trace.h index 21cd6b2a5c42..b99ae073192a 100644 --- a/include/linux/rcupreempt_trace.h +++ b/include/linux/rcupreempt_trace.h @@ -32,7 +32,6 @@ #ifndef __LINUX_RCUPREEMPT_TRACE_H #define __LINUX_RCUPREEMPT_TRACE_H -#ifdef __KERNEL__ #include #include @@ -95,5 +94,4 @@ extern void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace); extern void rcupreempt_trace_invoke(struct rcupreempt_trace *trace); extern void rcupreempt_trace_next_add(struct rcupreempt_trace *trace); -#endif /* __KERNEL__ */ #endif /* __LINUX_RCUPREEMPT_TRACE_H */ diff --git a/include/linux/rio.h b/include/linux/rio.h index cfb66bbc0f27..c1c99c9643d3 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -14,8 +14,6 @@ #ifndef LINUX_RIO_H #define LINUX_RIO_H -#ifdef __KERNEL__ - #include #include #include @@ -331,5 +329,4 @@ extern void rio_close_inb_mbox(struct rio_mport *, int); extern int rio_open_outb_mbox(struct rio_mport *, void *, int, int); extern void rio_close_outb_mbox(struct rio_mport *, int); -#endif /* __KERNEL__ */ #endif /* LINUX_RIO_H */ diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h index 7adb2a1aac92..90987b7bcc1b 100644 --- a/include/linux/rio_drv.h +++ b/include/linux/rio_drv.h @@ -13,8 +13,6 @@ #ifndef LINUX_RIO_DRV_H #define LINUX_RIO_DRV_H -#ifdef __KERNEL__ - #include #include #include @@ -465,5 +463,4 @@ extern struct rio_dev *rio_get_device(u16 vid, u16 did, struct rio_dev *from); extern struct rio_dev *rio_get_asm(u16 vid, u16 did, u16 asm_vid, u16 asm_did, struct rio_dev *from); -#endif /* __KERNEL__ */ #endif /* LINUX_RIO_DRV_H */ diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 7b524b4109a0..efd348fe8ca7 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -9,8 +9,6 @@ #include -#ifdef __KERNEL__ - #include #include #include @@ -90,5 +88,4 @@ extern void up_read_non_owner(struct rw_semaphore *sem); # define up_read_non_owner(sem) up_read(sem) #endif -#endif /* __KERNEL__ */ #endif /* _LINUX_RWSEM_H */ diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 5b5369c3c209..a66304a09955 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -1,6 +1,5 @@ #ifndef _LINUX_SEQ_FILE_H #define _LINUX_SEQ_FILE_H -#ifdef __KERNEL__ #include #include @@ -69,4 +68,3 @@ extern struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos); #endif -#endif diff --git a/include/linux/slab.h b/include/linux/slab.h index 6d03c954f641..805ed4b92f9a 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -9,8 +9,6 @@ #ifndef _LINUX_SLAB_H #define _LINUX_SLAB_H -#ifdef __KERNEL__ - #include #include @@ -283,5 +281,4 @@ extern const struct seq_operations slabinfo_op; ssize_t slabinfo_write(struct file *, const char __user *, size_t, loff_t *); #endif -#endif /* __KERNEL__ */ #endif /* _LINUX_SLAB_H */ diff --git a/include/linux/smb_fs_i.h b/include/linux/smb_fs_i.h index 8516954a5141..8ccf4eca2c3d 100644 --- a/include/linux/smb_fs_i.h +++ b/include/linux/smb_fs_i.h @@ -9,7 +9,6 @@ #ifndef _LINUX_SMB_FS_I #define _LINUX_SMB_FS_I -#ifdef __KERNEL__ #include #include @@ -36,4 +35,3 @@ struct smb_inode_info { }; #endif -#endif diff --git a/include/linux/smb_fs_sb.h b/include/linux/smb_fs_sb.h index 3aa97aa4277f..8a060a7040d8 100644 --- a/include/linux/smb_fs_sb.h +++ b/include/linux/smb_fs_sb.h @@ -9,8 +9,6 @@ #ifndef _SMB_FS_SB #define _SMB_FS_SB -#ifdef __KERNEL__ - #include #include @@ -96,6 +94,4 @@ smb_unlock_server(struct smb_sb_info *server) up(&(server->sem)); } -#endif /* __KERNEL__ */ - #endif diff --git a/include/linux/svga.h b/include/linux/svga.h index 13ad0b82ac28..c59a51a2b0e7 100644 --- a/include/linux/svga.h +++ b/include/linux/svga.h @@ -1,8 +1,6 @@ #ifndef _LINUX_SVGA_H #define _LINUX_SVGA_H -#ifdef __KERNEL__ - #include #include